1 Star 0 Fork 8

awen/PDF工具合集2.6

forked from 黄朝勋/PDF工具合集 
加入 Gitee
与超过 1200万 开发者一起发现、参与优秀开源项目,私有仓库也完全免费 :)
免费加入
克隆/下载
PDFmake.py 7.09 KB
一键复制 编辑 原始数据 按行查看 历史
黄朝勋 提交于 2022-12-02 14:51 . 升级
# -*- mode: python3.8.5 ; coding: utf-8 -*-
# Software: Windows 11 ; PyCharm 2022.1 ; python:3.8.5
# date:2022.10.19
# update time: 2022.11.16
# name:PDF工具合集2.6
# make:黄朝勋
# 安装依赖库
# pip install PyMuPDf==1.19.0 -i https://mirrors.aliyun.com/pypi/simple
# pip install PyWin32 -i https://mirrors.aliyun.com/pypi/simple
# pip install pyinstaller -i https://mirrors.aliyun.com/pypi/simple
# pip install pdf2docx -i https://mirrors.aliyun.com/pypi/simple
# pip install ttkbootstrap -i https://mirrors.aliyun.com/pypi/simple
# pip install PyPDF2 -i https://mirrors.aliyun.com/pypi/simple
# pip install paddlepaddle -i https://mirror.baidu.com/pypi/simple
# pip install paddlepaddle==2.3.2 -i https://pypi.tuna.tsinghua.edu.cn/simple
# pip install "paddleocr>=2.6.0.3" -i https://pypi.tuna.tsinghua.edu.cn/simple
# 生成可执行文件命令:pyinstaller -D -w -i favicon.ico PDFmake.py
# pyinstaller PDFmake.spec
import base64
import os
import tkinter
from tkinter import filedialog
from tkinter import messagebox
from tkinter import ttk
import cv2
import fitz
from PIL import Image
from PIL import ImageEnhance
from PyPDF2 import PdfReader, PdfWriter
from paddleocr import PPStructure
from paddleocr import PaddleOCR
from recovery_to_doc import sorted_layout_boxes, convert_info_docx
from pdf2docx import Converter
from ttkbootstrap import Style
from win32com.client import constants, gencache
from icon import img # 引用 icon.py 的图片
# 读取base64转码后的数据,并设置压缩图标
picture = open("picture.ico", "wb+")
picture.write(base64.b64decode(img))
picture.close()
style = Style(theme='minty')
class desk:
def __init__(self):
self.init_window = style.master
self.init_window.title('PDF工具合集2.6(黄朝勋)')
self.init_window.geometry('450x350+450+200')
self.init_window["background"] = "#fafbf0"
self.init_window.attributes("-topmost", True)
self.init_window.iconbitmap('picture.ico')
self.init_window.attributes("-toolwindow", False)
def Button(self, row, column, text, func):
button = ttk.Button(self.init_window, text=text, command=func, style='success.Outline.TButton')
button.grid(row=row, column=column, padx=53, pady=18)
def run(self):
self.Button(1, 1, 'PDF转图片工具', lambda: self.conver('PdfToImg'))
self.Button(1, 2, 'Word转PDF工具', lambda: self.conver('WordToPdf'))
self.Button(2, 1, 'PDF转Word工具', lambda: self.conver('PdfToWord'))
self.Button(2, 2, 'PDF文件压缩工具', lambda: self.conver('PdfToLow'))
self.Button(3, 1, 'PDF文件拆分工具', lambda: self.conver('PdfToMore'))
self.Button(3, 2, '照片磨皮修复工具', lambda: self.conver('PhotoToImg'))
self.Button(4, 1, '图片文字转word', lambda: self.conver('PhotoToWord'))
self.Button(4, 2, 'PDF转可编辑word', lambda: self.conver('PdfToRWord'))
self.lab = tkinter.Label()
self.lab.place(rely=1.0, relx=1.0, x=0, y=0)
self.init_window.mainloop()
def conver(self, type):
selectFile = filedialog.askopenfilename()
obj = Change(selectFile)
res = getattr(obj, type)
self.message(res())
def message(self, text):
messagebox.showinfo('结果', text)
class Change:
def __init__(self, filename):
self.filename = filename
# pdf转图片模块
def PdfToImg(self):
try:
self.pdf = fitz.open(self.filename)
self.pageCount = self.pdf.pageCount
for pg in range(self.pageCount):
page = self.pdf[pg]
trans = fitz.Matrix(2.0, 2.0).prerotate(0)
pm = page.get_pixmap(matrix=trans, alpha=False)
pm.save(self.filename + '%s.png' % pg)
return '转换图片完成'
except Exception as e:
return e
# word转pdf模块
def WordToPdf(self):
pdfname = self.filename + '.pdf'
word = gencache.EnsureDispatch('Word.Application')
doc = word.Documents.Open(self.filename, ReadOnly=1)
doc.ExportAsFixedFormat(pdfname,
constants.wdExportFormatPDF,
Item=constants.wdExportDocumentWithMarkup,
CreateBookmarks=constants.wdExportCreateHeadingBookmarks)
word.Quit(constants.wdDoNotSaveChanges)
return '转换DPF完成'
# PDF转word模块
def PdfToWord(self):
Wordname = (self.filename + '.docx')
cv = Converter(self.filename)
cv.convert(Wordname, start=0, end=None)
cv.close()
return '转换word完成'
# PDF文件压缩模块
def PdfToLow(self):
reader = PdfReader(self.filename)
writer = PdfWriter()
for page in reader.pages:
page.compress_content_streams()
writer.add_page(page)
with open(self.filename + '.pdf', 'wb') as f:
writer.write(f)
return 'PDF文件压缩完成'
# PDF拆分模块
def PdfToMore(self):
reader = PdfReader(self.filename)
writer = self.filename
for i in range(0, reader.getNumPages()):
pdf_writer = PdfWriter()
pdf_writer.addPage(reader.getPage(i))
with open(writer + '-{}.pdf'.format(str(i)), 'wb') as fh:
pdf_writer.write(fh)
return '将PDF文件中的每页生成单独一个PDF文件完成'
# 照片磨皮修复模块
def PhotoToImg(self):
path = self.filename
img = cv2.imread(path)
work_img = cv2.bilateralFilter(img, 31, 100, 150)
# 图像融合
result_img = cv2.addWeighted(img, 0.3, work_img, 0.7, 0)
cv2.imwrite("img_1.jpg", result_img)
image = Image.open("img_1.jpg")
# 锐度调节
enh_img = ImageEnhance.Sharpness(image)
image_sharped = enh_img.enhance(1.5)
# 对比度调节
con_img = ImageEnhance.Contrast(image_sharped)
image_con = con_img.enhance(1.15)
image_con.save(self.filename + '.jpg')
return '照片修复完成'
# 图片内容转word
def PhotoToWord(self):
table_engine = PPStructure(layout=False, recovery=True)
img_path = self.filename
save_folder = img_path + '\Desktop'
img = cv2.imread(img_path)
result = table_engine(img)
h, w, _ = img.shape
res = sorted_layout_boxes(result, w)
convert_info_docx(img, res, save_folder, os.path.dirname(img_path + '\Desktop').split('.')[0])
return '图片内容转word完成'
# PDF转可编辑word
def PdfToRWord(self):
log = open(self.filename + '.doc', mode="a", encoding="utf-8")
ocr = PaddleOCR(use_angle_cls=True, lang="ch")
img_path = self.filename
result = ocr.ocr(img_path, cls=True)
Result = result[0]
txts = [line[1][0] for line in Result]
for i in range(len(txts)):
res = txts[i]
for line in res:
print(line, file=log, end='')
log.close()
return '转换成可编辑word完成'
if __name__ == '__main__':
obj = desk()
obj.run()
马建仓 AI 助手
尝试更多
代码解读
代码找茬
代码优化
Python
1
https://gitee.com/wenjunzhong/pdfmake.git
git@gitee.com:wenjunzhong/pdfmake.git
wenjunzhong
pdfmake
PDF工具合集2.6
master

搜索帮助