代码拉取完成,页面将自动刷新
同步操作将从 黄朝勋/PDF工具合集 强制同步,此操作会覆盖自 Fork 仓库以来所做的任何修改,且无法恢复!!!
确定后同步将在后台操作,完成时将刷新页面,请耐心等待。
# -*- mode: python3.8.5 ; coding: utf-8 -*-
# Software: Windows 11 ; PyCharm 2022.1 ; python:3.8.5
# date:2022.10.19
# update time: 2022.11.16
# name:PDF工具合集2.6
# make:黄朝勋
# 安装依赖库
# pip install PyMuPDf==1.19.0 -i https://mirrors.aliyun.com/pypi/simple
# pip install PyWin32 -i https://mirrors.aliyun.com/pypi/simple
# pip install pyinstaller -i https://mirrors.aliyun.com/pypi/simple
# pip install pdf2docx -i https://mirrors.aliyun.com/pypi/simple
# pip install ttkbootstrap -i https://mirrors.aliyun.com/pypi/simple
# pip install PyPDF2 -i https://mirrors.aliyun.com/pypi/simple
# pip install paddlepaddle -i https://mirror.baidu.com/pypi/simple
# pip install paddlepaddle==2.3.2 -i https://pypi.tuna.tsinghua.edu.cn/simple
# pip install "paddleocr>=2.6.0.3" -i https://pypi.tuna.tsinghua.edu.cn/simple
# 生成可执行文件命令:pyinstaller -D -w -i favicon.ico PDFmake.py
# pyinstaller PDFmake.spec
import base64
import os
import tkinter
from tkinter import filedialog
from tkinter import messagebox
from tkinter import ttk
import cv2
import fitz
from PIL import Image
from PIL import ImageEnhance
from PyPDF2 import PdfReader, PdfWriter
from paddleocr import PPStructure
from paddleocr import PaddleOCR
from recovery_to_doc import sorted_layout_boxes, convert_info_docx
from pdf2docx import Converter
from ttkbootstrap import Style
from win32com.client import constants, gencache
from icon import img # 引用 icon.py 的图片
# 读取base64转码后的数据,并设置压缩图标
picture = open("picture.ico", "wb+")
picture.write(base64.b64decode(img))
picture.close()
style = Style(theme='minty')
class desk:
def __init__(self):
self.init_window = style.master
self.init_window.title('PDF工具合集2.6(黄朝勋)')
self.init_window.geometry('450x350+450+200')
self.init_window["background"] = "#fafbf0"
self.init_window.attributes("-topmost", True)
self.init_window.iconbitmap('picture.ico')
self.init_window.attributes("-toolwindow", False)
def Button(self, row, column, text, func):
button = ttk.Button(self.init_window, text=text, command=func, style='success.Outline.TButton')
button.grid(row=row, column=column, padx=53, pady=18)
def run(self):
self.Button(1, 1, 'PDF转图片工具', lambda: self.conver('PdfToImg'))
self.Button(1, 2, 'Word转PDF工具', lambda: self.conver('WordToPdf'))
self.Button(2, 1, 'PDF转Word工具', lambda: self.conver('PdfToWord'))
self.Button(2, 2, 'PDF文件压缩工具', lambda: self.conver('PdfToLow'))
self.Button(3, 1, 'PDF文件拆分工具', lambda: self.conver('PdfToMore'))
self.Button(3, 2, '照片磨皮修复工具', lambda: self.conver('PhotoToImg'))
self.Button(4, 1, '图片文字转word', lambda: self.conver('PhotoToWord'))
self.Button(4, 2, 'PDF转可编辑word', lambda: self.conver('PdfToRWord'))
self.lab = tkinter.Label()
self.lab.place(rely=1.0, relx=1.0, x=0, y=0)
self.init_window.mainloop()
def conver(self, type):
selectFile = filedialog.askopenfilename()
obj = Change(selectFile)
res = getattr(obj, type)
self.message(res())
def message(self, text):
messagebox.showinfo('结果', text)
class Change:
def __init__(self, filename):
self.filename = filename
# pdf转图片模块
def PdfToImg(self):
try:
self.pdf = fitz.open(self.filename)
self.pageCount = self.pdf.pageCount
for pg in range(self.pageCount):
page = self.pdf[pg]
trans = fitz.Matrix(2.0, 2.0).prerotate(0)
pm = page.get_pixmap(matrix=trans, alpha=False)
pm.save(self.filename + '%s.png' % pg)
return '转换图片完成'
except Exception as e:
return e
# word转pdf模块
def WordToPdf(self):
pdfname = self.filename + '.pdf'
word = gencache.EnsureDispatch('Word.Application')
doc = word.Documents.Open(self.filename, ReadOnly=1)
doc.ExportAsFixedFormat(pdfname,
constants.wdExportFormatPDF,
Item=constants.wdExportDocumentWithMarkup,
CreateBookmarks=constants.wdExportCreateHeadingBookmarks)
word.Quit(constants.wdDoNotSaveChanges)
return '转换DPF完成'
# PDF转word模块
def PdfToWord(self):
Wordname = (self.filename + '.docx')
cv = Converter(self.filename)
cv.convert(Wordname, start=0, end=None)
cv.close()
return '转换word完成'
# PDF文件压缩模块
def PdfToLow(self):
reader = PdfReader(self.filename)
writer = PdfWriter()
for page in reader.pages:
page.compress_content_streams()
writer.add_page(page)
with open(self.filename + '.pdf', 'wb') as f:
writer.write(f)
return 'PDF文件压缩完成'
# PDF拆分模块
def PdfToMore(self):
reader = PdfReader(self.filename)
writer = self.filename
for i in range(0, reader.getNumPages()):
pdf_writer = PdfWriter()
pdf_writer.addPage(reader.getPage(i))
with open(writer + '-{}.pdf'.format(str(i)), 'wb') as fh:
pdf_writer.write(fh)
return '将PDF文件中的每页生成单独一个PDF文件完成'
# 照片磨皮修复模块
def PhotoToImg(self):
path = self.filename
img = cv2.imread(path)
work_img = cv2.bilateralFilter(img, 31, 100, 150)
# 图像融合
result_img = cv2.addWeighted(img, 0.3, work_img, 0.7, 0)
cv2.imwrite("img_1.jpg", result_img)
image = Image.open("img_1.jpg")
# 锐度调节
enh_img = ImageEnhance.Sharpness(image)
image_sharped = enh_img.enhance(1.5)
# 对比度调节
con_img = ImageEnhance.Contrast(image_sharped)
image_con = con_img.enhance(1.15)
image_con.save(self.filename + '.jpg')
return '照片修复完成'
# 图片内容转word
def PhotoToWord(self):
table_engine = PPStructure(layout=False, recovery=True)
img_path = self.filename
save_folder = img_path + '\Desktop'
img = cv2.imread(img_path)
result = table_engine(img)
h, w, _ = img.shape
res = sorted_layout_boxes(result, w)
convert_info_docx(img, res, save_folder, os.path.dirname(img_path + '\Desktop').split('.')[0])
return '图片内容转word完成'
# PDF转可编辑word
def PdfToRWord(self):
log = open(self.filename + '.doc', mode="a", encoding="utf-8")
ocr = PaddleOCR(use_angle_cls=True, lang="ch")
img_path = self.filename
result = ocr.ocr(img_path, cls=True)
Result = result[0]
txts = [line[1][0] for line in Result]
for i in range(len(txts)):
res = txts[i]
for line in res:
print(line, file=log, end='')
log.close()
return '转换成可编辑word完成'
if __name__ == '__main__':
obj = desk()
obj.run()
此处可能存在不合适展示的内容,页面不予展示。您可通过相关编辑功能自查并修改。
如您确认内容无涉及 不当用语 / 纯广告导流 / 暴力 / 低俗色情 / 侵权 / 盗版 / 虚假 / 无价值内容或违法国家有关法律法规的内容,可点击提交进行申诉,我们将尽快为您处理。