用python将漫画书PDF的扫描页分成左右两页
2024-10-12 本文已影响0人
孙庚辛
在Linux 系统中运行:
import os
from PyPDF2 import PdfReader, PdfWriter
from PIL import Image
from pdf2image import convert_from_path
def split_pdf(input_folder, output_folder):
if not os.path.exists(output_folder):
os.makedirs(output_folder)
for filename in os.listdir(input_folder):
if filename.endswith('.pdf'):
reader = PdfReader(os.path.join(input_folder, filename))
writer = PdfWriter()
for page_num in range(1, len(reader.pages)):
page = reader.pages[page_num]
#page_image = page.to_image() # 使用to_image()将PDF页面转换为图像
page_image = convert_from_path(os.path.join(input_folder, filename), first_page=page_num, last_page=page_num)[0]
# 分割图像
width, height = page_image.size
left_image = page_image.crop((0, 0, width // 2, height))
right_image = page_image.crop((width // 2, 0, width, height))
# 生成新PDF
left_image_path = f'{output_folder}/{filename[:-4]}_left_{page_num}.pdf'
right_image_path = f'{output_folder}/{filename[:-4]}_right_{page_num}.pdf'
left_image.save(left_image_path, 'PDF')
right_image.save(right_image_path, 'PDF')
# 将右侧部分排在左侧前面
writer.add_page(PdfReader(right_image_path).pages[0])
writer.add_page(PdfReader(left_image_path).pages[0])
output_pdf_path = os.path.join(output_folder, filename)
with open(output_pdf_path, 'wb') as f:
writer.write(f)
input_folder = 'guanlangaoshou'
output_folder = 'guanlanSinglePage'
split_pdf(input_folder, output_folder)
这个程序把分开的单页也保存下来了, 要清楚再执行下面的程序删除掉这些文件只保留拼接后的最终结果:
import os
def delete_files_with_keywords(folder_path):
# 关键字列表
keywords = ['right', 'left']
# 遍历文件夹中的所有文件
for filename in os.listdir(folder_path):
# 检查文件名是否包含任何关键字
if any(keyword in filename for keyword in keywords):
file_path = os.path.join(folder_path, filename)
try:
# 删除文件
os.remove(file_path)
print(f"Deleted: {file_path}")
except Exception as e:
print(f"Error deleting {file_path}: {e}")
# 指定文件夹路径
folder_path = 'D:\\BaiduNetdiskDownload\\guanlanSinglePage'
# 调用函数删除文件
delete_files_with_keywords(folder_path)