加快pdf.mergePage

时间:2019-08-22 08:42:29

标签: python python-3.x performance pypdf2

目标是能够快速遍历文件并对其施加标记。我可以使用它,但是每个循环大约要花费4s的时间,我要处理的文件超过10k。

仅简要解释其作用:它遍历一个文件夹,将所有pdf名称提取到一个列表中,然后遍历整个列表以更改每个文件。还有一个excel写操作,可以在我触摸文件之前记录其工作情况以及文件的修改日期。创建一个空白的pdf文件,并将图像放置在我想要放置的位置,然后将两个pdf文件合并在一起。

现在有比我现在更快的方法吗?

import csv
import traceback
from datetime import datetime
from PIL import Image
import PIL
import time



def perform_action(rootDir, filelist):
    barcode_file = r"...path1"
    errorlist = []
    for fname in filelist:
        loopstart = time.time()
        #check to see files aren't being "double-stamped"
        if fname[0:3] != "New":
            print('Files completed: %s' % dirCount)

            ts = os.path.getmtime(str(rootDir) + "\\" + str(fname))

            line  = [fname, datetime.utcfromtimestamp(ts).strftime('%Y-%m-%d %H:%M:%S')]
            wr = csv.writer(file)
            wr.writerow(line)
            print("Time to write to excelfile1:    ", time.time() - loopstart)
            try:
                # Get the watermark file you just created
                with open('watermark.pdf', "rb") as watermark:
                    watermark = PdfFileReader(watermark)

                    # Get our files ready
                    output_file = PdfFileWriter()

                    with open(os.path.join(dirName, fname), "rb") as input_file:
                        input_file = PdfFileReader(input_file)

                        # Number of pages in input document
                        page_count = input_file.getNumPages()

                        basewidth = 20

                        # Go through all the input file pages to add a watermark to them
                        for page_number in range(page_count):
                            # merge the watermark with the page
                            input_page = input_file.getPage(page_number) 

                            # Create the stamp from an image file
                            c = canvas.Canvas('watermark.pdf')

                            # Draw the image at x, y on a canvas
                            c.drawImage(barcode_file, 10, 80, basewidth, basewidth*7)
                            c.save()

                            input_page.mergePage(watermark.getPage(0))
                            # add page from input file to output document
                            output_file.addPage(input_page)

                        print("Time to create and merge files:          ", time.time()-loopstart)

                        #remove the old version of the file and replace it with the stamped version
                        fname2 = "New" + fname
                        with open(os.path.join(dirName, fname2), "wb") as outputStream:
                                output_file.write(outputStream)

                    os.remove(os.path.join(dirName, fname))

                    print("Full Loop time:         ", time.time()-loopstart)
            except Exception:
                traceback.print_exc()

rootDir = r'path'


start_time = time.time()
with open(r'....path2','w', newline='') as file:
    with open(r'....path3','w', newline='') as file2:

        for dirName, subdirList, fileList in os.walk(rootDir):
            perform_action(rootDir,fileList)                
            print("Time for the program:", (time.time()-start_time))

0 个答案:

没有答案
相关问题