目标是能够快速遍历文件并对其施加标记。我可以使用它,但是每个循环大约要花费4s的时间,我要处理的文件超过10k。
仅简要解释其作用:它遍历一个文件夹,将所有pdf名称提取到一个列表中,然后遍历整个列表以更改每个文件。还有一个excel写操作,可以在我触摸文件之前记录其工作情况以及文件的修改日期。创建一个空白的pdf文件,并将图像放置在我想要放置的位置,然后将两个pdf文件合并在一起。
现在有比我现在更快的方法吗?
import csv
import traceback
from datetime import datetime
from PIL import Image
import PIL
import time
def perform_action(rootDir, filelist):
barcode_file = r"...path1"
errorlist = []
for fname in filelist:
loopstart = time.time()
#check to see files aren't being "double-stamped"
if fname[0:3] != "New":
print('Files completed: %s' % dirCount)
ts = os.path.getmtime(str(rootDir) + "\\" + str(fname))
line = [fname, datetime.utcfromtimestamp(ts).strftime('%Y-%m-%d %H:%M:%S')]
wr = csv.writer(file)
wr.writerow(line)
print("Time to write to excelfile1: ", time.time() - loopstart)
try:
# Get the watermark file you just created
with open('watermark.pdf', "rb") as watermark:
watermark = PdfFileReader(watermark)
# Get our files ready
output_file = PdfFileWriter()
with open(os.path.join(dirName, fname), "rb") as input_file:
input_file = PdfFileReader(input_file)
# Number of pages in input document
page_count = input_file.getNumPages()
basewidth = 20
# Go through all the input file pages to add a watermark to them
for page_number in range(page_count):
# merge the watermark with the page
input_page = input_file.getPage(page_number)
# Create the stamp from an image file
c = canvas.Canvas('watermark.pdf')
# Draw the image at x, y on a canvas
c.drawImage(barcode_file, 10, 80, basewidth, basewidth*7)
c.save()
input_page.mergePage(watermark.getPage(0))
# add page from input file to output document
output_file.addPage(input_page)
print("Time to create and merge files: ", time.time()-loopstart)
#remove the old version of the file and replace it with the stamped version
fname2 = "New" + fname
with open(os.path.join(dirName, fname2), "wb") as outputStream:
output_file.write(outputStream)
os.remove(os.path.join(dirName, fname))
print("Full Loop time: ", time.time()-loopstart)
except Exception:
traceback.print_exc()
rootDir = r'path'
start_time = time.time()
with open(r'....path2','w', newline='') as file:
with open(r'....path3','w', newline='') as file2:
for dirName, subdirList, fileList in os.walk(rootDir):
perform_action(rootDir,fileList)
print("Time for the program:", (time.time()-start_time))