如何在Windows中修复此UnicodeDecodeError?

时间:2019-05-07 16:09:09

标签: python-3.x scons nuitka

我正在运行nuitka并收到UnicodeDecodeError。

python -m nuitka --standalone --plugin-enable=tk-inter MyProgram.py

UnicodeDecodeError:'utf-8'编解码器无法解码位置74的字节0xc6:无效的连续字节 scons:[MyProgram.build \ module.babel.core.obj] UnicodeDecodeError:“ utf-8”编解码器无法解码位置79的字节0xc6:无效的继续字节

MyProgram.py

import datetime
import glob
import mysql.connector
import openpyxl
import os
import PyPDF4
import sys
import tkcalendar
import tkinter.filedialog, tkinter.messagebox, tkinter.ttk

version = 0.03

def verify():
    comp = datetime.datetime.strptime(base.get(), "%d/%m/%Y").strftime("%Y-%m-01")
    tkinter.messagebox.showinfo("Planilha Excel", "Selecione a planilha")
    xlsx = tkinter.filedialog.askopenfilename(initialdir = "M:\\", title = "Selecione a planilha", filetypes = [("Excel files", "*.xlsx")])
    pbar["value"] = 5
    tkinter.messagebox.showinfo("Pasta", "Selecione a pasta contendo os arquivos pdf")
    directory = tkinter.filedialog.askdirectory(initialdir = "M:\\", title = "Selecione a pasta contendo os arquivos pdf")
    pbar["value"] = 10
    wbterc = openpyxl.load_workbook(xlsx, read_only = True)
    sheeterc = wbterc.active
    pbar["value"] = 20
    listerc = []
    i = 3
    cell = sheeterc.cell(row = 1, column = 1)
    while cell.value != None:
        cell = sheeterc.cell(row = 1, column = 1)
        ctr = cell.value
        cell = sheeterc.cell(row = i, column = 5)
        dep = cell.value
        cell = sheeterc.cell(row = i, column = 6)
        sb = cell.value
        cell = sheeterc.cell(row = i, column = 2)
        cpf = str(cell.value).replace("-", "").replace(".", "")
        cell = sheeterc.cell(row = i, column = 1)
        nameterc = cell.value
        listerc.append((ctr, comp, dep, sb, cpf, nameterc))
        pbar["value"] = i + 20
        i = i + 1
    col1 = "CTR"
    col2 = "COMPETENCIA"
    col3 = "AG_LOTACAO"
    col4 = "SB_LOTACAO"
    col5 = "CPF_TERC"
    col6 = "NOME_TERC"
    workbook = openpyxl.Workbook()
    worksheet = workbook.active
    worksheet.title = "TERCEIRIZADOS"
    worksheet.cell(row = 1, column = 1).value = directory
    worksheet.cell(row = 2, column = 1).value = col1
    worksheet.cell(row = 2, column = 2).value = col2
    worksheet.cell(row = 2, column = 3).value = col3
    worksheet.cell(row = 2, column = 4).value = col4
    worksheet.cell(row = 2, column = 5).value = col5
    worksheet.cell(row = 2, column = 6).value = col6
    indexfile = 7
    indexname = 3
    os.chdir(directory)
    for file in glob.glob("*.pdf"):
        pdf = PyPDF4.PdfFileReader(directory + "/" + file)
        worksheet.cell(row = 2, column = indexfile).value = file
        numpages = pdf.getNumPages()
        data = ""
        i = 0
        while i < numpages:
            pdfReader = pdf.getPage(i).extractText()
            data = data + pdfReader
            i = i + 1
        for tupterc in listerc:
            nameterc = tupterc[5]
            if nameterc == None:
                break
            worksheet.cell(row = indexname, column = 1).value = tupterc[0]
            worksheet.cell(row = indexname, column = 2).value = tupterc[1]
            worksheet.cell(row = indexname, column = 3).value = tupterc[2]
            worksheet.cell(row = indexname, column = 4).value = tupterc[3]
            worksheet.cell(row = indexname, column = 5).value = int(tupterc[4])
            worksheet.cell(row = indexname, column = 6).value = tupterc[5]
            stmt = "replace into terceirizados.TERCEIRIZADOS (" + col1 + ", " + col2 + ", " + col3 + ", " + col4 + ", " + col5 + ", " + col6 + ") values (%s, %s, %s, %s, %s, %s)"
            cur.execute(stmt, tupterc)
            if nameterc in data:
                worksheet.cell(row = indexname, column = indexfile).value = "SIM"
            else:
                worksheet.cell(row = indexname, column = indexfile).value = "NÃO"
            pbar["value"] = indexname + 100
            indexname = indexname + 1
            dbconn.commit()
    dbconn.close()
    pbar["value"] = 100
    destfilename = directory + "/testeverificacaoterceirizados_" + datetime.datetime.now().strftime("%d-%m-%y") + ".xlsx"
    workbook.save(filename = destfilename)
    tkinter.messagebox.showinfo("Fim", "Relatório resultante salvo em " + directory)
    os.startfile(destfilename)
    pbar["value"] = 100

dbconn = mysql.connector.connect(
    host = "pxl0hosp0164.dispositivos.bb.com.br", user = "terceirizados", password = "7417pd2", port = 3306
)
cur = dbconn.cursor()
cur.execute("Select max(V.VERSAO) from terceirizados.VERSIONAMENTO V")
v = cur.fetchone()

if float(v[0]) == version:
    window = tkinter.Tk()
    window.title("MyProgram - v" + str(version))
    window.geometry("350x200")
    label = tkinter.Label(window, text = "Defina a competência: ")
    label.place(relx = 0.185, rely = 0.3, anchor = "center")
    comp = datetime.datetime.now()
    base = tkcalendar.DateEntry(window)
    base.place(relx = 0.5, rely = 0.3, anchor = "center")
    button = tkinter.Button(window, text = "VERIFICAR", command = verify)
    button.place(relx = 0.5, rely = 0.5, anchor = "center")
    pbar = tkinter.ttk.Progressbar(window, length = 100)
    pbar.place(relx = 0.5, rely = 0.7, anchor = "center")
    pbar["value"] = 0
    window.mainloop()
else:
    dbconn.close()
    tkinter.messagebox.showinfo("Atualização", "Programa desatualizado")

如何绕过它?

1 个答案:

答案 0 :(得分:0)

您的源文件中带有重音符:

  • 齐声共鸣
  • Defina acompetência
  • Atualização

确保整个工具链(nuitka / scons / ...)都希望输入文件采用UTF-8格式,并且还要确保源文件MyProgram.py也以UTF-8编码存储。

后一种情况似乎并非如此,因此对以某种本地ASCII编码存储的字符(巴西葡萄牙语?代码页860?)的解码扼流圈。