Python:从标签(gmail)以PDF格式下载所有电子邮件

时间:2019-03-12 20:44:58

标签: python python-3.x pdf download gmail-api

我想从gmail以pdf格式下载100多个电子邮件。通过gmail中的打印选项手动下载所有文件都太长了。

此python脚本检索所选标签中的电子邮件。如何将这封电子邮件转换为pdf。

# source  = https://developers.google.com/gmail/api/quickstart/python?authuser=2

from __future__ import print_function
import pickle
import os.path
from googleapiclient.discovery import build
from google_auth_oauthlib.flow import InstalledAppFlow
from google.auth.transport.requests import Request



SCOPES = ['https://www.googleapis.com/auth/gmail.readonly']

def main():
    creds = None

    if os.path.exists('token.pickle'):
        with open('token.pickle', 'rb') as token:
            creds = pickle.load(token)
    # If there are no (valid) credentials available, let the user log in.
    if not creds or not creds.valid:
        if creds and creds.expired and creds.refresh_token:
            creds.refresh(Request())
        else:
            flow = InstalledAppFlow.from_client_secrets_file(
                'credentials.json', SCOPES)
            creds = flow.run_local_server()
        # Save the credentials for the next run
        with open('token.pickle', 'wb') as token:
            pickle.dump(creds, token)

    service = build('gmail', 'v1', credentials=creds)

    # Call the Gmail API 

    response= service.users().messages().list(userId="me", labelIds="Label_53", q=None, pageToken=None, maxResults=None, includeSpamTrash=None).execute()
    all_message_in_label = []
    if 'messages' in response:
        all_message_in_label.extend(response['messages'])

    while 'nextPageToken' in response:
      page_token = response['nextPageToken']
      response = service.users().messages().list(userId="me", labelIds="Label_53", q=None, pageToken=page_token, maxResults=None, includeSpamTrash=None).execute()
      all_message_in_label.extend(response['messages'])


    if not all_message_in_label:
        print('No email LM found.')
    else:
        # get message from Id listed in all_message_in_label
        for emails in all_message_in_label: 
            message= service.users().messages().get(userId="me", id=emails["id"], format="raw", metadataHeaders=None).execute()



if __name__ == '__main__':
    main()

2 个答案:

答案 0 :(得分:1)

我做了一些关于您的问题的挖掘工作,发现一些有用的链接:

将邮件转换为.eml格式的this链接时。

.eml转换为.pdf时,以下链接:

eml2pdf是一个python github项目,可将eml文件转换为pdf,但我不确定它是否有效。您可以检查一下是否可行。

eml-to-pdf是另一个github项目,看起来不那么有效。它是用JavaScript编写的。

pyPdf,可用于生成pdf文件。尽管这样做可能需要转换电子邮件并自行格式化。

有关消息对象格式的更多信息,您可以参考gmail api python docs get方法。

here是一篇博客文章,使用另一种方法来满足您的需求,尽管我不确定它是否仍然有效。

我希望它会有所帮助。祝你好运。

答案 1 :(得分:1)

我尝试了Ali Nuri Seker回答中的建议,但是这些建议无效:
-eml2pdf:在Windows上不起作用
-eml-to-pdf:Mime类型错误
-pyPdf:构建整个设计所需的工作过多 -gmail-to-pdf:某些电子邮件的代码错误(比照github上提到的错误)

有效的方法(与Ali Nuri Seker相同的一般想法):

  1. 使用email.generator.Generator将电子邮件另存为.eml
  2. 使用eml-to-pdf-converter(不是基于python而是开放源GUI)将.eml文件转换为pdf(基本上,只需删除包含.eml文件的文件夹,单击一个按钮,即可得到pdf。它甚至可以与子文件夹一起使用!)

可以找到更详细的脚本here


这是第一个脚本“将电子邮件另存为.eml”:

# source  = https://developers.google.com/gmail/api/quickstart/python?authuser=2

# In brief:
# this script will connect to your gmail account and download as .eml file your email from a specified label. 
# then you can convert the .eml files to pdf :  https://github.com/nickrussler/eml-to-pdf-converter

# set up
#  1) save this script in a folder
#  2) save the script "get labels id.py" in the same folder
#  3) go to this link https://developers.google.com/gmail/api/quickstart/python and click on "Enable the gmail API", then click on "Download client configuration" and save this .json file in the same folder as this script
#  4) GMAIL API doesn't use Label name but ID so you need to run the script "get labels id.py" and to copy the ID for the label you need (the firt time, you will be ask the persmission on  a consent screen, accept it with the account where you want to download your email)  
#  5) copy your label id below in custom var 
#  6) run this script and your emails will be saved as .eml file in a subfolder "emails as eml"

# connect to gmail api 
from __future__ import print_function
import pickle
import os.path
from googleapiclient.discovery import build
from google_auth_oauthlib.flow import InstalledAppFlow
from google.auth.transport.requests import Request

# decode response from Gmail api and save a email
import base64
import email

#for working dir and path for saving file
import os

# CUSTOM VAR 
labelid = "Label_18"  # change your label id

# set working directory  https://stackoverflow.com/a/1432949/3154274
abspath = os.path.abspath(__file__)
dname = os.path.dirname(abspath)
os.chdir(dname)
print("working dir set to ", dname)

# create folder to save email 
emailfolder= dname+"\emails as eml"
if not os.path.exists(emailfolder):
    os.makedirs(emailfolder)


# If modifying these scopes, delete the file token.pickle.
SCOPES = ['https://www.googleapis.com/auth/gmail.readonly']

def main():

    # create the credential the first tim and save then in token.pickle
    creds = None
    if os.path.exists('token.pickle'):
        with open('token.pickle', 'rb') as token:
            creds = pickle.load(token)
    if not creds or not creds.valid:
        if creds and creds.expired and creds.refresh_token:
            creds.refresh(Request())
        else:
            flow = InstalledAppFlow.from_client_secrets_file(
                'credentials.json', SCOPES)
            creds = flow.run_local_server()
        with open('token.pickle', 'wb') as token:
            pickle.dump(creds, token)

    #create the service 
    service = build('gmail', 'v1', credentials=creds)


    # get the *list* of all emails in the labels (if there are multiple pages, navigate to them)
    #*************************************
    #  ressources for *list* email by labels
    # https://developers.google.com/resources/api-libraries/documentation/gmail/v1/python/latest/index.html 
    # https://developers.google.com/resources/api-libraries/documentation/gmail/v1/python/latest/gmail_v1.users.messages.html#list
    # example of code for list: https://developers.google.com/gmail/api/v1/reference/users/messages/list?apix_params=%7B%22userId%22%3A%22me%22%2C%22includeSpamTrash%22%3Afalse%2C%22labelIds%22%3A%5B%22LM%22%5D%7D
    #*************************************

    response= service.users().messages().list(userId="me", labelIds=labelid, q=None, pageToken=None, maxResults=None, includeSpamTrash=None).execute()
    all_message_in_label = []
    if 'messages' in response:
        all_message_in_label.extend(response['messages'])

    while 'nextPageToken' in response:
      page_token = response['nextPageToken']
      response = service.users().messages().list(userId="me", labelIds=labelid, q=None, pageToken=page_token, maxResults=None, includeSpamTrash=None).execute()
      all_message_in_label.extend(response['messages'])


    # all_message_in_label looks like this 
            # for email in all_message_in_label:
                # print(email)
                #{'id': '169735e289ba7310', 'threadId': '169735e289ba7310'}
                #{'id': '169735c76a4b93af', 'threadId': '169735c76a4b93af'}    
    if not all_message_in_label:
        print('No email LM found.')
    else:
        # for each ID in all_message_in_label we *get* the message 

        #*************************************
        # ressources for *get* email 
        # https://developers.google.com/resources/api-libraries/documentation/gmail/v1/python/latest/gmail_v1.users.messages.html#get
        # code example for decode https://developers.google.com/gmail/api/v1/reference/users/messages/get 
        #  + decode for python 3 https://python-forum.io/Thread-TypeError-initial-value-must-be-str-or-None-not-bytes--12161
        #*************************************

        for emails in all_message_in_label: 
            message= service.users().messages().get(userId="me", id=emails["id"], format="raw", metadataHeaders=None).execute()
            msg_str = base64.urlsafe_b64decode(message['raw'].encode('ASCII'))

            try: 
                mime_msg = email.message_from_string(msg_str.decode())  

                # the the message as a .eml file 
                outfile_name = os.path.join(emailfolder, f'{emails["id"]}.eml')

                with open(outfile_name, 'w') as outfile:
                    gen = email.generator.Generator(outfile)
                    gen.flatten(mime_msg)
                print("mail saved: ", emails["id"])

            except:
                print("error in message ", message["snippet"])

if __name__ == '__main__':
    main()

这是第二个脚本“获取标签ids.py”(参见第一个脚本中的“设置”第4步)。

# source  = https://developers.google.com/gmail/api/quickstart/python?authuser=2

from __future__ import print_function
import pickle
import os.path
from googleapiclient.discovery import build
from google_auth_oauthlib.flow import InstalledAppFlow
from google.auth.transport.requests import Request

# If modifying these scopes, delete the file token.pickle.
SCOPES = ['https://www.googleapis.com/auth/gmail.readonly']


def main():
    creds = None
    if os.path.exists('token.pickle'):
        with open('token.pickle', 'rb') as token:
            creds = pickle.load(token)
    if not creds or not creds.valid:
        if creds and creds.expired and creds.refresh_token:
            creds.refresh(Request())
        else:
            flow = InstalledAppFlow.from_client_secrets_file(
                'credentials.json', SCOPES)
            creds = flow.run_local_server()
        with open('token.pickle', 'wb') as token:
            pickle.dump(creds, token)

    service = build('gmail', 'v1', credentials=creds)

    # Get list of all labels
    #  https://developers.google.com/resources/api-libraries/documentation/gmail/v1/python/latest/index.html
    results = service.users().labels().list(userId='me').execute()
    labels = results.get('labels', [])

    if not labels:
        print('No labels found.')
    else:
        print('Labels:')
    for label in labels:
        print(label['name'] + " "+label['id'])


if __name__ == '__main__':
    main()
    input("continue")
相关问题