python:以.csv文件格式

时间:2017-01-21 22:58:37

标签: python

#write data in .csv file
def data_save_csv(type,data,id,name,header,since = None):
    #get the date when storage data
    date_storage()
    #create the data storage directory
    csv_parent_directory = os.path.join("dataset","csv",type,glovar.date)

    #write data in .csv
    if type == "group_members":
        csv_file_prefix = "gm"
    elif type == "group_feed":
        csv_file_prefix = "gf"
    elif type == "public_figure_posts":
        csv_file_prefix = "pfp"
    elif "user_" in type:
        # create the data storage directory
        csv_parent_directory = os.path.join("dataset", "csv", "user", type, glovar.date)
        if type == "user_friends":
            csv_file_prefix = "uf"
        elif type == "user_likes":
            csv_file_prefix = "ul"
        elif type == "user_feed":
            csv_file_prefix = "uf"
    # create (mkdir) the csv_parent_directory
    directory_create(csv_parent_directory)

    if since:
        csv_file_name = csv_file_prefix + "_" + since.strftime("%Y%m%d-%H%M%S") + "_" + time_storage() + id + "_" +name + ".csv"
    else:
        csv_file_name = csv_file_prefix + "_"  + time_storage() + "_" + id + "_" +name + ".csv"
     csv_file_directory = os.path.join(csv_parent_directory,csv_file_name)

    if type == "user_feed":
        feed = data
        for item in feed:
            # parse the feed data from group_download.py
            print("id=" + item['id'] + ",")
            print("permalink_url=" + item['permalink_url'] + ",")
            print("created_time=" + item['created_time'] + ",")
            print("updated_time=" + item['updated_time'] + ",")
            print("name=" + item['from']['name'] + ",")
            print("from_id=" + item['from']['id'] + ",")
            print("message=" + item['message'] + ",")
            print("link=" + item['link'] + ",")
            print("likes_total_count=" + str(item['likes']['summary']['total_count']) + ",")
            print("comments_total_count=" + str(item['comments']['summary']['total_count']) + ",")

    with open(csv_file_directory,'w',newline='', encoding='utf-8') as csvfile:

        writer = csv.writer(csvfile,delimiter=',',quotechar='"',quoting=csv.QUOTE_MINIMAL)

        #csv header
        writer.writerow(header)

        #if data is group members(group_manage.py)
        if type == "group_members" or "user_friends" or "user_likes":
            row = []
            for i in range(len(data)):
                for k in data[i].keys():
                    if isinstance(data[i][k],bool):
                        data[i][k] = str(data[i][k])
                    row.append(data[i][k])

            writer.writerow(row)
            row = []
        #if data is group feed(group_download.py)
        elif type == "group_feed" or "public_figure_posts" or "user_feed":
            feed = data
            for item in feed:
                #parse the feed data from group_download.py
                row = [item['id'],item['permalink_url'],item['created_time'],item['updated_time'],item['from']['name'],item['from']['id'],item['message'],item['link'],item['likes']['summary']['total_count'],item['comments']['summary']['total_count']]
                writer.writerow(row)

    csvfile.close()

写一个python程序在.csv文件中写入数据,当类型是“user_feed”时, 我打印数据项目:

id=110286969468305_112459422584393,
permalink_url=https://www.facebook.com/110286969468305/posts/112459422584393,
created_time=2016-12-18T12:44:52+0000,
updated_time=2016-12-18T12:47:10+0000,
name=Dewi Nurfitri Oktaviani,
from_id=10202749157833181,
message=Hi, nice to meet you,
link=,
likes_total_count=0,
comments_total_count=1,

他们是对的,但是当在.csv文件中写入数据时,我发现数据序列与头部顺序不匹配,头部是:

header = ["POST ID", "Permalink", "Create time", "Updated time", "Author", "Author ID", "Message", "Link", "Likes", "Comments"]

你可以在这个方法中看到“data_save_csv”,

elif type == "group_feed" or "public_figure_posts" or "user_feed":
    feed = data
    for item in feed:
         #parse the feed data from group_download.py
         row = [item['id'],item['permalink_url'],item['created_time'],item['updated_time'],item['from']['name'],item['from']['id'],item['message'],item['link'],item['likes']['summary']['total_count'],item['comments']['summary']['total_count']]
         writer.writerow(row)

你可以看到数据项的序列与头部的序列相同,但是当我打开csv文件时,我发现头部项目的顺序是正确的,但数据项的顺序是无序的,没有“id”数据,其他项目顺序的顺序不正确。

你可以帮我吗?

Data Structure

1 个答案:

答案 0 :(得分:0)

问题1:这一行

if type == "group_members" or "user_friends" or "user_likes":

没有做你想做的事。表达式始终计算为True。可能的替代品:

if type == "group_members" or type == "user_friends" or type == "user_likes":

if type in ("group_members", "user_friends", "user_likes", ):

if type in {"group_members", "user_friends", "user_likes", }:

和这一行

elif type == "group_feed" or "public_figure_posts" or "user_feed":

有同样的问题。您应该修复这两行并重试。