我有一个字典:
d[class name]=(list of files)
例如:
d[earn]=(6,7,4)
其中6.txt,7.txt和4.txt是属于“赚钱”类的文件
现在,我需要创建另一个字典d2,以便:
d2[earn]=(12,3,2,17)
,其中
这是我的代码:
import collections
import sys
import os
import re
sys.stdout=open('dictionary.txt','w')
from collections import Counter
from glob import glob
folderpath='d:/individual-articles'
counter=Counter()
with open('topics.txt') as f:
d= collections.defaultdict(list)
for line in f:
value, *keys = line.strip().split('~')
for key in filter(None, keys):
d[key].append(value+".txt")
filepaths = glob(os.path.join(folderpath,'*.txt'))
def words_generator(fileobj):
for line in fileobj:
for word in line.split():
yield word
word_count_dict = {}
for file in filepaths:
f = open(file,"r")
words = words_generator(f)
for word in words:
if word not in word_count_dict:
word_count_dict[word] = {"total":0}
if file not in word_count_dict[word]:
word_count_dict[word][file] = 0
word_count_dict[word][file] += 1
word_count_dict[word]["total"] += 1
for k in word_count_dict.keys():
for filename in word_count_dict[k]:
if filename == 'total': continue
counter.update(filename)
for word, counts in word_count_dict.items():
print(word, counts['total'])
我需要打印d2,但我的代码不起作用。
答案 0 :(得分:0)
我认为这应该做你想要的:
from collections import defaultdict
d2 = defaultdict(list)
for word,files in d.items():
for fname in files: #go over each file name in the 'list' which was associated with the key 'word' in d
with open(fname) as f:
d2[word].append(f.read().count(word)) #add the count of the word in the file
#d2[word].append(f.read().split().count(word)) use this if you want words not occurances of 'word' in the file
d2[word].append(sum(d2[word])) #add the sum of all the counts
print d2