我正在处理非常大的文件系统。我的任务是用一些给定的参数清理系统。程序碎片下面可以给出一个想法。
import DirectoryWalker
extentions_to_delete = list([".rar",".doc",".URL",".js",".EXE",".mht",".css",".txt", ".cache", ".xml"])
extentions_to_copy = list([".jpg",".BMP",".GIF",".jpeg",".gif",".bmp",".png",".JPG"])
dw = DirectoryWalker.DirectoryWalker("/media/08247451247443AA/home/crap/")
def copy_advice(key, files):
for ext in extentions_to_copy:
if(ext == key):
print str(len(files)) + " Files of type " + key + " should be coppied to the target folder."
for file in files:
copy_to = "/media/08247451247443AA/home/crap-pics/"
moved = dw.move_file_to(file, copy_to, True)
if not moved:
print file + " : not moved"
walks = dw.get_all_file_types()
for key in DirectoryWalker.Walk.store.keys():
files = DirectoryWalker.Walk.store[key]
copy_advice(key, files)
在DirectoryWalker
下面编写代码。 Walk是一个简单的类,它有一个store
对象。
def get_all_file_types(self):
extentions = []
for dirpath,dirnames,filenames in os.walk(self.dir_name):
for file in filenames:
extentions.append(Walk(dirpath +"/"+ file))
return extentions
def move_file_to(self, file_path, copy_to, rename_if_exists= False):
file_name = os.path.split(file_path)[1]
target_file_name = copy_to + file_name;
coppied = False
if not os.path.isfile(target_file_name):
coppied = True
try:
os.rename(file_path, target_file_name)
except OSError:
coppied = False
print "Oops! Unable to rename : " + file_path + " to target : " + target_file_name
if rename_if_exists:
coppied = True
file_name = "new_"+ file_name
try:
os.rename(file_path, target_file_name)
except OSError:
coppied = False
print "Oops! Unable to rename : " + file_path + " to target : " + target_file_name
return coppied
Walk
班
class Walk:
store = dict([])
def __init__(self, filename):
self.file_ext = os.path.splitext(filename)[-1]
self.file_name = filename
if not (Walk.store.has_key(self.file_ext)):
Walk.store[self.file_ext] = list()
Walk.store[self.file_ext].append(self.file_name)
但是当程序执行时,它只会移动近10400个文件。但手动计算表明,文件系统中应该有13400个文件。请告诉我,我做错了什么?
更新解决方案
经过仔细调查后,我得出的结果是目标文件系统中存在许多不明确的文件名,并且这些文件丢失了。
答案 0 :(得分:2)
要回答您的问题,为什么不从一个更简单的代码开始进行测试?
import os
all_files = []
for root, dirs, files in os.walk('/media/08247451247443AA/home/crap/'):
all_files.extend(files)
print len(all_files)
作为旁注,您可以使用defaultdict?
替换Walk类答案 1 :(得分:1)
经过仔细调查后,我得出的结果是目标文件系统中存在许多不明确的文件名,而且这些文件丢失了。