python查找并删除相同文件-UNIQ File-script版本
2016-04-17 20:22
609 查看
今天用wxPython做了一个GUI程序,实现查找指定目录内的相同文件,主要原理是计算文件的md5值(计算前先找出文件大小相同的文件,然后计算这些文件的md5值,而不是所有文件都计算,大大减少了md5的计算量),加入了多线程功能。
以下是其脚本版本(无需安装wxPython)
UNIQFile-script.py
以下是其脚本版本(无需安装wxPython)
UNIQFile-script.py
# -*- coding: gbk -*- ''' Author:@DoNotSpyOnMe Blog: http://www.cnblogs.com/aaronhoo ''' import hashlib import os import threading def getFileSize(filePath): return os.path.getsize(filePath) ''' 一般文件的md5计算方法,一次读取文件的全部内容''' def CalcMD5(filepath): with open(filepath,'rb') as f: md5obj = hashlib.md5() md5obj.update(f.read()) hash = md5obj.hexdigest() return hash '''大文件计算md5的方法,分批读取文件内容,防止内存爆掉''' def GetFileMd5(filename): if not os.path.isfile(filename): return myhash = hashlib.md5() f = open(filename,'rb') while True: b = f.read(8*1024) if not b : break myhash.update(b) f.close() return myhash.hexdigest() def GetAllFiles(directory): files=[] for dirpath, dirnames,filenames in os.walk(directory): if filenames!=[]: for file in filenames: files.append(dirpath+'\\'+file) files.sort(key=len)#按照文件名的长度排序 return files def findSameSizeFiles(files): dicSize={} for f in files: size=getFileSize(f) if not dicSize.has_key(size): dicSize[size]=f else: dicSize[size]=dicSize[size]+';'+f dicCopy=dicSize.copy() for k in dicSize.iterkeys(): if dicSize[k].find(';')==-1: dicCopy.pop(k) del dicSize return dicCopy def findSameMD5Files(files): dicMD5={} for f in files: print 'calculating the md5 value of file %s'%f md5=GetFileMd5(f) if not dicMD5.has_key(md5): dicMD5[md5]=f else: dicMD5[md5]=dicMD5[md5]+';'+f dicCopy=dicMD5.copy() for k in dicMD5.iterkeys(): if dicMD5[k].find(';')==-1: dicCopy.pop(k) del dicMD5 return dicCopy def removeSameFile(mydir): msg='' msgUniq='Result:No file is removed since they are all uniq.' try: existsFlag=False files=GetAllFiles(mydir) print'%s files found in directory %s\n'%(len(files),mydir) dicFileOfSameSize=findSameSizeFiles(files) if dicFileOfSameSize=={}: print msgUniq return else: #list the duplicated files first: dicFiltered={} for k in dicFileOfSameSize.iterkeys(): filesOfSameSize=dicFileOfSameSize[k].split(';') dicSameMD5file=findSameMD5Files(filesOfSameSize) if dicSameMD5file!={}: existsFlag=True for k in dicSameMD5file.iterkeys(): msg=msg+'md5 %s: %s'%(k,dicSameMD5file[k])+'\n' dicFiltered[k]=dicSameMD5file[k] if not existsFlag: msg=msgUniq return else: msg='Duplicated files:\n'+msg+'\n' #then remove the duplicated files: removeCount=0 for k in dicFiltered.iterkeys(): sameFiles=dicFiltered[k].split(';') flagRemove=False for f in sameFiles: if not flagRemove: flagRemove=True else: msg=msg+'Removing file: %s'%f+'\n' os.remove(f) removeCount=removeCount+1 msg=msg+'%s files are removed.\n'%removeCount except Exception,e: print e # msg='Exception occured.' finally: print msg+'\n'+'Operation finished.' def listSameFile(mydir): msg='' msgUniq='Result:All files are uniq.' try: existsFlag=False files=GetAllFiles(mydir) print '%s files found in directory %s\n'%(len(files),mydir) dicFileOfSameSize=findSameSizeFiles(files) if dicFileOfSameSize=={}: print msgUniq return else: for k in dicFileOfSameSize.iterkeys(): filesOfSameSize=dicFileOfSameSize[k].split(';') dicSameMD5file=findSameMD5Files(filesOfSameSize) if dicSameMD5file!={}: existsFlag=True for k in dicSameMD5file.iterkeys(): msg=msg+'md5 %s: %s'%(k,dicSameMD5file[k])+'\n' if not existsFlag: msg=msgUniq else: msg='Duplicated files:\n'+msg except Exception,e: print e # msg='Exception occured.' finally: print msg+'\n'+'Operation finished.' if __name__=="__main__": print 'This program is designed for clearing the duplicated files and saving memory space.Select a directory and we will find or remove the duplicated files.' print 'All rights are reserved by @DoNotSpyOnMe' print '\n' print "You have three options:" print "'f' for finding the duplicated files in the directory that you're required to enter later,or" print "'r' for finding and the removing the duplicated file,or" print "'q' to quit" while True: option=raw_input('Please enter your option:\n') option=option.lower() while option!='f' and option!='r' and option!='q': option=raw_input('Please enter your option:\n') if option=='f' or option=='r': mydir=raw_input('Please enter the direcotry containing files:\n') mydir=mydir.lower() while mydir.find('\\')==-1 or not os.path.isdir(mydir): mydir=raw_input('Please enter a valid direcotry containing files:\n') if option=='f': listSameFile(mydir) else: removeSameFile(mydir) elif option=='q': exit(0) print ''
相关文章推荐
- Python在图像处理方向的基本配置
- python查找并删除相同文件-UNIQ File-wxPython版本
- python模拟登陆知乎并爬取数据
- Python 遍历网页代码抓取文字和图片
- 使用virtualenv构建python虚拟环境
- ipython和pip安装
- Python语句和控制流
- Python 爬虫学习2
- python文件读写
- python之函数
- sklearn.pipeline.Pipeline类的用法
- Python第一章-基础知识
- python入门(五)
- python 按照cpu的使用率对top中的进程排序(排序表格)
- Python学习笔记2-flask-sqlalchemy 简单笔记
- python随机数计算并输出
- 我的第一次Python爬虫——获取自己博客园的所有文章
- 哈工大语言云(LTP)本地安装使用及Python调用
- Python浅拷贝和和深拷贝
- Python之我见