您的位置:首页 > 编程语言 > Python开发

Python : 计算大文件MD5值

2012-12-28 16:40 393 查看
buffer = 8192 来自网络,但不知道是怎么实践出来的。

from hashlib import md5
import time
import os

def calMD5(str):
m = md5()
m.update(str)

return m.hexdigest()

def calMD5ForFile(file):
statinfo = os.stat(file)

if int(statinfo.st_size)/(1024*1024) >= 1000 :
print "File size > 1000, move to big file..."
return calMD5ForBigFile(file)

m = md5()
f = open(file, 'rb')
m.update(f.read())
f.close()

return m.hexdigest()

def calMD5ForFolder(dir,MD5File):
outfile = open(MD5File,'w')
for root, subdirs, files in os.walk(dir):
for file in files:
filefullpath = os.path.join(root, file)
"""print filefullpath"""

filerelpath = os.path.relpath(filefullpath, dir)
md5 = calMD5ForFile(filefullpath)
outfile.write(filerelpath+' '+md5+"\n")
outfile.close()

def calMD5ForBigFile(file):
m = md5()
f = open(file, 'rb')
buffer = 8192    # why is 8192 | 8192 is fast than 2048

while 1:
chunk = f.read(buffer)
if not chunk : break
m.update(chunk)

f.close()
return m.hexdigest()

if __name__ == "__main__":
#print calMD5("Hello World!")

t = time.time()
print calMD5ForFile("E:\\OS\\ubuntu-11.04-desktop-i386.iso")
print time.time() - t
t = time.time()
print calMD5ForBigFile("E:\\OS\\ubuntu-11.04-desktop-i386.iso")
print time.time() - t,"\n"

t = time.time()
print calMD5ForFile("E:\\OS\\ubuntu-12.04-desktop-amd64.iso")
print time.time() - t
t = time.time()
print calMD5ForBigFile("E:\\OS\\ubuntu-12.04-desktop-amd64.iso")
print time.time() - t,"\n"

t = time.time()
print calMD5ForFile("D:\\Virtual Machines\\Ubuntu 64-bit\\Ubuntu 64-bit-s001.vmdk")
print time.time() - t
t = time.time()
print calMD5ForBigFile("D:\\Virtual Machines\\Ubuntu 64-bit\\Ubuntu 64-bit-s001.vmdk")
print time.time() - t,"\n"

#output
#8b1085bed498b82ef1485ef19074c281
#2.57500004768
#8b1085bed498b82ef1485ef19074c281
#3.34100008011
#
#128f0c16f4734c420b0185a492d92e52
#2.632999897
#128f0c16f4734c420b0185a492d92e52
#3.39100003242
#
#File size > 1000, move to big file...
#ec1fa4dc1b32569e9da7b4744548a9ef
#5.40100002289
#ec1fa4dc1b32569e9da7b4744548a9ef
#5.42100000381
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签: