您的位置:首页 > 编程语言 > Python开发

【zz】Python2 批量下载网站文件

2014-03-11 13:11 561 查看
#! encoding=utf-8

import urllib2
import re
import os

def Download(url,output):
print "downloading..."+url
response = urllib2.urlopen(url)
resourceFile = open(output,"wb")
resourceFile.write(response.read())
resourceFile.close()
print "downloaded"

def Action(url,ext = "pdf",output = "."):

#1.domain
index = url.rfind("/");
domain = url[0:index+1];
print domain
request = urllib2.Request(url)
response = urllib2.urlopen(request)

#2.content
content = response.read()
#    print content

#3.resource
mode = '\"([^\"]+'+ext+')\"'
pattern = re.compile(mode)
strMatch = pattern.findall(content)
size = len(strMatch)
print "file num: "+str(size)
for i in range(0,size,1):
#        print strMatch[i]
one = strMatch[i]
partIndex = one.rfind('/')
if not one.startswith('http://'):
if -1!=partIndex:
directDir = one[0:partIndex+1]
else:
directDir = ""
#            print directDir
try:
os.makedirs(output+"/"+directDir)
except Exception,e:
pass
fileUrl = domain+one
fileOutput = output+"/"+one
print fileUrl
print fileOutput
Download(fileUrl,fileOutput)
else:
print one
print "........."
print one[partIndex:]
fileOutput = output+"/"+one[partIndex:]
print fileOutput
Download(one,fileOutput)
#5.download

if __name__=='__main__':
print "download"
url = "http://compgeom.cs.uiuc.edu/~jeffe/teaching/algorithms/";
Action("http://tech.qq.com/","jpg");


View Code

本文转载自:http://blog.csdn.net/infoworld/article/details/9337619
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签: