您的位置:首页 > 编程语言 > Python开发

python批量下载色影无忌和蜂鸟的图片 爬虫小应用

2017-06-16 13:40 549 查看
有些冗余信息。由于之前測试正則表達式。所以没有把它们给移走。只是不影响使用。
# -*- coding:utf-8 -*-
import re,urllib,sys,os,time

def getAllUrl():
entry=sys.argv[1]
#try:
getPage=urllib.urlopen(entry).read()
#except:
#	print "Error"

pattern=re.compile(r'<a href="(.+?

)".+?>')
web_site_pattern=re.compile(r'(http:.+?)')
all_url = pattern.findall(getPage)
for url in all_url:
if web_site_pattern.match(url):
print url
#print url

print "done"

def download_pic():
url=sys.argv[1];
#local_path="C:/Tools/source/"
connection=urllib.urlopen(url)
data=connection.read()
print "Waiting to get data"
time.sleep(3)
connection.close()

#analyze
#p=re.compile(r'img width="\d+".+src="(.+)".+')
download_pic_pattern=re.compile(r'<img src="(.+?\.jpg)".+?/>')
#p10=re.compile(r'(.+)\.jpg')
all_url=download_pic_pattern.findall(data)
#print all_url
i=1
directory="C:/Tools/source"
name_pattern=re.compile(r'/(\w+?\.jpg)')
if not os.path.exists(directory):
os.mkdir(directory)

for urls in all_url:
print urls
#print "working"

#print local_path
i=i+1
name=name_pattern.findall(urls)
print name[0]
local_path="C:/Tools/source/%s" % name[0]
jpeg_connection=urllib.urlopen(urls)

jpeg=jpeg_connection.read()
time.sleep(1)
print "waiting"
f=file(local_path,"wb")
f.write(jpeg)

f.close()
jpeg_connection.close()
#i=i+1

#f=file(local_path,"wb")
#f.write(data)
#f.close()
print("Done")

def download_pic_2():
url=sys.argv[1];
local_path="C:/Tools/a.jpg"
data=urllib.urlretrieve(url,local_path)
print("Done")

def regulation():
str1="abc123*GBK1024abc*defb1kc12*addd"

p1=re.compile(r'abc')
print p1.findall(str1)

p2=re.compile(r'a.c')
print p2.findall(str1)

p3=re.compile(r'abc\*')
print p3.findall(str1)

p4=re.compile(r'[abc]12')
print p4.findall(str1)

p5=re.compile(r'\d\*')
print p5.findall(str1)

p6=re.compile(r'a[^\d]')
print p6.findall(str1)

p7=re.compile(r'a[^\d]*')
print p7.findall(str1)

p8=re.compile(r'[a-zA-Z]+(\d+)')
print p8.findall(str1)

str2="dadfae ef <img atl=\"500\" src=\"www.qq.com/1.jpg\" width=\"700\"> asdfe aa<ima"

p9=re.compile(r'<img .+ src="(.+)" .+>')
urls=p9.findall(str2)
#print
print urls
for url in urls:
print url

if __name__ =="__main__":
#main()
#download_pic_2()
#regulation()
download_pic()
#getAllUrl()
#######兴许后面改动了代码,使用beautifulsoup,能够更大范围的下载图片代码例如以下: http://www.30daydo.com/article/56
                                            
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签: