您的位置：首页 > 编程语言 > Python开发

python 爬虫下载网站图片

2015-05-20 16:27 841 查看

#coding:utf-8
import os
import urllib
import re    #正则表达式模块

def getHtml(url):
page = urllib.urlopen(url)    #打开一个URL地址
html = page.read()         #读取URL上的数据
return html

def getImgs(html):
reg = r'src="(.+?\.jpg)" pic_ext'  #写一个正则表达式
imgre = re.compile(reg)        #编译正则表达式
imglist = re.findall(imgre,html)  #读取html中所有包含正则表达式的数据,返回一个list
return imglist

def download(urllist):
x = 1
filepath = "/home/swz/imgdownload/"
#if os.path.exists(filepath) is True:
#filepath = filepath + "2"
os.mkdir(filepath)

for imgurl in urllist:
#print filepath+'%s.jpg' % x
urllib.urlretrieve(imgurl,filepath+'%s.jpg' % x) #将远程数据下载到本地,并对图片进行重命名
x = x + 1
return

if __name__ == "__main__":
html = getHtml("http://tieba.baidu.com/p/2460150866")
urllist = getImgs(html)
#print urllist
download(urllist)

内容来自用户分享和网络整理，不保证内容的准确性，如有侵权内容，可联系管理员处理

标签：

相关文章推荐

新的分享

章节导航