您的位置:首页 > 理论基础 > 计算机网络

python 获取网络图片并下载到本地(由网络源码改编)

2016-08-09 15:36 453 查看
#coding=utf-8
import re,time
import urllib,urllib2
import os

def getImg(url):
path_name = url[-9:-5]
pathl = open_path(path_name)

html_page = urllib.urlopen(url).read()
time.sleep(2)
imgre = re.compile('http://\\S+\/uploads\/\\S+[0-9][0-9]{1,2}\.jpg')
imglist = imgre.findall(html_page)
# 去重
news_imglist = list(set(imglist))
time.sleep(3)
x = 1
for imgurl in news_imglist:
j = str(x)
local = pathl+j + '.jpg'
print  path_name+'文件夹---第'+str(x)+'张'
urllib.urlretrieve(imgurl, local)
time.sleep(1)
x += 1
def get_href(page):
html = urllib2.urlopen(page)
hrefs = re.compile('http://\\S+/a/\\d+\.html')
html_page = html.read()
links = hrefs.findall(html_page)
# 去重
news_links = list(set(links))
return news_links
def open_path(path_name):
path = "e:/img/"
new_path = os.path.join(path, path_name)
if not os.path.isdir(new_path):
os.makedirs(new_path)
new_path1 =new_path+"/"
return new_path1
def main():
for i in xrange(1,2):
htmls = 'http://meizitu.com/a/list_1_%d.html'%i
time.sleep(2)
a_hrefs = get_href(htmls)
for a_href in a_hrefs:
getImg(a_href)
if __name__ == '__main__':
main()
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
相关文章推荐