您的位置:首页 > 产品设计 > UI/UE

爬取百度贴吧的一些图片,使用request、re、urllib模块

2017-12-06 00:00 531 查看
直接上代码~~~~~开心

图片名字以0...命名

图片保存在本地文件夹里,也就是此刻.py文件同目录下



# -*- coding: utf-8 -*-

# 引入requests模块
import requests
import re
import urllib

# 获取html
def getHtml(url):
html = requests.get(url)
return html.content

htmls = getHtml('http://tieba.baidu.com/p/5467656444')
# print htmls

# 获取image的url,返回一个数组
def getImage(htmls):
image = re.findall(r'src="(http://imgsrc.*?\.jpg)"', htmls, re.M|re.I)
# return image
x = 0
for url in image:
# print url
urllib.urlretrieve(url, '%s.jpg' % x)
x = x + 1

image = getImage(htmls)
# print image

图片名字以path的内容命名:



# -*- coding: utf-8 -*-

# 引入requests模块
import requests
import re
import urllib
import urlparse

def getHtml(url):
html = requests.get(url)
return html.content

htmls = getHtml('http://tieba.baidu.com/p/5467656444')
# print htmls

def getImage(htmls):
image = re.findall(r'src="(http://imgsrc.*?\.jpg)"', htmls, re.M|re.I)
# return image
x = 0
for url in image:
# print url
# print urlparse.urlparse(url)
# print urlparse.urlparse(url).path
print urlparse.urlparse(url).path.split('/')[-1]
urlName = urlparse.urlparse(url).path.split('/')[-1]
urllib.urlretrieve(url, urlName)
# urllib.urlretrieve(url, '%s.jpg' % x)
# x = x + 1

image = getImage(htmls)
# print image
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签: