您的位置:首页 > 编程语言 > Python开发

python爬取知乎话题:"日常穿jk制服是怎样一种体验?"下的所有图片

2019-01-26 11:39 405 查看
版权声明:引用请标示来处,谢谢。 https://blog.csdn.net/YiXiao1997/article/details/86655584
[code]'''
第一次尝试python爬取图片,请多指正
回答者的答案单独存放一个文件夹
'''
from urllib import request
import math
import requests
import json
import re
import os

def getPicture():
count = 0#记录照片总数
filcount = 0#文件夹个数
for i in range(math.ceil(569/20)):#569是自己看到的回答总数,还没有实现自动获取回答数
try:
kv = {'user-agent': 'Mozillar/5.0'}
#知乎返回json数据的url中的limit是每次获取多少行数据,offset是每次从哪一个问题开始加载
url = "https://www.zhihu.com/api/v4/questions/29814297/answers?include=data%5B%2A%5D.is_normal%2Cadmin_closed_comment%2Creward_info%2Cis_collapsed%2Cannotation_action%2Cannotation_detail%2Ccollapse_reason%2Cis_sticky%2Ccollapsed_by%2Csuggest_edit%2Ccomment_count%2Ccan_comment%2Ccontent%2Ceditable_content%2Cvoteup_count%2Creshipment_settings%2Ccomment_permission%2Ccreated_time%2Cupdated_time%2Creview_info%2Crelevant_info%2Cquestion%2Cexcerpt%2Crelationship.is_authorized%2Cis_author%2Cvoting%2Cis_thanked%2Cis_nothelp%2Cis_labeled%3Bdata%5B%2A%5D.mark_infos%5B%2A%5D.url%3Bdata%5B%2A%5D.author.follower_count%2Cbadge%5B%2A%5D.topics&limit=20&offset="+str(20*i)+"&platform=desktop&sort_by=default"
r = requests.get(url, headers=kv)
j = json.loads(r.text)
for k in range(20):
jpgurl=re.findall(r'data-original="(.*?)"', j["data"][k]["content"])
name = j["data"][k]["author"]["name"]
ID = j["data"][k]["id"]
question = j["data"][k]["question"]["title"]
mkfile = os.mkdir("C:/Users/23504/Desktop/Python知乎数据/知乎话题:日常穿JK制服是一种怎样的体验?/" + str(filcount) + "-" + name)
filcount = filcount+1
jpgcount = 0 #文件夹内照片序号,每次重新建立文件夹重新置零
for m in range(0,len(jpgurl),2):
picture = request.urlopen(jpgurl[m]).read()
with open("C:/Users/23504/Desktop/Python知乎数据/知乎话题:日常穿JK制服是一种怎样的体验?/" + str(filcount-1) + "-" + name + "/" + str(jpgcount)+"-"+str(count)+ ".jpg",'wb') as file:
file.write(picture)
print("正在下载第"+str(filcount)+"个回答--回答者昵称:"+name+"--回答者ID:"+str(ID)+"--"+"问题:"+question+"--第" + str(count) + "张图片下载完成")
jpgcount = jpgcount + 1
count = count+1
except:
print("url链接无效")
if __name__ == "__main__":
getPicture()

 

内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签: