知乎1
2016-12-29 20:30
99 查看
import requests,re,json,os,random,time
from bs4 import BeautifulSoup
with open('d://zhihu//zimei//followers_info.txt','r') as f:
f=f.read().strip().split('\n')
with open('d://headers1.txt','r') as h:
headers1={}
for line in h.read().strip().split('\n'):
name,v=line.strip().split(':',1)
headers1[name]=v
a=[i.split(',') for i in f]
#urls=['https://www.zhihu.com/people/'+i[0]+'/following/columns' for i in a]
headers={**headers1,**{"User-Agent": "Opera/9.80 (Android 2.3.3; Linux; Opera Mobi/ADR-1202011015; U; en) Presto/2.9.201 Version/11.50"}}
def get_people(j):
column_urls_info=['https://www.zhihu.com/api/v4/members/{1}/following-columns?include=data%5B*%5D.intro%2Cfollowers%2Carticles_count%2Cimage_url%2Cis_following%2Clast_article.created&offset={0}0&limit=20'.format(i,j[0]) for i in range(2,3)]
column_urls_info.insert(0,'https://www.zhihu.com/api/v4/members/{0}/following-columns?include=data%5B*%5D.intro%2Cfollowers%2Carticles_count%2Cimage_url%2Cis_following%2Clast_article.created&limit=20'.format(j[0]))
s=requests.Session()
for url in column_urls_info:
r=s.get(url,headers=headers).content.decode('utf-8')
columns=json.loads(r)
columns=columns['data']
column_info={}
for column in columns:
column_info['title']=column['title']
column_info['image_url']=column['image_url']
column_info['id']=column['id']
column_info['followers']=column['followers']
column_info['intro']=column['intro']
column_info['articles_count']=column['articles_count']
column_info['author_name']=column['author']['name']
if not os.path.isdir('d://zhihu//%s'%column['id']):
os.mkdir('d://zhihu//%s'%column['id'])
with open('d://zhihu//%s//column_info.txt'%column['id'],'a',errors='replace') as f:
f.write(str(column_info))
for j in a:
try:
get_people(j)
except:
print(j)
continue
from bs4 import BeautifulSoup
with open('d://zhihu//zimei//followers_info.txt','r') as f:
f=f.read().strip().split('\n')
with open('d://headers1.txt','r') as h:
headers1={}
for line in h.read().strip().split('\n'):
name,v=line.strip().split(':',1)
headers1[name]=v
a=[i.split(',') for i in f]
#urls=['https://www.zhihu.com/people/'+i[0]+'/following/columns' for i in a]
headers={**headers1,**{"User-Agent": "Opera/9.80 (Android 2.3.3; Linux; Opera Mobi/ADR-1202011015; U; en) Presto/2.9.201 Version/11.50"}}
def get_people(j):
column_urls_info=['https://www.zhihu.com/api/v4/members/{1}/following-columns?include=data%5B*%5D.intro%2Cfollowers%2Carticles_count%2Cimage_url%2Cis_following%2Clast_article.created&offset={0}0&limit=20'.format(i,j[0]) for i in range(2,3)]
column_urls_info.insert(0,'https://www.zhihu.com/api/v4/members/{0}/following-columns?include=data%5B*%5D.intro%2Cfollowers%2Carticles_count%2Cimage_url%2Cis_following%2Clast_article.created&limit=20'.format(j[0]))
s=requests.Session()
for url in column_urls_info:
r=s.get(url,headers=headers).content.decode('utf-8')
columns=json.loads(r)
columns=columns['data']
column_info={}
for column in columns:
column_info['title']=column['title']
column_info['image_url']=column['image_url']
column_info['id']=column['id']
column_info['followers']=column['followers']
column_info['intro']=column['intro']
column_info['articles_count']=column['articles_count']
column_info['author_name']=column['author']['name']
if not os.path.isdir('d://zhihu//%s'%column['id']):
os.mkdir('d://zhihu//%s'%column['id'])
with open('d://zhihu//%s//column_info.txt'%column['id'],'a',errors='replace') as f:
f.write(str(column_info))
for j in a:
try:
get_people(j)
except:
print(j)
continue
相关文章推荐
- 离职员工揭秘:知乎是怎么做内容社区运营的?
- 关于软件测试人员能力模型的建立(from知乎)
- 我用爬虫一天时间“偷了”知乎一百万用户,只为证明PHP是世界上最好的语言
- 知乎(高逼格)android开发offer——get
- winter开源项目『狗日的知乎』简略分析
- 1001位知乎姑娘
- 仿知乎程序(一)DrawerLayout与Toolbar
- 自动化测试来源于知乎
- Android SwipeRefreshLayout官方下拉刷新控件介绍(与知乎Android客户端下拉刷新一样!!)
- 知乎上关于Java Bean的比喻
- 【Android车载系统 News | Tech 4】知乎--车载话题链接
- 在知乎上给评论加入跳转链接--XSS练手
- python Requests 知乎问题图片爬虫
- Android仿知乎图片墙
- 尝试为知乎增加响应式布局
- 百度、腾讯和阿里内部的级别和薪资待遇是什么样的?-转自知乎
- 【爬虫】python requests模拟登录知乎
- 为什么用Java(转载知乎)
- 读书笔记第一篇:知乎高赞回答
- 深度学习入门方法讨论--摘自知乎