您的位置:首页 > 其它

知乎1

2016-12-29 20:30 99 查看
import requests,re,json,os,random,time

from bs4 import BeautifulSoup

with open('d://zhihu//zimei//followers_info.txt','r') as f:

    f=f.read().strip().split('\n')

with open('d://headers1.txt','r') as h:

    headers1={}

    for line in h.read().strip().split('\n'):

        name,v=line.strip().split(':',1)

        headers1[name]=v

a=[i.split(',') for i in f]

#urls=['https://www.zhihu.com/people/'+i[0]+'/following/columns' for i in a]

headers={**headers1,**{"User-Agent": "Opera/9.80 (Android 2.3.3; Linux; Opera Mobi/ADR-1202011015; U; en) Presto/2.9.201 Version/11.50"}}

def get_people(j):

    column_urls_info=['https://www.zhihu.com/api/v4/members/{1}/following-columns?include=data%5B*%5D.intro%2Cfollowers%2Carticles_count%2Cimage_url%2Cis_following%2Clast_article.created&offset={0}0&limit=20'.format(i,j[0]) for i in range(2,3)]

    column_urls_info.insert(0,'https://www.zhihu.com/api/v4/members/{0}/following-columns?include=data%5B*%5D.intro%2Cfollowers%2Carticles_count%2Cimage_url%2Cis_following%2Clast_article.created&limit=20'.format(j[0]))

    s=requests.Session()

    for url in column_urls_info:

        r=s.get(url,headers=headers).content.decode('utf-8')

        columns=json.loads(r)

        columns=columns['data']

        column_info={}

        for column in columns:

            column_info['title']=column['title']

            column_info['image_url']=column['image_url']

            column_info['id']=column['id']

            column_info['followers']=column['followers']

            column_info['intro']=column['intro']

            column_info['articles_count']=column['articles_count']

            column_info['author_name']=column['author']['name']

           

            if not os.path.isdir('d://zhihu//%s'%column['id']):

                os.mkdir('d://zhihu//%s'%column['id'])

                with open('d://zhihu//%s//column_info.txt'%column['id'],'a',errors='replace') as f:

                    f.write(str(column_info))

for j in a:

    try:

        get_people(j)

    except:

        print(j)

        continue
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签: