您的位置:首页 > 其它

新浪微博数据挖掘食谱之十五: 爬虫篇 (抓取用户的朋友)

2015-01-11 06:43 330 查看
#!/usr/bin/python
# -*- coding: utf-8 -*-

'''
Created on 2015-1-11
@author: beyondzhou
@name: crawl_friendship_graph.py
'''

# Crawl friendship graph
def crawl_friendship_graph():

# import
#import json
from login import weibo_login
from users import crawl_weibo_followers

# Access to sina api
weibo_api = weibo_login()

screen_name = 'beyondzhou8'
crawl_weibo_followers(weibo_api, screen_name, depth=1, limit=10)

if __name__ == '__main__':
crawl_friendship_graph()

# Crawl a friendship graph
def crawl_weibo_followers(weibo_api, screen_name, limit=1000000, depth=2):

from data import save_to_mongo

# Resolve the ID for screen_name and start working with IDs for consistency in storage
seed_id = str(weibo_api.users.show.get(screen_name=screen_name)['id'])
_, next_queue = get_friends_followers_ids(weibo_api, user_id=seed_id, friends_limit=0, followers_limit=limit)

# Store a seed_id => _follower_ids mapping in MongoDB
save_to_mongo({'followers' : [ _id for _id in next_queue ]}, 'followers_crawl', '{0}-follower_ids'.format(seed_id))

d = 1
while d < depth:
d += 1
(queue, next_queue) = (next_queue, [])
for fid in queue:
follower_ids = get_friends_followers_ids(weibo_api, user_id=fid, friends_limit=0, followers_limit=limit)

# Store a fid => follower_ids mapping in MongoDB
save_to_mongo({'followers' : [ _id for _id in next_queue ]}, 'followers_crawl', '{0}-follower_ids'.format(fid))
next_queue += follower_ids
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签: