您的位置:首页 > 编程语言 > Python开发

用python requests库写一个人人网相册爬虫

2016-08-18 22:52 381 查看
担心人人网会黄掉,写个爬虫,把我的相册照片都下载下来。代码如下:

# -*- coding: utf-8 -*-
import requests
import json
import os

def mkdir(path):
path=path.strip()
path=path.rstrip("\\")
isExists=os.path.exists(path)
if not isExists:
print path+u' 创建成功'
os.makedirs(path)
return "yes"
else:
print path+u' 目录已存在'
return "no"

def login_renren(s):
origin_url = 'http://www.renren.com'
login_data = {
'email':'用户名',
'domain':'renren.com',
'origURL':'http://www.renren.com/home',
'key_id':'1',
'captcha_type':'web_login',
'password':'密码抓包获得',
'rkey':'rkey抓包获得'
}
r = s.post("http://www.renren.com/ajaxLogin/login?1=1&uniqueTimestamp=2016742045262", data = login_data)
if 'true' in r.content:
print u'登录人人网成功'
return s

def get_albums(s):
r = s.get('http://photo.renren.com/photo/278382090/albumlist/v7?showAll=1#')
#print r.content
content = r.content
index1 = content.find('nx.data.photo = ')
#print index1
index2 = content.find('nx.data.hasHiddenAlbum =')
#print index2
target_json = content[index1+16:index2].strip()
target_json = target_json[0:len(target_json)-1]
#print target_json
data = json.loads(target_json.replace("\'", '"'));
album_list = data['albumList']
album_count = album_list['albumCount']
tip = u'一共有'+str(album_count)+u'个相册'
print tip
album_ids = []
for album in album_list['albumList']:
#print album['albumName']
album_ids.append(album['albumId'])
return album_ids,s

def download_albums(album_ids,s):
#访问相册
for album_id in album_ids:
album_url = 'http://photo.renren.com/photo/278382090/album-'+album_id+'/v7'
r = s.get(album_url)
if "photoId" in r.content:
print u'进入相册成功'
#print r.content
content = r.content
index1 = content.find('nx.data.photo = ')
#print index1
index2 = content.find('; define.config')
#print index2
target_json = content[index1+16:index2].strip()
target_json = target_json[13:len(target_json)-2]
#print target_json
data = json.loads(target_json.replace("\'", '"'));
photos = data['photoList']
album_name = data['albumName']
# 定义并创建目录
album_path = 'd:\\'+album_name
#print album_path
if mkdir(album_path)=='yes':
for photo in photos:
#print photo['url']
image_name = photo['photoId']
photo_url = photo['url']
r = requests.get(photo_url)
image_path = album_path+'/'+image_name+'.jpg'
f = open(image_path, 'wb')
f.write(r.content)
f.close()
tip = u'相片'+image_name+u'下载成功'
print tip
else:
print u'相册已经下载'

#执行该文件的主过程
if __name__ == '__main__':
#创建requests会话
s = requests.Session()
#登录人人网
s = login_renren(s)
#获取相册列表
album_ids,s = get_albums(s)
#下载相册
download_albums(album_ids,s)


搞定!运行效果如下:





内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签: