您的位置:首页 > 编程语言 > Python开发

python人人语音爬虫(登陆尚未完成,需要使用先登录在查cookie中的t)

2014-04-25 00:00 706 查看
摘要: 登陆尚未完成,需要使用先登录再查cookie中的t,要保持登陆状态。

import urllib, urllib2, cookielib,re,json

def LoginRenren(url,t_cookie):
cookie = {"t": t_cookie}#cookie中的t需要到浏览器中去查
cookie = "".join(x + "=" + cookie[x] + ";" for x in cookie)
opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cookielib.CookieJar()))
urllib2.install_opener(opener)
req = urllib2.Request(url)
req.add_header('Cookie', cookie)
content = urllib2.urlopen(req).read()
return content

def searchMp3(content):
reMp3 = r'http://fmn.rrimg.com/fmn\d{3}/audio/\d{8}/\d{4}/\w+.mp3'
mp3Url = re.findall(reMp3,content)
return mp3Url

def downloadMp3(mp3Url,num):
localPath = r"C:\Users\john\Desktop\renrenMp3\%d.mp3"%num
urllib.urlretrieve(mp3Url,localPath)

def main(albumUrl,t_cookie):#firstpage
num = 0
pageNum = 0
mp3Url= []
while True:
pageUrl = albumUrl+'/bypage/ajax?curPage=%d&pagenum=40'%pageNum
jsonContent = LoginRenren(pageUrl,t_cookie)
content = json.loads(jsonContent)
if content["photoList"]:
tempList = searchMp3(jsonContent)
mp3Url = mp3Url+tempList
else:
break
pageNum +=1
print mp3Url
for eachMp3 in mp3Url:
num+=1
downloadMp3(eachMp3,num)

if __name__=='__main__':
#albumUrl = raw_input("albumUrl=")#http://photo.renren.com/photo/465457202/album-868663788
#t_cookie = raw_input("t_cookie=")#134cc936f2785fa03902fe3185e517f64
albumUrl = 'http://photo.renren.com/photo/465457202/album-868663788'
t_cookie = '134cc936f2785fa03902fe3185e517f64'
main(albumUrl,t_cookie)
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息