您的位置:首页 > 编程语言 > Python开发

使用python模拟登陆百度

2013-10-18 13:51 651 查看
#!/usr/bin/python
# -*- coding: utf-8 -*-
"""
Function:   Used to demostrate how to use Python code to emulate login baidu main page: http://www.baidu.com/ Note:       Before try to understand following code, firstly, please read the related articles:
(1)【整理】关于抓取网页,分析网页内容,模拟登陆网站的逻辑/流程和注意事项
 http://www.crifan.com/summary_about_flow_process_of_fetch_webpage_simulate_login_website_and_some_notice/ 
(2) 【教程】手把手教你如何利用工具(IE9的F12)去分析模拟登陆网站(百度首页)的内部逻辑过程
 http://www.crifan.com/use_ie9_f12_to_analysis_the_internal_logical_process_of_login_baidu_main_page_website/ 
(3) 【教程】模拟登陆网站 之 Python版
 http://www.crifan.com/emulate_login_website_using_python 
Version:    2012-11-06
Author:     Crifan
"""

import re;
import cookielib;
import urllib;
import urllib2;
import optparse;

#------------------------------------------------------------------------------
# check all cookies in cookiesDict is exist in cookieJar or not
def checkAllCookiesExist(cookieNameList, cookieJar) :
cookiesDict = {};
for eachCookieName in cookieNameList :
cookiesDict[eachCookieName] = False;

allCookieFound = True;
for cookie in cookieJar :
if(cookie.name in cookiesDict) :
cookiesDict[cookie.name] = True;

for eachCookie in cookiesDict.keys() :
if(not cookiesDict[eachCookie]) :
allCookieFound = False;
break;

return allCookieFound;

#------------------------------------------------------------------------------
# just for print delimiter
def printDelimiter():
print '-'*80;

#------------------------------------------------------------------------------
# main function to emulate login baidu
def emulateLoginBaidu():
print "Function: Used to demostrate how to use Python code to emulate login baidu main page: http://www.baidu.com/"; print "Usage: emulate_login_baidu_python.py -u yourBaiduUsername -p yourBaiduPassword";
printDelimiter();

# parse input parameters
parser = optparse.OptionParser();
parser.add_option("-u","--username",action="store",type="string",default='',dest="username",help="Your Baidu Username");
parser.add_option("-p","--password",action="store",type="string",default='',dest="password",help="Your Baidu password");
(options, args) = parser.parse_args();
# export all options variables, then later variables can be used
for i in dir(options):
exec(i + " = options." + i);

printDelimiter();
print "[preparation] using cookieJar & HTTPCookieProcessor to automatically handle cookies";
cj = cookielib.CookieJar();
opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cj));
urllib2.install_opener(opener);

printDelimiter();
print "[step1] to get cookie BAIDUID";
baiduMainUrl = "http://www.baidu.com/";
resp = urllib2.urlopen(baiduMainUrl);
#respInfo = resp.info();
#print "respInfo=",respInfo;
for index, cookie in enumerate(cj):
print '[',index, ']',cookie;

printDelimiter();
print "[step2] to get token value";
getapiUrl = "https://passport.baidu.com/v2/api/?getapi&class=login&tpl=mn&tangram=true";
getapiResp = urllib2.urlopen(getapiUrl);
#print "getapiResp=",getapiResp;
getapiRespHtml = getapiResp.read();
#print "getapiRespHtml=",getapiRespHtml;
#bdPass.api.params.login_token='5ab690978812b0e7fbbe1bfc267b90b3';
foundTokenVal = re.search("bdPass\.api\.params\.login_token='(?P<tokenVal>\w+)';", getapiRespHtml);
if(foundTokenVal):
tokenVal = foundTokenVal.group("tokenVal");
print "tokenVal=",tokenVal;

printDelimiter();
print "[step3] emulate login baidu";
staticpage = "http://www.baidu.com/cache/user/html/jump.html";
baiduMainLoginUrl = "https://passport.baidu.com/v2/api/?login";
postDict = {
#'ppui_logintime': "",
'charset'       : "utf-8",
#'codestring'    : "",
'token'         : tokenVal, #de3dbf1e8596642fa2ddf2921cd6257f
'isPhone'       : "false",
'index'         : "0",
#'u'             : "",
#'safeflg'       : "0",
'staticpage'    : staticpage, #http%3A%2F%2Fwww.baidu.com%2Fcache%2Fuser%2Fhtml%2Fjump.html
'loginType'     : "1",
'tpl'           : "mn",
'callback'      : "parent.bdPass.api.login._postCallback",
'username'      : username,
'password'      : password,
#'verifycode'    : "",
'mem_pass'      : "on",
};
postData = urllib.urlencode(postDict);
# here will automatically encode values of parameters
# such as:
# encode http://www.baidu.com/cache/user/html/jump.html into http%3A%2F%2Fwww.baidu.com%2Fcache%2Fuser%2Fhtml%2Fjump.html
#print "postData=",postData;
req = urllib2.Request(baiduMainLoginUrl, postData);
# in most case, for do POST request, the content-type, is application/x-www-form-urlencoded
req.add_header('Content-Type', "application/x-www-form-urlencoded");
resp = urllib2.urlopen(req);
#for index, cookie in enumerate(cj):
#    print '[',index, ']',cookie;
cookiesToCheck = ['BDUSS', 'PTOKEN', 'STOKEN', 'SAVEUSERID'];
loginBaiduOK = checkAllCookiesExist(cookiesToCheck, cj);
if(loginBaiduOK):
print "+++ Emulate login baidu is OK, ^_^";
else:
print "--- Failed to emulate login baidu !"
else:
print "Fail to extract token value from html=",getapiRespHtml;

if __name__=="__main__":
emulateLoginBaidu();
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签: