您的位置:首页 > 编程语言 > Python开发

python日常学习笔记4--模拟登陆知乎

2017-10-31 13:47 633 查看
from urllib import request, parse
from html.parser import HTMLParser
import json
from idna import unicode

class MyHTMLParser(HTMLParser):
def __init__(self):
self.__xsrf = 0  # 爬取知乎,登录需要这个随机的token
HTMLParser.__init__(self)

def handle_startendtag(self, tag, attrs):
if tag == 'input':
try:
if attrs[0][1] == 'hidden' and attrs[1][1] == '_xsrf':
self.__xsrf = attrs[2][1]
except Exception as e:
pass
def handle_starttag(self, tag, attrs):
pass

def handle_data(self, data):
pass

@property
def xsrf(self):
return self.__xsrf

if __name__ == '__main__':

myparser = MyHTMLParser()
url = 'https://www.zhihu.com/signin'
req = request.Request(url)
req.add_header('User-Agent', 'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:56.0) Gecko/20100101 Firefox/56.0')
with request.urlopen(req) as page:
data = page.read().decode('utf-8')
myparser.feed(data)
print(myparser.xsrf)
login_data = parse.urlencode([
('phone_num', '...'),
('password', '...'),
('_xsrf', myparser.xsrf),
('captcha_type', 'cn')
])
login_url = 'https://www.zhihu.com/login/phone_num'
login_req = request.Request(login_url)
login_req.add_header('User-Agent', 'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:56.0) Gecko/20100101 Firefox/56.0')
login_req.add_header('X-Xsrftoken', myparser.xsrf)
login_req.add_header('Connection', 'keep-alive')
with request.urlopen(login_req, data=login_data.encode('utf-8')) as f:
print('%s: %s' % (f.status, f.reason))
print(f.read().decode('unicode_escape'))

#并没有实现对验证码的辨别,可以通过第三方包识别验证码
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签: