python日常学习笔记4--模拟登陆知乎
2017-10-31 13:47
633 查看
from urllib import request, parse from html.parser import HTMLParser import json from idna import unicode class MyHTMLParser(HTMLParser): def __init__(self): self.__xsrf = 0 # 爬取知乎,登录需要这个随机的token HTMLParser.__init__(self) def handle_startendtag(self, tag, attrs): if tag == 'input': try: if attrs[0][1] == 'hidden' and attrs[1][1] == '_xsrf': self.__xsrf = attrs[2][1] except Exception as e: pass def handle_starttag(self, tag, attrs): pass def handle_data(self, data): pass @property def xsrf(self): return self.__xsrf if __name__ == '__main__': myparser = MyHTMLParser() url = 'https://www.zhihu.com/signin' req = request.Request(url) req.add_header('User-Agent', 'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:56.0) Gecko/20100101 Firefox/56.0') with request.urlopen(req) as page: data = page.read().decode('utf-8') myparser.feed(data) print(myparser.xsrf) login_data = parse.urlencode([ ('phone_num', '...'), ('password', '...'), ('_xsrf', myparser.xsrf), ('captcha_type', 'cn') ]) login_url = 'https://www.zhihu.com/login/phone_num' login_req = request.Request(login_url) login_req.add_header('User-Agent', 'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:56.0) Gecko/20100101 Firefox/56.0') login_req.add_header('X-Xsrftoken', myparser.xsrf) login_req.add_header('Connection', 'keep-alive') with request.urlopen(login_req, data=login_data.encode('utf-8')) as f: print('%s: %s' % (f.status, f.reason)) print(f.read().decode('unicode_escape')) #并没有实现对验证码的辨别,可以通过第三方包识别验证码
相关文章推荐
- python 日常笔记--模拟爬取学校的官网
- Python爬虫学习(简单的模拟登陆(一))
- Python之学习笔记(模拟键盘)
- python模拟登陆知乎,得到cookie
- python模拟登陆知乎(手工识别验证码)
- python爬虫学习笔记(2)-爬取知乎
- Python模拟登陆万能法-微博|知乎
- Python爬虫(入门+进阶)学习笔记 1-6 浏览器抓包及headers设置(案例一:爬取知乎)
- Python 爬虫模拟登陆知乎
- 【学习笔记】Python网络编程(三)利用socket模拟ssh协议
- 用python做有趣的事儿——模拟登陆知乎
- python语言学习笔记(三)-----模拟投掷三个骰子,对游戏结果进行统计
- 【python学习笔记】29:模拟OPT和LRU算法
- Python 模拟知乎登陆,保存登陆cookie
- Python爬虫笔记-豆瓣模拟登陆
- Python爬虫学习笔记——豆瓣登陆(一)
- Python 爬虫模拟登陆知乎
- 【学习笔记】Python网络编程(四)完善socket模拟ssh协议
- python 模拟知乎登陆