您的位置:首页 > 理论基础 > 计算机网络

Python使用urlib添加头部实现https请求 同时使用PyExecJS-1.4.0触发JS

2017-05-11 14:53 501 查看
运行环境:Python3.6

运行代码:

#!/usr/bin/env python
# -*-coding:utf-8-*-
import execjs
from html.entities import name2codepoint
from html.parser import HTMLParser
from urllib import request
import time

class ImgParser(HTMLParser):
num = 1

def __init__(self):
# 定义要搜寻的标签
self.processing = None
HTMLParser.__init__(self)  # 继承父类的构造函数
self.addr = ''

def handle_starttag(self, tag, attrs):
# 判断是否在要搜寻的标签内
if tag == 'img':
self.num += 1
print(self.num)
for key, value in attrs:
if key == 'src':
self.addr = 'https://e.189.cn'+value
# 在类的成员函数中,使用类中的另一个成员函数,前面必须要指定类名
if self.num == 2 :
ImgParser.getImage(self)  # 合法

def getImage(self):
ctx = execjs.compile("""
function add() {
return new Date().getTime();
}
""")
add_ = str(ctx.call("add"))
req_rnd = request.Request(self.addr+'&rnd='+add_)
req_rnd.add_header('User-Agent', 'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:53.0) Gecko/20100101 Firefox/53.0')
req_rnd.add_header('Connection', 'keep-alive')
req_rnd.add_header('Referer', 'https://e.189.cn/register/mobile/step1.do')
with request.urlopen(req_rnd) as u:
data = u.read()
filename = self.addr.split('/')[-1]
print(self.addr + '&rnd=' + add_)
f = open('G:\yzl_python_pycharm\LLB\defcaptcha.png', 'wb')
f.write(data)
f.close()

parser = ImgParser()
req = request.Request('https://e.189.cn/register/mobile/step1.do')
req.add_header('Host', 'e.189.cn')
req.add_header('User-Agent', 'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:53.0) Gecko/20100101 Firefox/53.0')
req.add_header('Accept', 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8')
req.add_header('Accept-Language', 'zh-CN,zh;q=0.8,en-US;q=0.5,en;q=0.3')
req.add_header('Cookie', 'JSESSIONID=aaaKzX9Z_0juP_pF3SMVv')
req.add_header('Connection', 'keep-alive')
req.add_header('Upgrade-Insecure-Requests', '1')
with request.urlopen(req) as f:
data = f.read()
parser.feed(data.decode('utf-8'))
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签: