您的位置:首页 > 理论基础 > 计算机网络

python 处理cookie简单很多啊 httpclient版本是4.3.3

2014-06-05 18:38 274 查看
模拟登录流程:

1 请求host_url

2 从host_url中解析出 隐藏表单 的值 添加到POST_DATA中

3 添加账户,密码到POST_DATA中

4 编码后,发送POST请求
要点1:java下,HttpClient必须是单例模式
要点2:post的url可能跟登录界面的url不同。post_url可以从host_url的返回结果中得到(具体情况自行分析)

5 通过firefox,chrome等相关插件验证登录完成

6 测试需要登录的采集任务

# --*-- coding:utf-8 --*--
import re
import cookielib
import urllib2
import urllib

username = 'your account'
pwd = 'your pwd'

hosturl = 'https://passport.csdn.net/account/login'
posturl = 'https://passport.csdn.net/account/login'

cj = cookielib.LWPCookieJar()
cookie_support = urllib2.HTTPCookieProcessor(cj)
opener = urllib2.build_opener(cookie_support, urllib2.HTTPHandler)
urllib2.install_opener(opener)

host_page = urllib2.urlopen(hosturl)
html = host_page.read()

def getgroup_1(res, input):
pat = re.compile(res)
m = pat.search(input)
if m:
return m.group(1)
else:
return None

res_lt = 'name="lt" value="(.*?)"'
lt = getgroup_1(res_lt, html)

res_exe = 'name="execution" value="(.*?)"'
exe = getgroup_1(res_exe, html)

print 'hidden post data', lt, exe

headers = {'User-Agent': 'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:16.0) Gecko/20100101 Firefox/16.0'}

post_data = {'_eventId': 'submit', 'execution': exe, 'lt': lt, 'username': username, 'password': pwd}
post_data = urllib.urlencode(post_data)

request = urllib2.Request(posturl, post_data, headers)
print request
response = urllib2.urlopen(request)
txt = response.read()
print txt


附带:java代码。。。。。。。。要点是httpclient必须是同一个实例
public class LoginTest {

private static String getGroup_1(String res, String input){
Pattern p = Pattern.compile(res);
Matcher m = p.matcher(input);
while(m.find()){
return m.group(1);
}
return null;
}

public static void main(String[] args) {//登录csdn
String uri = "https://passport.csdn.net/account/login";
String html = HttpUtil.DownHtml(uri);

//        <input type="hidden" name="lt" value="LT-207426-moK0sGnfCa9aqijJKeLYhFDYiEe2id" />
//         <input type="hidden" name="execution" value="e1s1" />
//        <input type="hidden" name="_eventId" value="submit" />

String lt = getGroup_1("name=\"lt\" value=\"(.*?)\"", html);
String execution = getGroup_1("name=\"execution\" value=\"(.*?)\"", html);
System.out.println(lt + "\t" + execution);

//构建cookie
Map<String, String> params = new HashMap<String,String>();
params.put("_eventId", "submit");
params.put("execution", execution);
params.put("lt", lt);
params.put("password", "******");
params.put("username", "******");

HttpUtil.Post(uri, params);

System.out.println(System.currentTimeMillis());

}


import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.UnsupportedEncodingException;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import org.apache.http.Header;
import org.apache.http.HeaderIterator;
import org.apache.http.HttpEntity;
import org.apache.http.HttpResponse;
import org.apache.http.HttpStatus;
import org.apache.http.NameValuePair;
import org.apache.http.client.ClientProtocolException;
import org.apache.http.client.config.CookieSpecs;
import org.apache.http.client.config.RequestConfig;
import org.apache.http.client.entity.UrlEncodedFormEntity;
import org.apache.http.client.methods.CloseableHttpResponse;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.client.methods.HttpPost;
import org.apache.http.impl.client.CloseableHttpClient;
import org.apache.http.impl.client.HttpClients;
import org.apache.http.message.BasicNameValuePair;
import org.apache.http.protocol.HTTP;
import org.apache.http.util.EntityUtils;

public class HttpUtil {

private static CloseableHttpClient httpclient = null;
  //要点:单例模式
static {
if (httpclient == null) {
httpclient = HttpClients.createDefault();
}
}

public static void Post(String uri, Map<String, String> params) {

HttpPost httpost = new HttpPost(uri);
List<NameValuePair> post_data = new ArrayList<NameValuePair>();

Set<String> keySet = params.keySet();
for (String key : keySet) {
post_data.add(new BasicNameValuePair(key, params.get(key)));
}

CloseableHttpResponse response = null;

try {
httpost.setHeader("User-Agent",
"Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:16.0) Gecko/20100101 Firefox/16.0");

httpost.setEntity(new UrlEncodedFormEntity(post_data, "UTF-8"));
response = httpclient.execute(httpost);

HeaderIterator it = response.headerIterator();
while (it.hasNext()) {
System.out.println(it.next());
}
System.out.println("---------------html---------------");

HttpEntity entity = response.getEntity();
String body = EntityUtils.toString(entity);
System.out.println(body);

} catch (UnsupportedEncodingException e) {
e.printStackTrace();
} catch (ClientProtocolException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
}
public static String DownHtml(String uri) {

HttpGet httpget = new HttpGet(uri);
CloseableHttpResponse response = null;

System.out.println(httpget.getURI());
System.out.println("Executing request " + httpget.getRequestLine());

try {
response = httpclient.execute(httpget);

System.out.println(response.getStatusLine().toString());
System.out.println("------------------------------");

// 头信息
HeaderIterator it = response.headerIterator();
StringBuffer buff = new StringBuffer();
while (it.hasNext()) {
buff.append(it.next());
// System.out.println(it.next());
}
System.out.println("------------------------------");

// 判断访问的状态码
int statusCode = response.getStatusLine().getStatusCode();
if (statusCode != HttpStatus.SC_OK) {
System.err
.println("Method failed: " + response.getStatusLine());
}

HttpEntity entity = response.getEntity();
// String charset = EntityUtils.getContentCharSet(entity);
StringBuilder pageBuffer = new StringBuilder();
if (entity != null) {
InputStream in = entity.getContent();
BufferedReader br = new BufferedReader(new InputStreamReader(
in, "UTF-8"));
String line;
while ((line = br.readLine()) != null) {
pageBuffer.append(line);
pageBuffer.append("\n");
}
in.close();
br.close();
}
return pageBuffer.toString();

} catch (ClientProtocolException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
return null;
}
}
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签: