python 处理cookie简单很多啊 httpclient版本是4.3.3
2014-06-05 18:38
274 查看
模拟登录流程: 1 请求host_url 2 从host_url中解析出 隐藏表单 的值 添加到POST_DATA中 3 添加账户,密码到POST_DATA中 4 编码后,发送POST请求 要点1:java下,HttpClient必须是单例模式 要点2:post的url可能跟登录界面的url不同。post_url可以从host_url的返回结果中得到(具体情况自行分析) 5 通过firefox,chrome等相关插件验证登录完成 6 测试需要登录的采集任务 # --*-- coding:utf-8 --*-- import re import cookielib import urllib2 import urllib username = 'your account' pwd = 'your pwd' hosturl = 'https://passport.csdn.net/account/login' posturl = 'https://passport.csdn.net/account/login' cj = cookielib.LWPCookieJar() cookie_support = urllib2.HTTPCookieProcessor(cj) opener = urllib2.build_opener(cookie_support, urllib2.HTTPHandler) urllib2.install_opener(opener) host_page = urllib2.urlopen(hosturl) html = host_page.read() def getgroup_1(res, input): pat = re.compile(res) m = pat.search(input) if m: return m.group(1) else: return None res_lt = 'name="lt" value="(.*?)"' lt = getgroup_1(res_lt, html) res_exe = 'name="execution" value="(.*?)"' exe = getgroup_1(res_exe, html) print 'hidden post data', lt, exe headers = {'User-Agent': 'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:16.0) Gecko/20100101 Firefox/16.0'} post_data = {'_eventId': 'submit', 'execution': exe, 'lt': lt, 'username': username, 'password': pwd} post_data = urllib.urlencode(post_data) request = urllib2.Request(posturl, post_data, headers) print request response = urllib2.urlopen(request) txt = response.read() print txt
附带:java代码。。。。。。。。要点是httpclient必须是同一个实例 public class LoginTest { private static String getGroup_1(String res, String input){ Pattern p = Pattern.compile(res); Matcher m = p.matcher(input); while(m.find()){ return m.group(1); } return null; } public static void main(String[] args) {//登录csdn String uri = "https://passport.csdn.net/account/login"; String html = HttpUtil.DownHtml(uri); // <input type="hidden" name="lt" value="LT-207426-moK0sGnfCa9aqijJKeLYhFDYiEe2id" /> // <input type="hidden" name="execution" value="e1s1" /> // <input type="hidden" name="_eventId" value="submit" /> String lt = getGroup_1("name=\"lt\" value=\"(.*?)\"", html); String execution = getGroup_1("name=\"execution\" value=\"(.*?)\"", html); System.out.println(lt + "\t" + execution); //构建cookie Map<String, String> params = new HashMap<String,String>(); params.put("_eventId", "submit"); params.put("execution", execution); params.put("lt", lt); params.put("password", "******"); params.put("username", "******"); HttpUtil.Post(uri, params); System.out.println(System.currentTimeMillis()); }
import java.io.BufferedReader; import java.io.IOException; import java.io.InputStream; import java.io.InputStreamReader; import java.io.UnsupportedEncodingException; import java.util.ArrayList; import java.util.List; import java.util.Map; import java.util.Set; import java.util.regex.Matcher; import java.util.regex.Pattern; import org.apache.http.Header; import org.apache.http.HeaderIterator; import org.apache.http.HttpEntity; import org.apache.http.HttpResponse; import org.apache.http.HttpStatus; import org.apache.http.NameValuePair; import org.apache.http.client.ClientProtocolException; import org.apache.http.client.config.CookieSpecs; import org.apache.http.client.config.RequestConfig; import org.apache.http.client.entity.UrlEncodedFormEntity; import org.apache.http.client.methods.CloseableHttpResponse; import org.apache.http.client.methods.HttpGet; import org.apache.http.client.methods.HttpPost; import org.apache.http.impl.client.CloseableHttpClient; import org.apache.http.impl.client.HttpClients; import org.apache.http.message.BasicNameValuePair; import org.apache.http.protocol.HTTP; import org.apache.http.util.EntityUtils; public class HttpUtil { private static CloseableHttpClient httpclient = null; //要点:单例模式 static { if (httpclient == null) { httpclient = HttpClients.createDefault(); } } public static void Post(String uri, Map<String, String> params) { HttpPost httpost = new HttpPost(uri); List<NameValuePair> post_data = new ArrayList<NameValuePair>(); Set<String> keySet = params.keySet(); for (String key : keySet) { post_data.add(new BasicNameValuePair(key, params.get(key))); } CloseableHttpResponse response = null; try { httpost.setHeader("User-Agent", "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:16.0) Gecko/20100101 Firefox/16.0"); httpost.setEntity(new UrlEncodedFormEntity(post_data, "UTF-8")); response = httpclient.execute(httpost); HeaderIterator it = response.headerIterator(); while (it.hasNext()) { System.out.println(it.next()); } System.out.println("---------------html---------------"); HttpEntity entity = response.getEntity(); String body = EntityUtils.toString(entity); System.out.println(body); } catch (UnsupportedEncodingException e) { e.printStackTrace(); } catch (ClientProtocolException e) { e.printStackTrace(); } catch (IOException e) { e.printStackTrace(); } } public static String DownHtml(String uri) { HttpGet httpget = new HttpGet(uri); CloseableHttpResponse response = null; System.out.println(httpget.getURI()); System.out.println("Executing request " + httpget.getRequestLine()); try { response = httpclient.execute(httpget); System.out.println(response.getStatusLine().toString()); System.out.println("------------------------------"); // 头信息 HeaderIterator it = response.headerIterator(); StringBuffer buff = new StringBuffer(); while (it.hasNext()) { buff.append(it.next()); // System.out.println(it.next()); } System.out.println("------------------------------"); // 判断访问的状态码 int statusCode = response.getStatusLine().getStatusCode(); if (statusCode != HttpStatus.SC_OK) { System.err .println("Method failed: " + response.getStatusLine()); } HttpEntity entity = response.getEntity(); // String charset = EntityUtils.getContentCharSet(entity); StringBuilder pageBuffer = new StringBuilder(); if (entity != null) { InputStream in = entity.getContent(); BufferedReader br = new BufferedReader(new InputStreamReader( in, "UTF-8")); String line; while ((line = br.readLine()) != null) { pageBuffer.append(line); pageBuffer.append("\n"); } in.close(); br.close(); } return pageBuffer.toString(); } catch (ClientProtocolException e) { e.printStackTrace(); } catch (IOException e) { e.printStackTrace(); } return null; } }
相关文章推荐
- Mac上python2和python3的版本切换的简单处理方式
- Mac上python2和python3的版本切换的简单处理方式
- java中对cookie的简单处理
- 工欲善其事必先利其器-简单几步打造顺手的python开发工具(windows,Linux多版本)
- python 简单图像处理(14) 灰度图腐蚀和膨胀,开运算、闭运算
- python 的cookie处理操作
- Python的cookie处理
- 比较简单实用的使用正则三种版本的js去空格处理方法
- 工欲善其事必先利其器-简单几步打造顺手的python开发工具(windows,Linux多版本)
- python 简单图像处理(12) 伪彩色增强
- python 简单图像处理(11) 空间域图像锐化(边缘检测)
- Python学习:时间处理工具--dateutil两个简单用法
- 用Python简单处理SQL语句绕过防注入
- python 简单图像处理(4) 旋转
- python 简单图像处理(3) 平移
- python 简单图像处理(13) 二值图腐蚀和膨胀,开运算、闭运算
- Python来完成简单图像处理任务
- python 简单图像处理(2) 镜像
- python 简单图像处理(6) 错切
- python 简单图像处理(16) 图像的细化(骨架抽取)