Java模拟登陆新浪微博抓取数据【转载】
2014-03-04 23:11
519 查看
package com.shiyimm.crawler.weibo; import java.io.FileNotFoundException; import java.io.FileReader; import java.io.IOException; import java.io.UnsupportedEncodingException; import java.net.URLDecoder; import java.net.URLEncoder; import java.util.ArrayList; import java.util.Date; import java.util.List; import java.util.regex.Matcher; import java.util.regex.Pattern; import javax.script.Invocable; import javax.script.ScriptEngine; import javax.script.ScriptEngineManager; import javax.script.ScriptException; import net.sf.json.JSONObject; import org.apache.commons.codec.binary.Base64; import org.apache.http.NameValuePair; import org.apache.http.client.ClientProtocolException; import org.apache.http.client.HttpClient; import org.apache.http.impl.client.DefaultHttpClient; import org.apache.http.message.BasicNameValuePair; import com.shiyimm.crawler.util.MyUrlUtil; import com.shiyimm.crawler.util.UrlUtil; public class SinaWeibo { private HttpClient client; private String username; //登录帐号(明文) private String password; //登录密码(明文) private String su; //登录帐号(Base64加密) private String sp; //登录密码(各种参数RSA加密后的密文) private long servertime; //初始登录时,服务器返回的时间戳,用以密码加密以及登录用 private String nonce; //初始登录时,服务器返回的一串字符,用以密码加密以及登录用 private String rsakv; //初始登录时,服务器返回的一串字符,用以密码加密以及登录用 private String pubkey; //初始登录时,服务器返回的RSA公钥 private String errInfo; //登录失败时的错误信息 private String location; //登录成功后的跳转连接 private String url; public SinaWeibo(String username,String password){ client = new DefaultHttpClient(); this.username = username; this.password = password; } /** * 初始登录信息<br> * 返回false说明初始失败 * @return */ public boolean preLogin(){ boolean flag = false; try { su = new String(Base64.encodeBase64(URLEncoder.encode(username, "UTF-8").getBytes())); String url = "http://login.sina.com.cn/sso/prelogin.php?entry=weibo&rsakt=mod&checkpin=1&" + "client=ssologin.js(v1.4.5)&_="+getTimestamp(); url += "&su="+su; String content; content = HttpTools.getRequest(client, url); //System.out.println(content); System.out.println("content------------"+content); JSONObject json = JSONObject.fromObject(content); System.out.println(json); servertime = json.getLong("servertime"); nonce = json.getString("nonce"); rsakv = json.getString("rsakv"); pubkey = json.getString("pubkey"); flag = encodePwd(); } catch (UnsupportedEncodingException e) { // TODO Auto-generated catch block //e.printStackTrace(); } catch (ClientProtocolException e) { // TODO Auto-generated catch block //e.printStackTrace(); } catch (IOException e) { // TODO Auto-generated catch block //e.printStackTrace(); } return flag; } /** * 登录 * @return true:登录成功 */ public boolean login(){ if(preLogin()){ String url = "http://login.sina.com.cn/sso/login.php?client=ssologin.js(v1.4.5)"; List<NameValuePair> parms = new ArrayList<NameValuePair>(); parms.add(new BasicNameValuePair("entry", "weibo")); parms.add(new BasicNameValuePair("geteway", "1")); parms.add(new BasicNameValuePair("from", "")); parms.add(new BasicNameValuePair("savestate", "7")); parms.add(new BasicNameValuePair("useticket", "1")); parms.add(new BasicNameValuePair("pagerefer", "http://login.sina.com.cn/sso/logout.php?entry=miniblog&r=http%3A%2F%2Fweibo.com%2Flogout.php%3Fbackurl%3D%2F")); parms.add(new BasicNameValuePair("vsnf", "1")); parms.add(new BasicNameValuePair("su", su)); parms.add(new BasicNameValuePair("service", "miniblog")); parms.add(new BasicNameValuePair("servertime", servertime+"")); parms.add(new BasicNameValuePair("nonce", nonce)); parms.add(new BasicNameValuePair("pwencode", "rsa2")); parms.add(new BasicNameValuePair("rsakv", rsakv)); parms.add(new BasicNameValuePair("sp", sp)); parms.add(new BasicNameValuePair("encoding", "UTF-8")); parms.add(new BasicNameValuePair("prelt", "182")); parms.add(new BasicNameValuePair("url", "http://weibo.com/ajaxlogin.php?framelogin=1&callback=parent.sinaSSOController.feedBackUrlCallBack")); parms.add(new BasicNameValuePair("returntype", "META")); try { String content = HttpTools.postRequest(client, url, parms); System.out.println("content----------"+content); String regex = "location\\.replace\\(\"(.+?)\"\\);"; Pattern p = Pattern.compile(regex); Matcher m = p.matcher(content); if(m.find()){ location = m.group(1); if(location.contains("reason=")){ errInfo = location.substring(location.indexOf("reason=")+7); errInfo = URLDecoder.decode(errInfo, "GBK"); }else{ String result = HttpTools.getRequest(client, location); System.out.println("result--------------"+result); return true; } } } catch (ClientProtocolException e) { // TODO Auto-generated catch block e.printStackTrace(); } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } // url = "http://www.weibo.com/hm"; // System.out.println(MyUrlUtil.getResource(url)); } return false; } /** * 密码进行RSA加密<br> * 返回false说明加密失败 * @return */ private boolean encodePwd(){ ScriptEngineManager sem = new ScriptEngineManager(); ScriptEngine se = sem.getEngineByName("javascript"); try { FileReader fr = new FileReader("E:\\encoder.js"); se.eval(fr); Invocable invocableEngine = (Invocable) se; String callbackvalue = (String) invocableEngine.invokeFunction("encodePwd",pubkey,servertime,nonce,password); sp = callbackvalue; return true; } catch (FileNotFoundException e) { // TODO Auto-generated catch block System.out.println("加密脚本encoder.sj未找到"); } catch (ScriptException e) { // TODO Auto-generated catch block //e.printStackTrace(); } catch (NoSuchMethodException e) { // TODO Auto-generated catch block //e.printStackTrace(); } errInfo = "密码加密失败!"; return false; } public String getErrInfo() { return errInfo; } /** * 获取时间戳 * @return */ private long getTimestamp(){ Date now = new Date(); return now.getTime(); } public static void main(String[] args) throws ClientProtocolException, IOException { SinaWeibo weibo = new SinaWeibo("账号", "密码"); if(weibo.login()){ System.out.println("登陆成功!"); String url = "http://www.weibo.com/hm"; // String source = MyUrlUtil.getResource(url); // System.out.println(source); }else{ System.out.println("登录失败!"); } } } <pre class="brush:java; toolbar: true; auto-links: false;">package com.shiyimm.crawler.weibo; import java.io.IOException; import java.util.List; import org.apache.http.HttpEntity; import org.apache.http.HttpResponse; import org.apache.http.NameValuePair; import org.apache.http.client.ClientProtocolException; import org.apache.http.client.HttpClient; import org.apache.http.client.entity.UrlEncodedFormEntity; import org.apache.http.client.methods.HttpGet; import org.apache.http.client.methods.HttpPost; import org.apache.http.util.EntityUtils; public class HttpTools { /** * 正常GET方式HTTP请求 * @param client * @param url * @return * @throws ClientProtocolException * @throws IOException */ public static String getRequest(HttpClient client,String url) throws ClientProtocolException, IOException{ HttpGet get = new HttpGet(url); get.addHeader("User-Agent", "Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.11 (KHTML, like Gecko) Chrome/23.0.1271.64 Safari/537.11"); //get.addHeader("Referer", "http://2013.weibo.com/"); HttpResponse response = client.execute(get); HttpEntity entity = response.getEntity(); String content = EntityUtils.toString(entity,"GBK"); //System.out.println(content); /*EntityUtils.consume(entity);*/ return content; } /** * 正常POST方式HTTP请求 * @param client * @param url * @param parms * @return * @throws ClientProtocolException * @throws IOException */ public static String postRequest(HttpClient client,String url,List<NameValuePair> parms) throws ClientProtocolException, IOException{ HttpPost post = new HttpPost(url); post.addHeader("User-Agent", "Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.11 (KHTML, like Gecko) Chrome/23.0.1271.64 Safari/537.11"); post.addHeader("Content-Type", "application/x-www-form-urlencoded"); //post.addHeader("Referer", "http://2013.weibo.com/"); UrlEncodedFormEntity postEntity = new UrlEncodedFormEntity(parms, "UTF-8"); post.setEntity(postEntity); HttpResponse response = client.execute(post); HttpEntity entity = response.getEntity(); String content = EntityUtils.toString(entity,"GBK"); /*EntityUtils.consume(entity);*/ return content; } }
相关文章推荐
- Java模拟新浪微博登陆抓取数据
- Java模拟新浪微博登陆抓取数据
- Java模拟新浪微博登陆抓取数据
- 新浪微博模拟登陆+数据抓取(java实现)
- Java模拟新浪微博登陆抓取数据
- 腾讯微博模拟登陆+数据抓取(java实现)
- Android模拟登陆综合教务系统客户端(java)-jsoup解析数据
- Java登录到新浪微博抓取数据
- 【数据抓取】模拟登陆
- Java模拟登陆02【转载】
- Java实现新浪微博模拟登陆
- cURL实现模拟登陆+抓取数据
- java 通过模拟cookies登陆新浪微博
- python requests 模拟登陆网站,抓取数据
- 使用JAVA实现模拟登陆并发送新浪微博(非调用新浪API)
- 分享:Python使用cookielib和urllib2模拟登陆新浪微博并抓取数据
- POST获取网易博客数据(网页抓取,模拟登陆资料学习备份)
- Java模拟登录系统抓取内容【转载】
- Java实现从正方教务系统抓取数据(一)--模拟登录
- 网页抓取,模拟登陆,抓取动态网页内容等过程中,所涉及的Headers信息,Cookie信息,POST数据的处理逻辑