您的位置:首页 > 理论基础 > 计算机网络

apache HttpClient 4.3.4自动登录并抓取中国联通网页用户基本信息和账单数据

2015-03-29 21:38 766 查看
1.新建一个maven项目httpclient

2.登录中国联通并抓取数据

3.使用Get模拟登录,抓取每月账单数据

中国联通有两种登录方式:









上面两图的区别一个是带验证码,一个是不带验证码,下面将先解决不带验证码的登录.

这里有两个难点,一是验证码,二uvc码;

验证码,这里将其写到本地,然后人工输入,这个还比较好解决.

uvc码,很重要,这个是在cookie里的,httpclient操作cookie的方法网上找了很久都没有找到,后来看其源码才看到.

viewsourceprint?

001
package
com.httpclient.asm.demo;
002
003
import

org.apache.http.HttpEntity;
004
import

org.apache.http.HttpResponse;
005
import

org.apache.http.client.CookieStore;
006
import

org.apache.http.client.HttpClient;
007
import

org.apache.http.client.methods.CloseableHttpResponse;
008
import

org.apache.http.client.methods.HttpGet;
009
import

org.apache.http.client.methods.HttpPost;
010
import

org.apache.http.cookie.Cookie;
011
import

org.apache.http.impl.client.*;
012
import

org.apache.http.util.EntityUtils;
013
014
import

javax.swing.*;
015
import

java.io.BufferedReader;
016
import

java.io.File;
017
import

java.io.FileOutputStream;
018
import

java.io.InputStream;
019
import

java.io.InputStreamReader;
020
021
public
class
LoginChinaUnicomWithCaptcha{
022
023
/**
024
*登录并抓取中国联通数据
025
*带验证码登录
026
*@authorEdson.di
027
*@date2015年3月4日
028
*@version1.0
029
*@throwsIOException
030
*/
031
public

static
void

main(Stringargs[])
throws

Exception{
032
033
Stringname=
"联通手机号"
;

034
Stringpwd=
"手机服务密码"
;

035
036
//生成验证码的链接
037
StringcreateCaptchaUrl=
"http://uac.10010.com/portal/Service/CreateImage"
;
038
HttpClienthttpClient=
new
DefaultHttpClient();
039
040
//这里可自定义所需要的cookie
041
CookieStorecookieStore=
new
BasicCookieStore();
042
043
CloseableHttpClienthttpclient=HttpClients.custom()
044
.setDefaultCookieStore(cookieStore)
045
.build();
046
047
//getcaptcha,获取验证码
048
HttpGetcaptchaHttpGet=
new
HttpGet(createCaptchaUrl);
049
HttpResponsecapthcaResponse=httpClient.execute(captchaHttpGet);
050
051
if

(capthcaResponse.getStatusLine().getStatusCode()==

200
){
052
//将验证码写入本地
053
saveToLocal(capthcaResponse.getEntity(),
"chinaunicom.capthca."

+System.currentTimeMillis()+
".png"
);
054
}
055
056
057
//手工输入验证码并验证
058
HttpResponseverifyResponse=
null
;
059
Stringcapthca=
null
;
060
Stringuvc=
null
;
061
062
do

{
063
//输入验证码,读入键盘输入
064
//1)
065
//InputStreaminputStream=System.in;
066
//BufferedReaderbufferedReader=newBufferedReader(newInputStreamReader(inputStream));
067
//System.out.println("请输入验证码:");
068
//capthca=bufferedReader.readLine();
069
070
capthca=JOptionPane.showInputDialog(
"请输入图片验证码:"
);
071
072
//2)
073
//Scannerscanner=newScanner(System.in);
074
//capthca=scanner.next();
075
//
http://uac.10010.com/portal/Service/CtaIdyChk?callback=jsonp1404716227598&verifyCode=4m3e&verifyType=1
076
StringverifyCaptchaUrl=
"
+capthca+
"&verifyType=1"
;
077
HttpGetverifyCapthcaGet=
new
HttpGet(verifyCaptchaUrl);
078
verifyResponse=httpClient.execute(verifyCapthcaGet);
079
AbstractHttpClientabstractHttpClient=(AbstractHttpClient)httpClient;
080
for

(Cookiecookie:abstractHttpClient.getCookieStore().getCookies()){
081
System.out.println(cookie.getName()+
":"
+cookie.getValue());
082
if

(cookie.getName().equals(
"uacverifykey"
)){
083
uvc=cookie.getValue();
084
}
085
}
086
}
while
(!EntityUtils.toString(verifyResponse.getEntity()).contains(
"true"
));
087
088
//登录
089
Stringloginurl=
"
+name+
"&password="

+pwd+
"&pwdType=01&productType=01&verifyCode="

+capthca+
"&redirectType=03&uvc="

+uvc;
090
HttpGetloginGet=
new
HttpGet(loginurl);
091
CloseableHttpResponseloginResponse=httpclient.execute(loginGet);
092
System.out.print(
"result:"

+EntityUtils.toString(loginResponse.getEntity()));

093
094
//抓取基本信息数据
095
//jsonp1404663560635({resultCode:"7072",redirectURL:"http://www.10010.com",errDesc:"null",msg:'系统忙,请稍后再试。',needvode:"1"});
096
HttpPostbasicHttpGet=
new
HttpPost(
"http://iservice.10010.com/ehallService/static/acctBalance/execute/YH102010005/QUERY_AcctBalance.processData/Result"
);
097
saveToLocal(httpclient.execute(basicHttpGet).getEntity(),
"chinaunicom.basic.html"
);
098
099
}
100
/**
101
*写文件到本地
102
*
103
*@paramhttpEntity
104
*@paramfilename
105
*/
106
public

static
void

saveToLocal(HttpEntityhttpEntity,Stringfilename){
107
108
try

{
109
110
Filedir=
new
File(
"/JEE/sz-588/workspace/maven-httpclient-demo"
);
111
if

(!dir.isDirectory()){
112
dir.mkdir();
113
}
114
115
Filefile=
new
File(dir.getAbsolutePath()+
"/"
+filename);
116
FileOutputStreamfileOutputStream=
new
FileOutputStream(file);
117
InputStreaminputStream=httpEntity.getContent();
118
119
if

(!file.exists()){
120
file.createNewFile();
121
}
122
byte
[]bytes=
new
byte
[
1024
];
123
int

length=
0
;
124
while

((length=inputStream.read(bytes))>
0
){
125
fileOutputStream.write(bytes,
0
,length);

126
}
127
inputStream.close();
128
fileOutputStream.close();
129
}
catch
(Exceptione){
130
e.printStackTrace();
131
}
132
133
}

134
}
生成文件





json格式输出





转自:http://www.zuidaima.com/share/2238465258310656.htm
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签: