您的位置:首页 > 编程语言 > Java开发

java 实现通过 post 方式提交json参数

2018-03-04 23:36 447 查看
由于所爬取的网站需要验证码,通过网页的开发人员工具【F12】及在线http post,get接口测试请求工具发现访问时加上请求头header 信息时可以跳过验证码校验。

而且该网站只接受post请求,对提交的参数也只接受json格式,否则请求失败。

现将通过 post 方式提交json参数的方法记录如下:

import java.io.UnsupportedEncodingException;
import java.net.URI;
import java.net.URLDecoder;
import java.util.ArrayList;
import java.util.List;

import org.apache.http.HttpEntity;
import org.apache.http.HttpResponse;
import org.apache.http.client.HttpClient;
import org.apache.http.client.config.RequestConfig;
import org.apache.http.client.methods.HttpPost;
import org.apache.http.client.methods.HttpRequestBase;
import org.apache.http.client.utils.URIBuilder;
import org.apache.http.entity.StringEntity;
import org.apache.http.impl.client.CloseableHttpClient;
import org.apache.http.impl.client.HttpClientBuilder;
import org.apache.http.impl.client.HttpClients;
import org.apache.http.util.EntityUtils;

import com.alibaba.fastjson.JSONArray;
import com.alibaba.fastjson.JSONObject;

/**
* <p>@PostJsonParamsTest.java</p>
* @version 1.0
* @author zxk
* @Date 2018-3-3
*/
public class PostJsonParamsTest {

// 超时时间
private static final int RUN_TIME =10000;

// 爬取初始页数
private String page;

public static void main(String[] args) throws Exception {
PostJsonParamsTest crawl = new PostJsonParamsTest();

// 请求的url地址
String url ="http://www.gzcredit.gov.cn/Service/CreditService.asmx/searchOrgWithPage";
// 设置起始访问页码
crawl.setPage("1");
String isStop = "";

// 设置请求
HttpRequestBase request = null;
request = new HttpPost(url);

try {
// 设置config
RequestConfig requestConfig = RequestConfig.custom()
.setSocketTimeout(RUN_TIME)
.setConnectTimeout(RUN_TIME)
.setConnectionRequestTimeout(RUN_TIME)
.build();
request.setConfig(requestConfig);

// json 格式的 post 参数
String postParams ="{\"condition\":{\"qymc\":\"%%%%\",\"cydw\":\"\"},\"pageNo\":"+crawl.getPage()+",\"pageSize\":100,count:2709846}";
System.out.println(postParams);
HttpEntity httpEntity = new StringEntity(postParams);
((HttpPost) request).setEntity(httpEntity);

// 添加请求头,可以绕过验证码
request.addHeader("Accept","application/json, text/javascript, */*");
request.addHeader("Accept-Encoding","gzip, deflate");
request.addHeader("Accept-Language", "zh-CN,zh;q=0.8");
request.addHeader("Connection", "keep-alive");
request.addHeader("Host", "www.gzcredit.gov.cn");
request.addHeader("Content-Type", "application/json; charset=UTF-8");

URIBuilder builder = new URIBuilder(url);
URI uri = builder.build();
uri = new URI(URLDecoder.decode(uri.toString(), "UTF-8"));
request.setURI(uri);

while(!isStop.equals("停止")||isStop.equals("重跑")){
isStop = crawl.crawlList(request);
if(isStop.equals("爬取")){
crawl.setPage(String.valueOf(Integer.parseInt(crawl.getPage())+1));
}

// if("2713".equals(crawl.getPage())) break;
if("2".equals(crawl.getPage())){
break;
}
}
} catch (NumberFormatException e) {
e.printStackTrace();
throw new NumberFormatException("数字格式错误");
} catch (UnsupportedEncodingException e) {
e.printStackTrace();
throw new UnsupportedEncodingException("不支持的编码集");
}
}

/**
* 爬取搜索列表
* @param page
* @return
*/
private String crawlList(HttpRequestBase request){
int statusCode = 0;

// 下面两种方式都可以用来创建客户端连接,相当于打开了一个浏览器
CloseableHttpClient httpClient = HttpClients.createDefault();
// HttpClient httpClient = HttpClientBuilder.create().build();

HttpEntity httpEntity = null;
HttpResponse response = null;
try {
try {
response = httpClient.execute(request);
} catch (Exception e){
e.printStackTrace();
EntityUtils.consumeQuietly(httpEntity);
return "重跑";
}

//打印状态
statusCode =response.getStatusLine().getStatusCode();
if(statusCode!=200){
EntityUtils.consumeQuietly(httpEntity);
return "重跑";
}
//实体
httpEntity = response.getEntity();
String searchListStr = EntityUtils.toString(httpEntity,"GBK").replaceAll("\\\\米", "米");
String allData = (String) JSONObject.parseObject(searchListStr).get("d");
// 字符串值中间含双引号的替换处理
String s = allData.replaceAll("\\{\"","{'")
.replaceAll("\":\"", "':'")
.replaceAll("\",\"", "','")
.replaceAll("\":", "':")
.replaceAll(",\"", ",'")
.replaceAll("\"\\}", "'}")
.replaceAll("\"", "")
.replaceAll("'", "\"")
.replaceAll("<br />", "")
.replaceAll("\t", "")
.replaceAll("\\\\", "?");
JSONObject jsonData = JSONObject.parseObject(s);
JSONArray jsonContent = jsonData.getJSONArray("orgList");

searchListStr = null;
allData = null;
s = null;

if (jsonContent==null || jsonContent.size()<1) {
return "重跑";
}
System.out.println(jsonContent.toJSONString());
return "爬取";
} catch (Exception e) {
e.printStackTrace();
return "重跑";
} finally{
EntityUtils.consumeQuietly(httpEntity);
}
}

private String getPage() {
return page;
}

private void setPage(String page) {
this.page = page;
}

}


最后,在调试程序进入死胡同的时候,感谢同事的帮助!
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签: 
相关文章推荐