您的位置:首页 > 理论基础 > 计算机网络

Jsoup+HttpClient获取新浪新闻数据

2015-04-29 22:43 483 查看
package com.test;

import java.io.IOException;
import java.net.URI;

import org.apache.http.Header;
import org.apache.http.HttpEntity;
import org.apache.http.HttpResponse;
import org.apache.http.client.ClientProtocolException;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.client.utils.URIBuilder;
import org.apache.http.impl.client.CloseableHttpClient;
import org.apache.http.impl.client.HttpClients;
import org.apache.http.util.EntityUtils;

/**
*
* 依赖 commons-httpclient-3.1.jar commons-codec-1.4.jar
*
* @author tianjun
*
*/
public class PostTest {

public static void main(String[] args) throws  Exception  {

// (1)构造HttpClient的实例

CloseableHttpClient httpCLient = HttpClients.createDefault();

// 创建get请求实例
HttpGet httpget = new HttpGet() ;

//设置参数
//http://roll.news.sina.com.cn/s/channel.php?ch=01#col=91&spec=&type=&ch=01&k=&offset_page=0&offset_num=0&num=60&asc=&page=NaN
//http://roll.news.sina.com.cn/s/channel.php?col=91&spec=&type=&ch=01&offset_page=0&offset_num=0&num=60&page=1

URI uri = new URIBuilder()
.setScheme("http")
.setHost("roll.news.sina.com.cn")
.setPath("/s/channel.php")
.setParameter("ch", "01")
.setParameter("col", "91")
.setParameter("spec","")
.setParameter("type", "")
.setParameter("ch", "01")
.setParameter("offset_page", "0")
.setParameter("offset_num", "0")
.setParameter("num", "60")
.setParameter("page", "1")

.build();

httpget.setURI(uri);
//设置请求头信息

/* */

httpget.setHeader("Accep", "*/*");
httpget.setHeader("Accept-Encoding","gzip, deflate");
httpget.setHeader("Accept-Language","zh-CN,zh;q=0.8,en-US;q=0.5,en;q=0.3");
httpget.setHeader("Connection","keep-alive");
httpget.setHeader("Host","roll.news.sina.com.cn");
httpget.setHeader("Referer","http://roll.news.sina.com.cn/s/channel.php?ch=01");
httpget.setHeader("User-Agent","Mozilla/5.0 (Windows NT 6.1; WOW64; rv:37.0) Gecko/20100101 Firefox/37.0");
httpget.setHeader("Content-Type","text/html;charset=UTF-8");

System.out.println("executing request "+httpget.getURI());

try
{

// 客户端执行get请求 返回响应实体
HttpResponse response = httpCLient.execute(httpget);

// 服务器响应状态行
System.out.println(response.getStatusLine());

Header[] heads = response.getAllHeaders();
// 打印所有响应头
for(Header h:heads){
System.out.println(h.getName()+":"+h.getValue());
}

// 获取响应消息实体
HttpEntity entity = response.getEntity();

System.out.println("------------------------------------");

if(entity != null){

//响应内容
System.out.println( new String(EntityUtils.toString(entity).getBytes("ISO-8859-1"),"gbk"));

System.out.println("----------------------------------------");
// 响应内容长度
System.out.println("响应内容长度:"+entity.getContentLength());
}

} catch (ClientProtocolException e){
e.printStackTrace();
} catch (IOException e){
e.printStackTrace();
}finally{
httpCLient.getConnectionManager().shutdown();
}
}

}
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签: