您的位置:首页 > 编程语言 > Java开发

java读取网页内容,并保存

2016-11-05 00:12 447 查看

利用java进行读取网页内容并保存。参数为url链接。

使用到的jar文件:

commons-logging-1.2.jar
httpclient-4.5.1.jar
httpcore-4.4.3.jar

package com.crawler;

import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileOutputStream;
import java.io.FileWriter;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.OutputStream;
import java.io.OutputStreamWriter;

import org.apache.http.HttpEntity;
import org.apache.http.HttpResponse;
import org.apache.http.client.ClientProtocolException;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.impl.client.CloseableHttpClient;
import org.apache.http.impl.client.HttpClients;

/**
* 网络爬虫测试
*
* @author Administrator
* @2016年11月4日
*/
public class WebCrawler {
public static void main(String[] args) {
CloseableHttpClient httpClient = HttpClients.createDefault();
HttpGet httpGet = new HttpGet("http://www.zysj.com.cn/lilunshuji/jichulilun/index.html");
httpGet.addHeader("Content-Type","application/x-www-form-urlencoded;charset=utf-8");
// 执行请求
HttpResponse response;
String line;
try {

response = httpClient.execute(httpGet);
HttpEntity httpEntity = response.getEntity();
BufferedReader bufferedReader = null;
bufferedReader = new BufferedReader(new InputStreamReader(
httpEntity.getContent(), "utf-8"), 8 * 1024);
StringBuilder entityStringBuilder = new StringBuilder();
while ((line = bufferedReader.readLine()) != null) {
entityStringBuilder.append(line + "\n");
}
// System.out.println(entityStringBuilder.toString());
// appendMethodB("f:/中医基础理论.html",entityStringBuilder.toString());
savaFile("f:/中医基础理论.html",entityStringBuilder.toString(),"UTF-8");

} catch (ClientProtocolException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
}

/**
* 保存文件
* @param fileName 文件名称:绝对路径
* @param content 要保存的内容
* @param format 以某种格式保存文件
*/
public static void savaFile(String fileName, String content,String format) {
BufferedWriter rd=null;
OutputStream out=null;
File file = new File(fileName);
try {
out = new FileOutputStream(file);
rd = new BufferedWriter(new OutputStreamWriter(out,format));
rd.write(content);
} catch (IOException e) {
e.printStackTrace();
}finally{
if(null!=rd){
try {
rd.close();
} catch (IOException e) {
e.printStackTrace();
}
}
if(null!=out){
try {
out.close();
} catch (IOException e) {
e.printStackTrace();
}
}

}
}

/**
*
* @param fileName
* @param content
*/
public static void appendMethodB(String fileName, String content) {
FileWriter writer=null;
try {
writer = new FileWriter(fileName, false);
// 打开一个写文件器,构造函数中的第二个参数true表示以追加形式写文件
writer.write(content);
} catch (IOException e) {
e.printStackTrace();
}finally{
try {
if(null!=writer){
writer.close();
}
} catch (IOException e) {
e.printStackTrace();
}

}
}
}
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签:  java 网络爬虫