您的位置:首页 > 编程语言 > Java开发

java网页文档保存成TXT

2017-11-27 15:29 239 查看
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import org.junit.Test;

import java.io.BufferedWriter;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.OutputStreamWriter;
import java.io.UnsupportedEncodingException;

public class ExampleUnitTest {

@Test
public void addition_isCorrect() throws Exception {
test();
}

private void test() {
final String url = "http://www.23us.cc/html/142/142351/7225315.html";
parse(url);
}

private void parse(String serverString) {
//可以使用Jsoup自带的网络请求方式:
Document document = null;
try {
document = Jsoup.connect(serverString).timeout(10000).get();
//              String string = document.toString();
//              System.out.println("document:"+string);
} catch (IOException e) {
e.printStackTrace();
}

//        解析xml
//        document = (Document) Jsoup.parse(serverString);

Elements h1 = document.select("h1");//得到table标签中的内容
for (Element item : h1) {
String name = item.text();
method(name);
System.out.println(name);
}
Elements div = document.select("div");//得到table标签中的内容
for (Element item : div) {
//            System.out.println("--------------------------");
//            System.out.println(item);

String name = item.attr("id");
if (name.equals("content")) {
//                System.out.println(item.text());
String[] line = item.text().split(" ");
int n = line.length;
for (int i = 0; i < n; i++) {
method(line[i]);
}

}
}

Elements div1 = document.select("div");//得到table标签中的内容
for (Element item : div) {
String name = item.attr("class");
if (name.equals("link xb")) {

Elements a = item.select("a");
for (Element item1 : a) {
String name1 = item1.text();
if (name1.equals("下一章")) {
String href = item1.attr("href");
System.out.println(href);
parse("http://www.23us.cc/html/142/142351/"+href);
}
}

}
}

}

public void method(String msg) {
File f = new File("G:\\txt\\1.txt");
FileOutputStream writerStream = null;
try {
writerStream = new FileOutputStream(f, true);
BufferedWriter writer = new BufferedWriter(new OutputStreamWriter(writerStream, "UTF-8"));
writer.write(msg + "\r\n");
writer.close();

} catch (FileNotFoundException e) {
e.printStackTrace();
} catch (UnsupportedEncodingException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}

}
}
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签: