您的位置:首页 > 其它

下载网页的基本方法

2016-09-09 20:28 78 查看
一、Java.net.URL

import java.io.BufferedReader;
import java.io.InputStreamReader;
import java.net.URL;

public class RetrivePage {
public static String downloadPage(String path) throws Exception {
URL pageURL = new URL(path);
BufferedReader reader = new BufferedReader(new InputStreamReader(pageURL.openStream()));
String line;
StringBuilder pageBuffer = new StringBuilder();
while ((line = reader.readLine()) != null) {
pageBuffer.append(line);
}
return pageBuffer.toString();
}

public static void main(String args[]) throws Exception {
System.out.println(RetrivePage.downloadPage("http://www.sina.com"));
}
}


二、Scanner对象

import java.io.InputStreamReader;
import java.net.URL;
import java.util.Scanner;

public class RetrivePage {
public static String downloadPage(String path) throws Exception {
URL pageURL = new URL(path);
Scanner scanner = new Scanner(new InputStreamReader(pageURL.openStream(), "utf-8"));
scanner.useDelimiter("\\z");
StringBuilder pageBuffer = new StringBuilder();
while (scanner.hasNext()) {
pageBuffer.append(scanner.next());
}
return pageBuffer.toString();
}

public static void main(String args[]) throws Exception {
System.out.println(RetrivePage.downloadPage("http://www.sina.com"));
}
}


三、套接字

import java.io.*;
import java.net.Socket;

public class RetrivePage {
public static void main(String args[]) throws Exception {
String host = "blog.csdn.net";
String file = "/column.html";
int port = 80;
Socket s = new Socket(host, port);
OutputStream out = s.getOutputStream();
PrintWriter outw = new PrintWriter(out, false);
outw.print("GET" + file + " HTTP/1.0\r\n");
outw.print("Accept:text/plain,text/html,text/*\r\n");
outw.print("\r\n");
outw.flush();
InputStream in = s.getInputStream();
InputStreamReader inr = new InputStreamReader(in);
BufferedReader bufferedReader = new BufferedReader(inr);
String line;
while ((line = bufferedReader.readLine()) != null) {
System.out.println(line);
}
}
}


四、HttpClient

import org.apache.http.HttpEntity;
import org.apache.http.HttpResponse;
import org.apache.http.client.HttpClient;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.impl.client.DefaultHttpClient;
import org.apache.http.util.EntityUtils;
public class RetrivePage {
public static void main(String args[]) throws Exception {
HttpClient httpClient=new DefaultHttpClient();
HttpGet httpGet=new HttpGet("http://www.sina.com");
HttpResponse response=httpClient.execute(httpGet);
HttpEntity entity=response.getEntity();
if(entity!=null){
System.out.println(EntityUtils.toString(entity,"utf-8"));
EntityUtils.consume(entity);
}
httpClient.getConnectionManager().shutdown();
}
}
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签: