下载网页的基本方法
2016-09-09 20:28
78 查看
一、Java.net.URL
二、Scanner对象
三、套接字
四、HttpClient
import java.io.BufferedReader; import java.io.InputStreamReader; import java.net.URL; public class RetrivePage { public static String downloadPage(String path) throws Exception { URL pageURL = new URL(path); BufferedReader reader = new BufferedReader(new InputStreamReader(pageURL.openStream())); String line; StringBuilder pageBuffer = new StringBuilder(); while ((line = reader.readLine()) != null) { pageBuffer.append(line); } return pageBuffer.toString(); } public static void main(String args[]) throws Exception { System.out.println(RetrivePage.downloadPage("http://www.sina.com")); } }
二、Scanner对象
import java.io.InputStreamReader; import java.net.URL; import java.util.Scanner; public class RetrivePage { public static String downloadPage(String path) throws Exception { URL pageURL = new URL(path); Scanner scanner = new Scanner(new InputStreamReader(pageURL.openStream(), "utf-8")); scanner.useDelimiter("\\z"); StringBuilder pageBuffer = new StringBuilder(); while (scanner.hasNext()) { pageBuffer.append(scanner.next()); } return pageBuffer.toString(); } public static void main(String args[]) throws Exception { System.out.println(RetrivePage.downloadPage("http://www.sina.com")); } }
三、套接字
import java.io.*; import java.net.Socket; public class RetrivePage { public static void main(String args[]) throws Exception { String host = "blog.csdn.net"; String file = "/column.html"; int port = 80; Socket s = new Socket(host, port); OutputStream out = s.getOutputStream(); PrintWriter outw = new PrintWriter(out, false); outw.print("GET" + file + " HTTP/1.0\r\n"); outw.print("Accept:text/plain,text/html,text/*\r\n"); outw.print("\r\n"); outw.flush(); InputStream in = s.getInputStream(); InputStreamReader inr = new InputStreamReader(in); BufferedReader bufferedReader = new BufferedReader(inr); String line; while ((line = bufferedReader.readLine()) != null) { System.out.println(line); } } }
四、HttpClient
import org.apache.http.HttpEntity; import org.apache.http.HttpResponse; import org.apache.http.client.HttpClient; import org.apache.http.client.methods.HttpGet; import org.apache.http.impl.client.DefaultHttpClient; import org.apache.http.util.EntityUtils; public class RetrivePage { public static void main(String args[]) throws Exception { HttpClient httpClient=new DefaultHttpClient(); HttpGet httpGet=new HttpGet("http://www.sina.com"); HttpResponse response=httpClient.execute(httpGet); HttpEntity entity=response.getEntity(); if(entity!=null){ System.out.println(EntityUtils.toString(entity,"utf-8")); EntityUtils.consume(entity); } httpClient.getConnectionManager().shutdown(); } }
相关文章推荐
- 从网页上下载控件时制作CAB包的方法
- 下载网页中远程图片的方法
- How to Download Embedded Flash Files using your Browser 从浏览器下载网页内嵌的Flash文件的方法
- ASP下载图片而不是在网页中打开的解决方法
- JQuery扩展插件Validate 1 基本使用方法并打包下载
- 提高网页下载速度的两种方法
- 将网页内容用php程序下载下来的方法
- c#下载网页源码的两种方法
- c#下载网页源码的多种方法
- 实现网页密码验证的两个基本方法
- 转载:从网页上下载控件时制作CAB包的方法
- 网页Flash下载方法大全[组图]
- 【转】从网页上下载控件时制作CAB包的方法
- [Tip: chm]解决方法:下载的Chm文档只有目录没有网页
- Python 下载网页的几种方法
- 下载网页中远程图片的方法
- python 破除网页限制的基本方法
- 网页嵌套的视频下载方法
- 打开网页时弹出迅雷下载,无法浏览网页的解决方法
- 【转】只能在同一IE窗口打开网页的基本解决方法