您的位置:首页 > 编程语言 > Java开发

java获取网页源码

2013-03-08 09:36 155 查看
01 package gogo.cool;

02

03 import java.io.BufferedReader;

04 import java.io.IOException;

05 import java.io.InputStreamReader;

06 import java.net.HttpURLConnection;

07 import java.net.URL;

08

09 public class test1 {

10

11 public static void main(String[] a) throws IOException {

12

13 String url = "http://www.baidu.com";

14

15 System.out.println(getHTML(url, "gbk")); // 使用原网页里声明的gb2312反而会出现乱码

16

17 }

18

19 public static String getHTML(String pageURL, String encoding) {

20

21 StringBuilder pageHTML = new StringBuilder();

22

23 try {

24

25 URL url = new URL(pageURL);

26

27 HttpURLConnection connection = (HttpURLConnection) url

28 .openConnection();

29

30 connection.setRequestProperty("User-Agent", "MSIE 7.0");

31

32 BufferedReader br = new BufferedReader(new InputStreamReader(

33 connection.getInputStream(), encoding));

34

35 String line = null;

36

37 while ((line = br.readLine()) != null) {

38

39 pageHTML.append(line);

40

41 pageHTML.append("\r\n");

42

43 }

44

45 connection.disconnect();

46

47 } catch (Exception e) {

48

49 e.printStackTrace();

50

51 }

52

53 return pageHTML.toString();

54

55 }

56 }
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签: