使用Jsoup解析从网络上获取到的html源码
2015-08-07 10:24
591 查看
首先贴上地址: http://www.17g.com/guild
如何查看一个网页的html源代码:右击鼠标——查看源代码,在要解析的时候点击审查元素,就可以看到html结构,方便解析。如下图
此时鼠标点击到的源码 会在相应的网页上变成蓝色,现在就可以根据自己的需求解析了
注:此篇文章和上一篇文章是属于同一个project,为简便起见分开描述
如何查看一个网页的html源代码:右击鼠标——查看源代码,在要解析的时候点击审查元素,就可以看到html结构,方便解析。如下图
此时鼠标点击到的源码 会在相应的网页上变成蓝色,现在就可以根据自己的需求解析了
package com.example.logintest; import java.io.BufferedReader; import java.io.InputStream; import java.io.InputStreamReader; import java.net.HttpURLConnection; import java.net.URL; import java.util.ArrayList; import java.util.List; import org.jsoup.Jsoup; import org.jsoup.nodes.Document; import org.jsoup.nodes.Element; import org.jsoup.select.Elements; import android.app.Activity; import android.os.AsyncTask; import android.os.Bundle; import android.view.View; import android.view.View.OnClickListener; import android.widget.Button; import android.widget.ListView; public class SecondActivity extends Activity{ private Button listbutton; private ListView listview; private DefineClass bean; private String URL = "http://www.17g.com/guild"; @Override protected void onCreate(Bundle savedInstanceState) { // TODO Auto-generated method stub super.onCreate(savedInstanceState); setContentView(R.layout.second_user); listbutton = (Button) findViewById(R.id.list); listview = (ListView) findViewById(R.id.datalist); listbutton.setOnClickListener(new OnClickListener() { @Override public void onClick(View arg0) { new NewsAsyncTask().execute(URL); } }); } class NewsAsyncTask extends AsyncTask<String,Void,List<DefineClass>> {//等同于开启新线程下载 @Override protected List<DefineClass> doInBackground(String... params) { //下载 return getURLdata(params[0]); //params,请求网址 } @Override protected void onPostExecute(List<DefineClass> classlist) { //下载完处理 super.onPostExecute(classlist); GuildAdapter adapter = new GuildAdapter(SecondActivity.this,classlist); listview.setAdapter(adapter); } } /** * 通过获取到的html源码解析出listview中要显示的成分,并将其加入到list中 * list,其中一条内容代表着一行listview要显示的所有数据 * @param url html源码的网址 * @return 返回装了所有数据的classlist */ private List<DefineClass> getURLdata(String url) { //从获取到的源码中解析出要用的内容 List<DefineClass> classlist = new ArrayList<DefineClass>(); String dataurl = getURLhtml(url); Document doc = Jsoup.parse(dataurl); Element units = doc.getElementById("g-ul-list"); Elements u_ele = units.getElementsByTag("li"); for(int i=0;i<u_ele.size();i++) { Element un_ele = u_ele.get(i); bean = new DefineClass(); //name Elements u_eles = un_ele.getElementsByClass("h5"); Element f_child = u_eles.get(0); Element child = f_child.child(0); bean.name = child.text(); System.out.println(child.text()); //introduction Elements u_eles_in = un_ele.getElementsByClass("g-r"); Element f_child_in = u_eles_in.get(0); Element child_in = f_child_in.child(0); bean.introduction = child_in.text(); System.out.println(child_in.text()); //number、No Elements u_eles_no = un_ele.getElementsByTag("p"); Element f_child_no = u_eles_no.get(0); Element child_no = f_child_no.child(0); bean.number = child_no.text(); System.out.println(child_no.text()); Element f_child1_no = u_eles_no.get(1); Element child1_no = f_child1_no.child(0); bean.No = child1_no.text(); System.out.println(child1_no.text()); //pic Elements up_eles = un_ele.getElementsByClass("guild-img"); Element fp_child = up_eles.get(0); Element childp = fp_child.child(0); bean.picurl = childp.child(0).attr("src"); System.out.println(childp.child(0).attr("src")); classlist.add(bean); } return classlist; } private String getURLhtml(String url) { //获取网络中html源码 全部 HttpURLConnection connection = null; StringBuilder response = null; try { URL urls = new URL(url); connection = (HttpURLConnection) urls.openConnection(); connection.setReadTimeout(8000); connection.setRequestMethod("GET"); connection.setConnectTimeout(8000); InputStream in = connection.getInputStream(); BufferedReader reader = new BufferedReader(new InputStreamReader(in,"utf-8")); response = new StringBuilder(); String line = null; while((line=reader.readLine())!=null) { response.append(line); } } catch (Exception e) { e.printStackTrace(); } finally { if(connection != null) { connection.disconnect(); } } return response.toString(); } }
注:此篇文章和上一篇文章是属于同一个project,为简便起见分开描述
相关文章推荐
- centos6.6重启网络报错Error: Connection activation failed: Device not managed by NetworkManager的解决办法
- http协议中content-length 以及chunked编码分析
- iphone-wireless ----- Iphone上扫描所有的无线网络
- http的get与post方式下的getParameter获取中文
- Drainage Ditches - poj 1273(网络流模板)
- Android学习笔记之网络图片加载
- TCPdump抓包命令详解
- HttpServlet详解
- 关于http的gzip解压
- iOS开发系列--网络开发
- 网络爬虫+HtmlAgilityPack+windows服务从博客园爬取20万博文
- python异常处理URLError,HTTPError,Wrapping,
- python异常处理URLError,HTTPError,Wrapping,
- 利用外网主机穿透本地网络防火墙
- http filter里面请求读取之后再次读取读取不到
- poj 2391 Ombrophobic Bovines 【floyd + 二分 + 拆点网络流】
- http协议详讲
- 移动开发 网络流量精简攻略
- 反馈神经网络Hopfield网络
- Windows Sockets 网络编程——第四章 网络应用程序工作机制