您的位置:首页 > 理论基础 > 计算机网络

使用Jsoup解析从网络上获取到的html源码

2015-08-07 10:24 591 查看
首先贴上地址: http://www.17g.com/guild
如何查看一个网页的html源代码:右击鼠标——查看源代码,在要解析的时候点击审查元素,就可以看到html结构,方便解析。如下图

此时鼠标点击到的源码 会在相应的网页上变成蓝色,现在就可以根据自己的需求解析了

package com.example.logintest;

import java.io.BufferedReader;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.net.HttpURLConnection;
import java.net.URL;
import java.util.ArrayList;
import java.util.List;

import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;

import android.app.Activity;
import android.os.AsyncTask;
import android.os.Bundle;
import android.view.View;
import android.view.View.OnClickListener;
import android.widget.Button;
import android.widget.ListView;

public class SecondActivity extends Activity{
	private Button listbutton;
	private ListView listview;

	private DefineClass bean;
	
	private String URL = "http://www.17g.com/guild";
	@Override
	protected void onCreate(Bundle savedInstanceState) {
		// TODO Auto-generated method stub
		super.onCreate(savedInstanceState);
		setContentView(R.layout.second_user);
		
		listbutton = (Button) findViewById(R.id.list);
		listview = (ListView) findViewById(R.id.datalist);
		
		listbutton.setOnClickListener(new OnClickListener() {
			@Override
			public void onClick(View arg0) {
				new NewsAsyncTask().execute(URL);
			}
		});
	}
	
	class NewsAsyncTask extends AsyncTask<String,Void,List<DefineClass>> {//等同于开启新线程下载
		@Override
		protected List<DefineClass> doInBackground(String... params) { //下载
			return getURLdata(params[0]);   //params,请求网址
		}
		
		@Override
		protected void onPostExecute(List<DefineClass> classlist) { //下载完处理
			super.onPostExecute(classlist);
			GuildAdapter adapter = new GuildAdapter(SecondActivity.this,classlist);
			listview.setAdapter(adapter);
		}
	}
	
	/**
	 * 通过获取到的html源码解析出listview中要显示的成分,并将其加入到list中
	 * list,其中一条内容代表着一行listview要显示的所有数据
	 * @param url  html源码的网址
	 * @return 返回装了所有数据的classlist
	 */
	private List<DefineClass> getURLdata(String url) { //从获取到的源码中解析出要用的内容
		List<DefineClass> classlist = new ArrayList<DefineClass>();
		String dataurl = getURLhtml(url);
		
		Document doc = Jsoup.parse(dataurl);
		Element units = doc.getElementById("g-ul-list");
		Elements u_ele = units.getElementsByTag("li");   
		
		for(int i=0;i<u_ele.size();i++) {
			Element un_ele = u_ele.get(i);
			bean = new DefineClass();
			
			//name
			Elements u_eles = un_ele.getElementsByClass("h5");
			Element f_child = u_eles.get(0);
			Element child = f_child.child(0);
			bean.name = child.text();
			System.out.println(child.text());
			
			//introduction
			Elements u_eles_in = un_ele.getElementsByClass("g-r");
			Element f_child_in = u_eles_in.get(0);
			Element child_in = f_child_in.child(0);
			bean.introduction = child_in.text();
			System.out.println(child_in.text());
			
			//number、No
			Elements u_eles_no = un_ele.getElementsByTag("p");
			
			Element f_child_no = u_eles_no.get(0);
			Element child_no = f_child_no.child(0);
			bean.number = child_no.text();
			System.out.println(child_no.text());

			Element f_child1_no = u_eles_no.get(1);
			Element child1_no = f_child1_no.child(0);
			bean.No = child1_no.text();
			System.out.println(child1_no.text());
			
			//pic
			Elements up_eles = un_ele.getElementsByClass("guild-img");
			Element fp_child = up_eles.get(0);
			Element childp = fp_child.child(0);
			bean.picurl = childp.child(0).attr("src");
			System.out.println(childp.child(0).attr("src"));
			
			classlist.add(bean);
		}
		return classlist;
	}
	
	private String getURLhtml(String url) { //获取网络中html源码   全部
		HttpURLConnection connection = null;
		StringBuilder response = null;
		try {
			URL urls = new URL(url);
			connection = (HttpURLConnection) urls.openConnection();
			connection.setReadTimeout(8000);
			connection.setRequestMethod("GET");
			connection.setConnectTimeout(8000);
			InputStream in = connection.getInputStream();
			BufferedReader reader = new BufferedReader(new InputStreamReader(in,"utf-8"));
			response = new StringBuilder();
			String line = null;
			while((line=reader.readLine())!=null) {
				response.append(line);
			}
		} catch (Exception e) {
			e.printStackTrace();
		} finally {
			if(connection != null) {
				connection.disconnect();
			}
		}
		return response.toString();
	}
}


注:此篇文章和上一篇文章是属于同一个project,为简便起见分开描述
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签: