您的位置:首页 > 运维架构 > 网站架构

Jsoup抓取数据实现为一个网站做第三方Android客户端

2016-04-22 08:49 513 查看
[b]一前言[/b]

学Android有很大一部分的乐趣其实就是做出一个网站客户端,比官方广告少,速度快,不臃肿,受到众多网友追捧。

由于学校图书馆网站没有APP,网站也没有移动版的,所以说体验相当差,用的实在是太烦就做了这个小应用,先下看下效果。



二Jsoup实现抓取书名

Jsoup是一个Java的一个工具包,百度一搜一大堆,不对Jsoup做过多介绍,先来看看对http://222.188.3.137:8080/opac/search.php的抓取。

这里抓取到数据只要往适配器里填充数据就好了。

package com.wyf.newlibrary;

import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;

import java.io.IOException;

public class BookNameJsoup {
String url;

String[] bName;
String[] bLink;
String nextPage;

public BookNameJsoup(String  link) {

url = link;
bName = new String[20];
bLink = new String[20];

}

public void init() {
try {
Document doc = Jsoup.connect(url).get();
int j = 0;
Elements bookName = doc.getElementsByTag("h3").select("a");
for (Element i : bookName) {
bName[j] = i.text().trim();
bLink[j++] = i.attr("abs:href");
}

Elements next=doc.getElementsByAttributeValue("class", "blue");
for(Element i:next)
{
if(i.text().contains("下一页"))
{
nextPage=i.attr("abs:href");
break;
}

}

} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}

/***************
* 得到LINK
********************/
public String[] getBookName() {
return bName;
}

/*******************
* 得到LINK
******************************/

public String[] getLink() {
return bLink;
}

public  String getNextPage()
{
return  nextPage;
}

/***
* 判断搜索结果是否为空
*/

}


三 MainActivity(书名列表界面)

在MainActivity中有一个ListView,在用Jsoup抓取到的数据往里面填充,ListView只用极其简单的布局,一看就能明白。



package com.wyf.newlibrary;

import android.content.Intent;
import android.os.AsyncTask;
import android.os.Bundle;
import android.support.v7.app.AppCompatActivity;
import android.util.Log;
import android.view.KeyEvent;
import android.view.LayoutInflater;
import android.view.View;
import android.view.inputmethod.EditorInfo;
import android.widget.AbsListView;
import android.widget.AdapterView;
import android.widget.ArrayAdapter;
import android.widget.EditText;
import android.widget.ImageButton;
import android.widget.ListView;
import android.widget.ProgressBar;
import android.widget.TextView;
import android.widget.Toast;

import com.umeng.analytics.MobclickAgent;
import com.umeng.update.UmengUpdateAgent;

import java.io.UnsupportedEncodingException;
import java.net.MalformedURLException;
import java.net.URL;
import java.net.URLEncoder;
import java.util.ArrayList;

public class MainActivity extends AppCompatActivity  {

EditText edit_search;
String url="我去",key;
ListView list_bookname;
ArrayList<String> bookName;  //得到的书名
ArrayList<String> bookLink;   //书名链接
ArrayAdapter adapter;
GetBookName get;
View foot,complete,fail;
ProgressBar progressBar;
boolean firstLoad=false;
ImageButton ibtn_clear;

@Override
protected void onCreate(Bundle savedInstanceState) {
super.onCreate(savedInstanceState);
getSupportActionBar().hide();
setContentView(R.layout.activity_main);

init();
initEvent();
list_bookname.setAdapter(adapter);

}

private void initEvent() {

edit_search.setOnEditorActionListener(new TextView.OnEditorActionListener() {
@Override
public boolean onEditorAction(TextView v, int actionId, KeyEvent event) {

if(actionId== EditorInfo.IME_ACTION_SEARCH)
{
url=edit_search.getText().toString().trim();
try {
url= URLEncoder.encode(url,"UTF-8");
} catch (UnsupportedEncodingException e) {
e.printStackTrace();
}
bookName.clear();
bookLink.clear();
boolean  loadNothing=true;
if(url!=null)
{
firstLoad=true;
new GetBookName(url).execute();
}
else{
Toast.makeText(MainActivity.this, "不能为空", Toast.LENGTH_SHORT).show();
}

}

return false;
}
});

list_bookname.setOnScrollListener(new AbsListView.OnScrollListener() {
//AbsListView view 这个view对象就是listview
int lastItem;
@Override
public void onScrollStateChanged(AbsListView view, int scrollState) {
if (scrollState == AbsListView.OnScrollListener.SCROLL_STATE_IDLE) {
if (view.getLastVisiblePosition() == view.getCount() - 1) {
if(url!=null)
{
list_bookname.addFooterView(foot);
new GetBookName().execute();
}

else if(bookName.isEmpty()){
list_bookname.addFooterView(fail);

}
else {
list_bookname.addFooterView(complete);
}

}
}
}
@Override
public void onScroll(AbsListView view, int firstVisibleItem,
int visibleItemCount, int totalItemCount) {
lastItem = firstVisibleItem + visibleItemCount - 1 ;
}
});
/*************************************ListView每个Item设置监听,转到这本书具体信息的Activity*************************************************/
list_bookname.setOnItemClickListener(new AdapterView.OnItemClickListener() {
@Override
public void onItemClick(AdapterView<?> parent, View view, int position, long id) {
Intent intent=new Intent(MainActivity.this,BookInfoActivity.class);
intent.putExtra("href",bookLink.get(position));
startActivity(intent);

}
});
}

private void init() {

// ibtn_clear= (ImageButton) findViewById(R.id.ibtn_clear);
edit_search= (EditText) findViewById(R.id.edit_search);
list_bookname= (ListView)findViewById(R.id.list_bookname);
bookName=new ArrayList<String>();
bookLink=new ArrayList<String>();
adapter=new ArrayAdapter(MainActivity.this,android.R.layout.simple_list_item_1,bookName);
foot= LayoutInflater.from(MainActivity.this).inflate(R.layout.layout_foot,null);
fail= LayoutInflater.from(MainActivity.this).inflate(R.layout.layout_fail,null);
complete= LayoutInflater.from(MainActivity.this).inflate(R.layout.layout_complete,null);
progressBar= (ProgressBar) findViewById(R.id.progressBar);

UmengUpdateAgent.update(this);

}

/***
* 获取图书馆书名数据
*/
class GetBookName extends AsyncTask {

BookNameJsoup jsoup;
URL href;

public GetBookName(String keyword)  {
super();
url="http://222.188.3.137:8080/opac/openlink.php?strSearchType=title&match_flag=forward&historyCount=1&strText="+keyword+
"&doctype=ALL&with_ebook=on&displaypg=20&showmode=list&sort=CATA_DATE&orderby=desc&dept=ALL&page=1";

try {
href=new URL(url);
url=href.toString();

} catch (MalformedURLException e) {
e.printStackTrace();
}

}
public GetBookName()
{
}

@Override
protected void onPreExecute() {
super.onPreExecute();
Log.d("TAG", "onPreExecute: "+url);
if(firstLoad)
{
progressBar.setVisibility(View.VISIBLE);
firstLoad=false;
}

}

@Override
protected Object doInBackground(Object[] params) {

jsoup=new BookNameJsoup(url);
jsoup.init();
return null;
}

@Override
protected void onPostExecute(Object o) {
super.onPostExecute(o);
progressBar.setVisibility(View.GONE);

String[] book = jsoup.getBookName();
String[] link = jsoup.getLink();

for (int i = 0; i < 20; i++) {
if (book[i] != null) {
bookName.add(book[i]);
Log.d("TAG", "onPostExecute: " + book[i]);
} else {
break;
}
if (link != null) {
bookLink.add(link[i]);
}
}
if(bookName.isEmpty())
{
list_bookname.addFooterView(fail);
}else {
list_bookname.removeFooterView(fail);
}

url = jsoup.getNextPage();
list_bookname.removeFooterView(foot);
adapter.notifyDataSetChanged();
}
}

}


四 书的详细信息

这里展现的是书的信息。如图



package com.wyf.newlibrary;

import android.os.AsyncTask;
import android.os.Bundle;
import android.support.v7.app.AppCompatActivity;
import android.util.Log;
import android.view.MenuItem;
import android.view.View;
import android.widget.ImageView;
import android.widget.ScrollView;
import android.widget.TextView;

import com.android.volley.RequestQueue;
import com.android.volley.Response;
import com.android.volley.VolleyError;
import com.android.volley.toolbox.StringRequest;
import com.android.volley.toolbox.Volley;
import com.umeng.analytics.MobclickAgent;

import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;

import java.io.UnsupportedEncodingException;

public class BookInfoActivity extends AppCompatActivity {

TextView text_bookName,text_douban,text_position;
ImageView image_logo;
String url,logoUrl;
ScrollView scrollView;
@Override
protected void onCreate(Bundle savedInstanceState) {
super.onCreate(savedInstanceState);
setContentView(R.layout.activity_book_info);
getSupportActionBar().setDisplayHomeAsUpEnabled(true);
setTitle("图书信息");
url=getIntent().getStringExtra("href");

init();

RequestQueue queue= Volley.newRequestQueue(BookInfoActivity.this);
StringRequest stringRequest=new StringRequest(url, new Response.Listener<String>() {
@Override
public void onResponse(String s) {
try {
s=new String(s.getBytes("ISO-8859-1"),"utf-8");
// Toast.makeText(BookInfoActivity.this, s, Toast.LENGTH_SHORT).show();
new  GetDetail(s).execute();
} catch (UnsupportedEncodingException e) {
e.printStackTrace();
}
}
}, new Response.ErrorListener() {
@Override
public void onErrorResponse(VolleyError volleyError) {
Log.d("TAG", "onErrorResponse: "+"i fail");
}
});
queue.add(stringRequest);

}

private void init() {
text_bookName= (TextView) findViewById(R.id.text_bookname);
text_douban= (TextView) findViewById(R.id.text_douban);
text_position= (TextView) findViewById(R.id.text_position);
scrollView= (ScrollView) findViewById(R.id.scroll_position);

}

@Override
public boolean onOptionsItemSelected(MenuItem item) {

if(item.getItemId()==android.R.id.home)
{
finish();
return true;
}
return super.onOptionsItemSelected(item);
}

/*****************异步请求拿到数据**********************/
class GetDetail extends AsyncTask{
String response,name,douBan="110",position="图书位置:\n";

public GetDetail(String response) {
super();
this.response=response;
}

@Override
protected Object doInBackground(Object[] params) {
Document doc= Jsoup.parse(response);
name=doc.getElementsByAttributeValue("class","booklist").first().text();
name=name.substring(name.indexOf(":")+1);
//  douBan=doc.getElementsByAttributeValue("id","douban_content").select("p").text();

Elements a=doc.select("dl.booklist");
String temp;
for(Element i:a)
{

System.out.println(i.lastElementSibling().text());
if(i.text().contains("提要文摘附注")&&i.text().length()>8)
{
temp=i.text();

douBan=temp;

}
}

//douBan=doc.select("intro").text();
logoUrl=doc.select("img#book_img").attr("src");
Elements posi=doc.getElementsByAttributeValue("align","left");
posi=posi.select("tr.whitetext");

for(Element i:posi)
{
if(i.text()!=null)
{
//temp=i.text();
// temp=temp.substring(0,temp.indexOf(' '))+temp.substring(temp.indexOf("-"));
//Log.d("TAG", "doInBackground: "+temp);
position+=i.text()+"\n";

}

}
return null;
}

@Override
protected void onPostExecute(Object o) {
super.onPostExecute(o);
text_bookName.setText(name);
text_douban.setText(douBan);
text_position.setText(position);

if(douBan.equals("110"))
{
text_douban.setVisibility(View.GONE);
}

scrollView.setVisibility(View.VISIBLE);

}
}

}


五 总结

这个Demo其实并不难,很容易理解,但要对Jsoup和异步请求有所了解。你也可以做出自己第三方客户端。

附上源码http://pan.baidu.com/s/1c1B1VIo
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签: