综合运用httpClient和Swt Browser实现网页抓取,修改和显示
2011-06-26 00:09
363 查看
httpClient是apache的开源框架,封装了http协议,实现了对http访问的强大管理。
Swt Browser是java下一个对javascript支持比较好的模拟浏览器,可用它在applet中显示网页。
以下为源代码
MyBrowser.java: 用cavas,display, shell, browser 在applet中显示页面。
MyFrame.java : Frame 类
SearchEngine.java: 搜索类,实现自动搜索
Swt Browser是java下一个对javascript支持比较好的模拟浏览器,可用它在applet中显示网页。
以下为源代码
MyBrowser.java: 用cavas,display, shell, browser 在applet中显示页面。
import java.awt.BorderLayout; import java.awt.Canvas; import javax.swing.JButton; import javax.swing.JFrame; import javax.swing.JPanel; import javax.swing.JTextField; import org.eclipse.swt.SWT; import org.eclipse.swt.awt.SWT_AWT; import org.eclipse.swt.browser.Browser; import org.eclipse.swt.layout.FillLayout; import org.eclipse.swt.widgets.Display; import org.eclipse.swt.widgets.Shell; public class MyBrowser extends JFrame { public static final int BOARD_WIDTH = 600; public static final int BOARD_HEIGHT = 500; public static final int LOCATION_X = 100; public static final int LOCATION_Y = 50; JTextField txtField = new JTextField(30); JButton startButton = new JButton("Search"); JButton exitButton = new JButton("Exit"); public Shell shell; public Browser browser; public Display display; public Canvas canvas; public MyFrame frame; public void init(){ System.setProperty("sun.awt.xembedserver", "true"); display = Display.getDefault(); canvas = new Canvas(); frame = new MyFrame("BrowserListener"); frame.init(this); frame.add(canvas,BorderLayout.CENTER); frame.pack(); shell = SWT_AWT.new_Shell(display, canvas); shell.setLayout(new FillLayout(SWT.DOWN)); browser = new Browser(shell, SWT.EMBEDDED); //browser.setUrl("www.google.com"); String html = "<html><head>"+ "<base href=/"http://www.eclipse.org/swt//" >"+ "<title>HTML Test</title></head>"+ "<body><a href=/"faq.php/">local link</a></body></html>"; browser.setText(html); browser.setVisible(true); shell.open(); frame.setSize(800, 600); frame.setVisible(true); while (!shell.isDisposed()) { if (!display.readAndDispatch()) display.sleep(); } display.dispose(); } public void run(final String script){ String html = "<html><head>"+ "<base href=/"http://www.eclipse.org/swt//" >"+ "<title>HTML Test</title></head>"+ "<body><a href=/"faq.php/">local link</a></body></html>"; this.display.asyncExec(new Runnable(){ public void run(){ //browser.setUrl("www.google.com"); browser.setText(script); } }); } public static void main(String[] args) { MyBrowser myBrowser = new MyBrowser(); myBrowser.init(); } }
MyFrame.java : Frame 类
import java.awt.BorderLayout; import java.awt.Frame; import java.awt.event.ActionEvent; import java.awt.event.ActionListener; import java.io.IOException; import javax.swing.JButton; import javax.swing.JFrame; import javax.swing.JPanel; import javax.swing.JTextField; import org.apache.http.client.ClientProtocolException; public class MyFrame extends JFrame implements ActionListener{ JTextField txtField = new JTextField(30); JButton startButton = new JButton("Search"); //JButton exitButton = new JButton("Exit"); SearchEngine searchEngine; public MyBrowser myBrowser; public MyFrame(String title){ super(title); } public MyFrame(){ super(); } public void init(MyBrowser myBrowser){ this.myBrowser = myBrowser; searchEngine = new SearchEngine(); JPanel northPanel = new JPanel(); northPanel.add(txtField); northPanel.add(startButton); //northPanel.add(exitButton); this.add(northPanel,BorderLayout.NORTH); startButton.addActionListener(this); //exitButton.addActionListener(this); setDefaultCloseOperation(JFrame.EXIT_ON_CLOSE); } public void actionPerformed(ActionEvent e){ if(e.getSource()==startButton){ String text = txtField.getText(); String responseBody; try { responseBody = searchEngine.search(text); myBrowser.run(responseBody); } catch (ClientProtocolException e1) { // TODO Auto-generated catch block e1.printStackTrace(); } catch (IOException e1) { // TODO Auto-generated catch block e1.printStackTrace(); } } } }
SearchEngine.java: 搜索类,实现自动搜索
import java.io.IOException; import java.io.InputStream; import java.net.URLEncoder; import org.apache.http.client.ClientProtocolException; import org.apache.http.client.ResponseHandler; import org.apache.http.client.methods.HttpGet; import org.apache.http.impl.client.BasicResponseHandler; import org.apache.http.impl.client.DefaultHttpClient; public class SearchEngine { public static String UrlString = "http://hk.rd.yahoo.com/homeb/search/t1/*-http://hk.search.yahoo.com/search?"; public static int Port = 80; public String search(String text) throws ClientProtocolException, IOException{ DefaultHttpClient httpClient = new DefaultHttpClient(); String queryString = ""; text = URLEncoder.encode(text, "UTF-8"); queryString = queryString + "p=" + text + "&fr=FP-tab-web-t&ei=UTF-8&meta=rst%3Dhk"; String url = UrlString + queryString; HttpGet req = new HttpGet(url); ResponseHandler<String> responseHandler = new BasicResponseHandler(); String responseBody = httpClient.execute(req, responseHandler); //System.out.println(responseBody); /* StringBuilder buffer = new StringBuilder(responseBody); int index1 = 0; int index2 = 0; while((index1 = buffer.indexOf("<script>"))>0){ index2 = buffer.indexOf("</script>"); if(index2>0){ index2 = index2 + 9; buffer.delete(index1, index2); } } responseBody = buffer.toString(); System.out.println(responseBody); */ return responseBody; } }
相关文章推荐
- 一、静态网页的实现 1.运用CSS,让导航菜单在右侧绝对定位显示。 2.运用锚点,实现导航定位。
- 网站截图抓取工具:ubuntu+xvfb+CutyCapt,可以实现高效网页缩略图,支持flash显示
- Python 中利用urllib2简单实现网页抓取
- c#关于网页内容抓取,简单爬虫的实现。(包括动态,静态的)
- request+goquery+mahonia实现自动抓取网页数据
- 实现用火狐进行动态网页抓取
- 【知识积累】使用Httpclient实现网页的爬取并保存至本地
- 实现HMTL网页的全屏幕显示或模态显示 (JS代码)
- htmlparser实现从网页上抓取数据
- PHP高级编程之--单线程实现并行抓取网页
- 实现从网页上抓取数据(htmlparser)
- Python3实现抓取javascript动态生成的html网页功能示例
- 实现TEXTAREA数据写入数据库后提取出来原样显示在网页TABLE上并自动换行
- ImageLoad+RollViewPage+Jsoup+WebView带你轻松实现抓取网页数据(附源码)
- jQuery实现鼠标单击网页文字后在文本框显示的方法
- asp.net 抓取网页源码三种实现方法
- 百度编辑器使用 嵌入ci (可灵活运用,建立数据库,写入并读出,实现用户自定义网页页面内容)
- Android 网页抓取(实现新闻客户端)
- 正确的方法去拦截OnNewWindow2实现在同一个窗口显示_target的网页
- Linux下socket实现网页抓取