Java网络编程(一):利用Java技术读取网页做一个简单爬网页上邮箱的网络蜘蛛
2017-04-11 13:24
507 查看
原理很简单,把网页信息用In流导入,然后用正则表达式,判断是否为邮箱,是的话就记录下来。当然也可以爬其他东西,而且做得更复杂。就是有超链接,或者深层的要进去继续爬,就是搜索。我这里比较简单,只爬单独的网页。
改进了 做了个图形界面的
package cn.hncu.dage.Spider; import java.io.BufferedReader; import java.io.FileReader; import java.io.IOException; import java.io.InputStreamReader; import java.net.MalformedURLException; import java.net.URL; import java.util.regex.Matcher; import java.util.regex.Pattern; import org.junit.Test; public class SpiderDemo { @Test // 爬本地的文件上的E-Mails public void getMails() throws IOException{ // 先把文件导入 FileReader fr = new FileReader("Mails.html"); BufferedReader br = new BufferedReader(fr); String reg="\\w+@\\w+(\\.\\w+)+";// 判断是否为e-mail 的正则表达式 Pattern p = Pattern.compile(reg); String str = null; while( (str=br.readLine())!=null){ Matcher m = p.matcher(str); while (m.find()) { System.out.println(m.group()); } } } @Test public void getMailsBynet() throws IOException{ URL url = new URL("http://www.sina.com"); BufferedReader br = new BufferedReader(new InputStreamReader(url.openStream())); String reg ="\\w+@\\w+(\\.\\w+)+"; Pattern p = Pattern.compile(reg); String str = null; while( (str=br.readLine())!=null){ Matcher m = p.matcher(str); while(m.find()){ System.out.println(m.group()); } } } }
改进了 做了个图形界面的
/* * SpiderFrame.java * * Created on __DATE__, __TIME__ */ package cn.hncu.dage.Spider.v; /** * * @author __USER__ */ public class SpiderFrame extends javax.swing.JFrame { /** Creates new form SpiderFrame */ public SpiderFrame() { initComponents(); this.setBounds(400, 100, 500, 400); this.setContentPane(new SpiderPanel()); } /** This method is called from within the constructor to * initialize the form. * WARNING: Do NOT modify this code. The content of this method is * always regenerated by the Form Editor. */ //GEN-BEGIN:initComponents // <editor-fold defaultstate="collapsed" desc="Generated Code"> private void initComponents() { menuBar = new javax.swing.JMenuBar(); fileMenu = new javax.swing.JMenu(); openMenuItem = new javax.swing.JMenuItem(); saveMenuItem = new javax.swing.JMenuItem(); saveAsMenuItem = new javax.swing.JMenuItem(); exitMenuItem = new javax.swing.JMenuItem(); editMenu = new javax.swing.JMenu(); cutMenuItem = new javax.swing.JMenuItem(); copyMenuItem = new javax.swing.JMenuItem(); pasteMenuItem = new javax.swing.JMenuItem(); deleteMenuItem = new javax.swing.JMenuItem(); helpMenu = new javax.swing.JMenu(); contentsMenuItem = new javax.swing.JMenuItem(); aboutMenuItem = new javax.swing.JMenuItem(); setDefaultCloseOperation(javax.swing.WindowConstants.EXIT_ON_CLOSE); setMinimumSize(new java.awt.Dimension(300, 400)); fileMenu.setText("File"); openMenuItem.setText("Open"); fileMenu.add(openMenuItem); saveMenuItem.setText("Save"); fileMenu.add(saveMenuItem); saveAsMenuItem.setText("Save As ..."); fileMenu.add(saveAsMenuItem); exitMenuItem.setText("Exit"); exitMenuItem.addActionListener(new java.awt.event.ActionListener() { public void actionPerformed(java.awt.event.ActionEvent evt) { exitMenuItemActionPerformed(evt); } }); fileMenu.add(exitMenuItem); menuBar.add(fileMenu); editMenu.setText("Edit"); cutMenuItem.setText("Cut"); editMenu.add(cutMenuItem); copyMenuItem.setText("Copy"); editMenu.add(copyMenuItem); pasteMenuItem.setText("Paste"); editMenu.add(pasteMenuItem); deleteMenuItem.setText("Delete"); editMenu.add(deleteMenuItem); menuBar.add(editMenu); helpMenu.setText("Help"); contentsMenuItem.setText("Contents"); helpMenu.add(contentsMenuItem); aboutMenuItem.setText("About"); helpMenu.add(aboutMenuItem); menuBar.add(helpMenu); setJMenuBar(menuBar); javax.swing.GroupLayout layout = new javax.swing.GroupLayout( getContentPane()); getContentPane().setLayout(layout); layout.setHorizontalGroup(layout.createParallelGroup( javax.swing.GroupLayout.Alignment.LEADING).addGap(0, 400, Short.MAX_VALUE)); layout.setVerticalGroup(layout.createParallelGroup( javax.swing.GroupLayout.Alignment.LEADING).addGap(0, 279, Short.MAX_VALUE)); pack(); }// </editor-fold> //GEN-END:initComponents private void exitMenuItemActionPerformed(java.awt.event.ActionEvent evt) {//GEN-FIRST:event_exitMenuItemActionPerformed System.exit(0); }//GEN-LAST:event_exitMenuItemActionPerformed /** * @param args the command line arguments */ public static void main(String args[]) { java.awt.EventQueue.invokeLater(new Runnable() { public void run() { new SpiderFrame().setVisible(true); } }); } //GEN-BEGIN:variables // Variables declaration - do not modify private javax.swing.JMenuItem aboutMenuItem; private javax.swing.JMenuItem contentsMenuItem; private javax.swing.JMenuItem copyMenuItem; private javax.swing.JMenuItem cutMenuItem; private javax.swing.JMenuItem deleteMenuItem; private javax.swing.JMenu editMenu; private javax.swing.JMenuItem exitMenuItem; private javax.swing.JMenu fileMenu; private javax.swing.JMenu helpMenu; private javax.swing.JMenuBar menuBar; private javax.swing.JMenuItem openMenuItem; private javax.swing.JMenuItem pasteMenuItem; private javax.swing.JMenuItem saveAsMenuItem; private javax.swing.JMenuItem saveMenuItem; // End of variables declaration//GEN-END:variables }
/* * SpiderPanel.java * * Created on __DATE__, __TIME__ */ package cn.hncu.dage.Spider.v; import java.io.BufferedReader; import java.io.IOException; import java.io.InputStreamReader; import java.net.MalformedURLException; import java.net.URL; import java.util.ArrayList; import java.util.List; import java.util.regex.Matcher; import java.util.regex.Pattern; import javax.swing.JOptionPane; /** * * @author __USER__ */ public class SpiderPanel extends javax.swing.JPanel { /** Creates new form SpiderPanel */ public SpiderPanel() { initComponents(); this.setBounds(400, 100, 500, 400); } /** This method is called from within the constructor to * initialize the form. * WARNING: Do NOT modify this code. The content of this method is * always regenerated by the Form Editor. */ //GEN-BEGIN:initComponents // <editor-fold defaultstate="collapsed" desc="Generated Code"> private void initComponents() { jLabel1 = new javax.swing.JLabel(); jLabel2 = new javax.swing.JLabel(); tfdMail = new javax.swing.JTextField(); jScrollPane1 = new javax.swing.JScrollPane(); ListShow = new javax.swing.JList(); btnDfs = new javax.swing.JButton(); btnExit = new javax.swing.JButton(); jLabel1.setFont(new java.awt.Font("黑体", 0, 24)); jLabel1.setForeground(new java.awt.Color(102, 102, 0)); jLabel1.setText("\u7f51\u7edc\u8718\u86db"); jLabel2.setFont(new java.awt.Font("黑体", 0, 18)); jLabel2.setText("\u641c\u7d22\u7f51\u5740\uff1a"); tfdMail.setFont(new java.awt.Font("黑体", 0, 24)); tfdMail.addActionListener(new java.awt.event.ActionListener() { public void actionPerformed(java.awt.event.ActionEvent evt) { tfdMailActionPerformed(evt); } }); ListShow.setFont(new java.awt.Font("黑体", 0, 18)); ListShow.setForeground(new java.awt.Color(0, 153, 153)); ListShow.setModel(new javax.swing.AbstractListModel() { String[] strings = { "" }; public int getSize() { return strings.length; } public Object getElementAt(int i) { return strings[i]; } }); jScrollPane1.setViewportView(ListShow); btnDfs.setFont(new java.awt.Font("黑体", 0, 18)); btnDfs.setText("\u641c\u7d22"); btnDfs.addActionListener(new java.awt.event.ActionListener() { public void actionPerformed(java.awt.event.ActionEvent evt) { btnDfsActionPerformed(evt); } }); btnExit.setFont(new java.awt.Font("黑体", 0, 18)); btnExit.setText("\u9000\u51fa"); btnExit.addActionListener(new java.awt.event.ActionListener() { public void actionPerformed(java.awt.event.ActionEvent evt) { btnExitActionPerformed(evt); } }); javax.swing.GroupLayout layout = new javax.swing.GroupLayout(this); this.setLayout(layout); layout.setHorizontalGroup(layout .createParallelGroup(javax.swing.GroupLayout.Alignment.LEADING) .addGroup( javax.swing.GroupLayout.Alignment.TRAILING, layout.createSequentialGroup() .addContainerGap(139, Short.MAX_VALUE) .addComponent(jLabel1, javax.swing.GroupLayout.PREFERRED_SIZE, 134, javax.swing.GroupLayout.PREFERRED_SIZE) .addGap(127, 127, 127)) .addGroup( layout.createSequentialGroup() .addGroup( layout.createParallelGroup( javax.swing.GroupLayout.Alignment.TRAILING, false) .addGroup( layout.createSequentialGroup() .addGap(20, 20, 20) .addComponent( ee42 jScrollPane1, javax.swing.GroupLayout.PREFERRED_SIZE, 231, javax.swing.GroupLayout.PREFERRED_SIZE) .addPreferredGap( javax.swing.LayoutStyle.ComponentPlacement.RELATED, 62, Short.MAX_VALUE) .addGroup( layout.createParallelGroup( javax.swing.GroupLayout.Alignment.TRAILING) .addComponent( btnDfs) .addComponent( btnExit))) .addGroup( javax.swing.GroupLayout.Alignment.LEADING, layout.createSequentialGroup() .addGap(31, 31, 31) .addComponent( jLabel2, javax.swing.GroupLayout.PREFERRED_SIZE, 100, javax.swing.GroupLayout.PREFERRED_SIZE) .addPreferredGap( javax.swing.LayoutStyle.ComponentPlacement.RELATED) .addComponent( tfdMail, javax.swing.GroupLayout.PREFERRED_SIZE, 246, javax.swing.GroupLayout.PREFERRED_SIZE))) .addContainerGap(18, Short.MAX_VALUE))); layout.setVerticalGroup(layout .createParallelGroup(javax.swing.GroupLayout.Alignment.LEADING) .addGroup( layout.createSequentialGroup() .addContainerGap() .addComponent(jLabel1, javax.swing.GroupLayout.PREFERRED_SIZE, 51, javax.swing.GroupLayout.PREFERRED_SIZE) .addPreferredGap( javax.swing.LayoutStyle.ComponentPlacement.UNRELATED) .addGroup( layout.createParallelGroup( javax.swing.GroupLayout.Alignment.BASELINE) .addComponent( jLabel2, javax.swing.GroupLayout.PREFERRED_SIZE, 43, javax.swing.GroupLayout.PREFERRED_SIZE) .addComponent( tfdMail, javax.swing.GroupLayout.PREFERRED_SIZE, 33, javax.swing.GroupLayout.PREFERRED_SIZE)) .addPreferredGap( javax.swing.LayoutStyle.ComponentPlacement.RELATED) .addGroup( layout.createParallelGroup( javax.swing.GroupLayout.Alignment.LEADING) .addGroup( javax.swing.GroupLayout.Alignment.TRAILING, layout.createSequentialGroup() .addComponent( btnDfs) .addGap(30, 30, 30) .addComponent( btnExit)) .addComponent( jScrollPane1, javax.swing.GroupLayout.Alignment.TRAILING, javax.swing.GroupLayout.PREFERRED_SIZE, 128, javax.swing.GroupLayout.PREFERRED_SIZE)) .addContainerGap(53, Short.MAX_VALUE))); }// </editor-fold> //GEN-END:initComponents private void btnExitActionPerformed(java.awt.event.ActionEvent evt) { System.exit(0); } private void btnDfsActionPerformed(java.awt.event.ActionEvent evt) { // 1. 收集参数 String str1 = tfdMail.getText(); // 判断是输入的是否为网址格式 String reg1 = "www.\\w+.(net|com|cn|org|cc|tv)"; Pattern p2 = Pattern.compile(reg1); Matcher m2 = p2.matcher(str1); if (!m2.find()) { JOptionPane.showMessageDialog(this, "请输入正确的网址格式"); return; } List<String> list = new ArrayList<String>(); try { URL url = new URL("http://" + str1); BufferedReader br = new BufferedReader(new InputStreamReader( url.openStream())); String reg = "\\w+@\\w+(\\.\\w+)+"; Pattern p = Pattern.compile(reg); String str = null; while ((str = br.readLine()) != null) { Matcher m = p.matcher(str); while (m.find()) { String s = m.group(); list.add(s); } } ListShow.setListData(list.toArray()); } catch (MalformedURLException e) { e.printStackTrace(); } catch (IOException e) { e.printStackTrace(); } } private void tfdMailActionPerformed(java.awt.event.ActionEvent evt) { // TODO add your handling code here: } //GEN-BEGIN:variables // Variables declaration - do not modify private javax.swing.JList ListShow; private javax.swing.JButton btnDfs; private javax.swing.JButton btnExit; private javax.swing.JLabel jLabel1; private javax.swing.JLabel jLabel2; private javax.swing.JScrollPane jScrollPane1; private javax.swing.JTextField tfdMail; // End of variables declaration//GEN-END:variables }
相关文章推荐
- 简单的Java网络爬虫(获取一个网页中的邮箱)
- 网络蜘蛛--抓取一个网页的邮箱
- 一个简单java爬虫爬取网页中邮箱并保存
- Java---网络蜘蛛-网页邮箱抓取器~源码
- 一个简单java爬虫爬取网页中邮箱并保存
- 利用Java Swing技术设计一个Email邮箱地址注册的图形用户界面应用程序
- java利用正则表达式获取一个网页中的所有邮箱地址
- 一个简单java爬虫爬取网页中邮箱并保存
- 一个简单的java读取网页图片并保存图片的程序
- Java---网络蜘蛛-网页邮箱抓取器~源码
- 简单的使用自动化技术实现用WORD读取一个XML文件的过程~
- 利用C#编写一个简单的抓网页应用程序
- 利用C#编写一个简单的抓网页应用程序
- 一个简单的java网络爬虫(spider)
- 利用Java技术开发Web网络课件浅议-Java基础-Java-编程开发
- 利用memcached java client一个简单的应用
- 一个简单的Java网络编程代码
- 利用ajax技术验证数据是否存在的一个简单例子
- 一个简单的JAVA网页爬虫
- 一个非常简单的分页技术MYSQL+JSP 利用了mysql的LIMIT参数