您的位置:首页 > 理论基础 > 计算机网络

Java网络编程(一):利用Java技术读取网页做一个简单爬网页上邮箱的网络蜘蛛

2017-04-11 13:24 507 查看
原理很简单,把网页信息用In流导入,然后用正则表达式,判断是否为邮箱,是的话就记录下来。当然也可以爬其他东西,而且做得更复杂。就是有超链接,或者深层的要进去继续爬,就是搜索。我这里比较简单,只爬单独的网页。

package cn.hncu.dage.Spider;

import java.io.BufferedReader;
import java.io.FileReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import org.junit.Test;

public class SpiderDemo {
@Test // 爬本地的文件上的E-Mails
public void getMails() throws IOException{
// 先把文件导入
FileReader fr = new FileReader("Mails.html");
BufferedReader br = new BufferedReader(fr);

String reg="\\w+@\\w+(\\.\\w+)+";// 判断是否为e-mail 的正则表达式
Pattern p = Pattern.compile(reg);
String str = null;
while( (str=br.readLine())!=null){
Matcher m = p.matcher(str);
while (m.find()) {
System.out.println(m.group());
}
}
}
@Test
public void getMailsBynet() throws IOException{
URL url = new URL("http://www.sina.com");
BufferedReader br = new BufferedReader(new InputStreamReader(url.openStream()));

String reg ="\\w+@\\w+(\\.\\w+)+";
Pattern p = Pattern.compile(reg);
String str = null;
while( (str=br.readLine())!=null){
Matcher m = p.matcher(str);
while(m.find()){
System.out.println(m.group());
}
}
}

}


改进了 做了个图形界面的

/*
* SpiderFrame.java
*
* Created on __DATE__, __TIME__
*/

package cn.hncu.dage.Spider.v;

/**
*
* @author  __USER__
*/
public class SpiderFrame extends javax.swing.JFrame {

/** Creates new form SpiderFrame */
public SpiderFrame() {
initComponents();
this.setBounds(400, 100, 500, 400);
this.setContentPane(new SpiderPanel());
}

/** This method is called from within the constructor to
* initialize the form.
* WARNING: Do NOT modify this code. The content of this method is
* always regenerated by the Form Editor.
*/
//GEN-BEGIN:initComponents
// <editor-fold defaultstate="collapsed" desc="Generated Code">
private void initComponents() {

menuBar = new javax.swing.JMenuBar();
fileMenu = new javax.swing.JMenu();
openMenuItem = new javax.swing.JMenuItem();
saveMenuItem = new javax.swing.JMenuItem();
saveAsMenuItem = new javax.swing.JMenuItem();
exitMenuItem = new javax.swing.JMenuItem();
editMenu = new javax.swing.JMenu();
cutMenuItem = new javax.swing.JMenuItem();
copyMenuItem = new javax.swing.JMenuItem();
pasteMenuItem = new javax.swing.JMenuItem();
deleteMenuItem = new javax.swing.JMenuItem();
helpMenu = new javax.swing.JMenu();
contentsMenuItem = new javax.swing.JMenuItem();
aboutMenuItem = new javax.swing.JMenuItem();

setDefaultCloseOperation(javax.swing.WindowConstants.EXIT_ON_CLOSE);
setMinimumSize(new java.awt.Dimension(300, 400));

fileMenu.setText("File");

openMenuItem.setText("Open");
fileMenu.add(openMenuItem);

saveMenuItem.setText("Save");
fileMenu.add(saveMenuItem);

saveAsMenuItem.setText("Save As ...");
fileMenu.add(saveAsMenuItem);

exitMenuItem.setText("Exit");
exitMenuItem.addActionListener(new java.awt.event.ActionListener() {
public void actionPerformed(java.awt.event.ActionEvent evt) {
exitMenuItemActionPerformed(evt);
}
});
fileMenu.add(exitMenuItem);

menuBar.add(fileMenu);

editMenu.setText("Edit");

cutMenuItem.setText("Cut");
editMenu.add(cutMenuItem);

copyMenuItem.setText("Copy");
editMenu.add(copyMenuItem);

pasteMenuItem.setText("Paste");
editMenu.add(pasteMenuItem);

deleteMenuItem.setText("Delete");
editMenu.add(deleteMenuItem);

menuBar.add(editMenu);

helpMenu.setText("Help");

contentsMenuItem.setText("Contents");
helpMenu.add(contentsMenuItem);

aboutMenuItem.setText("About");
helpMenu.add(aboutMenuItem);

menuBar.add(helpMenu);

setJMenuBar(menuBar);

javax.swing.GroupLayout layout = new javax.swing.GroupLayout(
getContentPane());
getContentPane().setLayout(layout);
layout.setHorizontalGroup(layout.createParallelGroup(
javax.swing.GroupLayout.Alignment.LEADING).addGap(0, 400,
Short.MAX_VALUE));
layout.setVerticalGroup(layout.createParallelGroup(
javax.swing.GroupLayout.Alignment.LEADING).addGap(0, 279,
Short.MAX_VALUE));

pack();
}// </editor-fold>
//GEN-END:initComponents

private void exitMenuItemActionPerformed(java.awt.event.ActionEvent evt) {//GEN-FIRST:event_exitMenuItemActionPerformed
System.exit(0);
}//GEN-LAST:event_exitMenuItemActionPerformed

/**
* @param args the command line arguments
*/
public static void main(String args[]) {
java.awt.EventQueue.invokeLater(new Runnable() {
public void run() {
new SpiderFrame().setVisible(true);
}
});
}

//GEN-BEGIN:variables
// Variables declaration - do not modify
private javax.swing.JMenuItem aboutMenuItem;
private javax.swing.JMenuItem contentsMenuItem;
private javax.swing.JMenuItem copyMenuItem;
private javax.swing.JMenuItem cutMenuItem;
private javax.swing.JMenuItem deleteMenuItem;
private javax.swing.JMenu editMenu;
private javax.swing.JMenuItem exitMenuItem;
private javax.swing.JMenu fileMenu;
private javax.swing.JMenu helpMenu;
private javax.swing.JMenuBar menuBar;
private javax.swing.JMenuItem openMenuItem;
private javax.swing.JMenuItem pasteMenuItem;
private javax.swing.JMenuItem saveAsMenuItem;
private javax.swing.JMenuItem saveMenuItem;
// End of variables declaration//GEN-END:variables

}


/*
* SpiderPanel.java
*
* Created on __DATE__, __TIME__
*/

package cn.hncu.dage.Spider.v;

import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.ArrayList;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import javax.swing.JOptionPane;

/**
*
* @author  __USER__
*/
public class SpiderPanel extends javax.swing.JPanel {

/** Creates new form SpiderPanel */
public SpiderPanel() {
initComponents();
this.setBounds(400, 100, 500, 400);
}

/** This method is called from within the constructor to
* initialize the form.
* WARNING: Do NOT modify this code. The content of this method is
* always regenerated by the Form Editor.
*/
//GEN-BEGIN:initComponents
// <editor-fold defaultstate="collapsed" desc="Generated Code">
private void initComponents() {

jLabel1 = new javax.swing.JLabel();
jLabel2 = new javax.swing.JLabel();
tfdMail = new javax.swing.JTextField();
jScrollPane1 = new javax.swing.JScrollPane();
ListShow = new javax.swing.JList();
btnDfs = new javax.swing.JButton();
btnExit = new javax.swing.JButton();

jLabel1.setFont(new java.awt.Font("黑体", 0, 24));
jLabel1.setForeground(new java.awt.Color(102, 102, 0));
jLabel1.setText("\u7f51\u7edc\u8718\u86db");

jLabel2.setFont(new java.awt.Font("黑体", 0, 18));
jLabel2.setText("\u641c\u7d22\u7f51\u5740\uff1a");

tfdMail.setFont(new java.awt.Font("黑体", 0, 24));
tfdMail.addActionListener(new java.awt.event.ActionListener() {
public void actionPerformed(java.awt.event.ActionEvent evt) {
tfdMailActionPerformed(evt);
}
});

ListShow.setFont(new java.awt.Font("黑体", 0, 18));
ListShow.setForeground(new java.awt.Color(0, 153, 153));
ListShow.setModel(new javax.swing.AbstractListModel() {
String[] strings = { "" };

public int getSize() {
return strings.length;
}

public Object getElementAt(int i) {
return strings[i];
}
});
jScrollPane1.setViewportView(ListShow);

btnDfs.setFont(new java.awt.Font("黑体", 0, 18));
btnDfs.setText("\u641c\u7d22");
btnDfs.addActionListener(new java.awt.event.ActionListener() {
public void actionPerformed(java.awt.event.ActionEvent evt) {
btnDfsActionPerformed(evt);
}
});

btnExit.setFont(new java.awt.Font("黑体", 0, 18));
btnExit.setText("\u9000\u51fa");
btnExit.addActionListener(new java.awt.event.ActionListener() {
public void actionPerformed(java.awt.event.ActionEvent evt) {
btnExitActionPerformed(evt);
}
});

javax.swing.GroupLayout layout = new javax.swing.GroupLayout(this);
this.setLayout(layout);
layout.setHorizontalGroup(layout
.createParallelGroup(javax.swing.GroupLayout.Alignment.LEADING)
.addGroup(
javax.swing.GroupLayout.Alignment.TRAILING,
layout.createSequentialGroup()
.addContainerGap(139, Short.MAX_VALUE)
.addComponent(jLabel1,
javax.swing.GroupLayout.PREFERRED_SIZE,
134,
javax.swing.GroupLayout.PREFERRED_SIZE)
.addGap(127, 127, 127))
.addGroup(
layout.createSequentialGroup()
.addGroup(
layout.createParallelGroup(
javax.swing.GroupLayout.Alignment.TRAILING,
false)
.addGroup(
layout.createSequentialGroup()
.addGap(20, 20,
20)
.addComponent(

ee42
jScrollPane1,
javax.swing.GroupLayout.PREFERRED_SIZE,
231,
javax.swing.GroupLayout.PREFERRED_SIZE)
.addPreferredGap(
javax.swing.LayoutStyle.ComponentPlacement.RELATED,
62,
Short.MAX_VALUE)
.addGroup(
layout.createParallelGroup(
javax.swing.GroupLayout.Alignment.TRAILING)
.addComponent(
btnDfs)
.addComponent(
btnExit)))
.addGroup(
javax.swing.GroupLayout.Alignment.LEADING,
layout.createSequentialGroup()
.addGap(31, 31,
31)
.addComponent(
jLabel2,
javax.swing.GroupLayout.PREFERRED_SIZE,
100,
javax.swing.GroupLayout.PREFERRED_SIZE)
.addPreferredGap(
javax.swing.LayoutStyle.ComponentPlacement.RELATED)
.addComponent(
tfdMail,
javax.swing.GroupLayout.PREFERRED_SIZE,
246,
javax.swing.GroupLayout.PREFERRED_SIZE)))
.addContainerGap(18, Short.MAX_VALUE)));
layout.setVerticalGroup(layout
.createParallelGroup(javax.swing.GroupLayout.Alignment.LEADING)
.addGroup(
layout.createSequentialGroup()
.addContainerGap()
.addComponent(jLabel1,
javax.swing.GroupLayout.PREFERRED_SIZE,
51,
javax.swing.GroupLayout.PREFERRED_SIZE)
.addPreferredGap(
javax.swing.LayoutStyle.ComponentPlacement.UNRELATED)
.addGroup(
layout.createParallelGroup(
javax.swing.GroupLayout.Alignment.BASELINE)
.addComponent(
jLabel2,
javax.swing.GroupLayout.PREFERRED_SIZE,
43,
javax.swing.GroupLayout.PREFERRED_SIZE)
.addComponent(
tfdMail,
javax.swing.GroupLayout.PREFERRED_SIZE,
33,
javax.swing.GroupLayout.PREFERRED_SIZE))
.addPreferredGap(
javax.swing.LayoutStyle.ComponentPlacement.RELATED)
.addGroup(
layout.createParallelGroup(
javax.swing.GroupLayout.Alignment.LEADING)
.addGroup(
javax.swing.GroupLayout.Alignment.TRAILING,
layout.createSequentialGroup()
.addComponent(
btnDfs)
.addGap(30, 30,
30)
.addComponent(
btnExit))
.addComponent(
jScrollPane1,
javax.swing.GroupLayout.Alignment.TRAILING,
javax.swing.GroupLayout.PREFERRED_SIZE,
128,
javax.swing.GroupLayout.PREFERRED_SIZE))
.addContainerGap(53, Short.MAX_VALUE)));
}// </editor-fold>
//GEN-END:initComponents

private void btnExitActionPerformed(java.awt.event.ActionEvent evt) {
System.exit(0);
}

private void btnDfsActionPerformed(java.awt.event.ActionEvent evt) {
// 1. 收集参数
String str1 = tfdMail.getText();
// 判断是输入的是否为网址格式
String reg1 = "www.\\w+.(net|com|cn|org|cc|tv)";
Pattern p2 = Pattern.compile(reg1);
Matcher m2 = p2.matcher(str1);
if (!m2.find()) {
JOptionPane.showMessageDialog(this, "请输入正确的网址格式");
return;
}
List<String> list = new ArrayList<String>();
try {
URL url = new URL("http://" + str1);
BufferedReader br = new BufferedReader(new InputStreamReader(
url.openStream()));

String reg = "\\w+@\\w+(\\.\\w+)+";
Pattern p = Pattern.compile(reg);
String str = null;
while ((str = br.readLine()) != null) {
Matcher m = p.matcher(str);
while (m.find()) {
String s = m.group();
list.add(s);
}
}
ListShow.setListData(list.toArray());
} catch (MalformedURLException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}

}

private void tfdMailActionPerformed(java.awt.event.ActionEvent evt) {
// TODO add your handling code here:
}

//GEN-BEGIN:variables
// Variables declaration - do not modify
private javax.swing.JList ListShow;
private javax.swing.JButton btnDfs;
private javax.swing.JButton btnExit;
private javax.swing.JLabel jLabel1;
private javax.swing.JLabel jLabel2;
private javax.swing.JScrollPane jScrollPane1;
private javax.swing.JTextField tfdMail;
// End of variables declaration//GEN-END:variables

}


内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签:  网络编程 java 邮箱