用java抓取cnproxy代理服务器地址
2015-06-05 14:00
441 查看
[java] view
plaincopyprint?
package org.mingyuan.fetcher;
import java.io.BufferedOutputStream;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.net.URL;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Set;
public class StringTest {
/**
* 通过url,得到url源文件
* @param url
* @return
* @throws IOException
*/
private static String getURLContent(String url) throws IOException {
URL ipListUrl = new URL(url);
BufferedReader in = new BufferedReader(new InputStreamReader(ipListUrl
.openStream()));
String str = null;
String html = "";
while ((str = in.readLine()) != null) {
html += str;
}
in.close();
return html;
}
/**
*
* @param htmlContent
* @param fileName
* @return ip地址和ip地理信息
* @throws IOException
*/
private static HashMap<String,String> getIpList(String htmlContent,String fileName) throws IOException {
String str1[] = htmlContent
.split("<tr><td width=/"140/">IP:Port</td><td width=/"40/">Type</td><td width=/"90/">Speed</td><td width=/"160/"> Country/Area</td></tr>");
String str2 = str1[1];// ip及余下部分
String str3[] = str2
.split("</table>");
String str4 = str3[0];// <tr><td>24.25.26.128<SCRIPT type=text/javascript>document.write(":"+q+d)</SCRIPT></td><td>HTTP</td><td>296,984,984</td><td>美国 维吉尼亚州</td></tr>
String str5[]=str4.split("<tr><td>");
HashMap<String,String> map=new HashMap<String,String>();
int len=str5.length;
String forIPArray[];
String forAddrArray[];
for(int i=0;i<len;i++){
forIPArray=str5[i].split("<");
forAddrArray=str5[i].split("<td>");
int len1=forAddrArray.length;
String str=forAddrArray[len1-1];
String addrBeforeTD[]=str.split("</td></tr>");
map.put(forIPArray[0], addrBeforeTD[0]);
}
/*File file = new File(fileName);
FileOutputStream out = new FileOutputStream(file);
BufferedOutputStream os = new BufferedOutputStream(out);
Set<String> ipSet=map.keySet();
for(String ip:ipSet){
String addr=map.get(ip);
os.write((ip+" "+addr+"</br>").getBytes());
}
os.close();
out.close();*/
return map;
}
/**
* 根据ip.cn数据库,获得ip地理信息
* @param ipList
* @throws IOException
*/
private static void showAddress(ArrayList<String> ipList) throws IOException {
File file = new File("c:/IpAddrInfo.html");
FileOutputStream out = new FileOutputStream(file);
BufferedOutputStream os = new BufferedOutputStream(out);
for(String ip:ipList){
URL ipListUrl = new URL(
"http://www.ip.cn/getip.php?action=queryip&ip_url="+ip);
BufferedReader in = new BufferedReader(new InputStreamReader(ipListUrl
.openStream()));
String str = null;
while ((str = in.readLine()) != null) {
os.write((str+"</br>").getBytes());
}
in.close();
}
os.close();
out.close();
}
/**
* @param args
* @throws IOException
*/
public static void main(String[] args) throws IOException {
// TODO Auto-generated method stub
String html=null;
String htmlContent;
HashMap<String,String> map=new HashMap<String,String>();
for(int i=1;i<13;i++){
if(i<11){
html="http://www.cnproxy.com/proxy"+i+".html";
}else{
html="http://www.cnproxy.com/proxyedu"+(i-10)+".html";
}
htmlContent=getURLContent(html);
map.putAll(getIpList(htmlContent,"c:/ip"+i+".html"));
}
File file = new File("c:/allIP.txt");
FileOutputStream out = new FileOutputStream(file);
BufferedOutputStream os = new BufferedOutputStream(out);
Set<String> ipSet=map.keySet();
int count=0;
for(String ip:ipSet){
os.write((ip+" "+map.get(ip)+"/r/n").getBytes());
count++;
}
os.close();
out.close();
System.out.println("操作完成,共获取"+count+"个IP及其地址信息");
}
}
plaincopyprint?
package org.mingyuan.fetcher;
import java.io.BufferedOutputStream;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.net.URL;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Set;
public class StringTest {
/**
* 通过url,得到url源文件
* @param url
* @return
* @throws IOException
*/
private static String getURLContent(String url) throws IOException {
URL ipListUrl = new URL(url);
BufferedReader in = new BufferedReader(new InputStreamReader(ipListUrl
.openStream()));
String str = null;
String html = "";
while ((str = in.readLine()) != null) {
html += str;
}
in.close();
return html;
}
/**
*
* @param htmlContent
* @param fileName
* @return ip地址和ip地理信息
* @throws IOException
*/
private static HashMap<String,String> getIpList(String htmlContent,String fileName) throws IOException {
String str1[] = htmlContent
.split("<tr><td width=/"140/">IP:Port</td><td width=/"40/">Type</td><td width=/"90/">Speed</td><td width=/"160/"> Country/Area</td></tr>");
String str2 = str1[1];// ip及余下部分
String str3[] = str2
.split("</table>");
String str4 = str3[0];// <tr><td>24.25.26.128<SCRIPT type=text/javascript>document.write(":"+q+d)</SCRIPT></td><td>HTTP</td><td>296,984,984</td><td>美国 维吉尼亚州</td></tr>
String str5[]=str4.split("<tr><td>");
HashMap<String,String> map=new HashMap<String,String>();
int len=str5.length;
String forIPArray[];
String forAddrArray[];
for(int i=0;i<len;i++){
forIPArray=str5[i].split("<");
forAddrArray=str5[i].split("<td>");
int len1=forAddrArray.length;
String str=forAddrArray[len1-1];
String addrBeforeTD[]=str.split("</td></tr>");
map.put(forIPArray[0], addrBeforeTD[0]);
}
/*File file = new File(fileName);
FileOutputStream out = new FileOutputStream(file);
BufferedOutputStream os = new BufferedOutputStream(out);
Set<String> ipSet=map.keySet();
for(String ip:ipSet){
String addr=map.get(ip);
os.write((ip+" "+addr+"</br>").getBytes());
}
os.close();
out.close();*/
return map;
}
/**
* 根据ip.cn数据库,获得ip地理信息
* @param ipList
* @throws IOException
*/
private static void showAddress(ArrayList<String> ipList) throws IOException {
File file = new File("c:/IpAddrInfo.html");
FileOutputStream out = new FileOutputStream(file);
BufferedOutputStream os = new BufferedOutputStream(out);
for(String ip:ipList){
URL ipListUrl = new URL(
"http://www.ip.cn/getip.php?action=queryip&ip_url="+ip);
BufferedReader in = new BufferedReader(new InputStreamReader(ipListUrl
.openStream()));
String str = null;
while ((str = in.readLine()) != null) {
os.write((str+"</br>").getBytes());
}
in.close();
}
os.close();
out.close();
}
/**
* @param args
* @throws IOException
*/
public static void main(String[] args) throws IOException {
// TODO Auto-generated method stub
String html=null;
String htmlContent;
HashMap<String,String> map=new HashMap<String,String>();
for(int i=1;i<13;i++){
if(i<11){
html="http://www.cnproxy.com/proxy"+i+".html";
}else{
html="http://www.cnproxy.com/proxyedu"+(i-10)+".html";
}
htmlContent=getURLContent(html);
map.putAll(getIpList(htmlContent,"c:/ip"+i+".html"));
}
File file = new File("c:/allIP.txt");
FileOutputStream out = new FileOutputStream(file);
BufferedOutputStream os = new BufferedOutputStream(out);
Set<String> ipSet=map.keySet();
int count=0;
for(String ip:ipSet){
os.write((ip+" "+map.get(ip)+"/r/n").getBytes());
count++;
}
os.close();
out.close();
System.out.println("操作完成,共获取"+count+"个IP及其地址信息");
}
}
相关文章推荐
- 代理服务器的路由分析
- 在Windows下利用Squid开设代理服务器
- 教你如何使用node.js制作代理服务器
- Perl实现的Linux下socket代理服务器
- 简单架设SSH+Squid代理服务器的自由上网通道的方法
- PHP实例分享判断客户端是否使用代理服务器及其匿名级别
- PHP实现检测客户端是否使用代理服务器及其匿名级别
- Python写的Socks5协议代理服务器
- Linux下Squid代理服务器的架设与维护经验分享
- PHP实现检测客户端是否使用代理服务器及其匿名级别
- Nginx搭建反向代理服务器过程详解
- 使用 Nginx 配置jsp服务器
- squid完全攻略(一)squid优化后详细安装步骤
- Squid代理服务器应用案例 多出口多用户认证上网
- Nginx+Keepalived高可用架构平台
- nginx文件路径处理远程命令执行漏洞(转)
- 图解正向代理、反向代理、透明代理
- 使用nginx后如何在web应用中获取用户ip及原理解释
- 防火墙的最大并发连接数
- 提高企业效率 主流代理服务器软件大搜捕