您的位置:首页 > 其它

批量查找未注册的域名

2014-04-11 10:05 190 查看
package com.blog.collection;

import java.util.ArrayList;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import com.blog.model.Blog;

public class CollectionHandler {

private Progress progress;
public void setProgress(Progress progress) {
this.progress = progress;
}
public Progress getProgress() {
return progress;
}
public void go(String user){
HttpRequest request=new HttpRequest();
System.out.println("加载中...");
String content=request.sendGet("http://blog.csdn.net/"+user+"/article/list/1", "");
//获取页码-摘要视图
String count=matcher(content, "(?<=<div[\\s\\S]{0,10}id=\"papelist\"[\\s\\S]{0,10}class=\"pagelist\">[\\s\\S]{1,100}共)\\d+(?=页</span>)");
Integer code=count.equals("")?0:Integer.parseInt(count);
List<String> urls=new ArrayList<String>();
getUrls(content, urls, null);
for(int i=2;i<=code;i++){
getUrls(null,urls, "http://blog.csdn.net/"+user+"/article/list/"+i);
}
System.out.println("数量:"+urls.size());
for (String string : urls) {
System.out.println(string);
handler(string);
}
System.out.println("处理完成");
}

public void getUrls(String text,List<String> urls,String url){
HttpRequest request=new HttpRequest();

String content=null;
if(text==null){
content=request.sendGet(url, "");
}else{
content=text;
}
String regex="(?<=<span[\\s\\S]{0,10}class=\"link_title\"><a[\\s\\S]{0,10}\")[\\s\\S]*?(?=\">)";
Pattern pattern = Pattern.compile(regex);
Matcher matcher = pattern.matcher(content);
while(matcher.find()){
urls.add("http://blog.csdn.net"+matcher.group());
}
}

/**
* 处理博文
* @param url
*/
public void handler(String url){
Blog blog=new Blog();
HttpRequest request=new HttpRequest();
String content=request.sendGet(url, "");
//System.out.println(content);
String regex = "(?<=<span class=\"link_title\"><a[\\s\\S]{0,1000}?>)[\\s\\S]*?(?=</a></span>)";
//标题
String title=matcher(content, regex).replaceAll("\n", "").replaceAll(" ", "");
System.out.println("标题");
System.out.println(title);
blog.setTitle(title);
//文章内容
regex="(?<=<div[\\s\\S]{0,100}id=\"article_content\"[\\s\\S]{0,100}class=\"article_content\">)[\\s\\S]*?(?=</div>[\\s\\S]{0,100}<!--)";
System.out.println("博文");
String text=matcher(content, regex);
blog.setContent(text);
//分类
regex="(?<=<span[\\s\\S]{0,100}class=\"link_categories\">[\\s\\S]{0,1000}<a[\\s\\S]{0,200}?>)[\\s\\S]*?(?=</a>)";
System.out.println("分类");
String type=matcher(content, regex);
blog.setTags(type);
System.out.println(type);
if(this.progress!=null){
progress.handler(blog, type);
}
}

public String matcher(String content,String regex){
Pattern pattern = Pattern.compile(regex);
Matcher matcher = pattern.matcher(content);
if (matcher.find()) {
String group = matcher.group(0);
return group;
}
return "";
}
}
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签:  域名