C#收集网页中的EMail实现源码
2010-01-26 14:44
267 查看
C#收集网页中的EMail实现源码:
//CAll
private void GetAllURL(string urlStr)
{
new Thread(new ParameterizedThreadStart(GetEmailAddress)).Start(urlStr);
... //处理页面中的Link
}
/**//// <summary>
/// 提取网页中的Eamil
/// </summary>
/// <param name="urlStr">网页地址</param>
private void GetEmailAddress(object urlStr)
{
ArrayList EmailStrs = GetWebInfo((string)urlStr, @"(?<EmailStr>\b[A-Z0-9._%-]+@[A-Z0-9._%-]+\.[A-Z]{2,4}\b)"); //得到Email
foreach (object tmp in EmailStrs)
{
Invoke(new AppendTextDelegate(AppendText), new object[] { tmp + "\r\n" });
}
}
private ArrayList GetWebInfo(string URlStr,string RegExpress)
{
//打开指定页
HttpWebRequest webRequest1 = (HttpWebRequest)WebRequest.Create(new Uri(URlStr));
webRequest1.Method = "GET";
HttpWebResponse response = (HttpWebResponse)webRequest1.GetResponse();
String textData = new StreamReader(response.GetResponseStream(), Encoding.Default).ReadToEnd();
//用正则表达式,提取指定内容,带一个变量
Regex r;
Match m;
r = new Regex(RegExpress, //@"copyTitle.\'(?<AdInfo>.*)\'",
RegexOptions.IgnoreCase | RegexOptions.Compiled);
int pos1=RegExpress.IndexOf("(?<");
int pos2=RegExpress.IndexOf(">",pos1);
string DestionKey = RegExpress.Substring(pos1 + 3, pos2 - pos1 - 3);
string AdStr = "";
ArrayList Result = new ArrayList();
for (m = r.Match(textData); m.Success; m = m.NextMatch())
{
AdStr = m.Result("${" + DestionKey + "}").Trim(); //地址
Result.Add(AdStr);
}
return Result;
}
上述代码中的关键是书写提取EMail的表达式:
@"(?<EmailStr>\b[A-Z0-9._%-]+@[A-Z0-9._%-]+\.[A-Z]{2,4}\b)"
//CAll
private void GetAllURL(string urlStr)
{
new Thread(new ParameterizedThreadStart(GetEmailAddress)).Start(urlStr);
... //处理页面中的Link
}
/**//// <summary>
/// 提取网页中的Eamil
/// </summary>
/// <param name="urlStr">网页地址</param>
private void GetEmailAddress(object urlStr)
{
ArrayList EmailStrs = GetWebInfo((string)urlStr, @"(?<EmailStr>\b[A-Z0-9._%-]+@[A-Z0-9._%-]+\.[A-Z]{2,4}\b)"); //得到Email
foreach (object tmp in EmailStrs)
{
Invoke(new AppendTextDelegate(AppendText), new object[] { tmp + "\r\n" });
}
}
private ArrayList GetWebInfo(string URlStr,string RegExpress)
{
//打开指定页
HttpWebRequest webRequest1 = (HttpWebRequest)WebRequest.Create(new Uri(URlStr));
webRequest1.Method = "GET";
HttpWebResponse response = (HttpWebResponse)webRequest1.GetResponse();
String textData = new StreamReader(response.GetResponseStream(), Encoding.Default).ReadToEnd();
//用正则表达式,提取指定内容,带一个变量
Regex r;
Match m;
r = new Regex(RegExpress, //@"copyTitle.\'(?<AdInfo>.*)\'",
RegexOptions.IgnoreCase | RegexOptions.Compiled);
int pos1=RegExpress.IndexOf("(?<");
int pos2=RegExpress.IndexOf(">",pos1);
string DestionKey = RegExpress.Substring(pos1 + 3, pos2 - pos1 - 3);
string AdStr = "";
ArrayList Result = new ArrayList();
for (m = r.Match(textData); m.Success; m = m.NextMatch())
{
AdStr = m.Result("${" + DestionKey + "}").Trim(); //地址
Result.Add(AdStr);
}
return Result;
}
上述代码中的关键是书写提取EMail的表达式:
@"(?<EmailStr>\b[A-Z0-9._%-]+@[A-Z0-9._%-]+\.[A-Z]{2,4}\b)"
相关文章推荐
- C#收集网页中的EMail实现源码
- C#实现下载网页HTML源码的方法
- C# 使用 Abot 实现 爬虫 抓取网页信息 源码下载
- 从网上收集EMail(正则表达式,C#源码)
- C#使用WebClient登录网站并抓取登录后的网页信息实现方法
- C#实现视频会议系统 GGMeeting(附源码)
- 禁用IE的后退按钮|显示网页已过期|几种语言的实现方法|c#|javascript|html
- 【转】asp.net(c#)加密解密算法之sha1、md5、des、aes实现源码详解
- C#获取网页源码
- C#实现通过程序自动抓取远程Web网页信息
- C#获取远程网页中的所有链接URL(网络蜘蛛实现原理)
- C#获取网页源码
- C#实现树型结构TreeView节点拖拽的简单功能,附全部源码,供有需要的参考
- Web C#2.0 DataSet和Reader封装组件实现自动多数据库切换(含组件源码和实例)
- c# class 实现泛型的源码
- C#.NET实现网页自动登录的方法
- C#实现自动登录赶集网(www.ganji.com)附源码下载--HttpHelper版
- C#实现通过程序自动抓取远程Web网页信息(转载)
- 二维码生成算法及c#实现(附源码)
- C#实现网页正文提取算法