您的位置:首页 > 编程语言 > C#

C#收集网页中的EMail实现源码

2010-01-26 14:44 267 查看
C#收集网页中的EMail实现源码:

//CAll
private void GetAllURL(string urlStr)
{
new Thread(new ParameterizedThreadStart(GetEmailAddress)).Start(urlStr);
... //处理页面中的Link
}
/**//// <summary>
/// 提取网页中的Eamil
/// </summary>
/// <param name="urlStr">网页地址</param>
private void GetEmailAddress(object urlStr)
{
ArrayList EmailStrs = GetWebInfo((string)urlStr, @"(?<EmailStr>\b[A-Z0-9._%-]+@[A-Z0-9._%-]+\.[A-Z]{2,4}\b)"); //得到Email
foreach (object tmp in EmailStrs)
{
Invoke(new AppendTextDelegate(AppendText), new object[] { tmp + "\r\n" });
}
}

private ArrayList GetWebInfo(string URlStr,string RegExpress)
{
//打开指定页
HttpWebRequest webRequest1 = (HttpWebRequest)WebRequest.Create(new Uri(URlStr));
webRequest1.Method = "GET";
HttpWebResponse response = (HttpWebResponse)webRequest1.GetResponse();
String textData = new StreamReader(response.GetResponseStream(), Encoding.Default).ReadToEnd();

//用正则表达式,提取指定内容,带一个变量
Regex r;
Match m;
r = new Regex(RegExpress, //@"copyTitle.\'(?<AdInfo>.*)\'",
RegexOptions.IgnoreCase | RegexOptions.Compiled);
int pos1=RegExpress.IndexOf("(?<");
int pos2=RegExpress.IndexOf(">",pos1);
string DestionKey = RegExpress.Substring(pos1 + 3, pos2 - pos1 - 3);
string AdStr = "";
ArrayList Result = new ArrayList();
for (m = r.Match(textData); m.Success; m = m.NextMatch())
{
AdStr = m.Result("${" + DestionKey + "}").Trim(); //地址
Result.Add(AdStr);
}
return Result;
}

上述代码中的关键是书写提取EMail的表达式:
@"(?<EmailStr>\b[A-Z0-9._%-]+@[A-Z0-9._%-]+\.[A-Z]{2,4}\b)"

内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签: