朋友开网店 做个抓取数据的小程序
2007-04-16 14:04
351 查看
朋友开网店需要填充初期的数据. 专门做了一个抓取数据的小程序.分享一下.
private void button1_Click(object sender, EventArgs e)
{
StringBuilder sb = new StringBuilder();
string[] sArray=FormatBox(textBox1.Text);
int i = 1;
foreach (string s in sArray)
{
string htm = GetRequestString(s);
string res = FormatHtml(htm);
sb.AppendLine(i.ToString() + "\t" + res);
i++;
}
textBox2.Text = sb.ToString();
using (StreamWriter sw = new StreamWriter(@"c:\test\ouput.txt"))//将获取的内容写入文本
{
sw.Write(sb.ToString());
}
}
protected string[] FormatBox(string Boxtext) {
string[] res = null;
res = Boxtext.Split('\n');
return res;
}
public string FormatHtml(string htm)
{
string res = "";
try
{
string a1 = GetNumCode(htm);
string a2 = GetPrice(htm);
string a3 = GetDeal(htm);
string a4 = GetStuff(htm);
string a5 = GetWoman(htm);
string a6 = GetMan(htm);
string a7 = GetInfo(htm);
res = a1 + "\t" + a2 + "\t" + a3 + "\t" + a4 + "\t" + a5 + "\t" + a6 + "\t" + a7;
}
catch
{
}
return res;
}
public string GetRequestString(string strUrl)
{
string res = "";
try
{
string PageUrl = strUrl;
System.Net.HttpWebRequest request =
(System.Net.HttpWebRequest)System.Net.WebRequest.Create(PageUrl);
request.UserAgent =
"Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.2; .NET CLR 1.1.4322;.NET CLR 2.0.50727; InfoPath.1) Web-Sniffer/1.0.24";
System.Net.WebResponse response = request.GetResponse();
System.IO.Stream resStream = response.GetResponseStream();
System.IO.StreamReader sr =
new System.IO.StreamReader(resStream, System.Text.Encoding.Default);
res = sr.ReadToEnd();
resStream.Close();
sr.Close();
}
catch { }
return res;
}
/// <summary>
/// 匹配编号
/// </summary>
/// <param name="strSomeCodes"></param>
/// <returns></returns>
public string GetNumCode(string strSomeCodes)
{
Regex DoubleQuotedString = new Regex("bgcolor=\'\\#FFEEFD\'>(\\w*)");
// 然后去匹配字符串。
Match m;
m = DoubleQuotedString.Match(strSomeCodes);
return m.Groups[1].Captures[0].Value;
}
/// <summary>
/// 匹配市场价格
/// </summary>
/// <param name="strSomeCodes"></param>
/// <returns></returns>
public string GetPrice(string strSomeCodes)
{
Regex DoubleQuotedString = new Regex(" class=goodsmoney><s>(\\S*)</s>");
// 然后去匹配字符串。
Match m;
m = DoubleQuotedString.Match(strSomeCodes);
return m.Groups[1].Captures[0].Value;
}
/// <summary>
/// 表面处理
/// </summary>
/// <param name="strSomeCodes"></param>
/// <returns></returns>
public string GetDeal(string strSomeCodes)
{
Regex DoubleQuotedString = new Regex("<td height='25' bgcolor='\\#ffffff'>([^<]*)</td>");
// 然后去匹配字符串。
Match m;
m = DoubleQuotedString.Match(strSomeCodes);
return m.Groups[1].Captures[0].Value;
}
/// <summary>
/// 匹配材质
/// </summary>
/// <param name="strSomeCodes"></param>
/// <returns></returns>
public string GetStuff(string strSomeCodes)
{
Regex DoubleQuotedString = new Regex("<td height='25' bgcolor='\\#ffffff'>([^<]*)</td>");
// 然后去匹配字符串。
Match m;
m = DoubleQuotedString.Match(strSomeCodes);
m = m.NextMatch();
return m.Groups[1].Captures[0].Value;
}
/// <summary>
/// 女戒尺寸
/// </summary>
/// <param name="strSomeCodes"></param>
/// <returns></returns>
public string GetWoman(string strSomeCodes)
{
Regex DoubleQuotedString = new Regex("<td height='25' bgcolor='\\#ffffff'>([^<]*)</td>");
// 然后去匹配字符串。
Match m;
m = DoubleQuotedString.Match(strSomeCodes);
m = m.NextMatch().NextMatch();
return m.Groups[1].Captures[0].Value;
}
/// <summary>
/// 男戒尺寸
/// </summary>
/// <param name="strSomeCodes"></param>
/// <returns></returns>
public string GetMan(string strSomeCodes)
{
Regex DoubleQuotedString = new Regex("<td height='25' bgcolor='\\#ffffff'>([^<]*)</td>");
// 然后去匹配字符串。
Match m;
m = DoubleQuotedString.Match(strSomeCodes);
m = m.NextMatch().NextMatch().NextMatch();
return m.Groups[1].Captures[0].Value;
}
/// <summary>
/// 介绍
/// </summary>
/// <param name="strSomeCodes"></param>
/// <returns></returns>
public string GetInfo(string strSomeCodes)
{
Regex DoubleQuotedString = new Regex("</DIV>\\s*<FONT.>([^/td]*)<\\/FONT>");
// 然后去匹配字符串。
Match m;
m = DoubleQuotedString.Match(strSomeCodes);
return m.Groups[1].Captures[0].Value.Replace(@"<br>", "");
}
private void button1_Click(object sender, EventArgs e)
{
StringBuilder sb = new StringBuilder();
string[] sArray=FormatBox(textBox1.Text);
int i = 1;
foreach (string s in sArray)
{
string htm = GetRequestString(s);
string res = FormatHtml(htm);
sb.AppendLine(i.ToString() + "\t" + res);
i++;
}
textBox2.Text = sb.ToString();
using (StreamWriter sw = new StreamWriter(@"c:\test\ouput.txt"))//将获取的内容写入文本
{
sw.Write(sb.ToString());
}
}
protected string[] FormatBox(string Boxtext) {
string[] res = null;
res = Boxtext.Split('\n');
return res;
}
public string FormatHtml(string htm)
{
string res = "";
try
{
string a1 = GetNumCode(htm);
string a2 = GetPrice(htm);
string a3 = GetDeal(htm);
string a4 = GetStuff(htm);
string a5 = GetWoman(htm);
string a6 = GetMan(htm);
string a7 = GetInfo(htm);
res = a1 + "\t" + a2 + "\t" + a3 + "\t" + a4 + "\t" + a5 + "\t" + a6 + "\t" + a7;
}
catch
{
}
return res;
}
public string GetRequestString(string strUrl)
{
string res = "";
try
{
string PageUrl = strUrl;
System.Net.HttpWebRequest request =
(System.Net.HttpWebRequest)System.Net.WebRequest.Create(PageUrl);
request.UserAgent =
"Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.2; .NET CLR 1.1.4322;.NET CLR 2.0.50727; InfoPath.1) Web-Sniffer/1.0.24";
System.Net.WebResponse response = request.GetResponse();
System.IO.Stream resStream = response.GetResponseStream();
System.IO.StreamReader sr =
new System.IO.StreamReader(resStream, System.Text.Encoding.Default);
res = sr.ReadToEnd();
resStream.Close();
sr.Close();
}
catch { }
return res;
}
/// <summary>
/// 匹配编号
/// </summary>
/// <param name="strSomeCodes"></param>
/// <returns></returns>
public string GetNumCode(string strSomeCodes)
{
Regex DoubleQuotedString = new Regex("bgcolor=\'\\#FFEEFD\'>(\\w*)");
// 然后去匹配字符串。
Match m;
m = DoubleQuotedString.Match(strSomeCodes);
return m.Groups[1].Captures[0].Value;
}
/// <summary>
/// 匹配市场价格
/// </summary>
/// <param name="strSomeCodes"></param>
/// <returns></returns>
public string GetPrice(string strSomeCodes)
{
Regex DoubleQuotedString = new Regex(" class=goodsmoney><s>(\\S*)</s>");
// 然后去匹配字符串。
Match m;
m = DoubleQuotedString.Match(strSomeCodes);
return m.Groups[1].Captures[0].Value;
}
/// <summary>
/// 表面处理
/// </summary>
/// <param name="strSomeCodes"></param>
/// <returns></returns>
public string GetDeal(string strSomeCodes)
{
Regex DoubleQuotedString = new Regex("<td height='25' bgcolor='\\#ffffff'>([^<]*)</td>");
// 然后去匹配字符串。
Match m;
m = DoubleQuotedString.Match(strSomeCodes);
return m.Groups[1].Captures[0].Value;
}
/// <summary>
/// 匹配材质
/// </summary>
/// <param name="strSomeCodes"></param>
/// <returns></returns>
public string GetStuff(string strSomeCodes)
{
Regex DoubleQuotedString = new Regex("<td height='25' bgcolor='\\#ffffff'>([^<]*)</td>");
// 然后去匹配字符串。
Match m;
m = DoubleQuotedString.Match(strSomeCodes);
m = m.NextMatch();
return m.Groups[1].Captures[0].Value;
}
/// <summary>
/// 女戒尺寸
/// </summary>
/// <param name="strSomeCodes"></param>
/// <returns></returns>
public string GetWoman(string strSomeCodes)
{
Regex DoubleQuotedString = new Regex("<td height='25' bgcolor='\\#ffffff'>([^<]*)</td>");
// 然后去匹配字符串。
Match m;
m = DoubleQuotedString.Match(strSomeCodes);
m = m.NextMatch().NextMatch();
return m.Groups[1].Captures[0].Value;
}
/// <summary>
/// 男戒尺寸
/// </summary>
/// <param name="strSomeCodes"></param>
/// <returns></returns>
public string GetMan(string strSomeCodes)
{
Regex DoubleQuotedString = new Regex("<td height='25' bgcolor='\\#ffffff'>([^<]*)</td>");
// 然后去匹配字符串。
Match m;
m = DoubleQuotedString.Match(strSomeCodes);
m = m.NextMatch().NextMatch().NextMatch();
return m.Groups[1].Captures[0].Value;
}
/// <summary>
/// 介绍
/// </summary>
/// <param name="strSomeCodes"></param>
/// <returns></returns>
public string GetInfo(string strSomeCodes)
{
Regex DoubleQuotedString = new Regex("</DIV>\\s*<FONT.>([^/td]*)<\\/FONT>");
// 然后去匹配字符串。
Match m;
m = DoubleQuotedString.Match(strSomeCodes);
return m.Groups[1].Captures[0].Value.Replace(@"<br>", "");
}
相关文章推荐
- 被管理员和谐了的最高票答案“知乎数据抓取程序”(.net、c#数据挖掘)
- ios开发-程序压后台后,悄悄的抓取数据~~
- 一个简单的使用python抓取网页中的水文数据的程序
- 数据抓取的艺术(二):数据抓取程序优化
- 抓取网页数据的小程序
- 项目总结之数据抓取程序
- 网站上抓取数据并且自动发帖到论坛程序
- 发一个python写的多线程 代理服务器 抓取,保存,验证程序,希望喜欢python的朋友和我一起完善它
- Android 通过代理程序抓取请求数据
- Asp.net 数据采集(远程抓取,小偷程序) 基类
- 新浪微博数据挖掘食谱之十五: 爬虫篇 (抓取用户的朋友)
- 帮朋友做的一个导出列或行数据的小程序
- 爬虫小程序(实习僧网抓取数据)
- 程序抓取网站数据HttpWebRequest
- python2抓取某虚拟币网数据的小程序
- 一个极其简洁的Python网页抓取程序,自动从雅虎财经抓取股票数据
- selenium和casperjs2种数据抓取方式(进来的朋友请留言,共同探讨)
- charles抓取微信小程序数据(抓取http和https数据)
- python2抓取某虚拟币网数据小程序
- 一个极其简洁的Python网页抓取程序,自己主动从雅虎財经抓取股票数据