您的位置:首页 > 其它

winform抓取淘宝宝贝详细页的上下架时间等信息

2014-05-20 23:08 190 查看
在界面上拖拉几个界面,如下图所示:



后台编码:

using System;
using System.Collections.Generic;
using System.ComponentModel;
using System.Data;
using System.Drawing;
using System.Linq;
using System.Text;
using System.Windows.Forms;
using HtmlAgilityPack;
using System.Text.RegularExpressions;

public partial class Form9 : Form
{
private List<Product> proList = new List<Product>();
public Form9()
{
InitializeComponent();

this.dgResult.Columns["StartTime"].DefaultCellStyle.Format = "yyyy-MM-dd hh:mm:ss";
this.dgResult.Columns["EndTime"].DefaultCellStyle.Format = "yyyy-MM-dd hh:mm:ss";
}

/// <summary>
/// 将Unix时间戳转换为DateTime类型时间(Unix时间戳指是从1970年1月1日(UTC/GMT的午夜)开始所经过的秒数)
/// </summary>
/// <param name="d">double 型数字</param>
/// <returns>DateTime</returns>
public System.DateTime UnixToDateTime(long d)
{
System.DateTime time = System.DateTime.MinValue;
System.DateTime startTime = TimeZone.CurrentTimeZone.ToLocalTime(new System.DateTime(1970, 1, 1));
time = startTime.AddSeconds(d);
return time;
}
/// <summary>
/// 提取字符串里面的Unix时间戳
/// </summary>
/// <param name="input"></param>
/// <param name="pattern"></param>
/// <returns></returns>
private long GetUnixTick(string input, string pattern)
{
long result = 0;
Regex rx = new Regex(pattern, RegexOptions.IgnoreCase);
MatchCollection mc = rx.Matches(input);
string value = mc[0].Value;
Int64.TryParse(value, out result);
if (result > 0)//1401119998000需要去除三个0
result = result / 1000;
return result;
}
private void btnQuery_Click(object sender, EventArgs e)
{
string input = txtInput.Text.Trim();
string[] arrUrl = Regex.Split(input, "\\s+");
foreach (string url in arrUrl)
{
string html = Utils.GetHtmlSource(url, Encoding.GetEncoding("GBK"));
if (string.IsNullOrEmpty(html))
continue;

HtmlNode rootNode = null;
HtmlAgilityPack.HtmlDocument document = new HtmlAgilityPack.HtmlDocument();
document.LoadHtml(html);
rootNode = document.DocumentNode;

string unixStr = GetNodeAttr(rootNode, "//button[@id='J_listBuyerOnView']");
if (string.IsNullOrEmpty(unixStr))
continue;

//上架时间
long unixTickStart = GetUnixTick(unixStr, "(?<=starts=)(.*?)(?=&item_id)");
DateTime dtStart = UnixToDateTime(unixTickStart);
//下架时间
long unixTickEnd = GetUnixTick(unixStr, "(?<=ends=)(.*?)(?=&starts)");
DateTime dtEnd = UnixToDateTime(unixTickEnd);
//标题
string title = GetNodeText(rootNode, "//title");
//价格
decimal price = 0;
string priceTmp = GetNodeText(rootNode, "//em[@class='tb-rmb-num']");
decimal.TryParse(priceTmp, out price);

proList.Add(new Product()
{
Url = url,
Title = title,
Price = price,
StartTime = dtStart,
EndTime = dtEnd
});
}
if (proList.Count == 0)
{
MessageBox.Show("没有找到符合条件的数据,输入网址是否正确?");
return;
}

var bindingList = new BindingList<Product>(proList);
var source = new BindingSource(bindingList, null);
dgResult.DataSource = source;
}
private string GetNodeAttr(HtmlNode rootNode, string path)
{
HtmlNode temp = rootNode.SelectSingleNode(path);
if (temp != null)
return temp.Attributes["data-api"].Value;
return "";
}
private string GetNodeText(HtmlNode rootNode, string path)
{
HtmlNode temp = rootNode.SelectSingleNode(path);
if (temp != null)
return temp.InnerText;
return "";
}

private void btnClear_Click(object sender, EventArgs e)
{
proList.Clear();
var bindingList = new BindingList<Product>(proList);
var source = new BindingSource(bindingList, null);
dgResult.DataSource = source;
}
}
public class Product
{
public string Url { get; set; }
public string Title { get; set; }
public decimal Price { get; set; }
public DateTime StartTime { get; set; }
public DateTime EndTime { get; set; }
}


运行结果如下:

内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签: