您的位置:首页 > Web前端 > JavaScript

[爬虫源码]和大家分享一下瓜子二手车上的二车手信息爬虫源码

2016-12-09 16:53 627 查看
使用javascript编写的爬虫源码,用于爬取瓜子二手车上的二车手信息

源码如下:

var scanUrl = "http://www.guazi.com/hz/buy/";//@input(scanUrl, 入口url, 请输入一个需爬取城市的url,格式为:“http://www.guazi.com/城市名称/buy/”)

if (scanUrl.trim().length > 0) {
var city = scanUrl.trim().substring(scanUrl.indexOf(".com/") + 5, scanUrl.indexOf("/buy/"));
}

var configs = {
domains: ["guazi.com"],
scanUrls: [scanUrl],
contentUrlRegexes: ["https?://www\\.guazi\\.com/" + city + "/\\w+\\.htm"],
helperUrlRegexes: ["https?://www\\.guazi\\.com/" + city + "/buy/(o\\d+/)?"],
enableJS: false,
interval: 10000,
fields: [
{
name: "car_name",
selector: "//h1[contains(@class,'dt-titletype')]"
},
{
name: "car_price",
selector: "//span[contains(@class,'fc-org pricestype')]"
},
{
name: "car_license",
selector: "//li[contains(@class,'one')]/b"
},
{
name: "car_mileage",
selector: "//ul[contains(@class,'assort')]/li[2]/b"
},
{
name: "car_gearbox",
selector: "//ul[contains(@class,'assort')]/li[3]/b"
},
{
name: "car_emission_standard",
selector: "//li[contains(@class,'em-sta detailHoverTips')]/b"
},
{
name: "car_license_location",
selector: "//ul[contains(@class,'assort')]/li[5]/b"
},
{
name: "car_owner",
selector: "//li[contains(@class,'owner')]/text()[2]"
},
{
name: "car_description",
selector: "//*[@id='base']/p"
}
]
};

configs.afterExtractField = function(fieldName, data, page) {
if (fieldName == "car_price") {
var price = extract(data, "//b").replace("¥", "¥");
var coinUnit = exclude(data, "//b");
return (price + coinUnit);
}
else if (fieldName == "car_owner") {
return data.trim();
}
else if (fieldName == "car_description") {
return data.replace("<em></em>", "");
}
return data;
};

var crawler = new Crawler(configs);
crawler.start();

代码运行方法及运行效果:

https://github.com/ShenJianShou/crawler_samples/blob/master/%E5%A6%82%E4%BD%95%E6%89%A7%E8%A1%8C%E6%A0%B7%E4%BE%8B%E4%BB%A3%E7%A0%81.txt
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
相关文章推荐