您的位置:首页 > 数据库 > MySQL

nodejs 爬取热点明星存入mysql数据库

2016-12-06 17:18 447 查看
配置文件说明:

/root/Spider/nodejs-server-server的project.json

{

“open_allbaidu_hot” : 1, //是否爬取所有百度热点信息

“open_allwangyi_hot”:1,//是否爬取所有网易热点信息

“open_alljinritoujiao_hot”:1,//是否爬取今日头条热点信息

“open_allweibo_hot”:1,/是否爬取微博热点信息

“open_allxinlang_hot”:1,//是否爬取新浪热点信息

“open_allbaidutieba_hot”:1,//是否爬取百度贴吧热点信息

“open_baidu_hot_people”:1,//是否爬取百度热点明星

“open_weibo_hot_people”:1,//是否爬取微博热点明星

“timeInterval”:1//设置爬虫时间间隔单位为小时

“db_user”:”root”,//数据库用户名

“password”:”233233”,//密码

“database”:”nodejs”//数据库名字

}

主类别表:

mid: int 类别id,主键

mclassname: varchar(20) 主类名

次类别表:

Cid int 类别id,主键

Cclassname varchar(20) 次类名

成员表:

Id int 成员id,主键

Name varchar(20) 名字

mClass int 所属主类名id,外键

CClass int 所属次类名id,外键

Source varchar(50) 来源网站

Description varchar(2000) 信息描述

Hot varchar(5) 热度指数

Time varchar(20) 获取时间

登陆mysql查看结果:



重点内容

var sources = ["热点人物","娱乐名人","女演员","男演员","演员","女歌手","男歌手","歌手","名家人物","主持人","体坛人物","美女","帅哥","选秀歌手","欧美明星","财经人物","互联网人物","历史人物","公益明星"]
var bmsites = ["http://top.baidu.com/buzz?b=258&c=9&fr=topcategory_c9","http://top.baidu.com/buzz?b=618&c=9&fr=topbuzz_b258_c9","http://top.baidu.com/buzz?b=18&c=9&fr=topbuzz_b18_c9","http://top.baidu.com/buzz?b=17&c=9&fr=topbuzz_b18_c9","http://top.baidu.com/buzz?b=1395&c=9&fr=topbuzz_b17_c9","http://top.baidu.com/buzz?b=16&c=9&fr=topbuzz_b1395_c9","http://top.baidu.com/buzz?b=15&c=9&fr=topbuzz_b16_c9","http://top.baidu.com/buzz?b=1396&c=9&fr=topbuzz_b15_c9","http://top.baidu.com/buzz?b=260&c=9&fr=topbuzz_b1396_c9","http://top.baidu.com/buzz?b=454&c=9&fr=topbuzz_b260_c9","http://top.baidu.com/buzz?b=255&c=9&fr=topbuzz_b454_c9","http://top.baidu.com/buzz?b=3&c=9&fr=topbuzz_b255_c9","http://top.baidu.com/buzz?b=22&c=9&fr=topbuzz_b3_c9","http://top.baidu.com/buzz?b=493&c=9&fr=topbuzz_b22_c9","http://top.baidu.com/buzz?b=491&c=9&fr=topbuzz_b493_c9","http://top.baidu.com/buzz?b=261&c=9&fr=topbuzz_b491_c9","http://top.baidu.com/buzz?b=257&c=9&fr=topbuzz_b261_c9","http://top.baidu.com/buzz?b=259&c=9&fr=topbuzz_b257_c9","http://top.baidu.com/buzz?b=612&c=9&fr=topbuzz_b259_c9"]
//var bmsite = "http://top.baidu.com/buzz?b=258&c=9&fr=topcategory_c9";
var bmingxingentity = [];
var examples = {};

var mxurl = bmsites[mxn];
console.log(bmsites.length)
var req = http.get(mxurl,function(res){

// res.setEncoding('binary');
var buffer = new BufferHelper();
res.on('data',function(data){
buffer.concat(data);
}).on('end',function(){
var buf = buffer.toBuffer();
//var buf = new Buffer(html,'binary');
var str = iconv.decode(buf,'gbk');
try{
var res = str.match(/href_top.*>.*\</gm);
var mxhot = str.match(/span class=\"icon\-.*>.*\</gm);
if(res != null){
var conn = mysql.createConnection({
host: 'localhost',
user: project.db_user,
password: project.password,
database:project.database,
port: 3306
});
console.log("begin to connect");
conn.connect();
if(res.length != 0){
for(var i = 0;i < res.length;i++){
var r = res[i].match(/\>.*\</gm)[0];
var mh = mxhot[i].match(/\>.*\</gm)[0];
r = r.replace("<","");
r = r.replace(">","");
mh = mh.replace("<","");
mh = mh.replace(">","");

var CID = mxn + 1;
var baidu = "baidu";
var test  = new Date();
var time = test.getFullYear() + "-" + test.getMonth() +"-" + test.getDate() +  " " + test.getHours() +  ":"+ test.getMinutes();
conn.query("insert into Member (name,hot,mid,source,cid,time) values ( '"+ r + "'" + ","  + parseFloat(mh) + "," + 1 + ","  + "'" + baidu +"'"+"," +  CID + "," + "'" + time +"'" +")", function(err, rs, fields){
//处理数据
if(err) {
console.log('ClientConnectionReady Error: ' + err.message);

return;
}
});

bmingxingentity.push({
'name':r,
'hot':parseInt(mh)
})
}
}
}
conn.end();
examples['application/json'] = bmingxingentity;
if(Object.keys(examples).length > 0) {
console.log(10000);
//res.setHeader('Content-Type', 'application/json');
var data = JSON.stringify(examples[Object.keys(examples)[0]] || {}, null, 2);
client.set('Hot4' + (mxn+10000), data, redis.print);
//client.expire('Hot1' + (3), 300);
//res.end(data);
}
else {
res.end();
}
}catch(e){

}

}).on('close',function(){
console.log('Close recevied!');
});
});
req.on('error',function(error){

});
}
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签: