您的位置:首页 > Web前端 > Node.js

node爬虫1

2020-07-14 06:10 429 查看

在古诗文网上爬下了唐诗三百首,并把内容保存至MySQL数据库。

var myRequest = require('request')
var myCheerio = require('cheerio')
var mysql = require('mysql')
var fs = require('fs')

let options = {
host : "127.0.0.1",
port : "3306",
user : "root",
password : "root",
database : "tangpoem"
}
let con=mysql.createConnection(options);
con.connect((err)=>{
if(err){
console.log(err);
}
})

var myURL = 'https://so.gushiwen.org/gushi/tangshi.aspx'
function request(url, callback) {
var options = {
url: url,  encoding: null, headers: null
}
myRequest(options, callback)
}
request(myURL, function (err, res, body) {
var html = body;
var $ = myCheerio.load(html, { decodeEntities: false });
$(".sons a").each(function(index,element){
href = element.attribs.href;
exp=/^\/shiwenv/
if(exp.test(href)){
next(href)
}
}) ;

})
function next(href){
href = "https://so.gushiwen.org" + href;
request(href, function (err, res, body) {
if(err){
console.log(err)
}
var html = body;
var $ = myCheerio.load(html, { decodeEntities: false });
title = $("h1").text()
content = $('meta[name="description"]').eq(0).attr("content");
// console.log(title)
// console.log(content)
let strsql = "insert into poem(title,content) values (?,?)"
con.query(strsql,[title,content],(err,results)=>{
if(err){
console.log(err)
}
})

})
}

效果:

内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签: