您的位置：首页 > 其它

利用cheerio模块实现爬虫某网页图片并存储到本地文件夹

2020-06-27 04:54 274 查看

任意网页都可以爬虫：

const http = require('http')

const cheerio = require('cheerio')

const fs=require('fs')

let url='http://www.nipic.com/photo/xiandai/jiaotong/index.html?page=4'

http.get(url, (res) => {//框架

let txt = '';

res.on('data', (data) => {//数据获取data事件

txt += data;

})

res.on('end', () => {//end事件，对数据进行处理

tmp(txt);

})

function tmp(data){//具体处理函数

let arr=[];

let $=cheerio.load(data);//加载dom对象

let tmp_div=$('img')//直接获取标签

tmp_div.each((index,item)=>{//多个标签操作直接用each，不能forEach

var xx=$(item)

//console.log(xx.attr())//先输出以下此标签的各个属性和其值，然后看看如何选择。

//console.log(xx.attr('data-src'))//使用attr获取属性的值

arr.push(xx.attr('data-src'))//把每一张图片的路径放到数组中

})

for(i in arr){

((i)=>{

setTimeout(() => {

tmp2(arr[i])//循环遍历数组值，调用管道输出到文件目录中

}, 500*i)

})(i)

}

function tmp2(url1){

http.get(url1,(res)=>{

console.log(url1)

let name=new Date().getTime();

let asd=fs.createWriteStream('./tmp/' + name + '.png')//文件名拼接

res.pipe(asd)//实现输出

})

}

爬虫框架如下：任意网页都可以实现爬虫：

const http = require('http')

const cheerio = require('cheerio')

const fs=require('fs')

let url='http://www.nipic.com/photo/xiandai/jiaotong/index.html?page=4'

http.get(url, (res) => {//框架

let txt = '';

res.on('data', (data) => {

txt += data;

})

res.on('end', () => {

tmp(txt);

})

function tmp(data){

}

function tmp2(url1){

}

内容来自用户分享和网络整理，不保证内容的准确性，如有侵权内容，可联系管理员处理

标签：

相关文章推荐

新的分享

章节导航