您的位置:首页 > 理论基础 > 计算机网络

node爬虫HTTP请求中的form data和request payload的区别

2017-09-12 23:07 375 查看
参考:

http://www.cnblogs.com/btgyoyo/p/6141480.html

代码片段:

let request = require('request');
let cheerio = require('cheerio');
let async=require('async');
let querystring=require('querystring');
let login_url='https://auth2.cityads.com/login/';
let offers_url='https://cityads.com/stat/analytics/offers';
let create_job="https://cityads.com/stat/ds/create_job";
let get_job_status="https://cityads.com/stat/ds/get_job_status";
let fs=require('fs');
let path=require('path');
let xlsx = require('node-xlsx');
let jobHash='';
let file_path='';
let headers={
'User-Agent':'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3112.113 Safari/537.36'
};

function start() {
async.waterfall([
function (cb) {
//当前方法可以省略
let _headers=headers;
_headers["Cookie"]="PHPSESSID=8rkhggprglk1an1cg4g2sjahg4; _ym_uid=1505217442926426080; _ym_isad=2; _ga=GA1.2.1435829105.1505217441; _gid=GA1.2.307976531.1505217441; storage_key_stat=106009bbb914944c62857aead94b7b7229d4d5d1";
let param={
url:offers_url,
method:"GET",
headers:_headers
};
request(param,function (err,res,body) {
// console.log(body);
cb()
});
},function (cb) {
let fiter={
"mainGroup":"203",
"subGroup":"",
"period":"11.09.2017 - 11.09.2017",
"dateType":"event_time",
"sort":"",
"sdir":"",
"cols":"is_sale,order_key,subaccount,click_time,lead_time,sale_time,lead_delta,sale_delta,total_delta,action_name,campaign_target_id,status,customer_type,payment_method,basket_count,original_total,currency,order_total,wm_currency,percent,commission_open_adv,commission_rejected_adv,commission_adv",
"reportPageId":11020001,
"lang":"cn",
"keyword":"",
"complexFilter":[],
"skin":"",
"useSkin":"0",
"jd":"",
"userCurrency":"rub",
"format":"xls"
};
let _headers=headers;
_headers["Cookie"]="PHPSESSID=8rkhggprglk1an1cg4g2sjahg4; _ym_uid=1505217442926426080; _ym_isad=2; _ga=GA1.2.1435829105.1505217441; _gid=GA1.2.307976531.1505217441; storage_key_stat=106009bbb914944c62857aead94b7b7229d4d5d1";
_headers["origin"]="https://cityads.com";
_headers["referer"]="https://cityads.com/stat/conversions";
_headers["x-json"]=1;
_headers["x-requested-with"]="XMLHttpRequest";
_headers["Content-Type"]="text/plain;charset=UTF-8";
let param={
url:create_job,
method:"POST",
headers:_headers,
form:JSON.stringify(fiter)
};
request(param,function (err,res,body) {
jobHash=JSON.parse(body)["jobHash"];
cb(err,jobHash);
});
},function (jobHash,callback) {
let flag=true;
async.whilst(
function () {
return flag;
},
function (cb) {
let _headers=headers;
_headers["origin"]="https://cityads.com";
_headers["referer"]="https://cityads.com/stat/conversions";
_headers["x-json"]=1;
_headers["x-requested-with"]="XMLHttpRequest";
let param={
url:get_job_status,
method:"POST",
form:{
json:1,
jobHash:jobHash
},
headers:_headers
};
request(param,function (err,res,body) {
let result=JSON.parse(body);
if (result["status"]==="success") {
flag=false;
return cb(err,result.uri)
}
cb();
})
},
function (err,uri) {
if (err) console.log(err);
console.log("uri=",uri);
callback(err,uri);
});
},function (uri,cb) {
let download_url="https://cityads.com"+uri;
let _headers=headers;
_headers["accept"]="text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8";
_headers["referer"]="https://cityads.com/stat/conversions";
_headers["accept-encoding"]="gzip, deflate, br";
_headers["accept-language"]="zh-CN,zh;q=0.8";
_headers["upgrade-insecure-requests"]=1;
// _headers["Content-Type"]="application/octet-stream";
_headers["Cookie"]="PHPSESSID=8rkhggprglk1an1cg4g2sjahg4; _ym_uid=1505217442926426080; _ym_isad=2; _ga=GA1.2.1435829105.1505217441; _gid=GA1.2.307976531.1505217441; storage_key_stat=106009bbb914944c62857aead94b7b7229d4d5d1";
let param={
url:download_url,
method:"GET",
headers:_headers
};
let _file_path=uri.split("/")[3];
file_path=_file_path;
request(param).pipe(fs.createWriteStream(_file_path));
cb()
},
function (cb) {
setTimeout(read_file,5000);
cb()
}
],function (err) {
if (err) console.log(err);
});
}
function read_file() {
let _path=path.join(__dirname,file_path);
let obj = xlsx.parse(_path);
let excelArray = obj[0].data;
path='';
console.log(excelArray);
}

start();
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签:  爬虫