Python爬虫抓取携程网机票信息并发邮件通知
2017-04-25 12:55
579 查看
背景:
由于要买机票,所以一直进行搜索,爬虫可以帮我解决这个问题;
用Python抓取携程网机票信息 过程纪实(上篇)
解释的超级详细。
于是通过这一过程,基本了解了一些;
查询 上海 到 西安 4.29~05.02的机票:
发送邮件程序(我找到出处就放上来):
将查询信息和发送邮件的程序整合起来,大概就是这样,
然后使用crontab 做一个定时任务,每20mins执行一次;
基本格式 :
so,
尽管有很多问题,正在学习。
由于要买机票,所以一直进行搜索,爬虫可以帮我解决这个问题;
用Python抓取携程网机票信息 过程纪实(上篇)
解释的超级详细。
于是通过这一过程,基本了解了一些;
查询 上海 到 西安 4.29~05.02的机票:
#coding:utf-8 import urllib2 from lxml import etree import json import random import sys reload(sys) sys.setdefaultencoding('utf8') def get_json2(date,rk,CK,r): '''根据构造出的url获取到航班数据''' url= "http://flights.ctrip.com/domesticsearch/search/SearchFirstRouteFlights?DCity1=SHA&ACity1=SIA&SearchType=S&DDate1=%s&IsNearAirportRecommond=0&rk=%s&CK=%s&r=%s"%(date,rk,CK,r) headers={'Host':"flights.ctrip.com",'User-Agent':"Mozilla/5.0 (Windows NT 10.0; WOW64; rv:45.0) Gecko/20100101 Firefox/45.0",'Referer':"http://flights.ctrip.com/booking/hrb-sha-day-1.html?ddate1=2017-04-29"} headers['Referer']="http://flights.ctrip.com/booking/hrb-sha-day-1.html?ddate1=%s"%date req=urllib2.Request(url,headers=headers) res=urllib2.urlopen(req) content=res.read() dict_content=json.loads(content,encoding="gb2312") length = len(dict_content['fis']) # print length i = 0 for i in range(length): if ((dict_content['fis'][i][u'lp']) < 600 ): print (dict_content['fis'][i][u'lp']), print (dict_content['fis'][i][u'dt']), print (dict_content['fis'][i][u'at']) #print (dict_content['fis'][i][u'dpbn']) def get_parameter(date): '''获取重要的参数 date:日期,格式示例:2016-05-13 ''' url='http://flights.ctrip.com/booking/hrb-sha-day-1.html?ddate1=%s'%date res=urllib2.urlopen(url).read() tree=etree.HTML(res) pp=tree.xpath('''//body/script[1]/text()''')[0].split() CK_original=pp[3][-34:-2] CK=CK_original[0:5]+CK_original[13]+CK_original[5:13]+CK_original[14:] rk=pp[-1][18:24] num=random.random()*10 num_str="%.15f"%num rk=num_str+rk r=pp[-1][27:len(pp[-1])-3] return rk,CK,r if __name__=='__main__': dates=['2017-04-29','2017-04-30','2017-05-01','2017-05-02'] for date in dates: rk,CK,r=get_parameter(date) get_json2(date,rk,CK,r) print "-----"
发送邮件程序(我找到出处就放上来):
# -*- coding: utf-8 -*- from email import encoders from email.header import Header from email.mime.text import MIMEText from email.utils import parseaddr, formataddr import smtplib def _format_addr(s): name, addr = parseaddr(s) return formataddr(( \ Header(name, 'utf-8').encode(), \ addr.encode('utf-8') if isinstance(addr, unicode) else addr)) from_addr = raw_input('From: ') password = raw_input('Password: ') to_addr = raw_input('To: ') smtp_server = raw_input('SMTP server: ') msg = MIMEText('Not just fly fight...', 'plain', 'utf-8') msg['From'] = _format_addr(u'Air <%s>' % from_addr) msg['To'] = _format_addr(u'126.Air <%s>' % to_addr) msg['Subject'] = Header(u'flight……', 'utf-8').encode() server = smtplib.SMTP(smtp_server, 25) server.set_debuglevel(1) # 正式用改为0就好啦 server.login(from_addr, password) server.sendmail(from_addr, [to_addr], msg.as_string()) server.quit()
将查询信息和发送邮件的程序整合起来,大概就是这样,
#!/usr/bin/python # -*- coding: utf-8 -*- import urllib2 from lxml import etree import json import random from email import encoders from email.header import Header from email.mime.text import MIMEText from email.utils import parseaddr, formataddr import smtplib import sys reload(sys) sys.setdefaultencoding('utf8') from_addr = "****@126.com" #raw_input('From: ') password = "******" #raw_input('Password: ') to_addr = "********@qq.com" #raw_input('To: ') smtp_server = "smtp.126.com"#raw_input('SMTP server: ') def _format_addr(s): name, addr = parseaddr(s) return formataddr(( \ Header(name, 'utf-8').encode(), \ addr.encode('utf-8') if isinstance(addr, unicode) else addr)) def get_json2(date,rk,CK,r): '''根据构造出的url获取到航班数据''' url= "http://flights.ctrip.com/domesticsearch/search/SearchFirstRouteFlights?DCity1=SHA&ACity1=SIA&SearchType=S&DDate1=%s&IsNearAirportRecommond=0&rk=%s&CK=%s&r=%s"%(date,rk,CK,r) headers={'Host':"flights.ctrip.com",'User-Agent':"Mozilla/5.0 (Windows NT 10.0; WOW64; rv:45.0) Gecko/20100101 Firefox/45.0",'Referer':"http://flights.ctrip.com/booking/hrb-sha-day-1.html?ddate1=2017-04-29"} headers['Referer']="http://flights.ctrip.com/booking/hrb-sha-day-1.html?ddate1=%s"%date req=urllib2.Request(url,headers=headers) res=urllib2.urlopen(req) content=res.read() dict_content=json.loads(content,encoding="gb2312") length = len(dict_content['fis']) # print length i = 0 for i in range(length): if ((dict_content['fis'][i][u'lp']) < 600 ): print (dict_content['fis'][i][u'lp']), print (dict_content['fis'][i][u'dt']), print (dict_content['fis'][i][u'at']), print (dict_content['fis'][i][u'dpbn']) if ((dict_content['fis'][i][u'lp']) <= 450 ): msg = MIMEText(('%r at %s in %s'% ((dict_content['fis'][i][u'lp']),(dict_content['fis'][i][u'dt']),(dict_content['fis'][i][u'dpbn']))),'plain', 'utf-8') msg['From'] = _format_addr(u'Air <%s>' % from_addr) msg['To'] = _format_addr(u'126.Air <%s>' % to_addr) msg['Subject'] = Header(u'flight…%r '%(dict_content['fis'][i][u'lp']), 'utf-8').encode() server = smtplib.SMTP(smtp_server, 25) server.set_debuglevel(0) server.login(from_addr, password) server.sendmail(from_addr, [to_addr], msg.as_string()) server.quit() def get_parameter(date): '''获取重要的参数 date:日期,格式示例:2016-05-13 ''' url='http://flights.ctrip.com/booking/hrb-sha-day-1.html?ddate1=%s'%date res=urllib2.urlopen(url).read() tree=etree.HTML(res) pp=tree.xpath('''//body/script[1]/text()''')[0].split() CK_original=pp[3][-34:-2] CK=CK_original[0:5]+CK_original[13]+CK_original[5:13]+CK_original[14:] rk=pp[-1][18:24] num=random.random()*10 num_str="%.15f"%num rk=num_str+rk r=pp[-1][27:len(pp[-1])-3] return rk,CK,r if __name__=='__main__': dates=['2017-04-29','2017-04-30','2017-05-01'] for date in dates: rk,CK,r=get_parameter(date) get_json2(date,rk,CK,r) print "-----"
然后使用crontab 做一个定时任务,每20mins执行一次;
基本格式 :
* * * * * command 分 时 日 月 周 命令
so,
0,20,40 * * * * python ~/test.py
尽管有很多问题,正在学习。
相关文章推荐
- Python爬虫-爬取集思录的金融信息,并写入文件和检测数据变化发送邮件通知
- python爬虫抓取zabbix监控图,并发邮件
- Python爬虫框架Scrapy实战之批量抓取招聘信息
- python 爬虫实战 抓取学校bbs相关板块的发帖信息
- 通过python的paramiko抓取多台服务器信息,并通过html格式发送邮件到群组
- [Python爬虫] 之二十六:Selenium +phantomjs 利用 pyquery抓取智能电视网站图片信息
- Python爬虫之一 PySpider 抓取淘宝MM的个人信息和图片
- Python爬虫框架Scrapy实战之批量抓取招聘信息
- Python 爬虫-爬取阿里旅行特价机票信息(3)——完结
- Python爬虫一步步抓取房产信息
- python——爬虫实现网页信息抓取
- Python爬虫小项目(1):抓取转转网西安二手商品的详细信息并导入mongo,绘制图表,慢更
- python 爬虫抓取19楼租房信息
- Python网络爬虫之抓取订餐信息
- 利用python爬虫抓取OJ上做题信息(扩展版)
- 简单的抓取淘宝关键字信息、图片的Python爬虫|Python3中级玩家:淘宝天猫商品搜索爬虫自动化工具(第二篇)
- Python 爬虫-爬取阿里旅行特价机票信息(1)
- Python 爬虫-爬取阿里旅行特价机票信息(2)
- Python爬虫实现网页信息抓取功能示例【URL与正则模块】
- Python开发网络爬虫抓取某同城房价信息