python搜索汽车票
2018-02-12 10:56
861 查看
一、背景
利用Requests模块获取页面,BeautifulSoup来获取需要的内容,处理并返回结果。二、代码
git源码地址getinfo.py代码如下:
#!/bin/env python # -*- coding:utf-8 -*- # _author:kaliarch import re import requests from bs4 import BeautifulSoup class Getinformation: def __init__(self): """init url""" self.init_url = 'http://www.xaglkp.com/ClassSearch/IndexPost' def getsoup(self): """:return soup""" response = requests.post(self.init_url) soup = BeautifulSoup(response.text, 'html.parser') return soup def getStartAddress(self,soup): """get start address dict""" staraddress = soup.find_all('option', text=re.compile("\w+客运站")) num_list = [1000] add_list = ["全部"] for num in staraddress: num_list.append(num['value']) add_list.append(num.text) startaddrdict = dict(zip(add_list, num_list)) return startaddrdict def getStartDate(self,soup): """get time""" date = soup.find_all('option', text=re.compile("2018.*?")) date_list = [] for num in date: date_list.append(num.text) return date_list def getStartTime(self,soup): """ get start time :return start time list """ times = soup.find_all('option', text=re.compile("\w.*?后")) time_list = ['05:00后'] for num in times: time_list.append(num.text) return time_list if __name__ == '__main__': test = Getinformation() soup = test.getsoup() add = test.getStartAddress(soup) print(add)
main.py代码如下
#!/bin/env python # -*- coding:utf-8 -*- # _author:kaliarch import getinfo import requests from bs4 import BeautifulSoup oper = getinfo.Getinformation() soup = oper.getsoup() startdic = oper.getStartAddress(soup) startdate = oper.getStartDate(soup) starttime = oper.getStartTime(soup) init_url = 'http://www.xaglkp.com/ClassSearch/IndexPost' def getSadd(): """get input start address""" print("起始站如下:") print("*" * 20) for v in startdic.keys(): print(v) print("*" * 20) trynum = 0 start = str(input("请输入出发地点:")) while trynum < 5: for v in startdic.keys(): if start == v: return startdic[start] else: print("输入错误,还有%d次重试机会" % (5-trynum)) start = str(input("请从新输入出发地点:")) trynum += 1 def getDes(): """get input destination address""" try: destadd = str(input("请输入目的地:")) result = 1 except Exception as e: print("输入错误:",e) if result: return destadd def getDate(): """get input start date""" print("出发日期如下:") print("*" * 20) for date in startdate: print(date) print("*" * 20) trynum = 0 date1 = str(input("请输入出发时间:")) while trynum < 5: for i in startdate: if date1 == i: return date1 else: print("输入错误,还有%d次重试机会" % (5-trynum)) date1 = str(input("请从新输入出发时间:")) trynum += 1 def getTime(): """get input start time""" print("出车时间如下**(05:00表示所有)**:") print("*" * 20) for time in starttime: print(time) print("*"*20) trynum = 0 time1 = str(input("请输入发车时间(05:00表示所有):")) while trynum < 5: for i in starttime: if time1 == i: return time1.split('后')[0] else: print("输入错误,还有%d次重试机会" % (5-trynum)) time1 = str(input("请从新输入发车时间(05:00表示所有):")) trynum += 1 def getPayData(): """get requests paydata""" sadd = getSadd() desadd = getDes() sdate = getDate() stime = getTime() paydata = { 'selected':sadd, 'Arrive':desadd, 'selected1':sdate, 'selected2':stime, 'page':'', 'ArriveHidden':sadd, } return paydata def getHeader(): """get request headers""" header = { 'Content-Type':'application/x-www-form-urlencoded', 'Host':'www.xaglkp.com', 'Referer':'http://www.xaglkp.com/ClassSearch/IndexPost', 'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/61.0.3163.100 Safari/537.36' } return header def get_request(): """requests""" header = getHeader() paydata = getPayData() select_result = requests.post(init_url,headers=header,data=paydata) result_soup = BeautifulSoup(select_result.text, 'html.parser') result = result_soup.find_all('table') return result def format_list(): result_L = get_request() L1 = [] for context in result_L: L1.append(context.text) tmp = '\n'.join(L1) str = tmp.replace("\n"," ") L2 = [] for i2 in str.split(): L2.append(i2) return L2 def title_context(): """get title list,get context list""" result_L = format_list() print("共搜索出%d趟车!详细信息如下:" % int(len(result_L) / 9)) title_L = [] title = 0 for i in result_L[title:title + 9]: title_L.append(i) print(title_L) totle_L = [] num = 9 for i in range(0, len(result_L) // 9): context_L = [] for n in result_L[num:num + 8]: context_L.append(n) print(context_L) totle_L.append(context_L) num += 8 if i > int(len(result_L) / 9): exit() if __name__ == '__main__': title_context()
三、效果展示
运行代码,填写需要信息,最终返回搜索结果,后期可将其保存至excel相关文章推荐
- python添加搜索路径
- python的搜索路径与包(package)
- 【Python排序搜索基本算法】之Prim算法
- 深度优先搜索—C—python
- 最近百度云盘不提供搜索,闲来无事,玩玩python爬虫,爬一下百度云盘的资源
- Python Import机制备忘-模块搜索路径(sys.path)、嵌套Import、package Import
- Python实验:百度搜索关键字自动打开相关URL
- Python实现抓取百度搜索结果页的网站标题信息
- Python模块搜索概念介绍及模块安装方法介绍
- Python开发【Django】:组合搜索、JSONP、XSS过滤
- Python: re.IGNORECASE 标志参数字符串忽略大小写的搜索替换
- Python抓取百度搜索结果
- 用python实现本地文件搜索
- Python爬虫 百度地图搜索数据采集
- 【Python排序搜索基本算法】之归并排序&分治法(Merge Sort and Divide & Conquer)
- 简单的抓取淘宝关键字信息、图片的Python爬虫|Python3中级玩家:淘宝天猫商品搜索爬虫自动化工具(第二篇)
- 二分搜索树-BST,python实现
- Python os.path.walk遍历文件,搜索文件里面的内容
- 飘逸的python - __get__ vs __getattr__ vs __getattribute__以及属性的搜索策略
- 一个dht网络的“磁力链接”搜索python代码