python3+BeautifulSoup+tkinter 爬虫 获取学校成绩
2015-03-10 11:01
211 查看
写的是一个小爬虫,有界面,爬取学校成绩
从最开始只用python3+正则表达式
然后加界面用tkinter
到最后加了BeautifulSoup
现在看起来和谐多了
从最开始只用python3+正则表达式
然后加界面用tkinter
到最后加了BeautifulSoup
现在看起来和谐多了
<pre name="code" class="python">#获取学校成绩
import re,string,urllib.parse,urllib.request from tkinter import * from tkinter import ttk from bs4 import BeautifulSoup class AhutScore: def __init__(self): print('已经启动安工大成绩查询爬虫,咔嚓咔嚓') def getAhutScore(self,stuNo,idCard,xn,xq): #通过HttpWatch抓包的当前网站的形式 postdata=urllib.parse.urlencode({ '__EVENTVALIDATION':'/wEWIQLH/uyCBwLs0bLrBgLs0fbZDALWrMSACwKEx5fABgKFx/uABQKax7/ABwKax6OABgKbx6OABgKYx+dBAsKF4K8GAs2FiJQIAsqF5O0IAsOF8PcLAsCFjO0JAvGV4pUFAv/6yPsJAv76yPsJAvbLmuYBAq7k2jACzqvD4A4CrvycrAcCi+uC+wwCn/nbgQ0C4d349AoC9PbF/AwCrZj0xQsCrZiIoQQC0sqYtwoC6MqwtAcC1srwtQoChobTsw4C1orq2A/lc4cMuGz9/vf0WzeaMjk2B63pi/yD0c3bh6AkZ2usTA==', '__VIEWSTATE':'/wEPDwUKLTc5MTY3NzY2OA9kFgICAw9kFg4CBQ8QZBAVDA09Peivt+mAieaLqT09CTIwMTQtMjAxNQkyMDEzLTIwMTQJMjAxMi0yMDEzCTIwMTEtMjAxMwkyMDExLTIwMTIJMjAxMC0yMDExCTIwMDktMjAxMAkyMDA4LTIwMDkJMjAwNy0yMDA4CTIwMDYtMjAwNwkyMDA1LTIwMDYVDAAJMjAxNC0yMDE1CTIwMTMtMjAxNAkyMDEyLTIwMTMJMjAxMS0yMDEzCTIwMTEtMjAxMgkyMDEwLTIwMTEJMjAwOS0yMDEwCTIwMDgtMjAwOQkyMDA3LTIwMDgJMjAwNi0yMDA3CTIwMDUtMjAwNhQrAwxnZ2dnZ2dnZ2dnZ2dkZAIHDxBkEBUDDT096K+36YCJ5oupPT0BMgExFQMAATIBMRQrAwNnZ2dkZAIdD2QWAgIFDzwrABEAZAIfD2QWAgIBDzwrABEAZAIjD2QWAgIJDzwrABEAZAIlD2QWAgIDDxBkZBYBZmQCJw9kFgICAQ88KwARAQEQFgAWABYAZBgEBQlHcmlkVmlldzMPZ2QFCUdyaWRWaWV3MQ9nZAUMR3JpZFZpZXdfY2owD2dkBQtHcmlkVmlld19jag9nZJ3osNiaHFKtpB351twVA++gU7GdyOdYypVlNUYHNaNo', '__VIEWSTATEGENERATOR':'DCA2160B', 'Button_cjcx':'查询', 'drop_type':'全部成绩', 'drop_xn':xn, 'drop_xq':xq, 'hid_dqszj':'', 'TextBox1':stuNo, 'TextBox2':idCard }).encode(encoding='utf-8') #伪装消息头,伪装用户访问 headers = { 'User-Agent':'Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US; rv:1.9.1.6) Gecko/20091201 Firefox/3.5.6' } #创建请求对象 req = urllib.request.Request( url = 'http://211.70.149.134:8080/stud_score/brow_stud_score.aspx', data = postdata, headers = headers ) #接收结果对象 result = urllib.request.urlopen(req) #读取并解码 unicodePage = result.read().decode('utf-8') soup = BeautifulSoup(unicodePage) #print(soup.prettify()) stuMsg = soup.find('span',id='Label1').string #stuMsg = soup.html('span',attrs={"id": "Label1"})[0].string #scoreMsg = re.findall('<tr align="left" onmouseover="c=this.style.backgroundColor;this.style.backgroundColor='#EEC470'" onmouseout="this.style.backgroundColor=c" style=".*?">(.*?)</tr>',unicodePage,re.S) # <tr class="Freezing" style="color:White;background-color:#006699;border-color:#6666FF;font-weight:bold;height:25px;"><th>*n</tr> #<span id="Label_SHOW" style="font-size:Small;color: #003300; font-family: 微软雅黑;">共找到4条记录!</span> items = [] labelShow = soup.find('span',id='Label_SHOW').string if labelShow == '没有返回记录!': return items,stuMsg,labelShow scoreTitle = soup.find('tr',class_='Freezing')('th') scoreMsg = soup('tr',align='left',onmouseout="this.style.backgroundColor=c") for itemMsg in scoreMsg: #itemScore = re.findall('<td>(.*?)</td><td>(.*?)</td><td>(.*?)</td><td>(.*?)</td><td>(.*?)</td><td>(.*?)</td><td>(.*?)</td><td>(.*?)</td><td style=".*?">(.*?)</td><td>(.*?)</td><td>(.*?)</td><td>(.*?)</td><td>(.*?)</td><td>(.*?)</td><td>(.*?)</td>',itemMsg,re.S) itemScore = itemMsg('td') dicts ={} for i in range(15): dicts[scoreTitle[i].string] = itemScore[i].string.replace('\xa0','') items.append(dicts) print(items) return items,stuMsg,labelShow def getGUI(self): def test(): result = self.getAhutScore(stuNo.get(),idCard.get(),xn.get(),xq.get()) rstk = Tk() rstk.title('成绩查询结果') rstk.resizable(width=False, height=False) t=Text(rstk) t.insert(1.0,result[1]+'\n') t.insert(2.0,'-'*32+result[2]+'-'*32+'\n') if result[0]!=[]: for item in result[0]: t.insert(3.0,item['课程名']+':'+item['总评成绩']+'\n') else: t.insert(3.0,'暂无信息!\n') t.grid(row=5, column=0,columnspan=4) root = Tk() root.geometry() root.resizable(width=False, height=False) root.title('ahut成绩查询') Label(root,text='学号:').grid(row=0, column=0,sticky=W) Label(root,text='身份证号:').grid(row=0, column=2,sticky=W) Label(root,text='学年:').grid(row=1, column=0,sticky=W) Label(root,text='学期:').grid(row=1, column=2,sticky=W) stuNo = StringVar() idCard = StringVar() xn = StringVar() xq = StringVar() #Entry(root, textvariable=e, state="readonly", show="*") Entry(root,textvariable = stuNo).grid(row=0, column=1) Entry(root,textvariable = idCard).grid(row=0, column=3) stuNo.set('11908***') idCard.set('34082*************') xnBox = ttk.Combobox(root,textvariable=xn,state='readonly') xnBox['values'] = ('','2010-2011', '2011-2012', '2012-2013','2013-2014', '2014-2015') xnBox.set('2014-2015') xnBox.grid(row=1, column=1) xqBox = ttk.Combobox(root,textvariable=xq,state='readonly') xqBox['values'] = ('','1', '2') xqBox.set('1') xqBox.grid(row=1, column=3) b = Button(root,text='查询',command=test).grid(row=4, columnspan=4) #label = Label(root,text='© F8').grid() root.mainloop() ahutScore = AhutScore() ahutScore.getGUI()
相关文章推荐
- python小爬虫—获取学校教务处成绩
- python 爬虫实战--登陆学校教务系统获取成绩信息
- Django+python+BeautifulSoup垂直搜索爬虫
- Python爬虫入门八之Beautiful Soup的用法
- Python爬虫利器二之Beautiful Soup的用法
- Python爬虫利器二之Beautiful Soup的用法
- [转载]Python爬虫入门八之Beautiful Soup的用法
- Python爬虫利器二之Beautiful Soup的用法
- Python爬虫利器二之Beautiful Soup的用法
- Python爬虫利器二之Beautiful Soup的用法
- python BeautifulSoup获取 网页链接的文字内容
- Python爬虫入门八之Beautiful Soup的用法
- python爬虫获取郑大教务在线成绩数据
- 【python小练】图片爬虫之BeautifulSoup4
- Python爬虫利器二之Beautiful Soup的用法【转过来,乱了,从原网址看】
- Python 爬虫—— requests BeautifulSoup
- 使用requests+beautifulsoup模块实现python网络爬虫功能
- Python爬虫利器二之Beautiful Soup的用法