您的位置:首页 > 编程语言 > Python开发

python3+BeautifulSoup+tkinter 爬虫 获取学校成绩

2015-03-10 11:01 211 查看
写的是一个小爬虫,有界面,爬取学校成绩

从最开始只用python3+正则表达式

然后加界面用tkinter

到最后加了BeautifulSoup

现在看起来和谐多了

<pre name="code" class="python">#获取学校成绩


import re,string,urllib.parse,urllib.request
from tkinter import *
from tkinter import ttk
from bs4 import BeautifulSoup
class AhutScore:
def __init__(self):
print('已经启动安工大成绩查询爬虫,咔嚓咔嚓')

def getAhutScore(self,stuNo,idCard,xn,xq):
#通过HttpWatch抓包的当前网站的形式
postdata=urllib.parse.urlencode({
'__EVENTVALIDATION':'/wEWIQLH/uyCBwLs0bLrBgLs0fbZDALWrMSACwKEx5fABgKFx/uABQKax7/ABwKax6OABgKbx6OABgKYx+dBAsKF4K8GAs2FiJQIAsqF5O0IAsOF8PcLAsCFjO0JAvGV4pUFAv/6yPsJAv76yPsJAvbLmuYBAq7k2jACzqvD4A4CrvycrAcCi+uC+wwCn/nbgQ0C4d349AoC9PbF/AwCrZj0xQsCrZiIoQQC0sqYtwoC6MqwtAcC1srwtQoChobTsw4C1orq2A/lc4cMuGz9/vf0WzeaMjk2B63pi/yD0c3bh6AkZ2usTA==',
'__VIEWSTATE':'/wEPDwUKLTc5MTY3NzY2OA9kFgICAw9kFg4CBQ8QZBAVDA09Peivt+mAieaLqT09CTIwMTQtMjAxNQkyMDEzLTIwMTQJMjAxMi0yMDEzCTIwMTEtMjAxMwkyMDExLTIwMTIJMjAxMC0yMDExCTIwMDktMjAxMAkyMDA4LTIwMDkJMjAwNy0yMDA4CTIwMDYtMjAwNwkyMDA1LTIwMDYVDAAJMjAxNC0yMDE1CTIwMTMtMjAxNAkyMDEyLTIwMTMJMjAxMS0yMDEzCTIwMTEtMjAxMgkyMDEwLTIwMTEJMjAwOS0yMDEwCTIwMDgtMjAwOQkyMDA3LTIwMDgJMjAwNi0yMDA3CTIwMDUtMjAwNhQrAwxnZ2dnZ2dnZ2dnZ2dkZAIHDxBkEBUDDT096K+36YCJ5oupPT0BMgExFQMAATIBMRQrAwNnZ2dkZAIdD2QWAgIFDzwrABEAZAIfD2QWAgIBDzwrABEAZAIjD2QWAgIJDzwrABEAZAIlD2QWAgIDDxBkZBYBZmQCJw9kFgICAQ88KwARAQEQFgAWABYAZBgEBQlHcmlkVmlldzMPZ2QFCUdyaWRWaWV3MQ9nZAUMR3JpZFZpZXdfY2owD2dkBQtHcmlkVmlld19jag9nZJ3osNiaHFKtpB351twVA++gU7GdyOdYypVlNUYHNaNo',
'__VIEWSTATEGENERATOR':'DCA2160B',
'Button_cjcx':'查询',
'drop_type':'全部成绩',
'drop_xn':xn,
'drop_xq':xq,
'hid_dqszj':'',
'TextBox1':stuNo,
'TextBox2':idCard
}).encode(encoding='utf-8')
#伪装消息头,伪装用户访问
headers = {
'User-Agent':'Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US; rv:1.9.1.6) Gecko/20091201 Firefox/3.5.6'
}
#创建请求对象
req = urllib.request.Request(
url = 'http://211.70.149.134:8080/stud_score/brow_stud_score.aspx',
data = postdata,
headers = headers
)
#接收结果对象
result = urllib.request.urlopen(req)
#读取并解码
unicodePage = result.read().decode('utf-8')
soup = BeautifulSoup(unicodePage)
#print(soup.prettify())
stuMsg = soup.find('span',id='Label1').string
#stuMsg = soup.html('span',attrs={"id": "Label1"})[0].string
#scoreMsg = re.findall('<tr align="left" onmouseover="c=this.style.backgroundColor;this.style.backgroundColor='#EEC470'" onmouseout="this.style.backgroundColor=c" style=".*?">(.*?)</tr>',unicodePage,re.S)
# <tr class="Freezing" style="color:White;background-color:#006699;border-color:#6666FF;font-weight:bold;height:25px;"><th>*n</tr>
#<span id="Label_SHOW" style="font-size:Small;color: #003300; font-family: 微软雅黑;">共找到4条记录!</span>
items = []
labelShow = soup.find('span',id='Label_SHOW').string
if labelShow == '没有返回记录!':
return items,stuMsg,labelShow
scoreTitle = soup.find('tr',class_='Freezing')('th')
scoreMsg = soup('tr',align='left',onmouseout="this.style.backgroundColor=c")

for itemMsg in scoreMsg:
#itemScore = re.findall('<td>(.*?)</td><td>(.*?)</td><td>(.*?)</td><td>(.*?)</td><td>(.*?)</td><td>(.*?)</td><td>(.*?)</td><td>(.*?)</td><td style=".*?">(.*?)</td><td>(.*?)</td><td>(.*?)</td><td>(.*?)</td><td>(.*?)</td><td>(.*?)</td><td>(.*?)</td>',itemMsg,re.S)
itemScore = itemMsg('td')
dicts ={}
for i in range(15):
dicts[scoreTitle[i].string] = itemScore[i].string.replace('\xa0','')
items.append(dicts)
print(items)
return items,stuMsg,labelShow

def getGUI(self):
def test():
result = self.getAhutScore(stuNo.get(),idCard.get(),xn.get(),xq.get())
rstk = Tk()
rstk.title('成绩查询结果')
rstk.resizable(width=False, height=False)
t=Text(rstk)
t.insert(1.0,result[1]+'\n')
t.insert(2.0,'-'*32+result[2]+'-'*32+'\n')
if result[0]!=[]:
for item in result[0]:
t.insert(3.0,item['课程名']+':'+item['总评成绩']+'\n')
else:
t.insert(3.0,'暂无信息!\n')
t.grid(row=5, column=0,columnspan=4)
root = Tk()
root.geometry()
root.resizable(width=False, height=False)
root.title('ahut成绩查询')
Label(root,text='学号:').grid(row=0, column=0,sticky=W)
Label(root,text='身份证号:').grid(row=0, column=2,sticky=W)
Label(root,text='学年:').grid(row=1, column=0,sticky=W)
Label(root,text='学期:').grid(row=1, column=2,sticky=W)
stuNo = StringVar()
idCard = StringVar()
xn = StringVar()
xq = StringVar()
#Entry(root, textvariable=e, state="readonly", show="*")
Entry(root,textvariable = stuNo).grid(row=0, column=1)
Entry(root,textvariable = idCard).grid(row=0, column=3)
stuNo.set('11908***')
idCard.set('34082*************')
xnBox = ttk.Combobox(root,textvariable=xn,state='readonly')
xnBox['values'] = ('','2010-2011', '2011-2012', '2012-2013','2013-2014', '2014-2015')
xnBox.set('2014-2015')
xnBox.grid(row=1, column=1)
xqBox = ttk.Combobox(root,textvariable=xq,state='readonly')
xqBox['values'] = ('','1', '2')
xqBox.set('1')
xqBox.grid(row=1, column=3)
b = Button(root,text='查询',command=test).grid(row=4, columnspan=4)
#label = Label(root,text='© F8').grid()
root.mainloop()

ahutScore = AhutScore()
ahutScore.getGUI()




内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签: