Python Post and Get 登陆web后台系统并抓取页面
2013-10-29 21:20
513 查看
#coding=utf8 #! /usr/bin/env python import httplib import re import socket import urllib timeout = 60 socket.setdefaulttimeout(timeout) def getTable(): f = open('kvpage.html') page = f.readlines() f.close() pattern = re.compile(r'.*<tbody>(.*?)</tbody>.*') for line in page: #print line m = pattern.match(line.strip()) if m is not None: return m.group(1) return None def extractKvEvents(content): #init result table = [] #init pattern patternTR = re.compile(r"<tr>(.*?)</tr>") patternTD = re.compile(r'<td class="confluenceTd">(.*?)</td>') #search all the rows allrows = patternTR.findall(content) if allrows is not None: for row in allrows: #print row cols = patternTD.findall(row) if cols is not None: table.append(cols) return table def outputToExcel(table): for row in table: print row def loginWiki(): httpClient = None try: params = urllib.urlencode({'os_username': 'xxxx@xxx.com', 'os_password': 'xxxx', 'login': 'Log In'}) headers = {"Content-type": "application/x-www-form-urlencoded" , "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8"} httpClient = httplib.HTTPConnection("xxx.com", 8080, timeout=30) httpClient.request("POST", "/login.action", params, headers) response = httpClient.getresponse() # print response.status # print response.reason # print response.read() # print response.getheaders() print response.getheader('Set-Cookie') cookieFile = open('cookie.txt', 'w') cookieFile.write(response.getheader('Set-Cookie')) cookieFile.close() except Exception, e: print e finally: if httpClient: httpClient.close() def catchPage(): httpClient = None try: #read cookie f = open('cookie.txt') cookie = f.read().strip() print cookie f.close() #init headers headers = {"Content-type": "application/x-www-form-urlencoded", "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8", 'Cookie': cookie} #send request httpClient = httplib.HTTPConnection('xxx.com', 8080, timeout=30) httpClient.request('GET', '/xxxPath', headers=headers) #response是HTTPResponse对象 response = httpClient.getresponse() print response.status print response.reason htmlPage = open('kvpage.html', 'w') htmlPage.write(response.read()) htmlPage.close() except Exception, e: print e finally: if httpClient: httpClient.close() if __name__ == '__main__': loginWiki() catchPage() tablecontent = getTable() table = extractKvEvents(tablecontent) outputToExcel(table)
相关文章推荐
- Python Post and Get 登陆web后台系统并抓取页面
- 一步一步实现web程序信息管理系统之二----后台框架实现跳转登陆页面
- python模拟浏览器webdriver登陆网站后抓取页面并输出
- python实现的json数据以HTTP GET,POST,PUT,DELETE方式页面请求
- Golang实现web api接口调用及web数据抓取[get post模式] 推荐
- python实现的json数据以HTTP GET,POST,PUT,DELETE方式页面请求
- python网络爬虫学习(一)通过GET和POST方式获取页面内容
- Using Django with GAE Python 后台抓取多个网站的页面全文
- python实现的json数据以HTTP GET,POST,PUT,DELETE方式页面请求
- python实现的json数据以HTTP GET,POST,PUT,DELETE方式页面请求
- Python网络爬虫(Get、Post抓取方式)
- webview链接 get和post请求传值给链接的页面
- XML Web Service初体验: HTTP-GET, HTTP-POST and SOAP的比较
- 黄聪:python访问抓取网页常用命令(保存图片到本地、模拟POST、GET、中文编码问题)
- C# http请求相关的函数 HttpWebRequest: Post , Get ; PostAndRedirect
- java后台get_post页面请求
- Python post、get百度(登陆)
- Python网络爬虫(Get、Post抓取方式)
- C# winform端 通过HttpWebRequest进行post和get请求,数据格式为json,后台java端接收,其中有关传输特殊字符(\t,\r,',\n,n)等处理
- webView 加载页面的两种请求方式: get 和 post