xpath抓取代理IP并检测IP的有效性
2017-03-17 11:43
323 查看
#coding:utf-8 import urllib import urllib2 import requests import time from bs4 import BeautifulSoup from lxml import etree import multiprocessing test_url='http://www.baidu.com/' testStr = "wahaha" _headers={'Accept':'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8', 'Accept-Encoding':'gzip, deflate, sdch', 'Accept-Language':'zh-CN,zh;q=0.8', 'Cache-Control':'max-age=0', 'Connection':'keep-alive', 'Host':'www.xicidaili.com', 'If-None-Match':'W/"b077743016dc54409ebe6b86ba7a869b"', 'Upgrade-Insecure-Requests':'1', 'User-Agent':'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/49.0.2623.75 Safari/537.36', } _cookies=None ip_port=[] file = open('proxy1.txt' , 'w') #_cookies=requests.get('http://www.xicidaili.com/nn/1',headers=_headers).cookies def process(page): # for page in xrange (1,10): response=requests.get('http://www.xicidaili.com/nn/'+'%s' % page,headers=_headers) text=response.text html=etree.HTML(text) result=html.xpath('(//tr[@class="odd"]|//tr[@class=""])/td[2]') result1 = html.xpath('(//tr[@class="odd"]|//tr[@class=""])/td[3]') result2 = html.xpath('(//tr[@class="odd"]|//tr[@class=""])/td[6]') for i ,j ,k in zip(result,result1,result2): ip=i.text port=j.text protocol=k.text # print '%s:%s' %(ip,port) if protocol== 'HTTP' or protocol=='HTTPS': cookies = urllib2.HTTPCookieProcessor() proxyHandler = urllib2.ProxyHandler({"http": r'http://%s:%s' % (ip, port)}) opener = urllib2.build_opener(cookies,proxyHandler) opener.addheaders = [('User-Agent', 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/31.0.1650.63 Safari/537.36')] # urllib2.install_opener(opener) try: # reqs=urllib2.urlopen(test_url,timeout=5) reqs=opener.open(test_url,timeout=5) result=reqs.read() if len(result)>1: print (ip ,port, protocol) ip_port.append((ip, port, protocol)) file.write(protocol+":"+ip+":"+port+"\n") else : continue time.sleep(1) except: continue if __name__== '__main__': for i in xrange(1, 50): p=multiprocessing.Process(target=process,args=(i,)) p.start() file.close()
相关文章推荐
- xpath抓取代理IP并检测IP的有效性
- 第2.2章 scrapy之多进程检测代理ip的有效性
- 代理IP 有效性检测
- Python3.5抓取代理IP并验证有效性
- php代码检查代理ip的有效性
- python动态抓取代理IP
- python使用ip代理抓取网页
- 多线程爬虫——抓取代理ip
- C#多线程爬虫抓取免费代理IP
- Python中抓取代理IP并测试
- python 自动抓取代理ip
- 高匿代理ip检测
- java 检测代理IP是否准确
- C#多线程爬虫抓取免费代理IP
- Python爬虫抓取代理IP并检验可用性的实例
- C#多线程爬虫抓取免费代理IP
- python3实现网络爬虫(7)-- 使用ip代理抓取网页
- Python实现检测代理IP是否可以翻墙
- Python 抓取可用代理IP
- 设置代理IP 设置完成后可发送抓取数据的请求