您的位置:首页 > 其它

用scrapy获取代理ip地址

2017-05-28 21:05 344 查看

items.py

-*- coding: utf-8 -*-

# Define here the models for your scraped items
#
# See documentation in:
# http://doc.scrapy.org/en/latest/topics/items.html 
import scrapy

class GetproxyItem(scrapy.Item):
# define the fields for your item here like:
# name = scrapy.Field()
ip = scrapy.Field()
port = scrapy.Field()
type = scrapy.Field()
location = scrapy.Field()
protocol = scrapy.Field()
source = scrapy.Field()


pipelines.py

# -*- coding: utf-8 -*-

# Define your item pipelines here
#
# Don't forget to add your pipeline to the ITEM_PIPELINES setting
# See: http://doc.scrapy.org/en/latest/topics/item-pipeline.html 
class GetproxyPipeline(object):
def process_item(self, item, spider):
fileName = 'proxy.txt'

with open(fileName,'a') as fp:
fp.write(item['ip'].encode('utf8').strip() + '\t')
fp.write(item['port'].encode('utf8').strip() + '\t')
fp.write(item['protocol'].encode('utf8').strip() + '\t')
fp.write(item['type'].encode('utf8').strip() + '\t')
fp.write(item['location'].encode('utf8').strip() + '\t')
fp.write(item['source'].encode('utf8').strip() + '\n')

return item


proxy360pider.py

# -*- coding: utf-8 -*-
import scrapy
from getProxy.items import GetproxyItem

class Proxy360piderSpider(scrapy.Spider):
name = "proxy360pider"
allowed_domains = ["proxy360.cn"]
start_urls = []

nations = ['Brazil','China','Taiwan','Japan','Thailand','Vietnam','bahrenin']

for nation in nations:
start_urls.append('http://www.proxy360.cn/Region/' + nation)

def parse(self, response):
subSelector = response.xpath('//div[@class="proxylistitem" and @name="list_proxy_ip"]')
items = []
for sub in subSelector:
item = GetproxyItem()
item['ip'] = sub.xpath('.//span[1]/text()').extract()[0]
item['port'] = sub.xpath('.//span[2]/text()').extract()[0]
item['type'] = sub.xpath('.//span[3]/text()').extract()[0]
item['location'] = sub.xpath('.//span[4]/text()').extract()[0]
item['protocol'] ='http'
item['source'] = 'proxy360'
items.append(item)

return items


部分代理ip
210.246.192.149 80  http    高匿  泰国  proxy360
118.175.255.10  80  http    高匿  泰国  proxy360
203.158.167.152 8080    http    高匿  泰国  proxy360
58.147.80.194   3128    http    高匿  泰国  proxy360
122.155.0.244   3128    http    透明  泰国  proxy360
203.151.233.143 80  http    高匿  泰国  proxy360
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签: