您的位置:首页 > 编程语言 > Python开发

pythonXXX云所有厂商

2015-08-25 13:29 459 查看
第一步:
#!usr/bin/env python#coding=utf-8#url列表import urllib,urllib2,renumber=raw_input('enter a number:\n')link='http://www.wooyun.org/corps/page/'newf=open('xh.txt','w')for s in range(int(number)):f=str(link+str(s+1))newf.writelines(f+'\n')if number==0:passelse:print fnewf.close()door=open('xh.txt','r')print door.readlines()door.close()
第二步:#!usr/bin/env python#coding=utf-8#完整爬虫import string,urllib2,urllib,resend_headers = {'Host':'www.wooyun.org','User-Agent':'Mozilla/5.0 (Windows NT 6.2; rv:16.0) Gecko/20100101 Firefox/16.0','Accept':'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8','Connection':'keep-alive'}newf=open('result.txt','w')def jx():door=open('xh.txt','r')t=door.readlines()for m in t:url=m#print urlurl=mreq = urllib2.Request(url,headers=send_headers)r=urllib2.urlopen(req)html=r.read()unicodepage=html.decode('utf-8')#print unicodepagemyitems=re.findall('_blank">.*?</a>',unicodepage,re.S)items=[]for items in myitems:items=items.replace('_blank">','')items=items.replace('</a>','')items=items.replace('<img src="/images/sae_bottom_logo.png" title="Powered by Sina App Engine"></a-->','')items=items.replace('</span>','')items=items.replace('<span class="other fright">','')items=items.replace('<a href="/impression">行业观点</a>','')items=items.replace('乌云招聘','')items=items.replace('知识库','')items=items.replace('<a href="/impression">行业观点','')items=items.replace('http://','')items=items.replace('/','')items=items.replace(',','\n')print itemsnewf.writelines(items+'\n')door.close()newf.close()jx()收工
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签: