您的位置:首页 > Web前端 > HTML

多线程截取html中相应的数据

2013-10-27 15:57 435 查看
多线程截取html中相应的数据
#coding=gbk
import re,urllib,time
import linecache,threading
from bs4 import BeautifulSoup as soup
mlock = threading.Lock()
a = []
def get_content(ip_content):

'获取HTML中需要的内容'

global a
pythoner = urllib.urlopen("http://hk.bing.com/search?q=ip%3A125.39.240.113&\
qs=n&form=QBLH&filt=all&pq=ip%{0}&sc=0-2&sp=-1&sk=" .format(ip_content) )
content = pythoner.read()
pythoner.close()
c = soup(content)
data = c.find_all("div",{"class":"sb_meta"})
mlock.acquire()
for x in data:
da = re.split('/',x.cite.text)[0]
if da not in a:
a.append(da)
data = open('c:\mylog.txt','a')
print >> data,da
mlock.release()

def thread_geturl(process,info):

'根据IP地址生成相应的进程'

for x in info:
d = threading.Thread(target=process,args=[x])
d.start()

if __name__ == '__main__':
ip_list = [ x for x in linecache.getlines(r'c:\iplist.txt')]
thread_geturl(get_content,ip_list)
IP_list

内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签:  多线程 import content