您的位置:首页 > 编程语言 > Python开发

python多线程与单线程之间的差距

2017-02-19 18:36 141 查看
对于python多线程与单线程之间有多少差距呢?今天用一个小例子比较一下。

说明:爬取代理ip网站ip并用代理ip访问某网站,看执行后的用时多少。

单线程版
# -*- coding: utf-8 -*-
import requests
from bs4 import BeautifulSoup as bs
import re
import time
def proxy_list(mbUrl):
headers={'User-Agent':"Mozilla/5.0 (Windows NT 6.3; WOW64; rv:51.0) Gecko/20100101 Firefox/51.0"}
url='http://www.xicidaili.com/nn/'
r=requests.get(url=url,headers=headers)
soup=bs(r.content)
datas=soup.find_all(name='tr',attrs={'class':re.compile('|[^odd]')})
i = 0
for data in datas:
soup_proxy_content=bs(str(data))
soup_proxys=soup_proxy_content.find_all(name='td')
#for i in[1,2,5]:
#   print soup_proxys[i].string
ip=str(soup_proxys[1].string)
port=str(soup_proxys[2].string)
types=str(soup_proxys[5].string)
proxy_test(mbUrl,ip,port,types)
#thread.start_new_thread(proxy_test,(mbUrl,ip,port,types))
def proxy_test(url,ip,port,types):
proxy={}
proxy[types.lower()]='%s:%s'%(ip,port)
#proxy={'http':'106.46.136.24:808'}
try:
r=requests.get(url,proxies=proxy,timeout=3) #会把每个代理ip都测试一遍,超时设置为六秒
ip_content=re.findall(r'\[(.*?)\]',r.text)[0]#匹配[]中的ip
#print r.text
if ip==ip_content:#判断代理是否测试成功
#lock.acquire()#线程锁
print proxy
#lock.release()
except Exception,e:
#print e
pass
#测试一下
if __name__=='__main__':
time_start=time.time()
#lock = thread.allocate_lock()  # 定义一个线程锁
proxy_list("http://1212.ip138.com/ic.asp
c83e
")
time_finished=time.time()-time_start
print time_finished

运行结果:

{'http': '39.85.13.253:9999'}

{'http': '113.124.9.229:8998'}

{'http': '124.88.67.52:843'}

{'http': '123.132.170.185:9999'}

{'http': '123.132.179.153:9999'}

{'http': '106.46.136.145:808'}

{'http': '111.124.245.121:9999'}

{'http': '218.17.43.228:3128'}

{'http': '58.23.103.106:9999'}

{'http': '42.84.103.140:9999'}

{'http': '140.240.245.167:9999'}

{'http': '106.46.136.96:808'}
85.9160001278

-------------------------------------------------------------------

多线程版
# -*- coding: utf-8 -*-
import requests
from bs4 import BeautifulSoup as bs
import re
import threading,thread
import time
def proxy_list(mbUrl):
headers={'User-Agent':"Mozilla/5.0 (Windows NT 6.3; WOW64; rv:51.0) Gecko/20100101 Firefox/51.0"}
url='http://www.xicidaili.com/nn/'
r=requests.get(url=url,headers=headers)
soup=bs(r.content)
datas=soup.find_all(name='tr',attrs={'class':re.compile('|[^odd]')})
datalen=len(datas)
threads=[] #定义一个线程队列
ip = []
port = []
types = []
for i in range(datalen):
soup_proxy_content=bs(str(datas[i]))
soup_proxys=soup_proxy_content.find_all(name='td')
ip.append(str(soup_proxys[1].string))
port.append(str(soup_proxys[2].string))
types.append(str(soup_proxys[5].string))
for i in range(datalen):
t=threading.Thread(target=proxy_test,args=(mbUrl,ip[i],port[i],types[i],))
threads.append(t)
for i in range(datalen):
threads[i].start()
#time.sleep(0.2)
for i in range(datalen):
threads[i].join()
def proxy_test(url,ip,port,types):
proxy={}
proxy[types.lower()]='%s:%s'%(ip,port)
try:
r=requests.get(url,proxies=proxy,timeout=3) #会把每个代理ip都测试一遍,超时设置为六秒
ip_content=re.findall(r'\[(.*?)\]',r.text)[0]#匹配[]中的ip
if ip==ip_content:#判断代理是否测试成功
lock.acquire()  # 线程锁
print proxy
lock.release()
except Exception,e:
#print e
pass
#测试一下
if __name__=='__main__':
time_start=time.time()
lock = thread.allocate_lock()  # 定义一个线程锁
proxy_list("http://1212.ip138.com/ic.asp")
time_finished=time.time()-time_start
print time_finished

运行结果:
{'http': '123.132.170.185:9999'}
{'http': '180.107.249.162:9999'}
{'http': '110.244.202.201:9999'}
{'http': '123.52.220.243:9999'}
{'http': '121.31.143.237:8123'}
{'http': '110.73.2.236:8123'}
{'http': '106.46.136.24:808'}
{'http': '110.72.41.108:8123'}
{'http': '140.240.245.167:9999'}
{'http': '218.17.43.228:3128'}
{'http': '220.166.241.213:8118'}
3.36299991608


多线程版的仅用了3.3秒,而单线程版的却用了86秒,足足差了26倍。这之间的差距一目了然。
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签: