python爬虫笔记(七):实战(三)股票数据定向爬虫
2018-04-01 10:00
591 查看
目标分析及描述
#CrawBaiduStocksA.py
import requests
from bs4 import BeautifulSoup
import traceback
import re
def getHTMLText(url):
try:
r = requests.get(url)
r.raise_for_status()
r.encoding = r.apparent_encoding
return r.text
except:
return ""
def getStockList(lst, stockURL):
html = getHTMLText(stockURL)
soup = BeautifulSoup(html, 'html.parser')
a = soup.find_all('a')
for i in a:
try:
href = i.attrs['href']
lst.append(re.findall(r"[s][hz]\d{6}", href)[0])
except:
continue
def getStockInfo(lst, stockURL, fpath):
for stock in lst:
url = stockURL + stock + ".html"
html = getHTMLText(url)
try:
if html=="":
continue
infoDict = {}
soup = BeautifulSoup(html, 'html.parser')
stockInfo = soup.find('div',attrs={'class':'stock-bets'})
name = stockInfo.find_all(attrs={'class':'bets-name'})[0]
infoDict.update({'股票名称': name.text.split()[0]})
keyList = stockInfo.find_all('dt')
valueList = stockInfo.find_all('dd')
for i in range(len(keyList)):
key = keyList[i].text
val = valueList[i].text
infoDict[key] = val
with open(fpath, 'a', encoding='utf-8') as f:
f.write( str(infoDict) + '\n' )
except:
traceback.print_exc()
continue
def main():
stock_list_url = 'http://quote.eastmoney.com/stocklist.html'
stock_info_url = 'https://gupiao.baidu.com/stock/'
output_file = 'D:/BaiduStockInfo.txt'
slist=[]
getStockList(slist, stock_list_url)
getStockInfo(slist, stock_info_url, output_file)
main()
优化一下:import requests
from bs4 import BeautifulSoup
import traceback
import re
def getHTMLText(url, code="utf-8"):
try:
r = requests.get(url)
r.raise_for_status()
r.encoding = code
return r.text
except:
return ""
def getStockList(lst, stockURL):
html = getHTMLText(stockURL, "GB2312")
soup = BeautifulSoup(html, 'html.parser')
a = soup.find_all('a')
for i in a:
try:
href = i.attrs['href']
lst.append(re.findall(r"[s][hz]\d{6}", href)[0])
except:
continue
def getStockInfo(lst, stockURL, fpath):
count = 0
for stock in lst:
url = stockURL + stock + ".html"
html = getHTMLText(url)
try:
if html=="":
continue
infoDict = {}
soup = BeautifulSoup(html, 'html.parser')
stockInfo = soup.find('div',attrs={'class':'stock-bets'})
name = stockInfo.find_all(attrs={'class':'bets-name'})[0]
infoDict.update({'股票名称': name.text.split()[0]})
keyList = stockInfo.find_all('dt')
valueList = stockInfo.find_all('dd')
for i in range(len(keyList)):
key = keyList[i].text
val = valueList[i].text
infoDict[key] = val
with open(fpath, 'a', encoding='utf-8') as f:
f.write( str(infoDict) + '\n' )
count = count + 1
print("\r当前进度: {:.2f}%".format(count*100/len(lst)),end="")
except:
count = count + 1
print("\r当前进度: {:.2f}%".format(count*100/len(lst)),end="")
continue
def main():
stock_list_url = 'http://quote.eastmoney.com/stocklist.html'
stock_info_url = 'https://gupiao.baidu.com/stock/'
output_file = 'D:/BaiduStockInfo.txt'
slist=[]
getStockList(slist, stock_list_url)
getStockInfo(slist, stock_info_url, output_file)
main()
相关文章推荐
- Python 爬虫实战:股票数据定向爬虫
- python爬虫实战二——股票数据定向爬虫【有补充】
- Python 爬虫实战(2):股票数据定向爬虫
- 爬虫笔记 : Python提取网站数据总概括
- 鱼c笔记——Python爬虫(二):利用urllib.urlopen向有道翻译发送数据获得翻译结果
- python爬虫案例——新浪腾讯股票数据采集
- python 东方财富网&百度股票数据定向爬虫 实例
- 数据科学工程师面试宝典系列之一--Python爬虫实战
- python爬虫案例——东方财富股票数据采集
- python爬虫实战笔记---selenium爬取QQ空间说说并存至本地(上)
- python学习笔记(一)爬虫实战:图片自动下载器
- [hadoop+spark+python]大数据实战随手笔记
- python爬虫实战笔记---以轮子哥为起点Scrapy爬取知乎用户信息
- Python 爬虫和数据分析实战
- 【实战\聚焦Python分布式爬虫必学框架Scrapy 打造搜索引擎项目笔记】第1章 课程介绍
- Python网络爬虫与信息提取-Day14-(实例)股票数据定向爬虫
- python 爬虫 实战(一) —— 抓取学校开课数据
- 分享python3爬虫及数据分析实战视频教程
- python3 [爬虫入门实战]scrapy爬取盘多多五百万数据并存mongoDB
- 【极客学院】-python学习笔记-4-单线程爬虫 (提交表单抓取信息,实战练习)