利用Python中的matplotlib模块抓取yahoo finance里的历史数据并绘图
2012-11-21 20:15
941 查看
如何自动获取各个公司的股票历史数据并绘图是金融文本情感分析项目里的必要部分,诚然这些数据在finance.yahoo.com里可以很方便的看到,但如何利用程序自动获取、实时显示却是个问题。之前一直考虑写爬虫来抓取数据,显然这样做很费力且效率不高,而Python.matplotlibmodule有一financemodule能够很便捷的实现这一功能。
finance.pyisacollectionofmodulesforcollecting,collecting,analyingandplottingfinancialdata.让我们先看一个example关于利用matplotlib模块获取finance.yahoo.com里的历史数据并绘图,先贴代码
frompylabimportfigure,show
frommatplotlib.financeimportquotes_historical_yahoo
frommatplotlib.datesimportYearLocator,MonthLocator,DateFormatter
importdatetime
date1=datetime.date(2012,1,1)
date2=datetime.date(2012,11,11)
daysFmt=DateFormatter('%m-%d-%Y')
quotes=quotes_historical_yahoo('MSFT',date1,date2)
iflen(quotes)==0:
raiseSystemExit
dates=[q[0]forqinquotes]
opens=[q[1]forqinquotes]
fig=figure()
ax=fig.add_subplot(111)
ax.plot_date(dates,opens,'-')
#formattheticks
ax.xaxis.set_major_formatter(daysFmt)
ax.autoscale_view()
#formatthecoordsmessagebox
defprice(x):return'$%1.2f'%x
ax.fmt_xdata=DateFormatter('%Y-%m-%d')
ax.fmt_ydata=price
ax.grid(True)
fig.autofmt_xdate()
show()
date1、date2分别是所要查询数据的起止时间,比如这个例子就是要查询微软2012.1.1至2012.11.11之间的历史股价。
quotes_historical_yahoo是一个获取yahoo历史数据的函数,需要输入公司的TickerSymbol和查询起止日期,输出为一缓冲文件,具体代码如下:
defquotes_historical_yahoo(ticker,date1,date2,asobject=False,
adjusted=True,cachename=None):
"""
Gethistoricaldatafortickerbetweendate1anddate2.date1and
date2aredatetimeinstancesor(year,month,day)sequences.
See:func:`parse_yahoo_historical`forexplanationofoutputformats
andthe*asobject*and*adjusted*kwargs.
Ex:
sp=f.quotes_historical_yahoo('^GSPC',d1,d2,
asobject=True,adjusted=True)
returns=(sp.open[1:]-sp.open[:-1])/sp.open[1:]
[n,bins,patches]=hist(returns,100)
mu=mean(returns)
sigma=std(returns)
x=normpdf(bins,mu,sigma)
plot(bins,x,color='red',lw=2)
cachenameisthenameofthelocalfilecache.IfNone,will
defaulttothemd5hashortheurl(whichincorporatestheticker
anddaterange)
"""
#Maybeenableawarninglateraspartofaslowtransition
#tousingNoneinsteadofFalse.
#ifasobjectisFalse:
#warnings.warn("Recommendchangingtoasobject=None")
fh=fetch_historical_yahoo(ticker,date1,date2,cachename)
try:
ret=parse_yahoo_historical(fh,asobject=asobject,
adjusted=adjusted)
iflen(ret)==0:
returnNone
exceptIOErrorasexc:
warnings.warn('fhfailure\n%s'%(exc.strerror[1]))
returnNone
returnret
fetch_historical_yahoo函数返回一个历史数据文件fh,当然也可以用
deffetch_historical_yahoo(ticker,date1,date2,cachename=None,dividends=False):
"""
Fetchhistoricaldatafortickerbetweendate1anddate2.date1and
date2aredateordatetimeinstances,or(year,month,day)sequences.
Ex:
fh=fetch_historical_yahoo('^GSPC',(2000,1,1),(2001,12,31))
cachenameisthenameofthelocalfilecache.IfNone,will
defaulttothemd5hashortheurl(whichincorporatestheticker
anddaterange)
setdividends=Truetoreturndividendsinsteadofpricedata.With
thisoptionset,parsefunctionswillnotwork
afilehandleisreturned
"""
ticker=ticker.upper()
ifiterable(date1):
d1=(date1[1]-1,date1[2],date1[0])
else:
d1=(date1.month-1,date1.day,date1.year)
ifiterable(date2):
d2=(date2[1]-1,date2[2],date2[0])
else:
d2=(date2.month-1,date2.day,date2.year)
ifdividends:
g='v'
verbose.report('Retrievingdividendsinsteadofprices')
else:
g='d'
urlFmt='http://table.finance.yahoo.com/table.csv?a=%d&b=%d&c=%d&d=%d&e=%d&f=%d&s=%s&y=0&g=%s&ignore=.csv'
url=urlFmt%(d1[0],d1[1],d1[2],
d2[0],d2[1],d2[2],ticker,g)
ifcachenameisNone:
cachename=os.path.join(cachedir,md5(url).hexdigest())
ifos.path.exists(cachename):
fh=open(cachename)
verbose.report('Usingcachefile%sfor%s'%(cachename,ticker))
else:
mkdirs(cachedir)
urlfh=urlopen(url)
fh=open(cachename,'wb')
fh.write(urlfh.read())
fh.close()
verbose.report('Saved%sdatatocachefile%s'%(ticker,cachename))
fh=open(cachename,'r')
returnfh
parse_yahoo_historical函数可对历史数据进行解析,读取文件,对文件部分内容进行操作,代码如下:
defparse_yahoo_historical(fh,adjusted=True,asobject=False):
"""
Parsethehistoricaldatainfilehandlefhfromyahoofinance.
*adjusted*
IfTrue(default)replaceopen,close,high,andlowpriceswith
theiradjustedvalues.Theadjustmentisbyascalefactor,S=
adjusted_close/close.Adjustedpricesareactualprices
multipliedbyS.
Volumeisnotadjustedasitisalreadybackwardsplitadjusted
byYahoo.Ifyouwanttocomputedollarstraded,multiplyvolume
bytheadjustedclose,regardlessofwhetheryouchooseadjusted
=True|False.
*asobject*
IfFalse(defaultforcompatibilitywithearlierversions)
returnalistoftuplescontaining
d,open,close,high,low,volume
IfNone(preferredalternativetoFalse),return
a2-Dndarraycorrespondingtothelistoftuples.
Otherwisereturnanumpyrecarraywith
date,year,month,day,d,open,close,high,low,
volume,adjusted_close
wheredisafloatingpoingrepresentationofdate,
asreturnedbydate2num,anddateisapythonstandard
librarydatetime.dateinstance.
Thenameofthiskwargisahistoricalartifact.Formerly,
TruereturnedacbookBunch
holding1-Dndarrays.Thebehaviorofanumpyrecarrayis
verysimilartotheBunch.
"""
lines=fh.readlines()
results=[]
datefmt='%Y-%m-%d'
forlineinlines[1:]:
vals=line.split(',')
iflen(vals)!=7:
continue#addwarning?
datestr=vals[0]
#dt=datetime.date(*time.strptime(datestr,datefmt)[:3])
#Usingstrptimedoublestheruntime.Withthepresent
#format,wedon'tneedit.
dt=datetime.date(*[int(val)forvalindatestr.split('-')])
dnum=date2num(dt)
open,high,low,close=[float(val)forvalinvals[1:5]]
volume=float(vals[5])
aclose=float(vals[6])
results.append((dt,dt.year,dt.month,dt.day,
dnum,open,close,high,low,volume,aclose))
results.reverse()
d=np.array(results,dtype=stock_dt)
ifadjusted:
scale=d['aclose']/d['close']
scale[np.isinf(scale)]=np.nan
d['open']*=scale
d['close']*=scale
d['high']*=scale
d['low']*=scale
ifnotasobject:
#2-Dsequence;formerlylistoftuples,nowndarray
ret=np.zeros((len(d),6),dtype=np.float)
ret[:,0]=d['d']
ret[:,1]=d['open']
ret[:,2]=d['close']
ret[:,3]=d['high']
ret[:,4]=d['low']
ret[:,5]=d['volume']
ifasobjectisNone:
returnret
return[tuple(row)forrowinret]
returnd.view(np.recarray)#CloseenoughtoformerBunchreturn
另外,如果无需操作历史数据,只需下载存储到本地文件可参考下面代码:
#thisexamplecandownloadthedatainfinance.yahooandputinourcomputers
importos,urllib2,urllib
ticker='MSFT'#theTickerSymbol
date1=(2012,1,1)#beginingtime
date2=(2012,11,11)#endingtime
d1=(date1[1]-1,date1[2],date1[0])
d2=(date2[1]-1,date2[2],date2[0])
g='d'
urlFmt='http://table.finance.yahoo.com/table.csv?a=%d&b=%d&c=%d&d=%d&e=%d&f=%d&s=%s&y=0&g=%s&ignore=.csv'
url=urlFmt%(d1[0],d1[1],d1[2],
d2[0],d2[1],d2[2],ticker,g)#theurlofhistoricaldata
printurl
path=r'C:\Users\yinyao\Desktop\Pythoncode'#Savingpath
file_name=r'\ticker.csv'#filename
dest_dir=os.path.join(path,file_name)#locatedfile
urllib.urlretrieve(url,dest_dir)#downloadthedataandputinlocatedfile
相关文章推荐
- Python实战小程序利用matplotlib模块画图代码分享
- 7057-1.Python数据可视化:利用matplotlib中的subplot进行图中图展示
- python pandas做数据分析视图分析matplotlib,seaborn模块使用
- 解决Windows系统下python利用matplotlib绘图时中文乱码的问题
- 【Matplotlib】利用Python进行绘图
- 【利用python进行数据分析-学习记录】python-matplotlib中Basemap插件的安装
- Python数据可视化利器Matplotlib,绘图入门篇,Pyplot介绍
- Python进阶(三十九)-数据可视化の使用matplotlib进行绘图分析数据
- Python进阶(四十)-数据可视化の使用matplotlib进行绘图
- 使用python中的matplotlib进行绘图分析数据
- python数据分析之(6)简单绘图matplotlib.pyplot
- 获取博客积分排名,存入数据库,读取数据进行绘图(python,selenium,matplotlib)
- python数据挖掘学习笔记】十.Pandas、Matplotlib、PCA绘图实用代码补充
- 使用python中的matplotlib进行绘图分析数据
- Python进阶(三十八)-数据可视化の利用matplotlib 进行折线图,直方图和饼图的绘制
- matplotlib -- 使用python中的matplotlib进行绘图分析数据
- 【python数据挖掘课程】十.Pandas、Matplotlib、PCA绘图实用代码补充
- python matplotlib模块——绘制三维图形、三维数据散点图
- python数据挖掘课程 十.Pandas、Matplotlib、PCA绘图实用代码补充
- python matplotlib模块——绘制三维图形、三维数据散点图