您的位置:首页 > 编程语言 > Python开发

利用Python中的matplotlib模块抓取yahoo finance里的历史数据并绘图

2012-11-21 20:15 941 查看

如何自动获取各个公司的股票历史数据并绘图是金融文本情感分析项目里的必要部分,诚然这些数据在finance.yahoo.com里可以很方便的看到,但如何利用程序自动获取、实时显示却是个问题。之前一直考虑写爬虫来抓取数据,显然这样做很费力且效率不高,而Python.matplotlibmodule有一financemodule能够很便捷的实现这一功能。

finance.pyisacollectionofmodulesforcollecting,collecting,analyingandplottingfinancialdata.让我们先看一个example关于利用matplotlib模块获取finance.yahoo.com里的历史数据并绘图,先贴代码

frompylabimportfigure,show
frommatplotlib.financeimportquotes_historical_yahoo
frommatplotlib.datesimportYearLocator,MonthLocator,DateFormatter
importdatetime
date1=datetime.date(2012,1,1)
date2=datetime.date(2012,11,11)

daysFmt=DateFormatter('%m-%d-%Y')

quotes=quotes_historical_yahoo('MSFT',date1,date2)
iflen(quotes)==0:
raiseSystemExit

dates=[q[0]forqinquotes]
opens=[q[1]forqinquotes]

fig=figure()
ax=fig.add_subplot(111)
ax.plot_date(dates,opens,'-')

#formattheticks
ax.xaxis.set_major_formatter(daysFmt)
ax.autoscale_view()

#formatthecoordsmessagebox
defprice(x):return'$%1.2f'%x
ax.fmt_xdata=DateFormatter('%Y-%m-%d')
ax.fmt_ydata=price
ax.grid(True)

fig.autofmt_xdate()
show()


date1、date2分别是所要查询数据的起止时间,比如这个例子就是要查询微软2012.1.1至2012.11.11之间的历史股价。

quotes_historical_yahoo是一个获取yahoo历史数据的函数,需要输入公司的TickerSymbol和查询起止日期,输出为一缓冲文件,具体代码如下:

defquotes_historical_yahoo(ticker,date1,date2,asobject=False,
adjusted=True,cachename=None):
"""
Gethistoricaldatafortickerbetweendate1anddate2.date1and
date2aredatetimeinstancesor(year,month,day)sequences.

See:func:`parse_yahoo_historical`forexplanationofoutputformats
andthe*asobject*and*adjusted*kwargs.

Ex:
sp=f.quotes_historical_yahoo('^GSPC',d1,d2,
asobject=True,adjusted=True)
returns=(sp.open[1:]-sp.open[:-1])/sp.open[1:]
[n,bins,patches]=hist(returns,100)
mu=mean(returns)
sigma=std(returns)
x=normpdf(bins,mu,sigma)
plot(bins,x,color='red',lw=2)

cachenameisthenameofthelocalfilecache.IfNone,will
defaulttothemd5hashortheurl(whichincorporatestheticker
anddaterange)
"""
#Maybeenableawarninglateraspartofaslowtransition
#tousingNoneinsteadofFalse.
#ifasobjectisFalse:
#warnings.warn("Recommendchangingtoasobject=None")

fh=fetch_historical_yahoo(ticker,date1,date2,cachename)

try:
ret=parse_yahoo_historical(fh,asobject=asobject,
adjusted=adjusted)
iflen(ret)==0:
returnNone
exceptIOErrorasexc:
warnings.warn('fhfailure\n%s'%(exc.strerror[1]))
returnNone

returnret


fetch_historical_yahoo函数返回一个历史数据文件fh,当然也可以用http://table.finance.yahoo.com/table.csv?a=%d&b=%d&c=%d&d=%d&e=%d&f=%d&s=%s&y=0&g=%s&ignore=.csv手动下载,具体数值计算参见代码。

deffetch_historical_yahoo(ticker,date1,date2,cachename=None,dividends=False):
"""
Fetchhistoricaldatafortickerbetweendate1anddate2.date1and
date2aredateordatetimeinstances,or(year,month,day)sequences.

Ex:
fh=fetch_historical_yahoo('^GSPC',(2000,1,1),(2001,12,31))

cachenameisthenameofthelocalfilecache.IfNone,will
defaulttothemd5hashortheurl(whichincorporatestheticker
anddaterange)

setdividends=Truetoreturndividendsinsteadofpricedata.With
thisoptionset,parsefunctionswillnotwork

afilehandleisreturned
"""

ticker=ticker.upper()

ifiterable(date1):
d1=(date1[1]-1,date1[2],date1[0])
else:
d1=(date1.month-1,date1.day,date1.year)
ifiterable(date2):
d2=(date2[1]-1,date2[2],date2[0])
else:
d2=(date2.month-1,date2.day,date2.year)

ifdividends:
g='v'
verbose.report('Retrievingdividendsinsteadofprices')
else:
g='d'

urlFmt='http://table.finance.yahoo.com/table.csv?a=%d&b=%d&c=%d&d=%d&e=%d&f=%d&s=%s&y=0&g=%s&ignore=.csv'

url=urlFmt%(d1[0],d1[1],d1[2],
d2[0],d2[1],d2[2],ticker,g)

ifcachenameisNone:
cachename=os.path.join(cachedir,md5(url).hexdigest())
ifos.path.exists(cachename):
fh=open(cachename)
verbose.report('Usingcachefile%sfor%s'%(cachename,ticker))
else:
mkdirs(cachedir)
urlfh=urlopen(url)

fh=open(cachename,'wb')
fh.write(urlfh.read())
fh.close()
verbose.report('Saved%sdatatocachefile%s'%(ticker,cachename))
fh=open(cachename,'r')

returnfh


parse_yahoo_historical函数可对历史数据进行解析,读取文件,对文件部分内容进行操作,代码如下:

defparse_yahoo_historical(fh,adjusted=True,asobject=False):
"""
Parsethehistoricaldatainfilehandlefhfromyahoofinance.

*adjusted*
IfTrue(default)replaceopen,close,high,andlowpriceswith
theiradjustedvalues.Theadjustmentisbyascalefactor,S=
adjusted_close/close.Adjustedpricesareactualprices
multipliedbyS.

Volumeisnotadjustedasitisalreadybackwardsplitadjusted
byYahoo.Ifyouwanttocomputedollarstraded,multiplyvolume
bytheadjustedclose,regardlessofwhetheryouchooseadjusted
=True|False.

*asobject*
IfFalse(defaultforcompatibilitywithearlierversions)
returnalistoftuplescontaining

d,open,close,high,low,volume

IfNone(preferredalternativetoFalse),return
a2-Dndarraycorrespondingtothelistoftuples.

Otherwisereturnanumpyrecarraywith

date,year,month,day,d,open,close,high,low,
volume,adjusted_close

wheredisafloatingpoingrepresentationofdate,
asreturnedbydate2num,anddateisapythonstandard
librarydatetime.dateinstance.

Thenameofthiskwargisahistoricalartifact.Formerly,
TruereturnedacbookBunch
holding1-Dndarrays.Thebehaviorofanumpyrecarrayis
verysimilartotheBunch.

"""

lines=fh.readlines()

results=[]

datefmt='%Y-%m-%d'

forlineinlines[1:]:

vals=line.split(',')
iflen(vals)!=7:
continue#addwarning?
datestr=vals[0]
#dt=datetime.date(*time.strptime(datestr,datefmt)[:3])
#Usingstrptimedoublestheruntime.Withthepresent
#format,wedon'tneedit.
dt=datetime.date(*[int(val)forvalindatestr.split('-')])
dnum=date2num(dt)
open,high,low,close=[float(val)forvalinvals[1:5]]
volume=float(vals[5])
aclose=float(vals[6])

results.append((dt,dt.year,dt.month,dt.day,
dnum,open,close,high,low,volume,aclose))
results.reverse()
d=np.array(results,dtype=stock_dt)
ifadjusted:
scale=d['aclose']/d['close']
scale[np.isinf(scale)]=np.nan
d['open']*=scale
d['close']*=scale
d['high']*=scale
d['low']*=scale

ifnotasobject:
#2-Dsequence;formerlylistoftuples,nowndarray
ret=np.zeros((len(d),6),dtype=np.float)
ret[:,0]=d['d']
ret[:,1]=d['open']
ret[:,2]=d['close']
ret[:,3]=d['high']
ret[:,4]=d['low']
ret[:,5]=d['volume']
ifasobjectisNone:
returnret
return[tuple(row)forrowinret]

returnd.view(np.recarray)#CloseenoughtoformerBunchreturn


另外,如果无需操作历史数据,只需下载存储到本地文件可参考下面代码:

#thisexamplecandownloadthedatainfinance.yahooandputinourcomputers

importos,urllib2,urllib

ticker='MSFT'#theTickerSymbol
date1=(2012,1,1)#beginingtime
date2=(2012,11,11)#endingtime

d1=(date1[1]-1,date1[2],date1[0])

d2=(date2[1]-1,date2[2],date2[0])

g='d'

urlFmt='http://table.finance.yahoo.com/table.csv?a=%d&b=%d&c=%d&d=%d&e=%d&f=%d&s=%s&y=0&g=%s&ignore=.csv'
url=urlFmt%(d1[0],d1[1],d1[2],
d2[0],d2[1],d2[2],ticker,g)#theurlofhistoricaldata
printurl

path=r'C:\Users\yinyao\Desktop\Pythoncode'#Savingpath
file_name=r'\ticker.csv'#filename
dest_dir=os.path.join(path,file_name)#locatedfile
urllib.urlretrieve(url,dest_dir)#downloadthedataandputinlocatedfile

内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签: