您的位置：首页 > 编程语言 > Python开发

Pandas 10分钟入门(官方说明+个人小测试)

2017-10-28 21:53 369 查看

Pandas10分钟入门

代码下载地址[http://download.csdn.net/download/sirwill/10043185]In[19]:

importpandasaspd
importnumpyasnp
importmatplotlib.pyplotasplt

ObjectCreation

In[20]:

s=pd.Series([1,2,3,np.nan,5,6,])#series类型数组。
s

Out[20]:

01.0
12.0
23.0
3NaN
45.0
56.0
dtype:float64

In[21]:

dates=pd.date_range("20170112",periods=6)#CreatingaDataFramebypassinganumpyarray,withadatetimeindexandlabeledcolumn
dates

Out[21]:

DatetimeIndex(['2017-01-12','2017-01-13','2017-01-14','2017-01-15',
'2017-01-16','2017-01-17'],
dtype='datetime64[ns]',freq='D')

In[22]:

list(dates)
dates.date

Out[22]:

array([datetime.date(2017,1,12),datetime.date(2017,1,13),
datetime.date(2017,1,14),datetime.date(2017,1,15),
datetime.date(2017,1,16),datetime.date(2017,1,17)],dtype=object)

In[23]:

list(dates.date)

Out[23]:

[datetime.date(2017,1,12),
datetime.date(2017,1,13),
datetime.date(2017,1,14),
datetime.date(2017,1,15),
datetime.date(2017,1,16),
datetime.date(2017,1,17)]

In[24]:

dates.year

Out[24]:

Int64Index([2017,2017,2017,2017,2017,2017],dtype='int64')

In[25]:

list(dates.year)

Out[25]:

[2017,2017,2017,2017,2017,2017]

In[26]:

list(dates.day)

Out[26]:

[12,13,14,15,16,17]

In[27]:

str(dates.date)

Out[27]:

'[datetime.date(2017,1,12)datetime.date(2017,1,13)\ndatetime.date(2017,1,14)datetime.date(2017,1,15)\ndatetime.date(2017,1,16)datetime.date(2017,1,17)]'

In[28]:

df=pd.DataFrame(np.random.randn(6,4),index=dates,columns=list("ABCD"))
df

Out[28]:


	A	B	C	D
2017-01-12	-2.258121	2.456196	0.778567	-2.030407
2017-01-13	-0.658348	0.622495	0.388625	0.073587
2017-01-14	0.589219	1.392792	0.605545	1.231538
2017-01-15	-0.151958	-0.655249	-2.114725	-0.669839
2017-01-16	-1.323304	3.143659	0.638996	0.898683
2017-01-17	-0.024935	0.385811	-1.577185	-0.021460

In[29]:

df2=pd.DataFrame({'A':1.,
'B':pd.Timestamp('20130102'),
'C':pd.Series(1,index=list(range(4)),dtype='float32'),
'D':np.array([3]*4,dtype='int32'),
'E':pd.Categorical(["test","train","test","train"]),
'F':'foo'})#CreatingaDataFramebypassingadictofobjectsthatcanbeconvertedtoseries-like.
df2

Out[29]:


	A	B	C	D	E	F
0	1.0	2013-01-02	1.0	3	test	foo
1	1.0	2013-01-02	1.0	3	train	foo
2	1.0	2013-01-02	1.0	3	test	foo
3	1.0	2013-01-02	1.0	3	train	foo

In[30]:

df2.dtypes

Out[30]:

Afloat64
Bdatetime64[ns]
Cfloat32
Dint32
Ecategory
Fobject
dtype:object

In[31]:

df.dtypes

Out[31]:

Afloat64
Bfloat64
Cfloat64
Dfloat64
dtype:object

In[32]:

df2.<TAB>#使用jupyter时按tab键，可以看到代码提示。

File"<ipython-input-32-9c4c8dafe199>",line1
df2.<TAB>#Ifyou’reusingIPython,tabcompletionforcolumnnames(aswellaspublicattributes)isautomaticallyenabled.
^
SyntaxError:invalidsyntax

ViewingData

In[36]:

df.head()

Out[36]:


	A	B	C	D
2017-01-12	-2.258121	2.456196	0.778567	-2.030407
2017-01-13	-0.658348	0.622495	0.388625	0.073587
2017-01-14	0.589219	1.392792	0.605545	1.231538
2017-01-15	-0.151958	-0.655249	-2.114725	-0.669839
2017-01-16	-1.323304	3.143659	0.638996	0.898683

In[37]:

df.index

Out[37]:

DatetimeIndex(['2017-01-12','2017-01-13','2017-01-14','2017-01-15',
'2017-01-16','2017-01-17'],
dtype='datetime64[ns]',freq='D')

In[38]:

df.columns

Out[38]:

Index(['A','B','C','D'],dtype='object')

In[39]:

df.values

Out[39]:

array([[-2.2581213,2.45619592,0.77856734,-2.030407],
[-0.65834822,0.62249451,0.38862467,0.07358728],
[0.58921899,1.39279193,0.60554535,1.23153815],
[-0.1519579,-0.65524863,-2.1147252,-0.66983949],
[-1.32330447,3.14365936,0.63899562,0.89868346],
[-0.02493461,0.3858107,-1.57718486,-0.0214603]])

In[40]:

df.describe()

Out[40]:


	A	B	C	D
count	6.000000	6.000000	6.000000	6.000000
mean	-0.637908	1.224284	-0.213363	-0.086316
std	1.021078	1.401987	1.282079	1.171045
min	-2.258121	-0.655249	-2.114725	-2.030407
25%	-1.157065	0.444982	-1.085732	-0.507745
50%	-0.405153	1.007643	0.497085	0.026063
75%	-0.056690	2.190345	0.630633	0.692409
max	0.589219	3.143659	0.778567	1.231538

In[41]:

df

Out[41]:


	A	B	C	D
2017-01-12	-2.258121	2.456196	0.778567	-2.030407
2017-01-13	-0.658348	0.622495	0.388625	0.073587
2017-01-14	0.589219	1.392792	0.605545	1.231538
2017-01-15	-0.151958	-0.655249	-2.114725	-0.669839
2017-01-16	-1.323304	3.143659	0.638996	0.898683
2017-01-17	-0.024935	0.385811	-1.577185	-0.021460

In[42]:

df.T

Out[42]:


	2017-01-1200:00:00	2017-01-1300:00:00	2017-01-1400:00:00	2017-01-1500:00:00	2017-01-1600:00:00	2017-01-1700:00:00
A	-2.258121	-0.658348	0.589219	-0.151958	-1.323304	-0.024935
B	2.456196	0.622495	1.392792	-0.655249	3.143659	0.385811
C	0.778567	0.388625	0.605545	-2.114725	0.638996	-1.577185
D	-2.030407	0.073587	1.231538	-0.669839	0.898683	-0.021460

In[43]:

df.sort_index(axis=1,ascending=False)#Sortingbyanaxis排序。

Out[43]:


	D	C	B	A
2017-01-12	-2.030407	0.778567	2.456196	-2.258121
2017-01-13	0.073587	0.388625	0.622495	-0.658348
2017-01-14	1.231538	0.605545	1.392792	0.589219
2017-01-15	-0.669839	-2.114725	-0.655249	-0.151958
2017-01-16	0.898683	0.638996	3.143659	-1.323304
2017-01-17	-0.021460	-1.577185	0.385811	-0.024935

In[44]:

df.sort_values(by="B")#Sortingbyvalues

Out[44]:


	A	B	C	D
2017-01-15	-0.151958	-0.655249	-2.114725	-0.669839
2017-01-17	-0.024935	0.385811	-1.577185	-0.021460
2017-01-13	-0.658348	0.622495	0.388625	0.073587
2017-01-14	0.589219	1.392792	0.605545	1.231538
2017-01-12	-2.258121	2.456196	0.778567	-2.030407
2017-01-16	-1.323304	3.143659	0.638996	0.898683

In[45]:

df

Out[45]:


	A	B	C	D
2017-01-12	-2.258121	2.456196	0.778567	-2.030407
2017-01-13	-0.658348	0.622495	0.388625	0.073587
2017-01-14	0.589219	1.392792	0.605545	1.231538
2017-01-15	-0.151958	-0.655249	-2.114725	-0.669839
2017-01-16	-1.323304	3.143659	0.638996	0.898683
2017-01-17	-0.024935	0.385811	-1.577185	-0.021460

Selection

Getting

In[46]:

df["A"]#Selectingasinglecolumn,whichyieldsaSeries,equivalenttodf.A

Out[46]:

2017-01-12-2.258121
2017-01-13-0.658348
2017-01-140.589219
2017-01-15-0.151958
2017-01-16-1.323304
2017-01-17-0.024935
Freq:D,Name:A,dtype:float64

In[47]:

df.A

Out[47]:

2017-01-12-2.258121
2017-01-13-0.658348
2017-01-140.589219
2017-01-15-0.151958
2017-01-16-1.323304
2017-01-17-0.024935
Freq:D,Name:A,dtype:float64

In[48]:

df[0:3]#Selectingvia[],whichslicestherows.

Out[48]:


	A	B	C	D
2017-01-12	-2.258121	2.456196	0.778567	-2.030407
2017-01-13	-0.658348	0.622495	0.388625	0.073587
2017-01-14	0.589219	1.392792	0.605545	1.231538

In[49]:

df["2017-01-13":"2017-01-17"]

Out[49]:


	A	B	C	D
2017-01-13	-0.658348	0.622495	0.388625	0.073587
2017-01-14	0.589219	1.392792	0.605545	1.231538
2017-01-15	-0.151958	-0.655249	-2.114725	-0.669839
2017-01-16	-1.323304	3.143659	0.638996	0.898683
2017-01-17	-0.024935	0.385811	-1.577185	-0.021460

SelectionbyLabel

In[50]:

dates

Out[50]:

DatetimeIndex(['2017-01-12','2017-01-13','2017-01-14','2017-01-15',
'2017-01-16','2017-01-17'],
dtype='datetime64[ns]',freq='D')

In[51]:

df.loc[dates[0]]#Forgettingacrosssectionusingalabel

Out[51]:

A-2.258121
B2.456196
C0.778567
D-2.030407
Name:2017-01-1200:00:00,dtype:float64

In[52]:

df.loc[:,["A","B"]]

Out[52]:


	A	B
2017-01-12	-2.258121	2.456196
2017-01-13	-0.658348	0.622495
2017-01-14	0.589219	1.392792
2017-01-15	-0.151958	-0.655249
2017-01-16	-1.323304	3.143659
2017-01-17	-0.024935	0.385811

In[53]:

df.loc['20170112':'20170116',['A','B']]#Showinglabelslicing,bothendpointsareincluded

Out[53]:


	A	B
2017-01-12	-2.258121	2.456196
2017-01-13	-0.658348	0.622495
2017-01-14	0.589219	1.392792
2017-01-15	-0.151958	-0.655249
2017-01-16	-1.323304	3.143659

In[54]:

df.loc["20170115",["A","B"]]

Out[54]:

A-0.151958
B-0.655249
Name:2017-01-1500:00:00,dtype:float64

In[55]:

df.loc[dates[3],"D"]#Forgettingascalarvalue

Out[55]:

-0.6698394854437093

In[56]:

df.at[dates[3],"D"]#Forgettingfastaccesstoascalar(equivtothepriormethod)

Out[56]:

-0.6698394854437093

SelectionbyPosition

In[57]:

df.iloc[3]#Selectviathepositionofthepassedintegers

Out[57]:

A-0.151958
B-0.655249
C-2.114725
D-0.669839
Name:2017-01-1500:00:00,dtype:float64

In[58]:

df.iloc[2:5,0:2]#Byintegerslices,actingsimilartonumpy/python

Out[58]:


	A	B
2017-01-14	0.589219	1.392792
2017-01-15	-0.151958	-0.655249
2017-01-16	-1.323304	3.143659

In[59]:

df.iloc[[1,3,4],[0,2]]#Bylistsofintegerpositionlocations,similartothenumpy/pythonstyle

Out[59]:


	A	C
2017-01-13	-0.658348	0.388625
2017-01-15	-0.151958	-2.114725
2017-01-16	-1.323304	0.638996

In[60]:

df.iloc[1:3,:]

Out[60]:


	A	B	C	D
2017-01-13	-0.658348	0.622495	0.388625	0.073587
2017-01-14	0.589219	1.392792	0.605545	1.231538

In[61]:

df.iloc[:,1:3]

Out[61]:


	B	C
2017-01-12	2.456196	0.778567
2017-01-13	0.622495	0.388625
2017-01-14	1.392792	0.605545
2017-01-15	-0.655249	-2.114725
2017-01-16	3.143659	0.638996
2017-01-17	0.385811	-1.577185

In[62]:

df.iloc[1,1]#Forgettingavalueexplicitly

Out[62]:

0.62249451281708756

In[63]:

df.iat[1,1]#Forgettingfastaccesstoascalar(equivtothepriormethod)

Out[63]:

0.62249451281708756

BooleanIndexing

In[64]:

df[df.A>0]#Usingasinglecolumn’svaluestoselectdata

Out[64]:


	A	B	C	D
2017-01-14	0.589219	1.392792	0.605545	1.231538

In[65]:

df[df>0]#SelectingvaluesfromaDataFramewhereabooleanconditionismet

Out[65]:


	A	B	C	D
2017-01-12	NaN	2.456196	0.778567	NaN
2017-01-13	NaN	0.622495	0.388625	0.073587
2017-01-14	0.589219	1.392792	0.605545	1.231538
2017-01-15	NaN	NaN	NaN	NaN
2017-01-16	NaN	3.143659	0.638996	0.898683
2017-01-17	NaN	0.385811	NaN	NaN

In[66]:

df2

Out[66]:


	A	B	C	D	E	F
0	1.0	2013-01-02	1.0	3	test	foo
1	1.0	2013-01-02	1.0	3	train	foo
2	1.0	2013-01-02	1.0	3	test	foo
3	1.0	2013-01-02	1.0	3	train	foo

In[67]:

df

Out[67]:


	A	B	C	D
2017-01-12	-2.258121	2.456196	0.778567	-2.030407
2017-01-13	-0.658348	0.622495	0.388625	0.073587
2017-01-14	0.589219	1.392792	0.605545	1.231538
2017-01-15	-0.151958	-0.655249	-2.114725	-0.669839
2017-01-16	-1.323304	3.143659	0.638996	0.898683
2017-01-17	-0.024935	0.385811	-1.577185	-0.021460

In[68]:

df2=df.copy()df2

Out[68]:


	A	B	C	D
2017-01-12	-2.258121	2.456196	0.778567	-2.030407
2017-01-13	-0.658348	0.622495	0.388625	0.073587
2017-01-14	0.589219	1.392792	0.605545	1.231538
2017-01-15	-0.151958	-0.655249	-2.114725	-0.669839
2017-01-16	-1.323304	3.143659	0.638996	0.898683
2017-01-17	-0.024935	0.385811	-1.577185	-0.021460

In[69]:

df.equals(df2)

Out[69]:

True

In[70]:

df==df2

Out[70]:


	A	B	C	D
2017-01-12	True	True	True	True
2017-01-13	True	True	True	True
2017-01-14	True	True	True	True
2017-01-15	True	True	True	True
2017-01-16	True	True	True	True
2017-01-17	True	True	True	True

In[71]:

dfisdf2

Out[71]:

False

In[72]:

df2["E"]=["one","one","two","three","four","three"]df2

Out[72]:


	A	B	C	D	E
2017-01-12	-2.258121	2.456196	0.778567	-2.030407	one
2017-01-13	-0.658348	0.622495	0.388625	0.073587	one
2017-01-14	0.589219	1.392792	0.605545	1.231538	two
2017-01-15	-0.151958	-0.655249	-2.114725	-0.669839	three
2017-01-16	-1.323304	3.143659	0.638996	0.898683	four
2017-01-17	-0.024935	0.385811	-1.577185	-0.021460	three

In[73]:

df2[df2.E.isin(["two","four"])]

Out[73]:


	A	B	C	D	E
2017-01-14	0.589219	1.392792	0.605545	1.231538	two
2017-01-16	-1.323304	3.143659	0.638996	0.898683	four

In[74]:

df2[df2["E"].isin(["two","four"])]

Out[74]:


	A	B	C	D	E
2017-01-14	0.589219	1.392792	0.605545	1.231538	two
2017-01-16	-1.323304	3.143659	0.638996	0.898683	four

Setting

In[75]:

s1=pd.Series([1,2,3,4,5,6],index=pd.date_range("20171016",periods=6))#Settinganewcolumnautomaticallyalignsthedatabytheindexes
s1

Out[75]:

2017-10-161
2017-10-172
2017-10-183
2017-10-194
2017-10-205
2017-10-216
Freq:D,dtype:int64

In[76]:

df.at[dates[0],"A"]=0#Settingvaluesbylabel

In[77]:

df

Out[77]:


	A	B	C	D
2017-01-12	0.000000	2.456196	0.778567	-2.030407
2017-01-13	-0.658348	0.622495	0.388625	0.073587
2017-01-14	0.589219	1.392792	0.605545	1.231538
2017-01-15	-0.151958	-0.655249	-2.114725	-0.669839
2017-01-16	-1.323304	3.143659	0.638996	0.898683
2017-01-17	-0.024935	0.385811	-1.577185	-0.021460

In[78]:

df.iat[0,1]=0df

Out[78]:


	A	B	C	D
2017-01-12	0.000000	0.000000	0.778567	-2.030407
2017-01-13	-0.658348	0.622495	0.388625	0.073587
2017-01-14	0.589219	1.392792	0.605545	1.231538
2017-01-15	-0.151958	-0.655249	-2.114725	-0.669839
2017-01-16	-1.323304	3.143659	0.638996	0.898683
2017-01-17	-0.024935	0.385811	-1.577185	-0.021460

In[79]:

df.loc[:,"D"]=np.array([5]*len(df))#Settingbyassigningwithanumpyarraydf

Out[79]:


	A	B	C	D
2017-01-12	0.000000	0.000000	0.778567	5
2017-01-13	-0.658348	0.622495	0.388625	5
2017-01-14	0.589219	1.392792	0.605545	5
2017-01-15	-0.151958	-0.655249	-2.114725	5
2017-01-16	-1.323304	3.143659	0.638996	5
2017-01-17	-0.024935	0.385811	-1.577185	5

In[80]:

df2=df.copy()df2

Out[80]:


	A	B	C	D
2017-01-12	0.000000	0.000000	0.778567	5
2017-01-13	-0.658348	0.622495	0.388625	5
2017-01-14	0.589219	1.392792	0.605545	5
2017-01-15	-0.151958	-0.655249	-2.114725	5
2017-01-16	-1.323304	3.143659	0.638996	5
2017-01-17	-0.024935	0.385811	-1.577185	5

In[81]:

df2[df2>0]=-df2df2

Out[81]:


	A	B	C	D
2017-01-12	0.000000	0.000000	-0.778567	-5
2017-01-13	-0.658348	-0.622495	-0.388625	-5
2017-01-14	-0.589219	-1.392792	-0.605545	-5
2017-01-15	-0.151958	-0.655249	-2.114725	-5
2017-01-16	-1.323304	-3.143659	-0.638996	-5
2017-01-17	-0.024935	-0.385811	-1.577185	-5

MissingData

In[83]:

df

Out[83]:


	A	B	C	D
2017-01-12	0.000000	0.000000	0.778567	5
2017-01-13	-0.658348	0.622495	0.388625	5
2017-01-14	0.589219	1.392792	0.605545	5
2017-01-15	-0.151958	-0.655249	-2.114725	5
2017-01-16	-1.323304	3.143659	0.638996	5
2017-01-17	-0.024935	0.385811	-1.577185	5

In[84]:

df1=df.reindex(index=dates[0:4],columns=list(df.columns)+['E'])
df1.loc[dates[0]:dates[1],'E']=1
df1

Out[84]:


	A	B	C	D	E
2017-01-12	0.000000	0.000000	0.778567	5	1.0
2017-01-13	-0.658348	0.622495	0.388625	5	1.0
2017-01-14	0.589219	1.392792	0.605545	5	NaN
2017-01-15	-0.151958	-0.655249	-2.114725	5	NaN

In[85]:

df1.dropna(how="any")#Todropanyrowsthathavemissingdata

Out[85]:


	A	B	C	D	E
2017-01-12	0.000000	0.000000	0.778567	5	1.0
2017-01-13	-0.658348	0.622495	0.388625	5	1.0

In[86]:

df1.fillna(value=5)#Fillingmissingdata

Out[86]:


	A	B	C	D	E
2017-01-12	0.000000	0.000000	0.778567	5	1.0
2017-01-13	-0.658348	0.622495	0.388625	5	1.0
2017-01-14	0.589219	1.392792	0.605545	5	5.0
2017-01-15	-0.151958	-0.655249	-2.114725	5	5.0

In[87]:

df1

Out[87]:


	A	B	C	D	E
2017-01-12	0.000000	0.000000	0.778567	5	1.0
2017-01-13	-0.658348	0.622495	0.388625	5	1.0
2017-01-14	0.589219	1.392792	0.605545	5	NaN
2017-01-15	-0.151958	-0.655249	-2.114725	5	NaN

In[88]:

pd.isnull(df1)

Out[88]:


	A	B	C	D	E
2017-01-12	False	False	False	False	False
2017-01-13	False	False	False	False	False
2017-01-14	False	False	False	False	True
2017-01-15	False	False	False	False	True

In[89]:

df1.isnull()

Out[89]:


	A	B	C	D	E
2017-01-12	False	False	False	False	False
2017-01-13	False	False	False	False	False
2017-01-14	False	False	False	False	True
2017-01-15	False	False	False	False	True

In[90]:

df1.isna()#没有这个方法~~

---------------------------------------------------------------------------AttributeErrorTraceback(mostrecentcalllast)<ipython-input-90-9dd6d031e095>in<module>()---->1df1.isna()#没有这个方法~~D:\Users\asus\Anaconda3\lib\site-packages\pandas\core\generic.pyin__getattr__(self,name)2968ifnameinself._info_axis:2969returnself[name]->2970returnobject.__getattribute__(self,name)29712972def__setattr__(self,name,value):AttributeError:'DataFrame'objecthasnoattribute'isna'

Options

Stats

Operationsingeneralexcludemissingdata.PerformingadescriptivestatisticIn[91]:

df

Out[91]:


	A	B	C	D
2017-01-12	0.000000	0.000000	0.778567	5
2017-01-13	-0.658348	0.622495	0.388625	5
2017-01-14	0.589219	1.392792	0.605545	5
2017-01-15	-0.151958	-0.655249	-2.114725	5
2017-01-16	-1.323304	3.143659	0.638996	5
2017-01-17	-0.024935	0.385811	-1.577185	5

In[92]:

df.mean()

Out[92]:

A-0.261554
B0.814918
C-0.213363
D5.000000
dtype:float64

In[93]:

df.mean(1)#Sameoperationontheotheraxis

Out[93]:

2017-01-121.444642
2017-01-131.338193
2017-01-141.896889
2017-01-150.519517
2017-01-161.864838
2017-01-170.945923
Freq:D,dtype:float64

In[94]:

s=pd.Series([1,2,3,np.nan,4,5],index=dates).shift(2)
#Operatingwithobjectsthathavedifferentdimensionalityandneedalignment.Inaddition,pandasautomaticallybroadcastsalongthespecifieddimension.
s

Out[94]:

2017-01-12NaN
2017-01-13NaN
2017-01-141.0
2017-01-152.0
2017-01-163.0
2017-01-17NaN
Freq:D,dtype:float64

In[95]:

df

Out[95]:


	A	B	C	D
2017-01-12	0.000000	0.000000	0.778567	5
2017-01-13	-0.658348	0.622495	0.388625	5
2017-01-14	0.589219	1.392792	0.605545	5
2017-01-15	-0.151958	-0.655249	-2.114725	5
2017-01-16	-1.323304	3.143659	0.638996	5
2017-01-17	-0.024935	0.385811	-1.577185	5

In[96]:

df.sub(s,axis="index")#dataFrame与series的减法

Out[96]:


	A	B	C	D
2017-01-12	NaN	NaN	NaN	NaN
2017-01-13	NaN	NaN	NaN	NaN
2017-01-14	-0.410781	0.392792	-0.394455	4.0
2017-01-15	-2.151958	-2.655249	-4.114725	3.0
2017-01-16	-4.323304	0.143659	-2.361004	2.0
2017-01-17	NaN	NaN	NaN	NaN

Apply

In[97]:

df

Out[97]:


	A	B	C	D
2017-01-12	0.000000	0.000000	0.778567	5
2017-01-13	-0.658348	0.622495	0.388625	5
2017-01-14	0.589219	1.392792	0.605545	5
2017-01-15	-0.151958	-0.655249	-2.114725	5
2017-01-16	-1.323304	3.143659	0.638996	5
2017-01-17	-0.024935	0.385811	-1.577185	5

In[98]:

df.apply(np.cumsum)#行叠加。

Out[98]:


	A	B	C	D
2017-01-12	0.000000	0.000000	0.778567	5
2017-01-13	-0.658348	0.622495	1.167192	10
2017-01-14	-0.069129	2.015286	1.772737	15
2017-01-15	-0.221087	1.360038	-0.341988	20
2017-01-16	-1.544392	4.503697	0.297008	25
2017-01-17	-1.569326	4.889508	-1.280177	30

In[99]:

df.apply(lambdax:x.max()-x.min())

Out[99]:

A1.912523
B3.798908
C2.893293
D0.000000
dtype:float64

Histogramming

In[100]:

s=pd.Series(np.random.randint(0,7,size=10))
s

Out[100]:

04
15
22
30
45
53
64
73
83
90
dtype:int32

In[101]:

s.value_counts()

Out[101]:

33
52
42
02
21
dtype:int64

StringMethods

Seriesisequippedwithasetofstringprocessingmethodsinthestrattributethatmakeiteasytooperateoneachelementofthearray,asinthecodesnippetbelow.Notethatpattern-matchinginstrgenerallyusesregularexpressionsbydefault(andinsomecasesalwaysusesthem).SeemoreatVectorizedStringMethods.In[102]:

s=pd.Series(['A','B','C','Aaba','Baca',np.nan,'CABA','dog','cat'])
s.str.lower()

Out[102]:

0a
1b
2c
3aaba
4baca
5NaN
6caba
7dog
8cat
dtype:object

In[103]:

Out[103]:

0A
1B
2C
3Aaba
4Baca
5NaN
6CABA
7dog
8cat
dtype:object

Merge合并

Concat

pandasprovidesvariousfacilitiesforeasilycombiningtogetherSeries,DataFrame,andPanelobjectswithvariouskindsofsetlogicfortheindexesandrelationalalgebrafunctionalityinthecaseofjoin/merge-typeoperations.SeetheMergingsectionConcatenatingpandasobjectstogetherwithconcat():In[104]:

df

Out[104]:


	A	B	C	D
2017-01-12	0.000000	0.000000	0.778567	5
2017-01-13	-0.658348	0.622495	0.388625	5
2017-01-14	0.589219	1.392792	0.605545	5
2017-01-15	-0.151958	-0.655249	-2.114725	5
2017-01-16	-1.323304	3.143659	0.638996	5
2017-01-17	-0.024935	0.385811	-1.577185	5

In[105]:

df=pd.DataFrame(np.random.randn(10,4))df

Out[105]:


	0	1	2	3
0	0.111766	-0.505125	2.156029	0.419152
1	1.068870	1.180587	0.361345	1.090554
2	0.488997	0.281507	-0.738345	-0.242974
3	-1.846709	1.686173	-0.202319	-1.151983
4	0.573012	-1.979189	1.544768	1.594595
5	-0.954571	-0.696788	0.270959	-2.296720
6	-1.511946	1.796113	0.399493	0.412664
7	0.089844	-0.545153	-0.315653	-0.235828
8	-0.747140	1.222900	-1.650812	0.292432
9	0.659855	0.501265	0.363978	1.722914

In[106]:

#breakitintopiecespieces=[df[:3],df[3:7],df[7:]]pd.concat(pieces)

Out[106]:


	0	1	2	3
0	0.111766	-0.505125	2.156029	0.419152
1	1.068870	1.180587	0.361345	1.090554
2	0.488997	0.281507	-0.738345	-0.242974
3	-1.846709	1.686173	-0.202319	-1.151983
4	0.573012	-1.979189	1.544768	1.594595
5	-0.954571	-0.696788	0.270959	-2.296720
6	-1.511946	1.796113	0.399493	0.412664
7	0.089844	-0.545153	-0.315653	-0.235828
8	-0.747140	1.222900	-1.650812	0.292432
9	0.659855	0.501265	0.363978	1.722914

In[107]:

pieces

Out[107]:

[0123
00.111766-0.5051252.1560290.419152
11.0688701.1805870.3613451.090554
20.4889970.281507-0.738345-0.242974,
0123
3-1.8467091.686173-0.202319-1.151983
40.573012-1.9791891.5447681.594595
5-0.954571-0.6967880.270959-2.296720
6-1.5119461.7961130.3994930.412664,
0123
70.089844-0.545153-0.315653-0.235828
8-0.7471401.222900-1.6508120.292432
90.6598550.5012650.3639781.722914]

Join

SQLstylemerges.SeetheDatabasestylejoiningIn[108]:

left=pd.DataFrame({"key":["foo","foo"],"lval":[1,2]})
right=pd.DataFrame({'key':['foo','foo'],'rval':[4,5]})

In[109]:

left

Out[109]:


	key	lval
0	foo	1
1	foo	2

In[110]:

right

Out[110]:


	key	rval
0	foo	4
1	foo	5

In[111]:

pd.merge(left,right,on="key")

Out[111]:


	key	lval	rval
0	foo	1	4
1	foo	1	5
2	foo	2	4
3	foo	2	5

In[112]:

left=pd.DataFrame({'key':['foo','bar'],'lval':[1,2]})
right=pd.DataFrame({'key':['foo','bar'],'rval':[4,5]})

In[113]:

left

Out[113]:


	key	lval
0	foo	1
1	bar	2

In[114]:

right

Out[114]:


	key	rval
0	foo	4
1	bar	5

In[115]:

pd.merge(left,right,on="key")

Out[115]:


	key	lval	rval
0	foo	1	4
1	bar	2	5

Append

In[116]:

df=pd.DataFrame(np.random.randn(8,4),columns=['A','B','C','D'])df

Out[116]:


	A	B	C	D
0	-0.852451	1.074357	-0.591892	0.950982
1	-0.977580	1.656374	0.693657	0.718832
2	0.303269	-0.881728	-1.509321	1.219849
3	0.655751	1.235660	1.729038	1.074948
4	0.658413	-1.215348	-1.139623	0.753772
5	1.345115	1.420212	-0.124543	-0.099265
6	1.129623	0.597484	-0.804759	-0.568266
7	-0.770570	0.540917	-0.261607	-0.083751

In[117]:

s=df.iloc[3]s

Out[117]:

A0.655751
B1.235660
C1.729038
D1.074948
Name:3,dtype:float64

In[118]:

df.append(s,ignore_index=True)

Out[118]:


	A	B	C	D
0	-0.852451	1.074357	-0.591892	0.950982
1	-0.977580	1.656374	0.693657	0.718832
2	0.303269	-0.881728	-1.509321	1.219849
3	0.655751	1.235660	1.729038	1.074948
4	0.658413	-1.215348	-1.139623	0.753772
5	1.345115	1.420212	-0.124543	-0.099265
6	1.129623	0.597484	-0.804759	-0.568266
7	-0.770570	0.540917	-0.261607	-0.083751
8	0.655751	1.235660	1.729038	1.074948

Grouping

By“groupby”wearereferringtoaprocessinvolvingoneormoreofthefollowingsteps•Splittingthedataintogroupsbasedonsomecriteria•Applyingafunctiontoeachgroupindependently•CombiningtheresultsintoadatastructureIn[119]:

df=pd.DataFrame({'A':['foo','bar','foo','bar','foo','bar','foo','foo'],'B':['one','one','two','three','two','two','one','three'],'C':np.random.randn(8),'D':np.random.randn(8)})df

Out[119]:


	A	B	C	D
0	foo	one	-0.523738	-1.363519
1	bar	one	-0.071920	-2.618027
2	foo	two	-2.712421	-0.407372
3	bar	three	-0.635898	-1.942854
4	foo	two	0.952073	-0.546110
5	bar	two	1.474296	-0.982238
6	foo	one	-0.529788	-0.213397
7	foo	three	0.877394	-0.791663

In[120]:

df.groupby("A").sum()

Out[120]:


	C	D
A
bar	0.766479	-5.543120
foo	-1.936480	-3.322062

In[121]:

df.groupby(["A","B"]).sum()#Groupingbymultiplecolumnsformsahierarchicalindex,whichwethenapplythefunction.

Out[121]:


		C	D
A	B
bar	one	-0.071920	-2.618027
three	-0.635898	-1.942854
two	1.474296	-0.982238
foo	one	-1.053527	-1.576917
three	0.877394	-0.791663
two	-1.760347	-0.953482

Reshaping

Stack

In[122]:

tuples=list(zip([['bar','bar','baz','baz','foo','foo','qux','qux'],['one','two','one','two','one','two','one','two']]))tuples

Out[122]:

[(['bar','bar','baz','baz','foo','foo','qux','qux'],),
(['one','two','one','two','one','two','one','two'],)]

In[123]:

tuples=list(zip(*[['bar','bar','baz','baz','foo','foo','qux','qux'],['one','two','one','two','one','two','one','two']]))tuples

Out[123]:

[('bar','one'),
('bar','two'),
('baz','one'),
('baz','two'),
('foo','one'),
('foo','two'),
('qux','one'),
('qux','two')]

In[124]:

index=pd.MultiIndex.from_tuples(tuples,names=["first","second"])
index

Out[124]:

MultiIndex(levels=[['bar','baz','foo','qux'],['one','two']],
labels=[[0,0,1,1,2,2,3,3],[0,1,0,1,0,1,0,1]],
names=['first','second'])

In[125]:

df=pd.DataFrame(np.random.randn(8,2),index=index,columns=['A','B'])df

Out[125]:


		A	B
first	second
bar	one	-1.101051	-1.126231
two	-0.395652	-0.313567
baz	one	1.378579	-1.637869
two	0.665960	-0.259749
foo	one	-0.256181	1.260131
two	-0.994720	0.506272
qux	one	-0.422649	0.191402
two	-0.102085	0.975210

In[126]:

df2=df[:4]df2

Out[126]:


		A	B
first	second
bar	one	-1.101051	-1.126231
two	-0.395652	-0.313567
baz	one	1.378579	-1.637869
two	0.665960	-0.259749

In[127]:

stacked=df2.stack()
stacked

Out[127]:

firstsecond
baroneA-1.101051
B-1.126231
twoA-0.395652
B-0.313567
bazoneA1.378579
B-1.637869
twoA0.665960
B-0.259749
dtype:float64

Witha“stacked”DataFrameorSeries(havingaMultiIndexastheindex),theinverseoperationofstack()isunstack(),whichbydefaultunstacksthelastlevel:In[128]:

stacked.unstack()

Out[128]:


		A	B
first	second
bar	one	-1.101051	-1.126231
two	-0.395652	-0.313567
baz	one	1.378579	-1.637869
two	0.665960	-0.259749

In[129]:

stacked.unstack(1)

Out[129]:


	second	one	two
first
bar	A	-1.101051	-0.395652
B	-1.126231	-0.313567
baz	A	1.378579	0.665960
B	-1.637869	-0.259749

In[130]:

stacked.unstack(0)

Out[130]:


	first	bar	baz
second
one	A	-1.101051	1.378579
B	-1.126231	-1.637869
two	A	-0.395652	0.665960
B	-0.313567	-0.259749

PivotTables

In[131]:

df=pd.DataFrame({'A':['one','one','two','three']*3,'B':['A','B','C']*4,'C':['foo','foo','foo','bar','bar','bar']*2,'D':np.random.randn(12),'E':np.random.randn(12)})df

Out[131]:


	A	B	C	D	E
0	one	A	foo	0.039230	0.134261
1	one	B	foo	0.952890	-0.499183
2	two	C	foo	-0.778814	-0.655735
3	three	A	bar	0.798864	0.025109
4	one	B	bar	-0.580050	-1.711672
5	one	C	bar	0.004300	-0.433591
6	two	A	foo	0.229248	-2.648814
7	three	B	foo	0.506488	0.630373
8	one	C	foo	-0.315667	0.031764
9	one	A	bar	-1.547410	0.743825
10	two	B	bar	-0.480958	0.365255
11	three	C	bar	1.742948	0.692884

In[4]:

pd.pivot_table(df,values="D",index=["A","B"],columns=["C"])

Out[4]:


	C	bar	foo
A	B
one	A	0.932814	-1.440079
B	0.060252	1.071877
C	2.879779	0.355274
three	A	-0.328442	NaN
B	NaN	-2.544812
C	-1.879058	NaN
two	A	NaN	-1.987377
B	0.220517	NaN
C	NaN	-0.082820

TimeSeries

pandashassimple,powerful,andefficientfunctionalityforperformingresamplingoperationsduringfrequencyconversion(e.g.,convertingsecondlydatainto5-minutelydata).Thisisextremelycommonin,butnotlimitedto,financialapplications.In[132]:

rng=pd.date_range("1/2/2017",periods=100,freq="S")
rng

Out[132]:

DatetimeIndex(['2017-01-0200:00:00','2017-01-0200:00:01',
'2017-01-0200:00:02','2017-01-0200:00:03',
'2017-01-0200:00:04','2017-01-0200:00:05',
'2017-01-0200:00:06','2017-01-0200:00:07',
'2017-01-0200:00:08','2017-01-0200:00:09',
'2017-01-0200:00:10','2017-01-0200:00:11',
'2017-01-0200:00:12','2017-01-0200:00:13',
'2017-01-0200:00:14','2017-01-0200:00:15',
'2017-01-0200:00:16','2017-01-0200:00:17',
'2017-01-0200:00:18','2017-01-0200:00:19',
'2017-01-0200:00:20','2017-01-0200:00:21',
'2017-01-0200:00:22','2017-01-0200:00:23',
'2017-01-0200:00:24','2017-01-0200:00:25',
'2017-01-0200:00:26','2017-01-0200:00:27',
'2017-01-0200:00:28','2017-01-0200:00:29',
'2017-01-0200:00:30','2017-01-0200:00:31',
'2017-01-0200:00:32','2017-01-0200:00:33',
'2017-01-0200:00:34','2017-01-0200:00:35',
'2017-01-0200:00:36','2017-01-0200:00:37',
'2017-01-0200:00:38','2017-01-0200:00:39',
'2017-01-0200:00:40','2017-01-0200:00:41',
'2017-01-0200:00:42','2017-01-0200:00:43',
'2017-01-0200:00:44','2017-01-0200:00:45',
'2017-01-0200:00:46','2017-01-0200:00:47',
'2017-01-0200:00:48','2017-01-0200:00:49',
'2017-01-0200:00:50','2017-01-0200:00:51',
'2017-01-0200:00:52','2017-01-0200:00:53',
'2017-01-0200:00:54','2017-01-0200:00:55',
'2017-01-0200:00:56','2017-01-0200:00:57',
'2017-01-0200:00:58','2017-01-0200:00:59',
'2017-01-0200:01:00','2017-01-0200:01:01',
'2017-01-0200:01:02','2017-01-0200:01:03',
'2017-01-0200:01:04','2017-01-0200:01:05',
'2017-01-0200:01:06','2017-01-0200:01:07',
'2017-01-0200:01:08','2017-01-0200:01:09',
'2017-01-0200:01:10','2017-01-0200:01:11',
'2017-01-0200:01:12','2017-01-0200:01:13',
'2017-01-0200:01:14','2017-01-0200:01:15',
'2017-01-0200:01:16','2017-01-0200:01:17',
'2017-01-0200:01:18','2017-01-0200:01:19',
'2017-01-0200:01:20','2017-01-0200:01:21',
'2017-01-0200:01:22','2017-01-0200:01:23',
'2017-01-0200:01:24','2017-01-0200:01:25',
'2017-01-0200:01:26','2017-01-0200:01:27',
'2017-01-0200:01:28','2017-01-0200:01:29',
'2017-01-0200:01:30','2017-01-0200:01:31',
'2017-01-0200:01:32','2017-01-0200:01:33',
'2017-01-0200:01:34','2017-01-0200:01:35',
'2017-01-0200:01:36','2017-01-0200:01:37',
'2017-01-0200:01:38','2017-01-0200:01:39'],
dtype='datetime64[ns]',freq='S')

In[133]:

ts=pd.Series(np.random.randint(0,500,len(rng)),index=rng)ts

Out[133]:

2017-01-0200:00:00251
2017-01-0200:00:0163
2017-01-0200:00:02108
2017-01-0200:00:03288
2017-01-0200:00:04491
2017-01-0200:00:05490
2017-01-0200:00:06343
2017-01-0200:00:07357
2017-01-0200:00:0872
2017-01-0200:00:09171
2017-01-0200:00:10324
2017-01-0200:00:11281
2017-01-0200:00:12176
2017-01-0200:00:1314
2017-01-0200:00:14495
2017-01-0200:00:15150
2017-01-0200:00:1669
2017-01-0200:00:17144
2017-01-0200:00:18126
2017-01-0200:00:19368
2017-01-0200:00:20129
2017-01-0200:00:21386
2017-01-0200:00:22228
2017-01-0200:00:23458
2017-01-0200:00:2498
2017-01-0200:00:25244
2017-01-0200:00:26206
2017-01-0200:00:2798
2017-01-0200:00:2892
2017-01-0200:00:29259
...
2017-01-0200:01:10127
2017-01-0200:01:11342
2017-01-0200:01:12185
2017-01-0200:01:13123
2017-01-0200:01:1473
2017-01-0200:01:15132
2017-01-0200:01:16462
2017-01-0200:01:17317
2017-01-0200:01:18180
2017-01-0200:01:19247
2017-01-0200:01:2097
2017-01-0200:01:21401
2017-01-0200:01:22342
2017-01-0200:01:23382
2017-01-0200:01:24304
2017-01-0200:01:2547
2017-01-0200:01:26193
2017-01-0200:01:27334
2017-01-0200:01:28196
2017-01-0200:01:29297
2017-01-0200:01:30195
2017-01-0200:01:31236
2017-01-0200:01:32200
2017-01-0200:01:33490
2017-01-0200:01:34196
2017-01-0200:01:35201
2017-01-0200:01:36397
2017-01-0200:01:37494
2017-01-0200:01:38482
2017-01-0200:01:39267
Freq:S,Length:100,dtype:int32

In[7]:

ts.resample("5Min").sum()

Out[7]:

2017-01-0222939
Freq:5T,dtype:int32

In[9]:

ts.resample("1Min").sum()

Out[9]:

2017-01-0200:00:0013896
2017-01-0200:01:009043
Freq:T,dtype:int32

Timezonerepresentation.零时区UTC表示。In[10]:

rng=pd.date_range("2/1/201700:00",periods=5,freq="D")
rng

Out[10]:

DatetimeIndex(['2017-02-01','2017-02-02','2017-02-03','2017-02-04',
'2017-02-05'],
dtype='datetime64[ns]',freq='D')

In[12]:

ts=pd.Series(np.random.randn(len(rng)),index=rng)ts

Out[12]:

2017-02-010.329594
2017-02-022.097319
2017-02-031.852023
2017-02-04-0.213452
2017-02-050.160873
Freq:D,dtype:float64

In[13]:

tsUtc=ts.tz_localize("UTC")
tsUtc

Out[13]:

2017-02-0100:00:00+00:000.329594
2017-02-0200:00:00+00:002.097319
2017-02-0300:00:00+00:001.852023
2017-02-0400:00:00+00:00-0.213452
2017-02-0500:00:00+00:000.160873
Freq:D,dtype:float64

Converttoanothertimezone.时区转换。In[14]:

tsUtc.tz_convert("US/Eastern")

Out[14]:

2017-01-3119:00:00-05:000.329594
2017-02-0119:00:00-05:002.097319
2017-02-0219:00:00-05:001.852023
2017-02-0319:00:00-05:00-0.213452
2017-02-0419:00:00-05:000.160873
Freq:D,dtype:float64

In[15]:

tsUtc

Out[15]:

2017-02-0100:00:00+00:000.329594
2017-02-0200:00:00+00:002.097319
2017-02-0300:00:00+00:001.852023
2017-02-0400:00:00+00:00-0.213452
2017-02-0500:00:00+00:000.160873
Freq:D,dtype:float64

ConvertingbetweentimespanrepresentationsIn[16]:

rng=pd.date_range("1/8/2017",periods=5,freq="M")
rng

Out[16]:

DatetimeIndex(['2017-01-31','2017-02-28','2017-03-31','2017-04-30',
'2017-05-31'],
dtype='datetime64[ns]',freq='M')

In[18]:

ts=pd.Series(np.random.randn(len(rng)),rng)ts

Out[18]:

2017-01-310.904523
2017-02-28-0.470144
2017-03-31-0.373244
2017-04-300.860448
2017-05-310.176226
Freq:M,dtype:float64

In[20]:

ps=ts.to_period()ps

Out[20]:

2017-010.904523
2017-02-0.470144
2017-03-0.373244
2017-040.860448
2017-050.176226
Freq:M,dtype:float64

In[21]:

ps.to_timestamp()

Out[21]:

2017-01-010.904523
2017-02-01-0.470144
2017-03-01-0.373244
2017-04-010.860448
2017-05-010.176226
Freq:MS,dtype:float64

In[22]:

ps

Out[22]:

2017-010.904523
2017-02-0.470144
2017-03-0.373244
2017-040.860448
2017-050.176226
Freq:M,dtype:float64

Convertingbetweenperiodandtimestampenablessomeconvenientarithmeticfunctionstobeused.Inthefollowingexample,weconvertaquarterlyfrequencywithyearendinginNovemberto9amoftheendofthemonthfollowingthequarterend:In[23]:

prng=pd.period_range("1990Q1","2017Q4",freq="Q-NOV")
prng

Out[23]:

PeriodIndex(['1990Q1','1990Q2','1990Q3','1990Q4','1991Q1','1991Q2',
'1991Q3','1991Q4','1992Q1','1992Q2',
...
'2015Q3','2015Q4','2016Q1','2016Q2','2016Q3','2016Q4',
'2017Q1','2017Q2','2017Q3','2017Q4'],
dtype='period[Q-NOV]',length=112,freq='Q-NOV')

In[25]:

ts=pd.Series(np.random.randn(len(prng)),prng)
ts.head()

Out[25]:

1990Q11.193031
1990Q20.621627
1990Q3-0.235553
1990Q40.642938
1991Q10.247024
Freq:Q-NOV,dtype:float64

In[26]:

ts.index=(prng.asfreq("M","e")+1).asfreq("H","s")+9
ts.head()

Out[26]:

1990-03-0109:001.193031
1990-06-0109:000.621627
1990-09-0109:00-0.235553
1990-12-0109:000.642938
1991-03-0109:000.247024
Freq:H,dtype:float64

Categoricals

In[34]:

df=pd.DataFrame({"id":[1,2,3,4,5,6],"raw_grade":["a","a","c","b","b","f"]})df

Out[34]:


	id	raw_grade
0	1	a
1	2	a
2	3	c
3	4	b
4	5	b
5	6	f

Converttherawgradestoacategoricaldatatype.In[35]:

df["grade"]=df.raw_grade.astype("category")df

Out[35]:


	id	raw_grade	grade
0	1	a	a
1	2	a	a
2	3	c	c
3	4	b	b
4	5	b	b
5	6	f	f

In[36]:

df.grade#Converttherawgradestoacategoricaldatatype

Out[36]:

0a
1a
2c
3b
4b
5f
Name:grade,dtype:category
Categories(4,object):[a,b,c,f]

In[37]:

#Renamethecategoriestomoremeaningfulnames(assigningtoSeries.cat.categoriesisinplace!)df.grade.cat.categories=["verygood","good","nomal","bad"]df

Out[37]:


	id	raw_grade	grade
0	1	a	verygood
1	2	a	verygood
2	3	c	nomal
3	4	b	good
4	5	b	good
5	6	f	bad

In[38]:

#Reorderthecategoriesandsimultaneouslyaddthemissingcategories(methodsunderSeries.catreturnanewSeriesperdefault).

df.grade=df.grade.cat.set_categories(["verybad","bad","medium","good","verygood"])
df.grade

Out[38]:

0verygood
1verygood
2NaN
3good
4good
5bad
Name:grade,dtype:category
Categories(5,object):[verybad,bad,medium,good,verygood]

In[39]:

df

Out[39]:


	id	raw_grade	grade
0	1	a	verygood
1	2	a	verygood
2	3	c	NaN
3	4	b	good
4	5	b	good
5	6	f	bad

Sortingisperorderinthecategories,notlexicalorderIn[40]:

df.sort_values(by="grade")

Out[40]:


	id	raw_grade	grade
2	3	c	NaN
5	6	f	bad
3	4	b	good
4	5	b	good
0	1	a	verygood
1	2	a	verygood

GroupingbyacategoricalcolumnshowsalsoemptycategoriesIn[41]:

df.groupby("grade").size()

Out[41]:

grade
verybad0
bad1
medium0
good2
verygood2
dtype:int64

Plotting

In[43]:

ts=pd.Series(np.random.randn(1000),index=pd.date_range("1/1/2017",periods=1000))
ts.head()

Out[43]:

2017-01-01-0.745067
2017-01-02-0.070895
2017-01-030.233542
2017-01-04-0.206597
2017-01-050.891064
Freq:D,dtype:float64

In[45]:

ts=ts.cumsum()
ts.head()

Out[45]:

2017-01-01-0.745067
2017-01-02-1.561029
2017-01-03-2.143449
2017-01-04-2.932466
2017-01-05-2.830418
Freq:D,dtype:float64

In[48]:

ts.plot()

Out[48]:

<matplotlib.axes._subplots.AxesSubplotat0x19bf6a6e278>

In[50]:

df=pd.DataFrame(np.random.randn(1000,4),index=ts.index,columns=["A","B","C","D"])df.head()

Out[50]:


	A	B	C	D
2017-01-01	-1.940139	-0.476590	-0.154066	1.692812
2017-01-02	0.399891	0.268976	0.596209	-0.484979
2017-01-03	0.814519	-0.142193	-0.084394	-0.687342
2017-01-04	0.385848	-1.230059	-0.093327	-0.096652
2017-01-05	0.407435	-0.849347	0.379192	0.172933

In[51]:

df=df.cumsum()

In[53]:

plt.figure()
df.plot()
plt.legend(loc="best")
plt.show()

<matplotlib.figure.Figureat0x19bf8855da0>

<matplotlib.figure.Figureat0x19bf897dc88>

GettingDataIn/Out

CSV

In[]:

df.to_csv("foo.csv")

In[]:

pd.read_csv("foo.csv")

HDF5

In[]:

df.to_hdf("foo.h5","df")

In[]:

pd.read_hdf("foo.h5","df")

Excel

In[]:

df.to_excel('foo.xlsx',sheet_name='Sheet1')

In[]:

pd.read_excel('foo.xlsx','Sheet1',index_col=None,na_values=['NA'])

In[]:

内容来自用户分享和网络整理，不保证内容的准确性，如有侵权内容，可联系管理员处理

标签： python pandas

相关文章推荐

新的分享

章节导航