您的位置:首页 > 其它

pandas之汇总和计算描述统计到层次化索引

2017-05-18 15:52 513 查看
df=DataFrame([[1.4,np.nan],[7.1,-4.5],[np.nan,np.nan],[0.75,-1.3]],index=['a','b','c','d'],columns=['one','two'])
df
df.sum()
df.sum(axis=1)
df
df.sum(axis=1,skipna=False)
df
df.idxmax()
df
df.cumsum()
df.describe()
obj=Series(['a','a','b','c']*4)
obj
obj.describe()
cl
c
l
import pandas.io.data as web
all_data={}
for ticker in ['AAPL','IBM',"MSFT",'GOOG']:
all_data[ticker]=web.get_data_yahoo(ticker,'1/1/2000'.'1/1/2010')

for ticker in ['AAPL','IBM',"MSFT",'GOOG']:
all_data[ticker] = web.get_data_yahoo(ticker, '1/1/2000',
'1/1/2010')

all_data
for ticker in ['AAPL','IBM',"MSFT",'GOOG']:
all_data[ticker] = web.get_data_yahoo(ticker, '1/1/2000','1/1/2010')

for ticker in ['AAPL','IBM',"MSFT",'GOOG']:
all_data[ticker] = web.get_data_yahoo(ticker, '1/1/2016','1/1/2017')

obj=Series(['c','a','d','a','a','b','b','c','c'])
uniques=obj.unique()
uniques
obj.value_counts()
pd.value_counts(obj.values,sort=False)
mask=obj.isin(['b','c'])
mask
obj[mask]
data=DataFrame({'Qu1':[1,3,4,3,4],'Qu2':[2,3,1,2,3],'Qu3':[1,5,2,4,4]})
data
data.apply(pd.value_counts())
data.apply(pd.value_counts)
data.apply(pd.value_counts).fillna(0)
string_data=Series(['aardvark','artichoke',np.nan,'avocado'])
string_data
string_data.isnull()
string_data[0]=None
string_data.isnull()
from numpy import nan as NA
data=Series([1,NA,3.5,NA,7])
data
data.dropna()
data
data[data.notnull()]
data=DataFrame([[1,6.5,3],[1.,NA,NA],[NA,NA,NA],[NA,6.5,3.]])
data
cleaned =data.dropna()
cleaned
cleaned =data.dropna(how='all')
cleaned
data[4]=NA
data
data.append(NA)
data.append(333)
data
cleaned =data.dropna(how='all',axis=1)
cleaned
df=DataFrame(np.random.randn(7,3))
df
df.ix[:4,1]=NA
df
df.ix[:2,2]=NA
df
df.dropna(thresh=3)
df
df.fillna(0)
df.fillna({1:0.5,3:-1})
df
_=df.fillna(0,inplace=Ture)
_=df.fillna(0,inplace=True)
df
df=DataFrame(np.random.randn(6,3))
df
df.ix[2:,1]=NA
df
df.ix[4:,2]=NA
df
df.fillna(method='ffill')
df
df.fillna(method='ffill',limit=2)
data=Series([1.,NA,3.5,NA,7])
data
data.fillna(data.mean())
data=Series(np.random.randn(10),index=[['a','a','a','b','b','b','c','c','d','d'],[1,2,3,1,2,3,1,2,2,3]])
data
data.index
data['b']
data['b':'c']
data.ix[['b','d']]
data
data[:,2]
data
data.unstack()
data.unstack().stack()
frame=DataFrame(np.arange(12).reshape((4,3)),index=[['a','a','b','b'],[1,2,1,2]],columns=[['Ohio','Ohio','Colorado'],['Green','Red','Green']])
frame
frame.index.names=['key1','key2']
frame
frame.columns.names=['state','color']
frame
frame['Ohio']
MultiIndex.from_arrays([['Ohio','Ohio','Colorado'],['Green','Red','Green']],names=['state','color'])
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签: