您的位置:首页 > 理论基础 > 数据结构算法

Pyhton科学计算工具Pandas(三) —— 数据结构Dataframe的基本操作

2018-03-12 23:55 453 查看

Pyhton科学计算工具Pandas(三) —— 数据结构Dataframe的基本操作

dataframe的基本操作

查看和转置

#  数据的转置和查看

df = pd.DataFrame(np.random.rand(16).reshape(8,2)*100,
columns=['a','b'])
print(df.head(3))      #查看顶部数据第3行
print(df.tail(2))        #查看尾部数据后2行
# 默认查看5条
print('---------')

print(df.T)      #转置


a          b
0  50.973343  19.208659
1  16.702960  98.460643
2  30.013108   8.446435
a          b
6   8.339462  69.308221
7  28.236923  53.976490
---------
0          1          2          3          4          5  \
a  50.973343  16.702960  30.013108   1.118677  34.971555  51.606055
b  19.208659  98.460643   8.446435  35.377823  64.811959  46.874331

6          7
a   8.339462  28.236923
b  69.308221  53.976490


添加和修改

# 添加和修改

df = pd.DataFrame(np.random.rand(16).reshape(4,4)*100,
columns=['a','b','c','d'])
print(df)
print('-----------')
df['e'] = 200              #新增列并赋值
df.loc[4] = 300            #新增行并赋值
print(df)
print('------------')

df['e'] = 12     #索引后直接修改赋值
df.loc[4] = 345
print(df)


a
4000
b          c          d
0  36.629277  97.240619  36.812864   2.828474
1  62.595577  23.721840  67.603865  59.825555
2  76.220139  15.454486  44.905106  90.229156
3  33.157141  65.396197  14.282099  63.127910
-----------
a           b           c           d    e
0   36.629277   97.240619   36.812864    2.828474  200
1   62.595577   23.721840   67.603865   59.825555  200
2   76.220139   15.454486   44.905106   90.229156  200
3   33.157141   65.396197   14.282099   63.127910  200
4  300.000000  300.000000  300.000000  300.000000  300
------------
a           b           c           d    e
0   36.629277   97.240619   36.812864    2.828474   12
1   62.595577   23.721840   67.603865   59.825555   12
2   76.220139   15.454486   44.905106   90.229156   12
3   33.157141   65.396197   14.282099   63.127910   12
4  345.000000  345.000000  345.000000  345.000000  345
345


删除

# 删除del /drop()

df = pd.DataFrame(np.random.rand(16).reshape(4,4)*100,
columns=['a','b','c','d'])
print(df)
print('-------')
del df['a']      #删除列a
print(df)

print('-------')

print(df.drop(0))
print(df.drop([1,2]))
# # drop()删除行,inplace=False → 删除后生成新的数据,不改变原数据
print('-----')

print(df.drop(['d'],axis = 1))
print(df)
# drop()删除列,需要加上axis = 1,inplace=False → 删除后生成新的数据,不改变原数据


a          b          c          d
0  51.330969  84.874605  84.096457  89.067999
1  77.191343  25.731875  69.804351  76.268104
2  80.739107  29.660919  14.462301  60.620529
3  15.128326  74.858322  29.683391  15.247288
-------
b          c          d
0  84.874605  84.096457  89.067999
1  25.731875  69.804351  76.268104
2  29.660919  14.462301  60.620529
3  74.858322  29.683391  15.247288
-------
b          c          d
1  25.731875  69.804351  76.268104
2  29.660919  14.462301  60.620529
3  74.858322  29.683391  15.247288
b          c          d
0  84.874605  84.096457  89.067999
3  74.858322  29.683391  15.247288
-----
b          c
0  84.874605  84.096457
1  25.731875  69.804351
2  29.660919  14.462301
3  74.858322  29.683391
b          c          d
0  84.874605  84.096457  89.067999
1  25.731875  69.804351  76.268104
2  29.660919  14.462301  60.620529
3  74.858322  29.683391  15.247288


对齐

# 对齐

df1 = pd.DataFrame(np.random.randn(10, 4), columns=['A', 'B', 'C', 'D'])
df2 = pd.DataFrame(np.random.randn(7, 3), columns=['A', 'B', 'C'])
print(df1 + df2)
# DataFrame对象之间的数据自动按照列和索引(行标签)对齐


A         B         C   D
0 -0.094771  2.624659  0.165298 NaN
1 -1.570840  0.253699  2.503872 NaN
2  1.728068 -0.075762 -0.271735 NaN
3  0.917945 -0.473601 -0.333300 NaN
4 -0.532056 -0.635843 -2.094396 NaN
5  2.216553 -1.130143 -2.527663 NaN
6 -2.083056  1.572935  1.792193 NaN
7       NaN       NaN       NaN NaN
8       NaN       NaN       NaN NaN
9       NaN       NaN       NaN NaN


排序

# 排序1 - 按值排序 .sort_values
# 同样适用于Series

df1 = pd.DataFrame(np.random.rand(16).reshape(4,4)*100,
columns = ['a','b','c','d'])
print(df1)
print('------')
print(df1.sort_values(['a'],ascending=True))    #升序
print(df1.sort_values(['a'],ascending=False))   #降序
# ascending参数:设置升序降序,默认升序
# 单列排序
print('--------')

df2 = pd.DataFrame({'a':[1,1,1,1,2,2,2,2],
'b':list(range(8)),
'c':list(range(8,0,-1))})
print(df2)
print(df2.sort_values(['a','c']))


a          b          c          d
0  18.114943   5.292488   2.779998   2.032498
1  91.696799  53.759458  86.748461  26.343732
2  53.317151  40.150072  30.818664  58.626604
3   0.965075  93.237873  24.491812  91.080167
------
a          b          c          d
3   0.965075  93.237873  24.491812  91.080167
0  18.114943   5.292488   2.779998   2.032498
2  53.317151  40.150072  30.818664  58.626604
1  91.696799  53.759458  86.748461  26.343732
a          b          c          d
1  91.696799  53.759458  86.748461  26.343732
2  53.317151  40.150072  30.818664  58.626604
0  18.114943   5.292488   2.779998   2.032498
3   0.965075  93.237873  24.491812  91.080167
--------
a  b  c
0  1  0  8
1  1  1  7
2  1  2  6
3  1  3  5
4  2  4  4
5  2  5  3
6  2  6  2
7  2  7  1
a  b  c
3  1  3  5
2  1  2  6
1  1  1  7
0  1  0  8
7  2  7  1
6  2  6  2
5  2  5  3
4  2  4  4


# 排序2 - 索引排序 .sort_index

df1 = pd.DataFrame(np.random.rand(16).reshape(4,4)*100,
index = [5,4,3,2],
columns = ['a','b','c','d'])
df2 = pd.DataFrame(np.random.rand(16).reshape(4,4)*100,
index = ['h','s','x','g'],
columns = ['a','b','c','d'])
print(df1)
print(df1.sort_index())
print(df2)
print(df2.sort_index())
# 按照index排序
# 默认 ascending=True, inplace=False


a          b          c          d
5  49.947718  86.021101  71.165114  51.096578
4  18.429933  71.700521   6.128441  86.213433
3  73.232182   0.455578   8.924621  18.621305
2  68.493486  77.333558  74.934523  74.317793
a          b          c          d
2  68.493486  77.333558  74.934523  74.317793
3  73.232182   0.455578   8.924621  18.621305
4  18.429933  71.700521   6.128441  86.213433
5  49.947718  86.021101  71.165114  51.096578
a          b          c          d
h  38.674702  48.975600   0.346447  13.743176
s  67.086746  67.218026  54.882057  67.597863
x  70.300753  41.170163  25.061863  96.519508
g  60.767159   6.414095  58.085732  44.014739
a          b          c          d
g  60.767159   6.414095  58.085732  44.014739
h  38.674702  48.975600   0.346447  13.743176
s  67.086746  67.218026  54.882057  67.597863
x  70.300753  41.170163  25.061863  96.519508


下片文章讲pandas时间序列
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签: