您的位置:首页 > 编程语言 > Python开发

python-pandas的基本用法11

2017-08-07 08:52 537 查看

pandas的基本用法11-层次化索引

# -*- coding: utf-8 -*-

import numpy as np
from pandas import Series, DataFrame, MultiIndex

print 'Series的层次索引'
data = Series([1,3,56,2,88, 32,43,12,65,90],
index = [['a', 'a', 'a', 'b', 'b', 'b', 'c', 'c', 'd', 'd'],
[1, 2, 3, 1, 2, 3, 1, 2, 2, 3]])

print data
# a  1     1
#    2     3
#    3    56
# b  1     2
#    2    88
#    3    32
# c  1    43
#    2    12
# d  2    65
#    3    90
print data.index
# [a  1,    2,    3, b  1,    2,    3, c  1,    2, d  2,    3]
print data[:2]
# a  1    1
#    2    3
print data.unstack()
#     1   2   3
# a   1   3  56
# b   2  88  32
# c  43  12 NaN
# d NaN  65  90
print data.unstack().stack()
# a  1     1
#    2     3
#    3    56
# b  1     2
#    2    88
#    3    32
# c  1    43
#    2    12
# d  2    65
#    3    90

print 'DataFrame的层次索引'
frame = DataFrame(np.arange(12).reshape((4, 3)),
index = [['a', 'a', 'b', 'b'], [1, 2, 1, 2]],
columns = [['Ohio', 'Ohio', 'Colorado'], ['Green', 'Red', 'Green']])

print frame
#       Ohio       Colorado
#      Green  Red     Green
# a 1      0    1         2
#   2      3    4         5
# b 1      6    7         8
#   2      9   10        11
frame.index.names = ['key1', 'key2']
frame.columns.names = ['state', 'color']
print frame
# key1 key2
# a    1         0    1         2
#      2         3    4         5
# b    1         6    7         8
#      2         9   10        11

print frame.ix['a', 1]
# state     color
# Ohio      Green    0
#           Red      1
# Colorado  Green    2
print frame.ix['a', 2]['Colorado']
# color
# Green    5
print frame.ix['a', 2]['Ohio']['Red']
# 4
print '直接用MultiIndex创建层次索引结构'
print MultiIndex.from_arrays([['Ohio', 'Ohio', 'Colorado'], ['Gree', 'Red', 'Green']],
names = ['state', 'color'])

print '索引层级交换'
frame_swapped = frame.swaplevel('key1', 'key2')
print frame_swapped
# state       Ohio       Colorado
# color      Green  Red     Green
# key2 key1
# 1    a         0    1         2
# 2    a         3    4         5
# 1    b         6    7         8
# 2    b         9   10        11
print frame_swapped.swaplevel(0, 1) #也可以这样写
# state       Ohio       Colorado
# color      Green  Red     Green
# key1 key2
# a    1         0    1         2
#      2         3    4         5
# b    1         6    7         8
#      2         9   10        11

print '根据索引排序'
print frame.sortlevel('key2')
# state       Ohio       Colorado
# color      Green  Red     Green
# key1 key2
# a    1         0    1         2
# b    1         6    7         8
# a    2         3    4         5
# b    2         9   10        11
print frame.swaplevel(0, 1).sortlevel(0)
# state       Ohio       Colorado
# color      Green  Red     Green
# key2 key1
# 1    a         0    1         2
#      b         6    7         8
# 2    a         3    4         5
#      b         9   10        11

print '根据指定的key计算统计信息'
print frame.sum(level = 'key2')
# state   Ohio       Colorado
# color  Green  Red     Green
# key2
# 1          6    8        10
# 2         12   14        16

print '使用列生成层次索引'
frame = DataFrame({'a':range(7),
'b':range(7, 0, -1),
'c':['one', 'one', 'one', 'two', 'two', 'two', 'two'],
'd':[0, 1, 2, 0, 1, 2, 3]})
print frame
#  a  b    c  d
# 0  0  7  one  0
# 1  1  6  one  1
# 2  2  5  one  2
# 3  3  4  two  0
# 4  4  3  two  1
# 5  5  2  two  2
# 6  6  1  two  3
print frame.set_index(['c', 'd'])  # 把c/d列变成索引
# c   d
# one 0  0  7
#     1  1  6
#     2  2  5
# two 0  3  4
#     1  4  3
#     2  5  2
#     3  6  1cd
print frame.set_index(['c', 'd'], drop = False) # cd列依然保留
# c   d
# one 0  0  7  one  0
#     1  1  6  one  1
#     2  2  5  one  2
# two 0  3  4  two  0
#     1  4  3  two  1
#     2  5  2  two  2
#     3  6  1  two  3
frame2 = frame.set_index(['c', 'd'])
print frame2.reset_index() #还原
#      c  d  a  b
# 0  one  0  0  7
# 1  one  1  1  6
# 2  one  2  2  5
# 3  two  0  3  4
# 4  two  1  4  3
# 5  two  2  5  2
# 6  two  3  6  1
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签:  pandas