您的位置:首页 > 编程语言 > Python开发

python-pandas的基本用法02

2017-08-05 16:51 288 查看

pandas的基本用法02-DataFrame基础

#coding:utf-8
import numpy as np
from pandas import Series,DataFrame

print '用字典生成DataFrame,key为列的名字。'
data = {'city':['Beijing', 'Shanghai', 'Shenzheng', 'Nanjing', 'Hangzhou'],
'gdp':[8000, 9000, 3000, 4000, 4500],
'pop':[2500, 3500, 500, 1500, 1000]}
print DataFrame(data)

#         city   gdp   pop
# 0    Beijing  8000  2500
# 1   Shanghai  9000  3500
# 2  Shenzheng  3000   500
# 3    Nanjing  4000  1500
# 4   Hangzhou  4500  1000

print '指定列顺序:'
print DataFrame(data, columns=['city', 'pop', 'gdp'])

#         city   pop   gdp
# 0    Beijing  2500  8000
# 1   Shanghai  3500  9000
# 2  Shenzheng   500  3000
# 3    Nanjing  1500  4000
# 4   Hangzhou  1000  4500

print '指定索引,在列中指定不存在的列,默认数据用NaN'
data2 = DataFrame(data, columns=['city', 'pop', 'gdp', 'env'],
index=['one', 'two', 'three', 'four', 'five']
)

print data2
#    city   pop   gdp  env
# one      Beijing  2500  8000  NaN
# two     Shanghai  3500  9000  NaN
# three  Shenzheng   500  3000  NaN
# four     Nanjing  1500  4000  NaN
# five    Hangzhou  1000  4500  NaN
print data2.city
# Name: city, dtype: object
print data2['city']
# one        Beijing
# two       Shanghai
# three    Shenzheng
# four       Nanjing
# five      Hangzhou
# Name: city, dtype: object

print data2.ix['three']
# city    Shenzheng
# pop           500
# gdp          3000
# env           NaN
# Name: three, dtype: object

data2.env = np.arange(5)
print data2
#             city   pop   gdp  env
# one      Beijing  2500  8000    0
# two     Shanghai  3500  9000    1
# three  Shenzheng   500  3000    2
# four     Nanjing  1500  4000    3
# five    Hangzhou  1000  4500    4

print '用Series指定要修改的索引及其对应的值,没有指定的默认数据用NaN。'
val = Series([5,3,1,3,2], index=['one', 'two', 'three', 'four', 'five'])
data2.env = val
print data2
#             city   pop   gdp  env
# one      Beijing  2500  8000    5
# two     Shanghai  3500  9000    3
# three  Shenzheng   500  3000    1
# four     Nanjing  1500  4000    3
# five    Hangzhou  1000  4500    2

print '赋值给新列'
data2['suit'] = (data2.city == 'Shenzheng')
print data2
#             city   pop   gdp  env   suit
# one      Beijing  2500  8000    5  False
# two     Shanghai  3500  9000    3  False
# three  Shenzheng   500  3000    1   True
# four     Nanjing  1500  4000    3  False
# five    Hangzhou  1000  4500    2  False
print data2.columns
# Index([city, pop, gdp, env, suit], dtype=object)

print 'DataFrame转置'
print data2.T
#           one       two      three     four      five
# city  Beijing  Shanghai  Shenzheng  Nanjing  Hangzhou
# pop      2500      3500        500     1500      1000
# gdp      8000      9000       3000     4000      4500
# env         5         3          1        3         2
# suit    False     False       True    False     False

print '指定索引顺序,以及使用切片初始化数据。'
data2.index = [1,2,3,4,5]

print data2['city'][:-1]
# 1      Beijing
# 2     Shanghai
# 3    Shenzheng
# 4      Nanjing
# Name: city, dtype: object

print '打印索引和列的名称'
print data2.index.name
print data2.columns.name
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签:  pandas