您的位置:首页 > 其它

pandas数据预处理与透视表

2017-03-13 09:42 183 查看

以下代码是基于python3.5.0编写的

import pandas as pd
import numpy as np
titanic_survival = pd.read_csv("titanic_train.csv")

# ---------------------------统计age列有多少值为空-------------------------
age = titanic_survival["Age"]
age_is_null = pd.isnull(age)
age_null_true = age[age_is_null]
age_null_count = len(age_null_true)
print(age_null_count)

#-------------------------求均值方法一----------------------------------------
good_ages = titanic_survival["Age"][age_is_null == False]         #把age列中不为空的值赋值给good_ages
correct_mean_age = sum(good_ages) / len(good_ages)
print(correct_mean_age)
#-------------------------求均值方法二----------------------------------------
correct_mean_age = titanic_survival["Age"].mean()            #mean函数会自动取出age列中为空的值,然后赋值给correct_mean_age
print(correct_mean_age)

#-----------------------------统计每种等级船舱平均票价------------------------------
passenger_classes = [1, 2, 3]
fares_by_class = {}
for this_class in passenger_classes:
pclass_rows = titanic_survival[titanic_survival["Pclass"] == this_class]    #找出Pclass为1的所有行
pclass_fares = pclass_rows["Fare"]                                          #找出Pclass为1的Fare列
fare_for_class = pclass_fares.mean()
fares_by_class[this_class] = fare_for_class
print(fares_by_class)

# -----------------pivot_table透视表函数,找出每种Pclass所对应Survived的平均值-------------------------------------
passenger_survival = titanic_survival.pivot_table(index="Pclass", values="Survived", aggfunc=np.mean)
print(passenger_survival)

passenger_age = titanic_survival.pivot_table(index="Pclass", values="Age", aggfunc=np.mean)
print(passenger_age)

port_stats = titanic_survival.pivot_table(index="Embarked", values=["Fare","Survived"], aggfunc=np.sum)
print(port_stats)
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签:  pandas