您的位置:首页 > 编程语言 > Python开发

python 数据分析与挖掘实战

2017-11-14 10:58 1296 查看
第六章

对数据进行拉格朗日差值:

import pandas as pd

from scipy.interpolate  import lagrange

miss=pd.read_excel('/home/yao/data/chapter6/demo/data/missing_data.xls',

def p(s,n,k=5):

    y = s[list(range(n-k,n))+list(range(n+1,n+1+k))]

    y = y[y.notnull()]

    return lagrange(y.index,list(y))(n)

for i in miss.columns:

    for j in range(len(miss)):

       if (miss[i].isnull())[j]:

          miss[i][j]=p(miss[i],j)

使用随机函数分为训练数据和测试数据

model=pd.read_excel('/home/yao/data/chapter6/model.xls',header=0)

from random import shuffle

model = model.as_matrix()

shuffle(model)

train = model[:int(len(model)*0.8),:]

test = model[int(len(model)*0.8):,:]

#创建cm_plot模块

  def cm_plot(y,yp):

   from sklearn.metrics import confusion_matrix

   cm = confusion_matrix(y,yp)

   import matplotlib.pyplot as plt

   plt.matshow(cm,cmap=plt.cm.Greens)

   plt.colorbar()

   

   for x in range(len(cm)):

      for y in range(len(cm)):

         plt.annotate(cm[x,y],xy=(x,y),horizontalalignment='center',verticalalignment='center')

   plt.ylabel('True label')

   plt.xlabel('Predicted label')

   return plt

#构建LM神经网络模型

from keras.models import Sequential

from keras.layers.core import Dense ,Activation

net = Sequential()

net.add(Dense(input_dim =3,output_dim=10))

net.add(Activation('relu'))

net.add(Dense(input_dim=10,output_dim=1))

net.add(Activation('sigmoid'))

net.compile(loss = 'binary_crossentropy',optimizer='adam')

net.fit(train[:,:3],train[:,3],nb_epoch =100,batch_size = 1)

predict_result = net.predict_classes(train[:,:3]).reshape(len(train))

from cm_plot import *

cm_plot(train[:,3],predict_result).show()

predict_result = net.predict(test[:,:3]).reshape(len(test))

fpr,tpr,thresholds = roc_curve(test[:,3],predict_result,pos_label = 1)>>> 

import matplotlib.pyplot as plt

plt.plot(fpr,tpr,linewidth=2,label='Roc of lm')

plt.xlabel('False Positive Rate')

plt.ylabel('True Positive Rate')

plt.xlim(0,1.05)

plt.ylim(0,1.05)

plt.show()

#创建 决策树CART模型

from sklearn.tree import DecisionTreeClassifier

tree = DecisionTreeClassifier()

tree.fit(train[:,:3],train[:,3])

from sklearn.externals import joblib

#joblib.dump(tree,treefile)

from cm_plot import *

cm_plot(trian[:,3],tree.predict(train[:,:3])).show()

from sklearn.metrics import roc_curve

fpr,tpr,thresholds=roc_curve(test[:,3],tree.predict_proba(test[:,:3])[:,1],pos_label=1)

plt.plot(fpr,tpr,linewidth = 2,label = 'Roc of cart',color = 'green')

plt.xlabel('False Positive Rate')

plt.ylabel('True Positive Rate')

plt.xlim(0,1.05)

plt.ylim(0,1.05)

plt.show()

第七章: 航空公司客户价值分析

#数据分析

import pandas as pd 

air = pd.read_csv('/home/yao/data/chapter7/air_data.csv')

air.describe().T

explore = air.describe(percentiles=[],include ='all').T

explore['null']= len(air)-explore['count']

explore.to_csv('/home/yao/data/chapter7/explore.csv')

#数据预处理

data = air[air['SUM_YR_1'].notnull()*air['SUM_YR_2'].notnull()]

index1 = data['SUM_YR_1']!=0

index2 = data['SUM_YR_2']!=0

index3 = (data['SEG_KM_SUM']==0&data['avg_discount']==0)

data2 = data[index1|index2|index3]

#数据正则化

zs = pd.read_excel('/home/yao/data/chapter7/zscoredata.xls')

zsn = (zs-zs.mean(axis=0))/zs.std(axis=0)

#K-means聚类算法实现

from  sklearn.cluster import KMeans 

kmodel = KMeans(n_clusters = 5)

kmodel.fit(zsn)

kmodel.cluster_centers_ # 聚类中心

kmodel.labels_# 查看各个样本对应的类别

#画图

import numpy as np

import matplotlib.pyplot as plt

labels = zsn.column

k = 5plot_data = kmodel.cluster_centers_

color= ['b','g','r','c','y']

angles=np.linspace(0,2*np.pi,k,endpoint = False)

plot_data=np.concatenate((plot_data,plot_data[:,[0]]),axis=1)

fig = plt.figure()

ax = fig.add_subplot(111,polar=True)

ax = fig.add_subplot(111,polar=True)

for i in range(len(plot_data)):

    ax.plot(angles,plot_data[i],'o-',color =color[i],label=u'客户群'+str(i),linewidth = 2)

ax.set_rgrids(np.arange(0.01,3.5,0.5),np.arange(-1,2.5,0.5),fontproperties="SimHei")

ax.set_thetagrids(angles*180/np.pi,labels,fontproperties='SimHei')

plt.show()
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签: