您的位置:首页 > 编程语言 > Python开发

小白学习机器学习---第三章(2):对数几率回归python实现

2018-03-27 19:47 1041 查看
上代码~~~~~~~~###梯度下降法实现多元线性回归
def loadDataSet():
###数据导入函数###
dataMatrix=[] #数据矩阵,第一列是w=1.0,第2,3列是特征
labelMatrix=[] #标签矩阵
# myArr=[[-3.5,-3,0],[-2.3,0,0],[-1.0,-0.1,0],[-1.3, -1.0, 0],
# [-2.5, 5, 0],[-3.5, 7, 1], [-1.5, 16, 1],[1, 10, 1],
# [1, 5, 1],[1, 3, 0]]
myArr=[[0.697,0.460,1],[0.774,0.376,1],[0.634,0.264,1],[0.608,0.318,1],[0.556,0.215,1],[0.403,0.211,1],[0.481,0.149,1],[0.437,0.211,1],
[0.666,0.091,0],[0.243,0.267,0],[0.245,0.057,0],[0.343,0.099,0],[0.639,0.161,0],[0.657,0.198,0],[0.360,0.370,0],[0.593,0.042,0]]
for itemArr in myArr:
dataMatrix.append([1.0,float(itemArr[0]),float(itemArr[1])]) #将X变为(1,X) W=(b:W)
labelMatrix.append(int(itemArr[2])) #这样Y=W0*1+W1x1+W2x2+....+WdXd=X*W
# print(dataMatrix)
# print(labelMatrix)
# print('m,n:',shape(dataMatrix))

return dataMatrix,labelMatrix

def logistic(x):
#计算logistic函数的值
return 1.0/(1+exp(-x))

def gradAscent(dataIn,classLabels):
###梯度下降算法,求出最佳的w参数矩阵
###梯度上升和梯度下降是可以相互转化的,将上升的东西加个负号就变成下降的东西;
dataMatrix=mat(dataIn) #dataIn的格式为:[1.0,第一特征,第二特征]
labelMatrix=mat(classLabels).transpose() #标签向量转置为列矩阵
m,n=shape(dataMatrix) #矩阵的行 列
alpha=0.001 #步长
maxCycle=500 #步数
weights=ones((n,1)) #ones 返回一个指定尺寸的数组,即用1来填充n*1的数组
#print(type(weights)) 此时weights初始化为数组
# print(weights)
for k in range(maxCycle):
h=logistic(dataMatrix*weights)
# print('h: ',h)
error=(h-labelMatrix)
weights=weights-alpha*dataMatrix.transpose()*error #套用W的变化公式,alpha后面的即为代价函数对w矩阵求导之后的东西
# print(type(weights)) #此时weights由数组变成了矩阵
return weights

def plotBestFit(weights,labelMatrix):
###画出最佳拟合直线
import matplotlib.pyplot as plt
dataArr=array(dataMatrix) #矩阵转换为数组
#print(dataArr[1])
n=shape(dataArr)[0] #获得样本数量,即data矩阵的行数
# print(n)
xcord1=[];ycord1=[]
xcord2=[];ycord2=[]

for i in range(n):
if(int(labelMatrix[i])==1):
xcord1.append(dataArr[i,1]);ycord1.append(dataArr[i,2])
else:
xcord2.append(dataArr[i,1]);ycord2.append(dataArr[i,2])

fig=plt.figure()
#在子图中画出样本点
ax=fig.add_subplot(111)
ax.scatter(xcord1,ycord1,s=30,c='red',marker='s')
ax.scatter(xcord2,ycord2,s=30,c='green')
ax.scatter(0.719,0.103,s=30,c='blue')
#画出拟合直线
# x=arange(-3.0,3.0,0.1) #此时X是有60个元素的数组
# print('y:',(-weights[0]-weights[1]*x)/weights[2])
# print(shape(array((-weights[0]-weights[1]*x)/weights[2]))) #(1,60)
# y=array((-weights[0]-weights[1]*x)/weights[2])[0] #即画出ln(y/1-y)=0,即y/(1-y)=1,即logistic=0.5的分界线,
# 需要将计算结果矩阵转换为数组,且和x的大小匹配

x=arange(0,1,0.001)
y=array((-weights[0]-weights[1]*x)/weights[2])[0]#即画出ln(y/1-y)=0,即y/(1-y)=1,即logistic=0.5的分界线,
# 需要将计算结果矩阵转换为数组,且和x的大小匹配
ax.plot(x,y)
# plt.xlabel('X1');plt.ylabel('X2')
plt.xlabel('密度');plt.ylabel('含糖率')
plt.show()
dataMatrix,labelMatrix=loadDataSet()
weights=gradAscent(dataMatrix,labelMatrix)
plotBestFit(weights,labelMatrix)
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签: