您的位置:首页 > 编程语言 > Python开发

机器学习实战(4) ——Logistic回归(python实现)

2018-03-22 21:42 609 查看
这是学习机器学习算法实战这本书时,写的代码实战。让自己对各个算法有更直观的了解,不能一直不写啊。不管简单还是不简单都亲自一行一行的敲一遍啊。
具体的源码和和数据链接:https://pan.baidu.com/s/1G2S2pb5gfBnxGNNTFgTkEA 密码:fov0
下面是主程序logRegress.py和其中在实际操作中遇到的问题啊# -*- coding: utf-8 -*-
# author: Yufeng Song
import math
from numpy import *
import matplotlib.pyplot as plt

def loadDataSet():
dataMat = [];
labelMat = []
fr = open('testSet.txt')
for line in fr.readlines():
lineArr = line.strip().split()
dataMat.append([1.0, float(lineArr[0]), float(lineArr[1])])
labelMat.append(int(lineArr[2]))
return dataMat, labelMat

def sigmoid(inX):
return 1.0 / (1 + exp(-inX)) # exp在numpy里面啊

def gradAscent(dataMatIn, classLabels):
dataMatrix = mat(dataMatIn)
# print(dataMatrix)
# print("#"*30)
labelMat = mat(classLabels).transpose()
m, n = shape(dataMatrix)
alpha = 0.001
maxCycles = 500
weights = ones((n, 1))
for k in range(maxCycles):
h = sigmoid(dataMatrix * weights)
# print(h)
# print(labelMat)
error = (labelMat - h)
# print(error)
# print('#'*30)
# print(dataMatrix.transpose())
# print('#'*30)
# print(alpha*dataMatrix.transpose())
# print('#'*30)
# print(weights)
weights = weights + alpha * dataMatrix.transpose() * error # 归根结底还是数学啊
# print(weights)
return weights

def plotBestFit(weights):
dataMat, labelMat = loadDataSet()
dataArr = array(dataMat)
n = shape(dataArr)[0]
xcord1 = [];
ycord1 = []
xcord2 = [];
ycord2 = []
for i in range(n):
if int(labelMat[i]) == 1:
xcord1.append(dataArr[i, 1]);
ycord1.append(dataArr[i, 2])
else:
xcord2.append(dataArr[i, 1]);
ycord2.append(dataArr[i, 2])
fig = plt.figure()
ax = fig.add_subplot(111)
ax.scatter(xcord1, ycord1, s=30, c='red', marker='s')
ax.scatter(xcord2, ycord2, s=30, c='green')
x = arange(-3.0, 3.0, 0.1)
print('#' * 100)
print(x)
y = (-weights[0] - weights[1] * x) / weights[2]
print(y)
ax.plot(x, y)
plt.xlabel('X1');
plt.ylabel('X2');
plt.show()

def stocGradAscent0(dataMatrix, classLabels):
m, n = shape(dataMatrix)
alpha = 0.01
weights = ones(n)
for i in range(m):
h = sigmoid(sum(dataMatrix[i] * weights))
error = classLabels[i] - h
weights = weights + alpha * error * dataMatrix[i]
return weights

def stocGradAscent1(dataMatrix, classLabels, numIter=150):
m, n = shape(dataMatrix)
weights = ones(n)
for j in range(numIter):
dataIndex = list(range(m))
for i in range(m):
alpha = 4 / (1.0 + j + i) + 0.0001 # 0.01#
randIndex = int(random.uniform(0, len(dataIndex)))
# randIndex = int(random.uniform(0,len(dataIndex)))
h = sigmoid(sum(dataMatrix[randIndex] * weights))
error = classLabels[randIndex] - h
weights = weights + alpha * error * dataMatrix[randIndex]
del (dataIndex[randIndex])
return weights

def classifyVector(inX, weights):
prob = sigmoid(sum(inX * weights))
# if prob > 0.5: return 1.0
# else: return 0.0
return 1.0 if prob > 0.5 else 0.0

def colicTest():
frTrain = open('horseColicTraining.txt')
frTest = open('horseColicTest.txt')
trainingSet = [];
trainingLabels = []
for line in frTrain.readlines():
currLine = line.strip().split('\t')
lineArr = []
for i in range(21):
lineArr.append(float(currLine[i]))
trainingSet.append(lineArr)
trainingLabels.append(float(currLine[21]))
trainWeights = stocGradAscent1(array(trainingSet), trainingLabels, 1000)
errorCount = 0;
numTestVec = 0.0
for line in frTest.readlines():
numTestVec += 1.0
currLine = line.strip().split('\t')
lineArr = []
for i in range(21):
lineArr.append(float(currLine[i]))
if int(classifyVector(array(lineArr), trainWeights)) != int(currLine[21]):
errorCount += 1
errorRate = (float(errorCount) / numTestVec)
print("the error rate of this test is: %f" % errorRate)
return errorRate
# errorRate = (float(errorCount) / numTestVec) 这是放在for循环外面的啊
# # print("the error rate of this test is: %f" % errorRate)
# print("the error rate of this test is: %f" % errorRate)
# return errorRate

def multiTest():
numTests = 10;
errorSum = 0.0
for k in range(numTests):
errorSum += colicTest()
print('after %d iterations the average error rate is: %f' % (numTests, errorSum / float(numTests)))

if __name__ == '__main__':
# dataArr,labelMat = loadDataSet()
# # weightUpdated = gradAscent(dataArr,labelMat)
# # print(weightUpdated)
# weights = gradAscent(dataArr,labelMat)
# plotBestFit(weights.getA())
multiTest()
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签: