您的位置:首页 > 其它

《机器学习实战》学习笔记-[9]-回归-加权最小二乘LWLR

2017-07-30 12:14 337 查看
线性回归求的是最小均方误差:可能出现欠拟合现象。因此在估计中引入一些偏差可以降低预测的均方误差。



x与x(i)越接近,则x的权重越大,也即x与x(i)越强相关







【备注】

每一个点都需要用大整个数据集来计算,权重,如上图当k=0.01时大部分点的权重已经为0,若避免计算这些值可减少计算时间

'''
机器学习实战-回归
'''

from numpy import *

def loadDataSet(fileName):
numFeat = len(open(fileName).readline().split('\t')) - 1
dataMat = [];
labelMat = []
fr = open(fileName)
for line in fr.readlines():
lineArr = []
curLine = line.strip().split('\t')
for i in range(numFeat):
lineArr.append(float(curLine[i])) # 获取数据部分
dataMat.append(lineArr)
labelMat.append(float(curLine[-1])) # 获取输出部分
return dataMat, labelMat

def lwlr(testPoit, xArr, yArr, k=1.0):
xMat = mat(xArr);yMat = mat(yArr).T
m = shape(xMat)[0]
weights = mat(eye(m)) # 下面初识化权重矩阵
for j in range(m):
diffMat = testPoit - xMat[j, :]
weights[j, j] = exp(diffMat * diffMat.T / (-2.0 * k ** 2))
xTx = xMat.T * (weights * xMat)
if linalg.det(xTx) == 0.0:
print("This matrix is singular, cannot do inverse")
return
ws = xTx.I *(xMat.T*(weights*yMat))
return testPoit*ws

def lwlrTest(testArr,xArr,yArr,k=1.0):
m=shape(testArr)[0]
yHat = zeros(m)
for i in range(m):
yHat[i] = lwlr(testArr[i],xArr,yArr,k)
return yHat

#=======测试文件
import os

from numpy import *

import matplotlib.pyplot as plt

#导入训练数据集
from ML_Learn.com.ML.Regression.BasicRegressionLWLR import lwlr

xArr, yArr = lwlr.loadDataSet(os.getcwd() + '/resource/ex0.txt')

xMat = mat(xArr)
yMat = mat(yArr)

fig = plt.figure()
ax = fig.add_subplot(111)
ax.scatter(xMat[:,1].flatten().A[0],yMat.T[:,0].flatten().A[0],s=2,c='red')

# yHat = lwlr.lwlrTest(xArr,xArr,yArr,1.0)
# srtInd = xMat[:,1].argsort(0)
# xSort = xMat[srtInd][:,0,:]
# ax.plot(xSort[:,1],yHat[srtInd])

# yHat = lwlr.lwlrTest(xArr,xArr,yArr,0.01)
# srtInd = xMat[:,1].argsort(0)
# xSort = xMat[srtInd][:,0,:]
# ax.plot(xSort[:,1],yHat[srtInd])
# #
yHat = lwlr.lwlrTest(xArr,xArr,yArr,0.003)
srtInd = xMat[:,1].argsort(0)
xSort = xMat[srtInd][:,0,:]
ax.plot(xSort[:,1],yHat[srtInd])

plt.show()





 当k太小是考虑太多的噪声,会出现过拟合
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签:  机器学习