您的位置:首页 > 编程语言 > Python开发

线性回归 numpy normal equation & tensorflow gradient descent

2016-12-18 20:55 495 查看

Linear Regression

normal equation

最简单的线性回归就是求出w,b使得 y = w x + b与真实值均方误差最小

于是很直接的可以直接令cost fuction 导数等于0, 然后就有可以求出closed solution

唯一需要注意的是把b吸收进w中 也就是 X = [X,1], Y = W * X 方便计算

train_x = (np.arange(10000)/100)
train_x=np.column_stack((train_x,np.ones(10000)))


其实核心代码就很简单:

def linearRegression(self):
xTx = train_x.T.dot(train_x)
if np.linalg.det(xTx) ==0.0:
print("singular")
return
w = np.linalg.inv(xTx).dot(self.train_x.T).dot(self.train_y)
pre_y = self.test_x.dot(w)
return pre_y


但是要注意:要先确定xTx非奇异才能继续往下做。

整体(顺便加了bias variance decomposition)

import numpy as np
import matplotlib.pyplot as plt

def f(x):
return x+x*2*np.sin(x)

class regression:
def __init__(self,train_x,train_y,test_x,test_y):
self.test_x = test_x
self.train_x = train_x
self.train_y = train_y
self.test_y = test_y

def linearRegression(self):
xTx = train_x.T.dot(train_x)
if np.linalg.det(xTx) ==0.0:
print("singular")
return
w = np.linalg.inv(xTx).dot(self.train_x.T).dot(self.train_y)
pre_y = self.test_x.dot(w)
return pre_y

def bias_variance(self,pre_y):
var = np.mean((pre_y - np.mean(pre_y)) ** 2)
bias2 = np.mean(np.mean(pre_y) - self.test_y) ** 2
noise = np.mean((pre_y - self.test_y) ** 2)
print("var = ", var)
print("bias2 = ", bias2)
print("noise = ", noise)
def plotAns(self,pre_y):
plt.plot(self.test_y, 'ro', label='Original data')
plt.plot(pre_y, label='Fitted line')
plt.show()

if __name__=="__main__":
train_x = (np.arange(10000)/100)
train_y = f(train_x)
test_x = train_x + np.random.random()
train_x = np.column_stack((train_x,np.ones(10000)))

test_x = np.column_stack((test_x, np.ones(10000)))
test_y = f(test_x[:,0])

regressor = regression(train_x,train_y,test_x,test_y)
pre_y = regressor.linearRegression()
regressor.bias_variance(pre_y)
regression(train_x, train_y, test_x, test_y).LWLR(0.1)
regressor.plotAns(pre_y)


上图



gradient descent

直接用tensorflow就很简单了,直接暴力优化均方误差就可以

import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
import random
from time import time
rng = np.random

# Training Data
train_size = 2000
train_X = np.linspace(0,10,train_size)
test_Y = train_X+2*np.sin(1.5*train_X)
train_Y = train_X+2*np.sin(1.5*train_X) + random.gauss(0,0.2)

learning_rate = 0.01
training_epochs = 2000
display_step = 50

class linearRegression:

def VectorLR(train_X,train_Y,learning_rate,training_epochs,display_step):
n_samples = train_X.shape[0]
X = tf.placeholder("float",[n_samples])
Y = tf.placeholder("float",[n_samples])

W = tf.Variable(tf.random_normal([1]),name = "weight")
b = tf.Variable(tf.random_normal([1]),name = "bias")

activation = X*W+b

cost = tf.reduce_sum(tf.pow(activation-Y,2))/(2*n_samples)
optimizer = tf.train.GradientDescentOptimizer(learning_rate).minimize(cost)

init = tf.initialize_all_variables()

sess = tf.InteractiveSession()
sess.run(init)
for epoch in range(training_epochs):
sess.run(optimizer,feed_dict={X:train_X,Y:train_Y})
return sess.run(W) * train_X + sess.run(b)
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息