斯坦福CS231n 课程学习笔记--线性分类器(Assignment1代码实现)
2016-09-25 12:15
330 查看
最近学习了斯坦福的CS231n(winter 2016)系列课程,收获很大,作为深度学习以及卷积神经网络学习的入门很是完美。学习过程中,主要参考了知乎上几位同学的课程翻译,做得很好,在这里也对他们表示感谢,跟课程相关的很多资源都可以在该专栏中找到。推荐大家把每个笔记的翻译都完整的看一下。关于该课程视频的中文字幕也在翻译进行中,目前第一集已经翻译完成,感兴趣的同学可以也可以看看参与进去。此外,完成课程视频和笔记的阅读之后也对课程的3个Assignment进行了实现。接下来主要以对课程笔记总结和Assignment代码实现相结合的方式完成这一系列的博客。
本篇博客主要对Assignment1中的linear_svm、softmax、linear_classifier三个任务进行实现。在该页面中提供了任务初始代码下载 下载的代码中已经有了相关文件。我们只需要按照svm.ipynb和softmax.ipynb两个文件来一步步地做。关于numpy中一些数组、向量、矩阵的操作可以参见该教程(介绍了如何从矩阵中选取特定位置处的元素等等接下来会用到的操作)
这里我直接贴出自己的代码,相关注释已经下载文件之中:
1,linear_svm.py
2,softmax.py
3,linear_classifier.py
notebook上面的代码,其实主要是实现超参数调优的功能。
贴上几张运行过程中的截图:
本篇博客主要对Assignment1中的linear_svm、softmax、linear_classifier三个任务进行实现。在该页面中提供了任务初始代码下载 下载的代码中已经有了相关文件。我们只需要按照svm.ipynb和softmax.ipynb两个文件来一步步地做。关于numpy中一些数组、向量、矩阵的操作可以参见该教程(介绍了如何从矩阵中选取特定位置处的元素等等接下来会用到的操作)
这里我直接贴出自己的代码,相关注释已经下载文件之中:
1,linear_svm.py
import numpy as np def svm_loss_naive(W, X, y, reg): dW = np.zeros(W.shape) # initialize the gradient as zero # compute the loss and the gradient num_classes = W.shape[1] num_train = X.shape[0] loss = 0.0 #遍历样本集中的每一个样本 for i in xrange(num_train): #计算得分,点乘 scores = X[i].dot(W) #记录正确类别的得分 correct_class_score = scores[y[i]] #遍历C个类别 for j in xrange(num_classes): if j == y[i]: continue #计算折页损失,和梯度公式。公式推导在上篇博客中已经介绍过了 margin = scores[j] - correct_class_score + 1 # note delta = 1 if margin > 0: loss += margin dW[:, y[i]] += -X[i, :] # compute the correct_class gradients dW[:, j] += X[i, :] # compute the wrong_class gradients # Right now the loss is a sum over all training examples, but we want it # to be an average instead so we divide by num_train. loss /= num_train dW /= num_train # Add regularization to the loss.添加正则损失 loss += 0.5 * reg * np.sum(W * W) dW += reg * W return loss, dW #使用向量运算计算loss和梯度 def svm_loss_vectorized(W, X, y, reg): loss = 0.0 dW = np.zeros(W.shape) # initialize the gradient as zero #计算得分,是个矩阵N*C scores = X.dot(W) # N by C num_train = X.shape[0] num_classes = W.shape[1] #记录所有样本的正确类别得分。该操作是获取1~N行中第y(数组n*1)个元素 scores_correct = scores[np.arange(num_train), y] # 1 by N scores_correct = np.reshape(scores_correct, (num_train, 1)) # N by 1 #计算分数与正确分类的差 margins = scores - scores_correct + 1.0 # N by C #正确分类处的误差为0 margins[np.arange(num_train), y] = 0.0 #小于0的位置也设置为0,即分类正确 margins[margins <= 0] = 0.0 loss += np.sum(margins) / num_train loss += 0.5 * reg * np.sum(W * W) # compute the gradient margins[margins > 0] = 1.0 row_sum = np.sum(margins, axis=1) # 1 by N margins[np.arange(num_train), y] = -row_sum dW += np.dot(X.T, margins)/num_train + reg * W # D by C return loss, dW
2,softmax.py
import numpy as np #原理和svm一样。也分为两种计算方法。 def softmax_loss_naive(W, X, y, reg): # Initialize the loss and gradient to zero. loss = 0.0 dW = np.zeros_like(W) # D by C dW_each = np.zeros_like(W) num_train, dim = X.shape num_class = W.shape[1] f = X.dot(W) # N by C # Considering the Numeric Stability #考虑数值稳定性。减去f分值中最大的项。见上篇博客 f_max = np.reshape(np.max(f, axis=1), (num_train, 1)) # N by 1 #计算归一化概率 prob = np.exp(f - f_max) / np.sum(np.exp(f - f_max), axis=1, keepdims=True) # N by C y_trueClass = np.zeros_like(prob) y_trueClass[np.arange(num_train), y] = 1.0 for i in xrange(num_train): for j in xrange(num_class): loss += -(y_trueClass[i, j] * np.log(prob[i, j])) dW_each[:, j] = -(y_trueClass[i, j] - prob[i, j]) * X[i, :] dW += dW_each loss /= num_train loss += 0.5 * reg * np.sum(W * W) dW /= num_train dW += reg * W return loss, dW def softmax_loss_vectorized(W, X, y, reg): # Initialize the loss and gradient to zero. loss = 0.0 dW = np.zeros_like(W) # D by C num_train, dim = X.shape f = X.dot(W) # N by C # Considering the Numeric Stability f_max = np.reshape(np.max(f, axis=1), (num_train, 1)) # N by 1 prob = np.exp(f - f_max) / np.sum(np.exp(f - f_max), axis=1, keepdims=True) y_trueClass = np.zeros_like(prob) y_trueClass[range(num_train), y] = 1.0 # N by C loss += -np.sum(y_trueClass * np.log(prob)) / num_train + 0.5 * reg * np.sum(W * W) dW += -np.dot(X.T, y_trueClass - prob) / num_train + reg * W return loss, dW
3,linear_classifier.py
from linear_svm import * from softmax import * class LinearClassifier(object): def __init__(self): self.W = None def train(self, X, y, learning_rate=1e-3, reg=1e-5, num_iters=100, batch_size=200, verbose=True): num_train, dim = X.shape # assume y takes values 0...K-1 where K is number of classes num_classes = np.max(y) + 1 if self.W is None: # lazily initialize W self.W = 0.001 * np.random.randn(dim, num_classes) # D by C # Run stochastic gradient descent(Mini-Batch) to optimize W loss_history = [] for it in xrange(num_iters): X_batch = None y_batch = None # Sampling with replacement is faster than sampling without replacement. sample_index = np.random.choice(num_train, batch_size, replace=False) X_batch = X[sample_index, :] # batch_size by D y_batch = y[sample_index] # 1 by batch_size # evaluate loss and gradient loss, grad = self.loss(X_batch, y_batch, reg) loss_history.append(loss) # perform parameter update self.W += -learning_rate * grad if verbose and it % 100 == 0: print 'Iteration %d / %d: loss %f' % (it, num_iters, loss) return loss_history def predict(self, X): y_pred = np.zeros(X.shape[1]) # 1 by N y_pred = np.argmax(np.dot(self.W.T, X.T), axis=0) return y_pred def loss(self, X_batch, y_batch, reg): pass class LinearSVM(LinearClassifier): """ A subclass that uses the Multiclass SVM loss function """ def loss(self, X_batch, y_batch, reg): return svm_loss_vectorized(self.W, X_batch, y_batch, reg) class Softmax(LinearClassifier): """ A subclass that uses the Softmax + Cross-entropy loss function """ def loss(self, X_batch, y_batch, reg): return softmax_loss_vectorized(self.W, X_batch, y_batch, reg)
notebook上面的代码,其实主要是实现超参数调优的功能。
# softmax.ipynb from cs231n.classifiers import Softmax results = {} best_val = -1 best_softmax = None learning_rates = [5e-6, 1e-7, 5e-7] regularization_strengths = [1e4, 5e4, 1e5] params = [(x,y) for x in learning_rates for y in regularization_strengths ] for lrate, regular in params: softmax = Softmax() loss_hist = softmax.train(X_train, y_train, learning_rate=lrate, reg=regular, num_iters=700, verbose=True) y_train_pred = softmax.predict(X_train) accuracy_train = np.mean( y_train == y_train_pred) y_val_pred = softmax.predict(X_val) accuracy_val = np.mean(y_val == y_val_pred) results[(lrate, regular)] = (accuracy_train, accuracy_val) if(best_val < accuracy_val): best_val = accuracy_val best_softmax = softmax # Print out results. for lr, reg in sorted(results): train_accuracy, val_accuracy = results[(lr, reg)] print 'lr %e reg %e train accuracy: %f val accuracy: %f' % ( lr, reg, train_accuracy, val_accuracy) print 'best validation accuracy achieved during cross-validation: %f' % best_val # svm.ipynb from cs231n.classifiers import LinearSVM learning_rates = [1e-7, 5e-5] regularization_strengths = [5e4, 1e5] results = {} best_val = -1 # The highest validation accuracy that we have seen so far. best_svm = None # The LinearSVM object that achieved the highest validation rate. iters= 1000 for lr in learning_rates: for rs in regularization_strengths: svm = LinearSVM() svm.train(X_train, y_train, learning_rate=lr, reg=rs, num_iters=iters) y_train_pred = svm.predict(X_train) acc_train = np.mean(y_train == y_train_pred) y_val_pred = svm.predict(X_val) acc_val = np.mean(y_val == y_val_pred) results[(lr, rs)] = (acc_train, acc_val) if best_val < acc_val: best_val = acc_val best_svm = svm # Print out results. for lr, reg in sorted(results): train_accuracy, val_accuracy = results[(lr, reg)] print 'lr %e reg %e train accuracy: %f val accuracy: %f' % ( lr, reg, train_accuracy, val_accuracy) print 'best validation accuracy achieved during cross-validation: %f' % best_val
贴上几张运行过程中的截图:
相关文章推荐
- 斯坦福CS231n 课程学习笔记--线性分类器(笔记篇)
- 【Java数据结构学习笔记之一】线性表的存储结构及其代码实现
- 斯坦福CS231n课程笔记:线性分类器(中)
- PHP100课程学习笔记1--PHP环境搭配和代码调试
- C/C++中strlen(),strcpy(),strcat()以及strcmp()的代码实现--学习笔记
- 学习笔记 --- LINNUX 使用异步通讯机制实现按键驱动代码分析
- WPF and Silverlight 学习笔记(二十二):使用代码实现绑定、绑定数据的验证
- J2ME 3D学习笔记——实现简单的界面(附代码)
- 学习笔记之 自定义连接池实现代码
- web前端学习笔记-瀑布流的算法分析与代码实现
- Udacity cs344-Introduction to Parallel Programming学习笔记-如何在VS环境下编译课程习题代码
- 线性表学习笔记 -顺序表实现(1)
- entlib 5.0学习笔记 通过代码方式实现注入
- lucene学习笔记之索引创建、内容检索、分类器实现
- 【Stage3D学习笔记续】山寨Starling(四):渲染代码实现及测试程序
- 一起学习水晶报表之拉模式【如何通过代码实现水晶报表显示数据】(课程2)
- jQuery学习笔记之控制页面实现代码
- WPF and Silverlight 学习笔记(二十二):使用代码实现绑定、绑定数据的验证
- 编程珠玑学习笔记 Aha算法 思考以及一些代码实现
- 学习笔记之 Servlet容器服务器原理 简易实现代码