【deeplearning.ai】第二门课:提升深层神经网络——正则化的编程作业
2017-10-10 09:59
603 查看
正则化的编程作业,包括无正则化情况、L2正则化、Dropout的编程实现,编程中用到的相关理论和公式请参考上一篇博文。问题描述:原问题是判断足球运动员是否头球,在此省略问题背景,其实就是二分类问题。有以下类型的数据,蓝点为一类,红点为一类导入需要的扩展包,reg_utils.py及数据集在此下载
import numpy as np import matplotlib.pyplot as plt from reg_utils import sigmoid, relu, plot_decision_boundary, initialize_parameters, load_2D_dataset, predict_dec from reg_utils import compute_cost, predict, forward_propagation, backward_propagation, update_parameters import sklearn import sklearn.datasets import scipy.io from testCases import * %matplotlib inline plt.rcParams['figure.figsize'] = (7.0, 4.0) # set default size of plots plt.rcParams['image.interpolation'] = 'nearest' plt.rcParams['image.cmap'] = 'gray'
train_X, train_Y, test_X, test_Y = load_2D_dataset() # 读取数据一、无正则化的模型实现def model(X, Y, learning_rate = 0.3, num_iterations = 30000, print_cost = True, lambd = 0, keep_prob = 1):"""输入参数:X -- 输入数据Y -- 标签,1代表蓝点,0代表红点learning_rate -- 学习率num_iterations -- 迭代次数print_cost -- 如果为真,则每10000次迭代输出costlambd -- 正则化参数keep_prob - dropout参数返回:parameters -- 模型学习到的参数"""grads = {} #costs = [] # 记录costm = X.shape[1] # 样本的数目layers_dims = [X.shape[0], 20, 3, 1] # 定义网络结构# 参数初始化parameters = initialize_parameters(layers_dims)# 循环,梯度下降for i in range(0, num_iterations):# 前向传播if keep_prob == 1:a3, cache = forward_propagation(X, parameters) # 实使用不带dropout的前向传播elif keep_prob < 1:a3, cache = forward_propagation_with_dropout(X, parameters, keep_prob) # 使用带dropout的前向传播# 代价函数if lambd == 0:cost = compute_cost(a3, Y) # 使用不带正则化的cost计算函数else:cost = compute_cost_with_regularization(a3, Y, parameters, lambd) # 使用带正则化的cost计算函数# 反向传播assert(lambd==0 or keep_prob==1) # it is possible to use both L2 regularization and dropout,# but this assignment will only explore one at a timeif lambd == 0 and keep_prob == 1:grads = backward_propagation(X, Y, cache)elif lambd != 0:grads = backward_propagation_with_regularization(X, Y, cache, lambd)elif keep_prob < 1:grads = backward_propagation_with_dropout(X, Y, cache, keep_prob)# 更新参数parameters = update_parameters(parameters, grads, learning_rate)# 每10000次迭代打印costif print_cost and i % 10000 == 0:print("Cost after iteration {}: {}".format(i, cost))if print_cost and i % 1000 == 0:costs.append(cost)# plot the costplt.plot(costs)plt.ylabel('cost')plt.xlabel('iterations (x1,000)')plt.title("Learning rate =" + str(learning_rate))plt.show()return parameters在没有任何正则化的情况下训练这个模型:parameters = model(train_X, train_Y)print ("On the training set:")predictions_train = predict(train_X, train_Y, parameters)print ("On the test set:")predictions_test = predict(test_X, test_Y, parameters)plt.title("Model without regularization")axes = plt.gca()axes.set_xlim([-0.75,0.40])axes.set_ylim([-0.75,0.65])plot_decision_boundary(lambda x: predict_dec(parameters, x.T), train_X, train_Y)cost曲线如下所示:在训练集上的准确率为0.947867298578,在测试集上的准确率为0.915绘制出分类边界如下所示。没有正则化的情况下,训练出现了过拟合。二、L2正则化def compute_cost_with_regularization(A3, Y, parameters, lambd):"""输入参数:A3 -- 前向传播的输出Y -- 真实的标签parameters -- 模型参数返回:cost - 带正则化损失函数的值"""m = Y.shape[1]W1 = parameters["W1"]W2 = parameters["W2"]W3 = parameters["W3"]# 不带正则化项的costcross_entropy_cost = compute_cost(A3, Y)# 正则化项L2_regularization_cost = (np.sum(np.square(W1)) + np.sum(np.square(W2)) + np.sum(np.square(W3))) * lambd /(2 * m)# 带正则化项的costcost = cross_entropy_cost + L2_regularization_costreturn costdef backward_propagation_with_regularization(X, Y, cache, lambd):"""输入参数:X -- 输入数据Y -- 真实的标签cache -- 从forward_propagation()输出的cachelambd -- 正则化参数返回:gradients -- 权重和偏置的导数"""m = X.shape[1](Z1, A1, W1, b1, Z2, A2, W2, b2, Z3, A3, W3, b3) = cachedZ3 = A3 - YdW3 = 1./m * np.dot(dZ3, A2.T) + W3 * lambd/mdb3 = 1./m * np.sum(dZ3, axis=1, keepdims = True)dA2 = np.dot(W3.T, dZ3)dZ2 = np.multiply(dA2, np.int64(A2 > 0))dW2 = 1./m * np.dot(dZ2, A1.T) + W2 * lambd/mdb2 = 1./m * np.sum(dZ2, axis=1, keepdims = True)dA1 = np.dot(W2.T, dZ2)dZ1 = np.multiply(dA1, np.int64(A1 > 0))dW1 = 1./m * np.dot(dZ1, X.T) + W1 * lambd/mdb1 = 1./m * np.sum(dZ1, axis=1, keepdims = True)gradients = {"dZ3": dZ3, "dW3": dW3, "db3": db3,"dA2": dA2,"dZ2": dZ2, "dW2": dW2, "db2": db2, "dA1": dA1,"dZ1": dZ1, "dW1": dW1, "db1": db1}return gradients训练此模型,得到cost曲线:在训练集上的准确率为0.938388625592,在测试集上的准确率为0.93,绘制出的分类边界如下:三、Dropoutdef forward_propagation_with_dropout(X, parameters, keep_prob = 0.5):"""输入参数:X -- 输入数据parameters -- 权重和偏置keep_prob - 保留神经元的概率返回:A3 -- 网络的输出cache -- 计算反向传播的cache"""np.random.seed(1)W1 = parameters["W1"]b1 = parameters["b1"]W2 = parameters["W2"]b2 = parameters["b2"]W3 = parameters["W3"]b3 = parameters["b3"]Z1 = np.dot(W1, X) + b1A1 = relu(Z1)# dropoutD1 = np.random.rand(A1.shape[0], A1.shape[1]) # Step 1: initialize matrix D1 = np.random.rand(..., ...)D1 = (D1 < keep_prob) # Step 2: convert entries of D1 to 0 or 1 (using keep_prob as the threshold)A1 = np.multiply(A1, D1) # Step 3: shut down some neurons of A1A1 = A1/keep_prob # Step 4: scale the value of neurons that haven't been shut downZ2 = np.dot(W2, A1) + b2A2 = relu(Z2)# dropoutD2 = np.random.rand(A2.shape[0], A2.shape[1]) # Step 1: initialize matrix D2 = np.random.rand(..., ...)D2 = (D2 < keep_prob) # Step 2: convert entries of D2 to 0 or 1 (using keep_prob as the threshold)A2 = np.multiply(A2, D2) # Step 3: shut down some neurons of A2A2 = A2/keep_prob # Step 4: scale the value of neurons that haven't been shut downZ3 = np.dot(W3, A2) + b3A3 = sigmoid(Z3)cache = (Z1, D1, A1, W1, b1, Z2, D2, A2, W2, b2, Z3, A3, W3, b3)return A3, cachedef backward_propagation_with_dropout(X, Y, cache, keep_prob):"""输入参数:X -- 输入数据Y -- 真实的标签cache -- 从forward_propagation_with_dropout()输出的cachekeep_prob - 保留神经元的概率返回:gradients -- 权重、偏置的导数"""m = X.shape[1](Z1, D1, A1, W1, b1, Z2, D2, A2, W2, b2, Z3, A3, W3, b3) = cachedZ3 = A3 - YdW3 = 1./m * np.dot(dZ3, A2.T)db3 = 1./m * np.sum(dZ3, axis=1, keepdims = True)dA2 = np.dot(W3.T, dZ3)dA2 = np.multiply(dA2, D2) # Step 1: Apply mask D2 to shut down the same neurons as during the forward propagationdA2 = dA2/keep_prob # Step 2: Scale the value of neurons that haven't been shut downdZ2 = np.multiply(dA2, np.int64(A2 > 0))dW2 = 1./m * np.dot(dZ2, A1.T)db2 = 1./m * np.sum(dZ2, axis=1, keepdims = True)dA1 = np.dot(W2.T, dZ2)dA1 = np.multiply(dA1, D1) # Step 1: Apply mask D1 to shut down the same neurons as during the forward propagationdA1 = dA1/keep_prob # Step 2: Scale the value of neurons that haven't been shut downdZ1 = np.multiply(dA1, np.int64(A1 > 0))dW1 = 1./m * np.dot(dZ1, X.T)db1 = 1./m * np.sum(dZ1, axis=1, keepdims = True)gradients = {"dZ3": dZ3, "dW3": dW3, "db3": db3,"dA2": dA2,"dZ2": dZ2, "dW2": dW2, "db2": db2, "dA1": dA1,"dZ1": dZ1, "dW1": dW1, "db1": db1}return gradients训练此模型,得到cost曲线:在训练集上的准确率为0.928909952607,在测试集上的准确率为0.95绘制的分类边界为:从以上可以看出,正则化降低了训练的准确率,因为它限制了网络拟合数据的能力,但提高了测试集的准确率。
相关文章推荐
- 【deeplearning.ai】第二门课:提升深层神经网络——正则化
- 【deeplearning.ai】第二门课:提升深层神经网络——权重初始化
- 【deeplearning.ai】第二门课:提升深层神经网络——偏差和方差
- DeepLearning.ai学习笔记(二)改善深层神经网络:超参数调试、正则化以及优化--week3 超参数调试、Batch正则化和程序框架
- [DeeplearningAI笔记]改善深层神经网络1.4_1.8深度学习实用层面_正则化Regularization与改善过拟合
- DeepLearning.ai学习笔记(二)改善深层神经网络:超参数调试、正则化以及优化--Week2优化算法
- DeepLearning.ai学习笔记(二)改善深层神经网络:超参数调试、正则化以及优化--Week1深度学习的实用层面
- [DeeplearningAI笔记]改善深层神经网络2.3_2.5_带修正偏差的指数加权平均
- [DeeplearningAI笔记]改善深层神经网络_优化算法2.1_2.2_mini-batch梯度下降法
- 【deeplearning.ai】Neural Networks and Deep Learning——深层神经网络
- [DeeplearningAI笔记]改善深层神经网络1.1_1.3深度学习实用层面_偏差/方差/欠拟合/过拟合/训练集/验证集/测试集
- Deeplearning.ai学习笔记-改善深层神经网络(二)-降低方差
- deeplearning.ai - 深层神经网络(Deep Neural Networks)
- [DeeplearningAI笔记]改善深层神经网络_深度学习的实用层面1.10_1.12/梯度消失/梯度爆炸/权重初始化
- Coursera deeplearning.ai 深度学习笔记2-1-Practical aspects of deep learning-神经网络实际问题分析(初始化&正则化&训练效率)与代码实现
- [DeeplearningAI笔记]改善深层神经网络_深度学习的实用层面1.9_归一化normalization
- [DeeplearningAI笔记]改善深层神经网络_优化算法2.6_2.9Momentum/RMSprop/Adam优化算法
- Coursera deeplearning.ai 深度学习笔记1-3-Shallow Neural Networks-浅层神经网络原理推导与代码实现
- Deep Learning Specialization课程笔记——神经网络编程基础
- Coursera 吴恩达 Deep Learning 第二课 改善神经网络 Improving Deep Neural Networks 第二周 编程作业代码Optimization methods