您的位置:首页 > 理论基础 > 计算机网络

【deeplearning.ai】第二门课:提升深层神经网络——正则化的编程作业

2017-10-10 09:59 603 查看
正则化的编程作业,包括无正则化情况、L2正则化、Dropout的编程实现,编程中用到的相关理论和公式请参考上一篇博文。问题描述:原问题是判断足球运动员是否头球,在此省略问题背景,其实就是二分类问题。有以下类型的数据,蓝点为一类,红点为一类导入需要的扩展包,reg_utils.py及数据集在此下载
import numpy as np
import matplotlib.pyplot as plt
from reg_utils import sigmoid, relu, plot_decision_boundary, initialize_parameters, load_2D_dataset, predict_dec
from reg_utils import compute_cost, predict, forward_propagation, backward_propagation, update_parameters
import sklearn
import sklearn.datasets
import scipy.io
from testCases import *

%matplotlib inline
plt.rcParams['figure.figsize'] = (7.0, 4.0) # set default size of plots
plt.rcParams['image.interpolation'] = 'nearest'
plt.rcParams['image.cmap'] = 'gray'
train_X, train_Y, test_X, test_Y = load_2D_dataset()	# 读取数据
一、无正则化的模型实现
def model(X, Y, learning_rate = 0.3, num_iterations = 30000, print_cost = True, lambd = 0, keep_prob = 1):"""输入参数:X -- 输入数据Y -- 标签,1代表蓝点,0代表红点learning_rate -- 学习率num_iterations -- 迭代次数print_cost -- 如果为真,则每10000次迭代输出costlambd -- 正则化参数keep_prob - dropout参数返回:parameters -- 模型学习到的参数"""grads = {}                            #costs = []                            # 记录costm = X.shape[1]                        # 样本的数目layers_dims = [X.shape[0], 20, 3, 1]  # 定义网络结构# 参数初始化parameters = initialize_parameters(layers_dims)# 循环,梯度下降for i in range(0, num_iterations):# 前向传播if keep_prob == 1:a3, cache = forward_propagation(X, parameters)      # 实使用不带dropout的前向传播elif keep_prob < 1:a3, cache = forward_propagation_with_dropout(X, parameters, keep_prob)      # 使用带dropout的前向传播# 代价函数if lambd == 0:cost = compute_cost(a3, Y)      # 使用不带正则化的cost计算函数else:cost = compute_cost_with_regularization(a3, Y, parameters, lambd)       # 使用带正则化的cost计算函数# 反向传播assert(lambd==0 or keep_prob==1)    # it is possible to use both L2 regularization and dropout,# but this assignment will only explore one at a timeif lambd == 0 and keep_prob == 1:grads = backward_propagation(X, Y, cache)elif lambd != 0:grads = backward_propagation_with_regularization(X, Y, cache, lambd)elif keep_prob < 1:grads = backward_propagation_with_dropout(X, Y, cache, keep_prob)# 更新参数parameters = update_parameters(parameters, grads, learning_rate)# 每10000次迭代打印costif print_cost and i % 10000 == 0:print("Cost after iteration {}: {}".format(i, cost))if print_cost and i % 1000 == 0:costs.append(cost)# plot the costplt.plot(costs)plt.ylabel('cost')plt.xlabel('iterations (x1,000)')plt.title("Learning rate =" + str(learning_rate))plt.show()return parameters
在没有任何正则化的情况下训练这个模型:
parameters = model(train_X, train_Y)print ("On the training set:")predictions_train = predict(train_X, train_Y, parameters)print ("On the test set:")predictions_test = predict(test_X, test_Y, parameters)
plt.title("Model without regularization")axes = plt.gca()axes.set_xlim([-0.75,0.40])axes.set_ylim([-0.75,0.65])plot_decision_boundary(lambda x: predict_dec(parameters, x.T), train_X, train_Y)
cost曲线如下所示:在训练集上的准确率为0.947867298578,在测试集上的准确率为0.915绘制出分类边界如下所示。没有正则化的情况下,训练出现了过拟合。二、L2正则化
def compute_cost_with_regularization(A3, Y, parameters, lambd):"""输入参数:A3 -- 前向传播的输出Y -- 真实的标签parameters -- 模型参数返回:cost - 带正则化损失函数的值"""m = Y.shape[1]W1 = parameters["W1"]W2 = parameters["W2"]W3 = parameters["W3"]# 不带正则化项的costcross_entropy_cost = compute_cost(A3, Y)# 正则化项L2_regularization_cost = (np.sum(np.square(W1)) + np.sum(np.square(W2)) + np.sum(np.square(W3))) * lambd /(2 * m)# 带正则化项的costcost = cross_entropy_cost + L2_regularization_costreturn cost
def backward_propagation_with_regularization(X, Y, cache, lambd):"""输入参数:X -- 输入数据Y -- 真实的标签cache -- 从forward_propagation()输出的cachelambd -- 正则化参数返回:gradients -- 权重和偏置的导数"""m = X.shape[1](Z1, A1, W1, b1, Z2, A2, W2, b2, Z3, A3, W3, b3) = cachedZ3 = A3 - YdW3 = 1./m * np.dot(dZ3, A2.T) + W3 * lambd/mdb3 = 1./m * np.sum(dZ3, axis=1, keepdims = True)dA2 = np.dot(W3.T, dZ3)dZ2 = np.multiply(dA2, np.int64(A2 > 0))dW2 = 1./m * np.dot(dZ2, A1.T) + W2 * lambd/mdb2 = 1./m * np.sum(dZ2, axis=1, keepdims = True)dA1 = np.dot(W2.T, dZ2)dZ1 = np.multiply(dA1, np.int64(A1 > 0))dW1 = 1./m * np.dot(dZ1, X.T) + W1 * lambd/mdb1 = 1./m * np.sum(dZ1, axis=1, keepdims = True)gradients = {"dZ3": dZ3, "dW3": dW3, "db3": db3,"dA2": dA2,"dZ2": dZ2, "dW2": dW2, "db2": db2, "dA1": dA1,"dZ1": dZ1, "dW1": dW1, "db1": db1}return gradients
训练此模型,得到cost曲线:在训练集上的准确率为0.938388625592,在测试集上的准确率为0.93,绘制出的分类边界如下:三、Dropout
def forward_propagation_with_dropout(X, parameters, keep_prob = 0.5):"""输入参数:X -- 输入数据parameters -- 权重和偏置keep_prob - 保留神经元的概率返回:A3 -- 网络的输出cache -- 计算反向传播的cache"""np.random.seed(1)W1 = parameters["W1"]b1 = parameters["b1"]W2 = parameters["W2"]b2 = parameters["b2"]W3 = parameters["W3"]b3 = parameters["b3"]Z1 = np.dot(W1, X) + b1A1 = relu(Z1)# dropoutD1 = np.random.rand(A1.shape[0], A1.shape[1])                                         # Step 1: initialize matrix D1 = np.random.rand(..., ...)D1 = (D1 < keep_prob)                                         # Step 2: convert entries of D1 to 0 or 1 (using keep_prob as the threshold)A1 = np.multiply(A1, D1)                                         # Step 3: shut down some neurons of A1A1 = A1/keep_prob                                         # Step 4: scale the value of neurons that haven't been shut downZ2 = np.dot(W2, A1) + b2A2 = relu(Z2)# dropoutD2 = np.random.rand(A2.shape[0], A2.shape[1])                                         # Step 1: initialize matrix D2 = np.random.rand(..., ...)D2 = (D2 < keep_prob)                                         # Step 2: convert entries of D2 to 0 or 1 (using keep_prob as the threshold)A2 = np.multiply(A2, D2)                                         # Step 3: shut down some neurons of A2A2 = A2/keep_prob                                         # Step 4: scale the value of neurons that haven't been shut downZ3 = np.dot(W3, A2) + b3A3 = sigmoid(Z3)cache = (Z1, D1, A1, W1, b1, Z2, D2, A2, W2, b2, Z3, A3, W3, b3)return A3, cache
def backward_propagation_with_dropout(X, Y, cache, keep_prob):"""输入参数:X -- 输入数据Y -- 真实的标签cache -- 从forward_propagation_with_dropout()输出的cachekeep_prob - 保留神经元的概率返回:gradients -- 权重、偏置的导数"""m = X.shape[1](Z1, D1, A1, W1, b1, Z2, D2, A2, W2, b2, Z3, A3, W3, b3) = cachedZ3 = A3 - YdW3 = 1./m * np.dot(dZ3, A2.T)db3 = 1./m * np.sum(dZ3, axis=1, keepdims = True)dA2 = np.dot(W3.T, dZ3)dA2 = np.multiply(dA2, D2)             # Step 1: Apply mask D2 to shut down the same neurons as during the forward propagationdA2 = dA2/keep_prob              # Step 2: Scale the value of neurons that haven't been shut downdZ2 = np.multiply(dA2, np.int64(A2 > 0))dW2 = 1./m * np.dot(dZ2, A1.T)db2 = 1./m * np.sum(dZ2, axis=1, keepdims = True)dA1 = np.dot(W2.T, dZ2)dA1 = np.multiply(dA1, D1)              # Step 1: Apply mask D1 to shut down the same neurons as during the forward propagationdA1 = dA1/keep_prob              # Step 2: Scale the value of neurons that haven't been shut downdZ1 = np.multiply(dA1, np.int64(A1 > 0))dW1 = 1./m * np.dot(dZ1, X.T)db1 = 1./m * np.sum(dZ1, axis=1, keepdims = True)gradients = {"dZ3": dZ3, "dW3": dW3, "db3": db3,"dA2": dA2,"dZ2": dZ2, "dW2": dW2, "db2": db2, "dA1": dA1,"dZ1": dZ1, "dW1": dW1, "db1": db1}return gradients
训练此模型,得到cost曲线:在训练集上的准确率为0.928909952607,在测试集上的准确率为0.95绘制的分类边界为:从以上可以看出,正则化降低了训练的准确率,因为它限制了网络拟合数据的能力,但提高了测试集的准确率。
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
相关文章推荐