Ng深度学习 L层神经网络搭建和实例
2018-03-04 16:35
435 查看
L层神经网络
import numpy as np import h5py import matplotlib.pyplot as plt def sigmoid(Z): A = 1/(1+np.exp(-Z)) cache = Z return A,cache def relu(Z): A = np.maximum(0,Z) cache = Z return A,cache def sigmoid_backward(dA,cache): Z = cache s = 1/(1+np.exp(-Z)) dZ = dA*s*(1-s) return dZ def relu_backward(dA,cache): Z = cache dZ = np.array(dA,copy = True) dZ[Z <= 0] = 0 return dZ def initialize_parameters_deep(layer_dims): L = len(layer_dims) parameters={} for l in range(1,L): #如果此处不使用 “/ np.sqrt(layer_dims[l-1])” 会产生梯度消失 parameters['W'+str(l)] = np.random.randn(layer_dims[l],layer_dims[l-1]) / np.sqrt(layer_dims[l-1]) parameters['b'+str(l)] = np.zeros((layer_dims[l],1)) return parameters def linear_forward(A_prev, W, b): Z = np.dot(W,A_prev)+b linear_cache = (A_prev,W,b) return Z,linear_cache def linear_activation_forward(A_prev, W, b, activation): Z,linear_cache = linear_forward(A_prev,W,b) if activation == 'sigmoid': A,activation_cache = sigmoid(Z)#return 2 term and activation_cache caches value 'Z' elif activation == 'relu': A,activation_cache = relu(Z) cache = (linear_cache,activation_cache) return A,cache def L_model_forward(X,parameters): L = len(parameters)//2 #整除 A_prev = X caches = [] for l in range(1,L): A,cache = linear_activation_forward(A_prev,parameters['W'+str(l)],parameters['b'+str(l)],activation='relu') A_prev = A caches.append(cache) AL,cache = linear_activation_forward(A_prev,parameters['W'+str(L)],parameters['b'+str(L)],activation='sigmoid') caches.append(cache) return AL,caches def compute_cost(AL, Y): m = Y.shape[1] cost = -(np.dot(Y,np.log(AL).T) + np.dot(1-Y,np.log(1-AL).T))/m cost = np.squeeze(cost) return cost def linear_backward(dZ,linear_cache): m = dZ.shape[1] (A_prev,W,b) = linear_cache dW = np.dot(dZ,A_prev.T)/m db = np.sum(dZ,axis = 1,keepdims=True)/m dA_prev = np.dot(W.T,dZ) return dA_prev,dW,db def linear_activation_backward(dA,linear_activation_cache,activation): (linear_cache,activation_cache) = linear_activation_cache Z = activation_cache if activation == 'sigmoid': dZ = sigmoid_backward(dA,Z) elif activation == 'relu': dZ = relu_backward(dA,Z) dA_prev,dW,db = linear_backward(dZ,linear_cache) return dA_prev,dW,db def L_model_backward(AL, Y, caches): grads = {} dAL = -np.divide(Y,AL)+np.divide(1-Y,1-AL) L = len(caches) #代表层数 dA_prev,dW,db = linear_activation_backward(dAL,caches[L-1],activation='sigmoid') #note:数组为0..L-1,L-1即为第L层 grads['dW'+str(L)] = dW #note 但是层数仍表示为1..L grads['db'+str(L)] = db grads['dA'+str(L)] = dAL for l in range (L-1,0,-1): #逆序循环 dA = dA_prev dA_prev,dW,db = linear_activation_backward(dA,caches[l-1],activation='relu') grads['dW'+str(l)] = dW grads['db'+str(l)] = db grads['dA'+str(l)] = dA #print(dA_prev) return grads def update_parameters(parameters,grads,learning_rate): L = len(parameters)//2 for l in range(L): parameters['W'+str(l+1)] -= learning_rate * grads['dW'+str(l+1)] parameters['b'+str(l+1)] -= learning_rate * grads['db'+str(l+1)] return parameters实例运用import numpy as np
import h5py
import matplotlib.pyplot as plt
import scipy
from PIL import Image
from scipy import ndimage
from L_layer_neural_network import *
def load_dataset():
train_dataset = h5py.File('datasets/train_catvnoncat.h5', "r")
train_set_x_orig = np.array(train_dataset["train_set_x"][:]) # your train set features
train_set_y_orig = np.array(train_dataset["train_set_y"][:]) # your train set labels
test_d
4000
ataset = h5py.File('datasets/test_catvnoncat.h5', "r")
test_set_x_orig = np.array(test_dataset["test_set_x"][:]) # your test set features
test_set_y_orig = np.array(test_dataset["test_set_y"][:]) # your test set labels
classes = np.array(test_dataset["list_classes"][:]) # the list of classes
train_set_y_orig = train_set_y_orig.reshape((1, train_set_y_orig.shape[0]))
test_set_y_orig = test_set_y_orig.reshape((1, test_set_y_orig.shape[0]))
return train_set_x_orig, train_set_y_orig, test_set_x_orig, test_set_y_orig, classes
train_x_orig, train_y, test_x_orig, test_y, classes = load_dataset()
print(train_x_orig.shape)
print(train_y.shape)
print(test_x_orig.shape)
print(test_y.shape)
print(classes.shape)
print(str(classes))
num_pix = train_x_orig.shape[1]
train_x_flatten = train_x_orig.reshape(train_x_orig.shape[0],num_pix*num_pix*3).T
test_x_flatten = test_x_orig.reshape(test_x_orig.shape[0],num_pix*num_pix*3).T
print(train_x_flatten.shape)
print(test_x_flatten.shape)
train_x = train_x_flatten/255.
test_x = test_x_flatten/255.
def L_layer_model(X,Y,layer_dims,learning_rate=0.0075,num_iterations = 3000,print_cost=False):
np.random.seed(1)
costs = [] #用于作图
parameters = initialize_parameters_deep(layer_dims)
for i in range(num_iterations):
AL,caches = L_model_forward(X,parameters)
cost = compute_cost(AL,Y)
grads = L_model_backward(AL,Y,caches)
parameters = update_parameters(parameters,grads,learning_rate)
if i%100 == 0 and print_cost:
print("iteration "+str(i)+' :'+str(cost))
costs.append(cost)
if print_cost:
plt.plot(np.squeeze(costs))
plt.ylabel('Cost')
plt.xlabel('Iterations(per 100)')
plt.title('Learning rate = '+str(learning_rate))
plt.show()
return parameters
def predict(X,Y,parameters):
AL,caches = L_model_forward(X,parameters)
AL = np.around(AL)
prediction = 1 - np.mean(np.abs(AL - Y))
return prediction
def predict_image(X,Y,parameters):
AL,caches = L_model_forward(X,parameters)
AL = np.around(AL)
return AL
layers_dims = [12288, 20, 7, 5, 1]
parameters = L_layer_model(train_x, train_y, layers_dims, num_iterations = 2500, print_cost = True)
pred_train = predict(train_x,train_y,parameters)
print(pred_train)
pred_test = predict(test_x,test_y,parameters)
print(pred_test)
for i in range(7):
my_image = "predict"+str(i+1)+".jpg"
my_label_y = [1]
fname = "images/" + my_image
image = np.array(ndimage.imread(fname, flatten=False))
my_image = scipy.misc.imresize(image, size=(num_pix,num_pix)).reshape((num_pix*num_pix*3,1))
my_predicted_image = predict_image(my_image, my_label_y, parameters)
print ("y_predict = " + str(np.squeeze(my_predicted_image)) + ", your L-layer model predicts a \"" + classes[int(np.squeeze(my_predicted_image)),].decode("utf-8") + "\" picture.")
plt.imshow(image)
plt.show()
相关文章推荐
- 深度学习_caffe (4) 基于mnist实例搭建新的神经网络&在caffe中添加层(续1)
- 深度学习_caffe (4) 基于mnist实例搭建新的神经网络&在caffe中添加层
- 深度学习入门实践_十行搭建手写数字识别神经网络
- 从零开始深度学习搭建神经网络(二)
- 深度学习一:搭建简单的全连接神经网络
- 深度学习与TensorFlow实战(五)全连接网络基础—模块化搭建神经网络
- 使用腾讯云 GPU 学习深度学习系列之三:搭建深度神经网络
- 深度学习实践系列(3)- 使用Keras搭建notMNIST的神经网络
- 【深度学习_1.3】搭建浅层神经网络模型
- 从零开始深度学习搭建神经网络(一)
- 深度学习——利用学习框架TensorFlow搭建深层神经网络DNN
- 神经网络体系搭建(二)——深度学习网络
- [计算机视觉][神经网络与深度学习]R-FCN、SSD、YOLO2、faster-rcnn和labelImg实验笔记
- 深度学习与计算机视觉系列(7)_神经网络数据预处理,正则化与损失函数
- 【懒懒的Tensorflow学习笔记三之搭建简单的神经网络模型】
- 【深度学习会被可微分编程取代?】展望30年后的神经网络和函数编程
- DeepLearning.ai学习笔记(一)神经网络和深度学习--Week4深层神经网络
- 深度学习:神经网络中的前向传播和反向传播算法推导
- 学习Andrew Ng的神经网络与深度学习的课程Part(一) 笔记一