您的位置：首页 > 理论基础 > 计算机网络

深度学习入门实践_十行搭建手写数字识别神经网络

2018-03-01 17:30 1051 查看

本次实践是基于李宏毅老师ML课程中“Hello world” of deep learning章节进行的实验探究。课程中李宏毅老师使用的是keras 2.0.1。可以参考中文文档自行安装。

参考链接：

Keras中文文档

李宏毅老师“Hello world” of deep learning PPT

一张图解释为什么要用Keras

1. Keras可以看做TensorFlow的接口，用Keras就相当于在用TensorFlow。

2. Keras集成了TensorFlow的许多复杂操作，使用起来更简洁。

3. 入门直接上手TensorFlow比较复杂。

数据准备

课程中获取数据的方法是从库中直接load_data

from keras.datasets import mnist
(x_train, y_train), (x_test, y_test) = mnist.load_data()

但是由于网络限制（不知道是不是因为wall，试了几次都没成功），就直接去官网下了数据，参见mnist数据集下载地址。

该数据下载后得到的是idx格式数据，具体处理方法参考了这篇博客使用Python解析MNIST数据集（IDX文件格式），测试可用的源码如下（规则在注释里写得很详细），该文件保存为load_data.py，在后文中会直接调用。

# encoding: utf-8
"""
对MNIST手写数字数据文件转换为bmp图片文件格式。
数据集下载地址为http://yann.lecun.com/exdb/mnist。
相关格式转换见官网以及代码注释。

========================
关于IDX文件格式的解析规则：
========================
THE IDX FILE FORMAT

the IDX file format is a simple format for vectors and multidimensional matrices of various numerical types.
The basic format is

magic number
size in dimension 0
size in dimension 1
size in dimension 2
.....
size in dimension N
data

The magic number is an integer (MSB first). The first 2 bytes are always 0.

The third byte codes the type of the data:
0x08: unsigned byte
0x09: signed byte
0x0B: short (2 bytes)
0x0C: int (4 bytes)
0x0D: float (4 bytes)
0x0E: double (8 bytes)

The 4-th byte codes the number of dimensions of the vector/matrix: 1 for vectors, 2 for matrices....

The sizes in each dimension are 4-byte integers (MSB first, high endian, like in most non-Intel processors).

The data is stored like in a C array, i.e. the index in the last dimension changes the fastest.
"""

import numpy as np
import struct
import matplotlib.pyplot as plt

# 训练集文件
train_images_idx3_ubyte_file = './data/train-images-idx3-ubyte'
# 训练集标签文件
train_labels_idx1_ubyte_file = './data/train-labels-idx1-ubyte'

# 测试集文件
test_images_idx3_ubyte_file = './data/t10k-images-idx3-ubyte'
# 测试集标签文件
test_labels_idx1_ubyte_file = './data/t10k-labels-idx1-ubyte'

def decode_idx3_ubyte(idx3_ubyte_file):
"""
解析idx3文件的通用函数
:param idx3_ubyte_file: idx3文件路径
:return: 数据集
"""
# 读取二进制数据
bin_data = open(idx3_ubyte_file, 'rb').read()

# 解析文件头信息，依次为魔数、图片数量、每张图片高、每张图片宽
offset = 0
fmt_header = '>iiii'
magic_number, num_images, num_rows, num_cols = struct.unpack_from(fmt_header, bin_data, offset)
#print('魔数:%d, 图片数量: %d张, 图片大小: %d*%d' % (magic_number, num_images, num_rows, num_cols))

# 解析数据集
image_size = num_rows * num_cols
offset += struct.calcsize(fmt_header)
fmt_image = '>' + str(image_size) + 'B'
images = np.empty((num_images, num_rows, num_cols))
for i in range(num_images):
#if (i + 1) % 10000 == 0:
#print('已解析 %d' % (i + 1) + '张')
images[i] = np.array(struct.unpack_from(fmt_image, bin_data, offset)).reshape((num_rows, num_cols))
offset += struct.calcsize(fmt_image)
return images

def decode_idx1_ubyte(idx1_ubyte_file):
"""
解析idx1文件的通用函数
:param idx1_ubyte_file: idx1文件路径
:return: 数据集
"""
# 读取二进制数据
bin_data = open(idx1_ubyte_file, 'rb').read()

# 解析文件头信息，依次为魔数和标签数
offset = 0
fmt_header = '>ii'
magic_number, num_images = struct.unpack_from(fmt_header, bin_data, offset)
#print('魔数:%d, 图片数量: %d张' % (magic_number, num_images))

# 解析数据集
offset += struct.calcsize(fmt_header)
fmt_image = '>B'
labels = np.empty(num_images)
for i in range(num_images):
#if (i + 1) % 10000 == 0:
#    print('已解析 %d' % (i + 1) + '张')
labels[i] = struct.unpack_from(fmt_image, bin_data, offset)[0]
offset += struct.calcsize(fmt_image)
return labels

def load_train_images(idx_ubyte_file=train_images_idx3_ubyte_file):
"""
TRAINING SET IMAGE FILE (train-images-idx3-ubyte):
[offset] [type]          [value]          [description]
0000     32 bit integer  0x00000803(2051) magic number
0004     32 bit integer  60000            number of images
0008     32 bit integer  28               number of rows
0012     32 bit integer  28               number of columns
0016     unsigned byte   ??               pixel
0017     unsigned byte   ??               pixel
........
xxxx     unsigned byte   ??               pixel
Pixels are organized row-wise. Pixel values are 0 to 255. 0 means background (white), 255 means foreground (black).

:param idx_ubyte_file: idx文件路径
:return: n*row*col维np.array对象，n为图片数量
"""
return decode_idx3_ubyte(idx_ubyte_file)

def load_train_labels(idx_ubyte_file=train_labels_idx1_ubyte_file):
"""
TRAINING SET LABEL FILE (train-labels-idx1-ubyte):
[offset] [type]          [value]          [description]
0000     32 bit integer  0x00000801(2049) magic number (MSB first)
0004     32 bit integer  60000            number of items
0008     unsigned byte   ??               label
0009     unsigned byte   ??               label
........
xxxx     unsigned byte   ??               label
The labels values are 0 to 9.

:param idx_ubyte_file: idx文件路径
:return: n*1维np.array对象，n为图片数量
"""
return decode_idx1_ubyte(idx_ubyte_file)

def load_test_images(idx_ubyte_file=test_images_idx3_ubyte_file):
"""
TEST SET IMAGE FILE (t10k-images-idx3-ubyte):
[offset] [type]          [value]          [description]
0000     32 bit integer  0x00000803(2051) magic number
0004     32 bit integer  10000            number of images
0008     32 bit integer  28               number of rows
0012     32 bit integer  28               number of columns
0016     unsigned byte   ??               pixel
0017     unsigned byte   ??               pixel
........
xxxx     unsigned byte   ??               pixel
Pixels are organized row-wise. Pixel values are 0 to 255. 0 means background (white), 255 means foreground (black).

:param idx_ubyte_file: idx文件路径
:return: n*row*col维np.array对象，n为图片数量
"""
return decode_idx3_ubyte(idx_ubyte_file)

def load_test_labels(idx_ubyte_file=test_labels_idx1_ubyte_file):
"""
TEST SET LABEL FILE (t10k-labels-idx1-ubyte):
[offset] [type]          [value]          [description]
0000     32 bit integer  0x00000801(2049) magic number (MSB first)
0004     32 bit integer  10000            number of items
0008     unsigned byte   ??               label
0009     unsigned byte   ??               label
........
xxxx     unsigned byte   ??               label
The labels values are 0 to 9.

:param idx_ubyte_file: idx文件路径
:return: n*1维np.array对象，n为图片数量
"""
return decode_idx1_ubyte(idx_ubyte_file)

def run():
train_images = load_train_images()
train_labels = load_train_labels()
test_images = load_test_images()
test_labels = load_test_labels()

# 查看前十个数据及其标签以读取是否正确
for i in range(10):
print(train_labels[i])
plt.imshow(train_images[i], cmap='gray')
plt.show()
print('done')

if __name__ == '__main__':
run()

数据预处理

导入相关包依赖及预处理函数

import numpy as np
from keras.models import Sequential
from keras.layers.core import Dense, Dropout, Activation
from keras.layers import Conv2D, MaxPooling2D, Flatten
from keras.optimizers import SGD, Adam
from keras.utils import np_utils
from load_data import *

#clean data
def load_dataset():
x_train, y_train = load_train_images(), load_train_labels()
x_test, y_test = load_test_images(), load_test_labels()
number = 60000
x_train, y_train = x_train[0:number], y_train[0:number]
x_train = x_train.reshape(number, 28*28)
x_test = x_test.reshape(x_test.shape[0], 28*28)
x_train, x_test = x_train.astype('float32'), x_test.astype('float32')
y_train, y_test = np_utils.to_categorical(y_train, 10), np_utils.to_categorical(y_test, 10)
x_train, x_test = x_train / 255, x_test / 255
return (x_train, y_train), (x_test, y_test)

到此，我们得到了训练和测试网络所需要的数据。

课程中搭建的网络及训练结果

(x_train, y_train), (x_test, y_test) = load_dataset()
model = Sequential()
#搭建三层网络
model.add(Dense(input_dim=28*28,units=633,activation='sigmoid'))
model.add(Dense(unit=633,activation='sigmoid'))
model.add(Dense(unit=10,activation='softmax'))

model.compile(loss='mse',optimizer=SGD(lr=0.1),metrics=['accuracy'])
model.fit(x_train,y_train,batch_size=100,epochs=20)
result = model.evaluate(x_test,y_test)
print('Test loss:', result[0])
print('Accuracy:', result[1])

李老师这个在课堂上两分钟构建出的三层网络达到的效果如下：

可以看到正确率是0.1135，效果并不太好。

对网络参数调整后的模型

改动地方主要为：

- 激励函数由sigmoid改为relu

- loss function由mse改为categorical_crossentropy

- 增加了Dropout，防止过拟合

改动后构建模型代码

(x_train, y_train), (x_test, y_test) = load_dataset()
model = Sequential()
model.add(Dense(input_dim=28*28,units=700,activation='relu'))
model.add(Dropout(0.2))
model.add(Dense(units=700,activation='relu'))
model.add(Dropout(0.2))
model.add(Dense(units=10,activation='softmax'))
model.compile(loss='categorical_crossentropy',optimizer=SGD(lr=0.1),metrics=['accuracy'])
model.fit(x_train,y_train,batch_size=100,epochs=20,validation_split=0.05)
result = model.evaluate(x_test,y_test)

print('Finish..')
print('Test loss:', result[0])
print('Accuracy:', result[1])

得到的结果

得到了比较好的测试结果。其中，最主要的还是激励函数影响（转自知乎Begin Again的回答）

1. 采用sigmoid等函数，算激活函数时（指数运算），计算量大，反向传播求误差梯度时，求导涉及除法，计算量相对大，而采用Relu激活函数，整个过程的计算量节省很多。

2. 对于深层网络，sigmoid函数反向传播时，很容易就会出现梯度消失的情况（在sigmoid接近饱和区时，变换太缓慢，导数趋于0，这种情况会造成信息丢失，从而无法完成深层网络的训练。

3. Relu会使一部分神经元的输出为0，这样就造成了网络的稀疏性，并且减少了参数的相互依存关系，缓解了过拟合问题的发生。

最后贴上完整代码

#encoding:utf-8

import numpy as np
from keras.models import Sequential
from keras.layers.core import Dense, Dropout, Activation
from keras.layers import Conv2D, MaxPooling2D, Flatten
from keras.optimizers import SGD, Adam
from keras.utils import np_utils
from load_data import *

#clean data
def load_dataset():
x_train, y_train = load_train_images(), load_train_labels()
x_test, y_test = load_test_images(), load_test_labels()
number = 60000
x_train, y_train = x_train[0:number], y_train[0:number]
x_train = x_train.reshape(number, 28*28)
x_test = x_test.reshape(x_test.shape[0], 28*28)
x_train, x_test = x_train.astype('float32'), x_test.astype('float32')
y_train, y_test = np_utils.to_categorical(y_train, 10), np_utils.to_categorical(y_test, 10)
x_train, x_test = x_train / 255, x_test / 255
return (x_train, y_train), (x_test, y_test)

def main():
(x_train, y_train), (x_test, y_test) = load_dataset()
#以下为网络搭建部分，不超过10行，简单易上手
model = Sequential()
model.add(Dense(input_dim=28*28,units=700,activation='relu'))
model.add(Dropout(0.2))
model.add(Dense(units=700,activation='relu'))
model.add(Dropout(0.2))
model.add(Dense(units=10,activation='softmax'))
model.compile(loss='categorical_crossentropy',optimizer=SGD(lr=0.1),metrics=['accuracy'])
model.fit(x_train,y_train,batch_size=100,epochs=20,validation_split=0.05)
result = model.evaluate(x_test,y_test)
#输出结果
print('Finish..')
print('Test loss:', result[0])
print('Accuracy:', result[1])

if __name__ == '__main__':
main()

内容来自用户分享和网络整理，不保证内容的准确性，如有侵权内容，可联系管理员处理

标签： 深度学习入门十行手写数字识别

相关文章推荐

新的分享

章节导航