您的位置:首页 > 理论基础 > 计算机网络

机器学习(4):BP神经网络原理及其python实现

2017-07-04 21:11 393 查看
BP神经网络是深度学习的重要基础,它是深度学习的重要前行算法之一,因此理解BP神经网络原理以及实现技巧非常有必要。接下来,我们对原理和实现展开讨论。

1.原理

  有空再慢慢补上,请先参考老外一篇不错的文章:AStepbyStepBackpropagationExample

  激活函数参考:深度学习常用激活函数之—Sigmoid&ReLU&Softmax

  浅显易懂的初始化:CS231n课程笔记翻译:神经网络笔记2

  有效的Trick:神经网络训练中的Tricks之高效BP(反向传播算法)

  通过简单演示BPNN的计算过程:一文弄懂神经网络中的反向传播法——BackPropagation

2.实现----Batch随机梯度法

这里实现了层数可定义的BP神经网络,可通过参数net_struct进行定义网络结果,如定义只有输出层,没有隐藏层的网络结构,激活函数为”sigmoid",学习率,可如下定义

net_struct=[[10,"sigmoid",0.01]]#网络结构


如定义一层隐藏层为100个神经元,再接一层隐藏层为50个神经元,输出层为10个神经元的网络结构,如下

net_struct=[[100,"sigmoid",0.01],[50,"sigmoid",0.01],[10,"sigmoid",0.01]]#网络结构


码农最爱的实现如下:


1##encoding=utf8
2'''
3Createdon2017-7-3
4
5@author:Administrator
6'''
7importrandom
8importpandasaspd
9importnumpyasnp
10frommatplotlibimportpyplotasplt
11fromsklearn.model_selectionimporttrain_test_splitasttsplit
12
13classLossFun:
14def__init__(self,lf_type="least_square"):
15self.name="lossfunction"
16self.type=lf_type
17
18defcal(self,t,z):
19loss=0
20ifself.type=="least_square":
21loss=self.least_square(t,z)
22returnloss
23
24defcal_deriv(self,t,z):
25delta=0
26ifself.type=="least_square":
27delta=self.least_square_deriv(t,z)
28returndelta
29
30defleast_square(self,t,z):
31zsize=z.shape
32sample_num=zsize[1]
33returnnp.sum(0.5*(t-z)*(t-z)*t)/sample_num
34
35defleast_square_deriv(self,t,z):
36returnz-t
37
38classActivationFun:
39'''
40激活函数
41'''
42def__init__(self,atype="sigmoid"):
43self.name="activationfunctionlibrary"
44self.type=atype;
45
46defcal(self,a):
47z=0
48ifself.type=="sigmoid":
49z=self.sigmoid(a)
50elifself.type=="relu":
51z=self.relu(a)
52returnz
53
54defcal_deriv(self,a):
55z=0
56ifself.type=="sigmoid":
57z=self.sigmoid_deriv(a)
58elifself.type=="relu":
59z=self.relu_deriv(a)
60returnz
61
62defsigmoid(self,a):
63return1/(1+np.exp(-a))
64
65defsigmoid_deriv(self,a):
66fa=self.sigmoid(a)
67returnfa*(1-fa)
68
69defrelu(self,a):
70idx=a<=0
71a[idx]=0.1*a[idx]
72returna#np.maximum(a,0.0)
73
74defrelu_deriv(self,a):
75#printa
76a[a>0]=1.0
77a[a<=0]=0.1
78#printa
79returna
80
81classLayer:
82'''
83神经网络层
84'''
85def__init__(self,num_neural,af_type="sigmoid",learn_rate=0.5):
86self.af_type=af_type#activefunctiontype
87self.learn_rate=learn_rate
88self.num_neural=num_neural
89self.dim=None
90self.W=None
91
92self.a=None
93self.X=None
94self.z=None
95self.delta=None
96self.theta=None
97self.act_fun=ActivationFun(self.af_type)
98
99deffp(self,X):
100'''
101FowardPropagation
102'''
103self.X=X
104xsize=X.shape
105self.dim=xsize[0]
106self.num=xsize[1]
107
108ifself.W==None:
109#self.W=np.random.random((self.dim,self.num_neural))-0.5
110#self.W=np.random.uniform(-1,1,size=(self.dim,self.num_neural))
111if(self.af_type=="sigmoid"):
112self.W=np.random.normal(0,1,size=(self.dim,self.num_neural))/np.sqrt(self.num)
113elif(self.af_type=="relu"):
114self.W=np.random.normal(0,1,size=(self.dim,self.num_neural))*np.sqrt(2.0/self.num)
115ifself.theta==None:
116#self.theta=np.random.random((self.num_neural,1))-0.5
117#self.theta=np.random.uniform(-1,1,size=(self.num_neural,1))
118
119if(self.af_type=="sigmoid"):
120self.theta=np.random.normal(0,1,size=(self.num_neural,1))/np.sqrt(self.num)
121elif(self.af_type=="relu"):
122self.theta=np.random.normal(0,1,size=(self.num_neural,1))*np.sqrt(2.0/self.num)
123#calculatetheforewarda
124self.a=(self.W.T).dot(self.X)
125###calculatetheforewardz####
126self.z=self.act_fun.cal(self.a)
127returnself.z
128
129defbp(self,delta):
130'''
131BackPropagation
132'''
133self.delta=delta*self.act_fun.cal_deriv(self.a)
134self.theta=np.array([np.mean(self.theta-self.learn_rate*self.delta,1)]).T#求所有样本的theta均值
135dW=self.X.dot(self.delta.T)/self.num
136self.W=self.W-self.learn_rate*dW
137delta_out=self.W.dot(self.delta);
138returndelta_out
139
140classBpNet:
141'''
142BP神经网络
143'''
144def__init__(self,net_struct,stop_crit,max_iter,batch_size=10):
145self.name="network"
146self.net_struct=net_struct
147iflen(self.net_struct)==0:
148print"nolayerisspecified!"
149return
150
151self.stop_crit=stop_crit
152self.max_iter=max_iter
153self.batch_size=batch_size
154self.layers=[]
155self.num_layers=0;
156#创建网络
157self.create_net(net_struct)
158self.loss_fun=LossFun("least_square");
159
160defcreate_net(self,net_struct):
161'''
162创建网络
163'''
164self.num_layers=len(net_struct)
165foriinrange(self.num_layers):
166self.layers.append(Layer(net_struct[i][0],net_struct[i][1],net_struct[i][2]))
167
168deftrain(self,X,t,Xtest=None,ttest=None):
169'''
170训练网络
171'''
172eva_acc_list=[]
173eva_loss_list=[]
174
175xshape=X.shape;
176num=xshape[0]
177dim=xshape[1]
178
179forkinrange(self.max_iter):
180#i=random.randint(0,num-1)
181idxs=random.sample(range(num),self.batch_size)
182xi=np.array([X[idxs,:]]).T[:,:,0]
183ti=np.array([t[idxs,:]]).T[:,:,0]
184#前向计算
185zi=self.fp(xi)
186
187#偏差计算
188delta_i=self.loss_fun.cal_deriv(ti,zi)
189
190#反馈计算
191self.bp(delta_i)
192
193#评估精度
194ifXtest!=None:
195ifk%100==0:
196[eva_acc,eva_loss]=self.test(Xtest,ttest)
197eva_acc_list.append(eva_acc)
198eva_loss_list.append(eva_loss)
199print"%4d,%4f,%4f"%(k,eva_acc,eva_loss)
200else:
201print"%4d"%(k)
202return[eva_acc_list,eva_loss_list]
203
204deftest(self,X,t):
205'''
206测试模型精度
207'''
208xshape=X.shape;
209num=xshape[0]
210z=self.fp_eval(X.T)
211t=t.T
212est_pos=np.argmax(z,0)
213real_pos=np.argmax(t,0)
214corrct_count=np.sum(est_pos==real_pos)
215acc=1.0*corrct_count/num
216loss=self.loss_fun.cal(t,z)
217#print"%4f,loss:%4f"%(loss)
218return[acc,loss]
219
220deffp(self,X):
221'''
222前向计算
223'''
224z=X
225foriinrange(self.num_layers):
226z=self.layers[i].fp(z)
227returnz
228
229defbp(self,delta):
230'''
231反馈计算
232'''
233z=delta
234foriinrange(self.num_layers-1,-1,-1):
235z=self.layers[i].bp(z)
236returnz
237
238deffp_eval(self,X):
239'''
240前向计算
241'''
242layers=self.layers
243z=X
244foriinrange(self.num_layers):
245z=layers[i].fp(z)
246returnz
247
248defz_score_normalization(x):
249mu=np.mean(x)
250sigma=np.std(x)
251x=(x-mu)/sigma;
252returnx;
253
254defsigmoid(X,useStatus):
255ifuseStatus:
256return1.0/(1+np.exp(-float(X)));
257else:
258returnfloat(X);
259
260defplot_curve(data,title,lege,xlabel,ylabel):
261num=len(data)
262idx=range(num)
263plt.plot(idx,data,color="r",linewidth=1)
264
265plt.xlabel(xlabel,fontsize="xx-large")
266plt.ylabel(ylabel,fontsize="xx-large")
267plt.title(title,fontsize="xx-large")
268plt.legend([lege],fontsize="xx-large",loc='upperleft');
269plt.show()
270
271if__name__=="__main__":
272print('Thisismainofmodule"bp_nn.py"')
273
274print("Importdata")
275raw_data=pd.read_csv('./train.csv',header=0)
276data=raw_data.values
277imgs=data[0::,1::]
278labels=data[::,0]
279train_features,test_features,train_labels,test_labels=ttsplit(
280imgs,labels,test_size=0.33,random_state=23323)
281
282train_features=z_score_normalization(train_features)
283test_features=z_score_normalization(test_features)
284sample_num=train_labels.shape[0]
285tr_labels=np.zeros([sample_num,10])
286foriinrange(sample_num):
287tr_labels[i][train_labels[i]]=1
288
289sample_num=test_labels.shape[0]
290te_labels=np.zeros([sample_num,10])
291foriinrange(sample_num):
292te_labels[i][test_labels[i]]=1
293
294printtrain_features.shape
295printtr_labels.shape
296printtest_features.shape
297printte_labels.shape
298
299stop_crit=100#停止
300max_iter=10000#最大迭代次数
301batch_size=100#每次训练的样本个数
302net_struct=[[100,"relu",0.01],[10,"sigmoid",0.1]]#网络结构[[batch_size,activefunction,learningrate]]
303#net_struct=[[200,"sigmoid",0.5],[100,"sigmoid",0.5],[10,"sigmoid",0.5]]网络结构[[batch_size,activefunction,learningrate]]
304
305bpNNCls=BpNet(net_struct,stop_crit,max_iter,batch_size);
306#trainmodel
307
308[acc,loss]=bpNNCls.train(train_features,tr_labels,test_features,te_labels)
309#[acc,loss]=bpNNCls.train(train_features,tr_labels)
310print("trainingmodelfinished")
311#createtestdata
312plot_curve(acc,"BpNetworkAccuracy","accuracy","iter","Accuracy")
313plot_curve(loss,"BpNetworkLoss","loss","iter","Loss")
314
315
316#testmodel
317[acc,loss]=bpNNCls.test(test_features,te_labels);
318print"testaccuracy:%f"%(acc)
319


ViewCode

实验数据为mnist数据集合,可从以下地址下载:https://github.com/WenDesi/lihang_book_algorithm/blob/master/data/train.csv

a.使用sigmoid激活函数和net_struct=[10,"sigmoid"]的网络结构(可看作是softmax回归),其校验精度和损失函数的变化,如下图所示:





测试精度达到0.916017,效果还是不错的。但是随机梯度法,依赖于参数的初始化,如果初始化不好,会收敛缓慢,甚至有不理想的结果。

b.使用sigmoid激活函数和net_struct=[200,"sigmoid",100,"sigmoid",10,"sigmoid"]的网络结构(一个200的隐藏层,一个100的隐藏层,和一个10的输出层),其校验精度和损失函数的变化,如下图所示:





其校验精度达到0.963636,比softmax要好不少。从损失曲线可以看出,加入隐藏层后,算法收敛要比无隐藏层的稳定。
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签: