您的位置:首页 > 编程语言 > C语言/C++

【deep learning学习笔记】注释yusugomori的SDA代码 -- Sda.cpp -- 模型训练与预测

2013-08-04 21:04 706 查看
模型的核心部分。代码并不多,基本思路就是层层训练,前一层的输出作为下一层的输入。hidde layer和dA共享的是相同的网络结构。有些需要注意的地方已经在代码中加入注释了。另外,还发现了原来代码实现中的一个bug,已经更正。

代码如下:

void SdA::pretrain (
int *input,
double lr,
double corruption_level,
int epochs
)
{
int *layer_input;
int prev_layer_input_size;
int *prev_layer_input;

int *train_X = new int[n_ins];

for(int i=0; i<n_layers; i++) 	// layer-wise, i
{
for(int epoch=0; epoch<epochs; epoch++) 	// training epochs, epoch
{
for(int n=0; n<N; n++) 	// input x1...xN, iterate each sample, n
{
// initial input
for(int m=0; m<n_ins; m++) 		// get the nth input sample
train_X[m] = input[n * n_ins + m];

// set the layer input and train it
// the code here is a little complicated:
// it calcualtes the node value layer by layer, from bottom
// input layer to the current calculated layer -- i. Then it
// train the network from the i-1 layer to the i layer by the
// denosing auto-encoder
for(int l=0; l<=i; l++) // l
{

if(l == 0) 	// in case the first layer
{
layer_input = new int[n_ins];
for(int j=0; j<n_ins; j++)
layer_input[j] = train_X[j];
}
else 		// in case the rest layers
{
// set the value of previous layer from the last 'layer_input'
// as the input for the current layer
if(l == 1)
prev_layer_input_size = n_ins;
else
prev_layer_input_size = hidden_layer_sizes[l-2];

prev_layer_input = new int[prev_layer_input_size];
for(int j=0; j<prev_layer_input_size; j++)
prev_layer_input[j] = layer_input[j];
delete[] layer_input;

// calcualte the value of current layer from prev_layer_input
// and put the value into the current layer_input
layer_input = new int[hidden_layer_sizes[l-1]];
sigmoid_layers[l-1]->sample_h_given_v(prev_layer_input, layer_input);

delete[] prev_layer_input;
}
} // for l

// train the current layer as denosing auto-encoder
dA_layers[i]->train(layer_input, lr, corruption_level);

}	// for N
}	// for epoches
} // for n_layers

delete[] train_X;
delete[] layer_input;
}

void SdA::finetune(
int *input,
int *label,
double lr,
int epochs)
{
int *layer_input;
int prev_layer_input_size;
int *prev_layer_input;

int *train_X = new int[n_ins];
int *train_Y = new int[n_outs];

for(int epoch=0; epoch<epochs; epoch++)
{
for(int n=0; n<N; n++) // input x1...xN
{
// initial input
for(int m=0; m<n_ins; m++)
train_X[m] = input[n * n_ins + m];
for(int m=0; m<n_outs; m++)
train_Y[m] = label[n * n_outs + m];

// calculate the value of the last dA layer
// most different from the code block in pretrain is :
// here the network weights in hidden layers have been well trained,
// which can be used directly to calculating the output layer by layer.
// however, in the pretrain function, the weights have not been trained.
// it should train the weights based on whichi the value of next layer
// can be calculated.
for(int i=0; i<n_layers; i++)
{
if(i == 0)
{
prev_layer_input = new int[n_ins];
for(int j=0; j<n_ins; j++)
prev_layer_input[j] = train_X[j];
}
else
{
prev_layer_input = new int[hidden_layer_sizes[i-1]];
for(int j=0; j<hidden_layer_sizes[i-1]; j++)
prev_layer_input[j] = layer_input[j];
delete[] layer_input;
}

layer_input = new int[hidden_layer_sizes[i]];
sigmoid_layers[i]->sample_h_given_v(prev_layer_input, layer_input);
delete[] prev_layer_input;

} // for n_layers

// train the output (last) layer by logistic regression
log_layer->train(layer_input, train_Y, lr);

} // for N
// lr *= 0.95;
} // for epoch

delete[] layer_input;
delete[] train_X;
delete[] train_Y;
}

void SdA::predict (
int *x,
double *y
)
{
double *layer_input;
int prev_layer_input_size;
double *prev_layer_input;

double linear_output;

prev_layer_input = new double[n_ins];
for(int j=0; j<n_ins; j++)
prev_layer_input[j] = x[j];

// layer activation: calculate the output value layer by layer
for(int i=0; i<n_layers; i++)
{
layer_input = new double[sigmoid_layers[i]->n_out];

// linear_output = 0.0;	// it is a bug, it should be in the 'for-loop' below!!
for(int k=0; k<sigmoid_layers[i]->n_out; k++)
{
linear_output = 0.0;	// here is the right place!!
for(int j=0; j<sigmoid_layers[i]->n_in; j++)
{
linear_output +=
sigmoid_layers[i]->W[k][j] * prev_layer_input[j];
}
linear_output += sigmoid_layers[i]->b[k];
layer_input[k] = sigmoid(linear_output);
}
delete[] prev_layer_input;

if(i < n_layers-1)
{
prev_layer_input = new double[sigmoid_layers[i]->n_out];
for(int j=0; j<sigmoid_layers[i]->n_out; j++)
prev_layer_input[j] = layer_input[j];
delete[] layer_input;
}
} // for n_layers

// the logestic regresssion layer
for(int i=0; i<log_layer->n_out; i++)
{
y[i] = 0;
for(int j=0; j<log_layer->n_in; j++)
{
y[i] += log_layer->W[i][j] * layer_input[j];
}
y[i] += log_layer->b[i];
}

log_layer->softmax(y);

delete[] layer_input;
}
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签: 
相关文章推荐