您的位置:首页 > 其它

tiny-cnn执行过程分析(MNIST)

2016-01-31 17:42 337 查看
/article/3632752.html中以MNIST为例对tiny-cnn的使用进行了介绍,下面对其执行过程进行分析:
支持两种损失函数:(1)、mean squared error(均方差);(2)、cross entropy(交叉熵)。在MNIST中使用的是mean squared error,代码段:

// mean-squared-error loss function for regression
class mse {
public:
    static float_t f(float_t y, float_t t) {
        return (y - t) * (y - t) / 2;
    }

    static float_t df(float_t y, float_t t) {
        return y - t;
    }
};
支持六种激活函数:(1)、tanh;(2)、sigmoid;(3)、softmax;(4)、rectifiedlinear(relu);(5)、leaky relu;(6)、identity。MNIST中使用的是tanh,代码段:

class tan_h : public function {
public:
    float_t f(const vec_t& v, size_t i) const override {
        const float_t ep = std::exp(v[i]);
        const float_t em = std::exp(-v[i]); 
        return (ep - em) / (ep + em);
    }

    // fast approximation of tanh (improve 2-3% speed in LeNet-5)
    /*float_t f(float_t x) const {
        const float_t x2 = x * x;
        x *= 1.0 + x2 * (0.1653 + x2 * 0.0097);
        return x / std::sqrt(1.0 + x * x);// invsqrt(static_cast<float>(1.0 + x * x));
    }*/

    float_t df(float_t y) const override { return 1.0 - sqr(y); }
    std::pair<float_t, float_t> scale() const override { return std::make_pair(-0.8, 0.8); }
设计CNN结构,用于MNIST,与LeNet-5结构相似,去除了F6层:
输入层Input:图像大小32*32,神经元数量32*32=1024,代码段:

const int width = header.num_cols + 2 * x_padding;
    const int height = header.num_rows + 2 * y_padding;

    std::vector<uint8_t> image_vec(header.num_rows * header.num_cols);

    ifs.read((char*) &image_vec[0], header.num_rows * header.num_cols);

    dst.resize(width * height, scale_min);

    for (size_t y = 0; y < header.num_rows; y++)
      for (size_t x = 0; x < header.num_cols; x++)
        dst[width * (y + y_padding) + x + x_padding]
        = (image_vec[y * header.num_cols + x] / 255.0) * (scale_max - scale_min) + scale_min;
C1层:卷积窗大小5*5,输出特征图数量6,卷积窗种类6,输出特征图大小28*28,可训练参数5*5*6+6=156,神经元数量28*28*6=4704;
S2层:卷积窗大小2*2,输出下采样图数量6,卷积窗种类6,输出下采样图大小14*14,可训练参数1*6+6=12,神经元数量14*14*6=1176;
C3层:卷积窗大小5*5,输出特征图数量16,卷积窗种类16,输出特征图大小10*10,可训练参数6*16*5*5+16=2416,神经元数量10*10*16=1600;
S4层:卷积窗大小2*2,输出下采样图数量16,卷积窗种类16,输出下采样图大小5*5,可训练参数1*16+16=32,神经元数量5*5*16=400;
C5层:卷积窗大小5*5,输出特征图数量120,卷积窗种类120,输出特征图大小1*1,可训练参数5*5*16*120+120=48120,神经元数量1*120=120;
输出层Output:输出特征图数量10,卷积窗种类10,输出特征图大小1*1,可训练参数120*10+10=1210,神经元数量1*10=10。
原有MNIST图像大小为28*28,此处为32*32,上下左右各填补2个像素,填补的像素取值为-1,其它像素取值范围为[-1,1]。
权值和阈值(偏置)初始化:权值采用均匀随机数产生,阈值均赋0。
C1层权值,初始化范围[sqrt(6.0/(25+150)), sqrt(6.0/(25+150))];
S2层权值,初始化范围[sqrt(6.0/(4+1)), - sqrt(6.0/(4+1))];
C3层权值,初始化范围[sqrt(6.0/(150+400)), - sqrt(6.0/(150+400))];
S4层权值,初始化范围[sqrt(6.0/(4+1)), - sqrt(6.0/(4+1))];
C5层权值,初始化范围[sqrt(6.0/(400+3000)), - sqrt(6.0/(400+3000))];
输出层权值,初始化范围[sqrt(6.0/(120+10)), -sqrt(6.0/(120+10))]。
前向传播:
C1层代码段:

vec_t &a = a_[worker_index]; // w*x
        vec_t &out = output_[worker_index]; // output
        const vec_t &in = *(prev_out_padded_[worker_index]); // input
        
        std::fill(a.begin(), a.end(), (float_t)0.0);

        for_i(parallelize_, out_.depth_, [&](int o) {
            for (layer_size_t inc = 0; inc < in_.depth_; inc++) {
                if (!tbl_.is_connected(o, inc)) continue;

                const float_t *pw = &this->W_[weight_.get_index(0, 0, in_.depth_ * o + inc)];
                const float_t *pi = &in[in_padded_.get_index(0, 0, inc)];
                float_t *pa = &a[out_.get_index(0, 0, o)];

                for (layer_size_t y = 0; y < out_.height_; y++) {
                    for (layer_size_t x = 0; x < out_.width_; x++) {
                        const float_t * ppw = pw;
                        const float_t * ppi = pi + (y * h_stride_) * in_padded_.width_ + x * w_stride_;
                        float_t sum = (float_t)0.0;

                        // should be optimized for small kernel(3x3,5x5)
                        for (layer_size_t wy = 0; wy < weight_.height_; wy++) {
                            for (layer_size_t wx = 0; wx < weight_.width_; wx++) {
                                sum += *ppw++ * ppi[wy * in_padded_.width_ + wx];
                            }
                        }
                        pa[y * out_.width_ + x] += sum;
                    }
                }
            }

            if (!this->b_.empty()) {
                float_t *pa = &a[out_.get_index(0, 0, o)];
                float_t b = this->b_[o];
                std::for_each(pa, pa + out_.width_ * out_.height_, [&](float_t& f) { f += b; });
            }
        });

        for_i(parallelize_, out_size_, [&](int i) {
            out[i] = h_.f(a, i);
        });
S2层代码段:

vec_t& a = a_[index];
     
        for_i(parallelize_, out_size_, [&](int i) {
            const wi_connections& connections = out2wi_[i];

            a[i] = 0.0;

            for (auto connection : connections)// 13.1%
                a[i] += W_[connection.first] * in[connection.second]; // 3.2%

            a[i] *= scale_factor_;
            a[i] += b_[out2bias_[i]];
        });

        for_i(parallelize_, out_size_, [&](int i) {
            output_[index][i] = h_.f(a, i);
        });

C3层、C5层代码段与C1层相同。
S4层代码段与S2层相同。
输出层代码段:

vec_t &a = a_[index];
        vec_t &out = output_[index];

        for_i(parallelize_, out_size_, [&](int i) {
            a[i] = 0.0;
            for (layer_size_t c = 0; c < in_size_; c++) {
                a[i] += W_[c*out_size_ + i] * in[c];
            }

            if (has_bias_)
                a[i] += b_[i];
        });

        for_i(parallelize_, out_size_, [&](int i) {
            out[i] = h_.f(a, i);
        });
反向传播:
输出层代码段:

vec_t delta(out_dim());
        const activation::function& h = layers_.tail()->activation_function();

        if (is_canonical_link(h)) {
            for_i(out_dim(), [&](int i){ delta[i] = out[i] - t[i]; });
        } else {
            vec_t dE_dy = gradient<E>(out, t);

            // delta = dE/da = (dE/dy) * (dy/da)
            for (size_t i = 0; i < out_dim(); i++) {
                vec_t dy_da = h.df(out, i);
                delta[i] = vectorize::dot(&dE_dy[0], &dy_da[0], out_dim());
            }
        }
C5层代码段:

const vec_t& prev_out = prev_->output(index);
        const activation::function& prev_h = prev_->activation_function();
        vec_t& prev_delta = prev_delta_[index];
        vec_t& dW = dW_[index];
        vec_t& db = db_[index];

        for (layer_size_t c = 0; c < this->in_size_; c++) {
            // propagate delta to previous layer
            // prev_delta[c] += current_delta[r] * W_[c * out_size_ + r]
            prev_delta[c] = vectorize::dot(&curr_delta[0], &W_[c*out_size_], out_size_);
            prev_delta[c] *= prev_h.df(prev_out[c]);
        }

        for_(parallelize_, 0, (size_t)out_size_, [&](const blocked_range& r) {
            // accumulate weight-step using delta
            // dW[c * out_size + i] += current_delta[i] * prev_out[c]
            for (layer_size_t c = 0; c < in_size_; c++)
                vectorize::muladd(&curr_delta[r.begin()], prev_out[c], r.end() - r.begin(), &dW[c*out_size_ + r.begin()]);

            if (has_bias_) {
                for (int i = r.begin(); i < r.end(); i++)
                    db[i] += curr_delta[i];
            }
        });
S4层代码段:

const vec_t& prev_out = *(prev_out_padded_[index]);
        const activation::function& prev_h = prev_->activation_function();
        vec_t* prev_delta = (pad_type_ == padding::same) ? &prev_delta_padded_[index] : &prev_delta_[index];
        vec_t& dW = dW_[index];
        vec_t& db = db_[index];

        std::fill(prev_delta->begin(), prev_delta->end(), (float_t)0.0);

        // propagate delta to previous layer
        for_i(in_.depth_, [&](int inc) {
            for (layer_size_t outc = 0; outc < out_.depth_; outc++) {
                if (!tbl_.is_connected(outc, inc)) continue;

                const float_t *pw = &this->W_[weight_.get_index(0, 0, in_.depth_ * outc + inc)];
                const float_t *pdelta_src = &curr_delta[out_.get_index(0, 0, outc)];
                float_t *pdelta_dst = &(*prev_delta)[in_padded_.get_index(0, 0, inc)];

                for (layer_size_t y = 0; y < out_.height_; y++) {
                    for (layer_size_t x = 0; x < out_.width_; x++) {
                        const float_t * ppw = pw;
                        const float_t ppdelta_src = pdelta_src[y * out_.width_ + x];
                        float_t * ppdelta_dst = pdelta_dst + y * h_stride_ * in_padded_.width_ + x * w_stride_;

                        for (layer_size_t wy = 0; wy < weight_.height_; wy++) {
                            for (layer_size_t wx = 0; wx < weight_.width_; wx++) {
                                ppdelta_dst[wy * in_padded_.width_ + wx] += *ppw++ * ppdelta_src;
                            }
                        }
                    }
                }
            }
        });

        for_i(parallelize_, in_padded_.size(), [&](int i) {
            (*prev_delta)[i] *= prev_h.df(prev_out[i]);
        });

        // accumulate dw
        for_i(in_.depth_, [&](int inc) {
            for (layer_size_t outc = 0; outc < out_.depth_; outc++) {

                if (!tbl_.is_connected(outc, inc)) continue;

                for (layer_size_t wy = 0; wy < weight_.height_; wy++) {
                    for (layer_size_t wx = 0; wx < weight_.width_; wx++) {
                        float_t dst = 0.0;
                        const float_t * prevo = &prev_out[in_padded_.get_index(wx, wy, inc)];
                        const float_t * delta = &curr_delta[out_.get_index(0, 0, outc)];

                        for (layer_size_t y = 0; y < out_.height_; y++) {
                            dst += vectorize::dot(prevo + y * in_padded_.width_, delta + y * out_.width_, out_.width_);
                        }
                        dW[weight_.get_index(wx, wy, in_.depth_ * outc + inc)] += dst;
                    }
                }
            }
        });

        // accumulate db
        if (!db.empty()) {
            for (layer_size_t outc = 0; outc < out_.depth_; outc++) {
                const float_t *delta = &curr_delta[out_.get_index(0, 0, outc)];
                db[outc] += std::accumulate(delta, delta + out_.width_ * out_.height_, (float_t)0.0);
            }
        }
C3层代码段:

const vec_t& prev_out = prev_->output(index);
        const activation::function& prev_h = prev_->activation_function();
        vec_t& prev_delta = prev_delta_[index];

        for_(parallelize_, 0, (size_t)in_size_, [&](const blocked_range& r) {
            for (int i = r.begin(); i != r.end(); i++) {
                const wo_connections& connections = in2wo_[i];
                float_t delta = 0.0;

                for (auto connection : connections) 
                    delta += W_[connection.first] * current_delta[connection.second]; // 40.6%

                prev_delta[i] = delta * scale_factor_ * prev_h.df(prev_out[i]); // 2.1%
            }
        });

        for_(parallelize_, 0, weight2io_.size(), [&](const blocked_range& r) {
            for (int i = r.begin(); i < r.end(); i++) {
                const io_connections& connections = weight2io_[i];
                float_t diff = 0.0;

                for (auto connection : connections) // 11.9%
                    diff += prev_out[connection.first] * current_delta[connection.second];

                dW_[index][i] += diff * scale_factor_;
            }
        });

        for (size_t i = 0; i < bias2out_.size(); i++) {
            const std::vector<layer_size_t>& outs = bias2out_[i];
            float_t diff = 0.0;

            for (auto o : outs)
                diff += current_delta[o];    

            db_[index][i] += diff;
        }
S2层、输入层代码段与S4层相同。
C1层代码段与C3层相同。
权值和偏置更新代码段:

void update(const vec_t& dW, const vec_t& /*Hessian*/, vec_t &W) {
        vec_t& g = get<0>(W);

        for_i(W.size(), [&](int i) {
            g[i] += dW[i] * dW[i];
            W[i] -= alpha * dW[i] / (std::sqrt(g[i]) + eps);
        });
    }
对MNIST中的60000个训练样本,依次执行上面的操作,并更新权值和偏置。 每此循环执行完60000个训练样本,会对10000个测试样本,进行测试,获得识别率。
共迭代30次,然后将最终的权值、偏置等相关参数保持到指定的文件中。
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签: