吴恩达老师深度学习视频课笔记:逻辑回归公式推导及C++实现
2018-02-22 13:28
836 查看
逻辑回归(Logistic Regression)是一个二分分类算法。逻辑回归的目标是最小化其预测与训练数据之间的误差。为了训练逻辑回归模型中的参数w和b,需要定义一个成本函数(cost function)。
成本函数(cost function):它是针对整个训练集的。衡量参数w和b在整个训练集上的效果。
损失函数或误差函数(loss function or error function):它是针对单个训练样本进行定义的。可以用来衡量算法的效果,衡量预测输出值与实际值有多接近。
梯度下降法的核心是最小化成本函数。使用梯度下降法可以找到一个函数的局部极小值。
关于逻辑回归的介绍可以参考: http://blog.csdn.net/fengbingchun/article/details/78283675
关于梯度下降法的介绍可以参考: http://blog.csdn.net/fengbingchun/article/details/75351323
关于激活函数sigmoid函数的介绍可以参考: http://blog.csdn.net/fengbingchun/article/details/73848734
关于MNIST数据集的介绍可以参考: http://blog.csdn.net/fengbingchun/article/details/49611549
以下截图来自吴恩达老师深度学习视频课:
以下code是完全按照上面的推导公式进行实现的,训练数据集为从MNIST中train中随机选取的0、1各10个图像;测试数据集为从MNIST中test中随机选取的0、1各10个图像,如下图,其中第一排前10个0用于训练,后10个0用于测试;第二排前10个1用于训练,后10个1用于测试:
logistic_regression2.hpp:#ifndef FBC_SRC_NN_LOGISTIC_REGRESSION2_HPP_
#define FBC_SRC_NN_LOGISTIC_REGRESSION2_HPP_
#include <vector>
#include <string>
namespace ANN {
template<typename T>
class LogisticRegression2 { // two categories
public:
LogisticRegression2() = default;
int init(const T* data, const T* labels, int train_num, int feature_length, T learning_rate = 0.00001, int iterations = 10000);
int train(const std::string& model);
int load_model(const std::string& model);
T predict(const T* data, int feature_length) const; // y = 1/(1+exp(-(wx+b)))
private:
int store_model(const std::string& model) const;
T calculate_sigmoid(T value) const; // y = 1/(1+exp(-value))
T calculate_z(const std::vector<T>& feature) const;
std::vector<std::vector<T>> x; // training set
std::vector<T> y; // ground truth labels
int iterations = 1000;
int m = 0; // train samples num
int feature_length = 0;
T alpha = (T)0.00001; // learning rate
std::vector<T> w; // weights
T b = (T)0.; // threshold
}; // class LogisticRegression2
} // namespace ANN
#endif // FBC_SRC_NN_LOGISTIC_REGRESSION2_HPP_ logistic_regression2.cpp:#include "logistic_regression2.hpp"
#include <fstream>
#include <algorithm>
#include <random>
#include <cmath>
#include "common.hpp"
namespace ANN {
template<typename T>
int LogisticRegression2<T>::init(const T* data, const T* labels, int train_num, int feature_length, T learning_rate, int iterations)
{
if (train_num < 2) {
fprintf(stderr, "logistic regression train samples num is too little: %d\n", train_num);
return -1;
}
if (learning_rate <= 0) {
fprintf(stderr, "learning rate must be greater 0: %f\n", learning_rate);
return -1;
}
if (iterations <= 0) {
fprintf(stderr, "number of iterations cannot be zero or a negative number: %d\n", iterations);
return -1;
}
this->alpha = learning_rate;
this->iterations = iterations;
this->m = train_num;
this->feature_length = feature_length;
this->x.resize(train_num);
this->y.resize(train_num);
for (int i = 0; i < train_num; ++i) {
const T* p = data + i * feature_length;
this->x[i].resize(feature_length);
for (int j = 0; j < feature_length; ++j) {
this->x[i][j] = p[j];
}
this->y[i] = labels[i];
}
return 0;
}
template<typename T>
T LogisticRegression2<T>::calculate_z(const std::vector<T>& feature) const
{
T z{ 0. };
for (int i = 0; i < this->feature_length; ++i) {
z += w[i] * feature[i];
}
z += b;
return z;
}
template<typename T>
int LogisticRegression2<T>::train(const std::string& model)
{
CHECK(x.size() == y.size());
w.resize(this->feature_length, (T)0.);
std::random_device rd;
std::mt19937 generator(rd());
std::uniform_real_distribution<T> distribution(-0.1, 0.1);
for (int i = 0; i < this->feature_length; ++i) {
w[i] = distribution(generator);
}
b = distribution(generator);
for (int iter = 0; iter < this->iterations; ++iter) {
T J = (T)0., db = (T)0.;
std::vector<T> dw(this->feature_length, (T)0.);
std::vector<T> z(this->m, (T)0), a(this->m, (T)0), dz(this->m, (T)0);
for (int i = 0; i < this->m; ++i) {
z[i] = calculate_z(x[i]); // z(i)=w^T*x(i)+b
a[i] = calculate_sigmoid(z[i]); // a(i)= 1/(1+e^(-z(i)))
J += -(y[i] * std::log(a[i]) + (1 - y[i] * std::log(1 - a[i]))); // J+=-[y(i)*loga(i)+(1-y(i))*log(1-a(i))]
dz[i] = a[i] - y[i]; // dz(i) = a(i)-y(i)
for (int j = 0; j < this->feature_length; ++j) {
dw[j] += x[i][j] * dz[i]; // dw(i)+=x(i)(j)*dz(i)
}
db += dz[i]; // db+=dz(i)
}
J /= this->m;
for (int j = 0; j < this->feature_length; ++j) {
dw[j] /= m;
}
db /= m;
for (int j = 0; j < this->feature_length; ++j) {
w[j] -= this->alpha * dw[j];
}
b -= this->alpha*db;
}
CHECK(store_model(model) == 0);
return 0;
}
template<typename T>
int LogisticRegression2<T>::load_model(const std::string& model)
{
std::ifstream file;
file.open(model.c_str(), std::ios::binary);
if (!file.is_open()) {
fprintf(stderr, "open file fail: %s\n", model.c_str());
return -1;
}
int length{ 0 };
file.read((char*)&length, sizeof(length));
this->w.resize(length);
this->feature_length = length;
file.read((char*)this->w.data(), sizeof(T)*this->w.size());
file.read((char*)&this->b, sizeof(T));
file.close();
return 0;
}
template<typename T>
T LogisticRegression2<T>::predict(const T* data, int feature_length) const
{
CHECK(feature_length == this->feature_length);
T value{ (T)0. };
for (int t = 0; t < this->feature_length; ++t) {
value += data[t] * this->w[t];
}
value += this->b;
return (calculate_sigmoid(value));
}
template<typename T>
int LogisticRegression2<T>::store_model(const std::string& model) const
{
std::ofstream file;
file.open(model.c_str(), std::ios::binary);
if (!file.is_open()) {
fprintf(stderr, "open file fail: %s\n", model.c_str());
return -1;
}
int length = w.size();
file.write((char*)&length, sizeof(length));
file.write((char*)w.data(), sizeof(T) * w.size());
file.write((char*)&b, sizeof(T));
file.close();
return 0;
}
template<typename T>
T LogisticRegression2<T>::calculate_sigmoid(T value) const
{
return ((T)1 / ((T)1 + exp(-value)));
}
template class LogisticRegression2<float>;
template class LogisticRegression2<double>;
} // namespace ANN main.cpp:#include "funset.hpp"
#include <iostream>
#include "perceptron.hpp"
#include "BP.hpp""
#include "CNN.hpp"
#include "linear_regression.hpp"
#include "naive_bayes_classifier.hpp"
#include "logistic_regression.hpp"
#include "common.hpp"
#include "knn.hpp"
#include "decision_tree.hpp"
#include "pca.hpp"
#include <opencv2/opencv.hpp>
#include "logistic_regression2.hpp"
// ================================ logistic regression =====================
int test_logistic_regression2_train()
{
const std::string image_path{ "E:/GitCode/NN_Test/data/images/digit/handwriting_0_and_1/" };
cv::Mat data, labels;
for (int i = 1; i < 11; ++i) {
const std::vector<std::string> label{ "0_", "1_" };
for (const auto& value : label) {
std::string name = std::to_string(i);
name = image_path + value + name + ".jpg";
cv::Mat image = cv::imread(name, 0);
if (image.empty()) {
fprintf(stderr, "read image fail: %s\n", name.c_str());
return -1;
}
data.push_back(image.reshape(0, 1));
}
}
data.convertTo(data, CV_32F);
std::unique_ptr<float[]> tmp(new float[20]);
for (int i = 0; i < 20; ++i) {
if (i % 2 == 0) tmp[i] = 0.f;
else tmp[i] = 1.f;
}
labels = cv::Mat(20, 1, CV_32FC1, tmp.get());
ANN::LogisticRegression2<float> lr;
const float learning_rate{ 0.0001f };
const int iterations{ 10000 };
int ret = lr.init((float*)data.data, (float*)labels.data, data.rows, data.cols);
if (ret != 0) {
fprintf(stderr, "logistic regression init fail: %d\n", ret);
return -1;
}
const std::string model{ "E:/GitCode/NN_Test/data/logistic_regression2.model" };
ret = lr.train(model);
if (ret != 0) {
fprintf(stderr, "logistic regression train fail: %d\n", ret);
return -1;
}
return 0;
}
int test_logistic_regression2_predict()
{
const std::string image_path{ "E:/GitCode/NN_Test/data/images/digit/handwriting_0_and_1/" };
cv::Mat data, labels, result;
for (int i = 11; i < 21; ++i) {
const std::vector<std::string> label{ "0_", "1_" };
for (const auto& value : label) {
std::string name = std::to_string(i);
name = image_path + value + name + ".jpg";
cv::Mat image = cv::imread(name, 0);
if (image.empty()) {
fprintf(stderr, "read image fail: %s\n", name.c_str());
return -1;
}
data.push_back(image.reshape(0, 1));
}
}
data.convertTo(data, CV_32F);
std::unique_ptr<int[]> tmp(new int[20]);
for (int i = 0; i < 20; ++i) {
if (i % 2 == 0) tmp[i] = 0;
else tmp[i] = 1;
}
labels = cv::Mat(20, 1, CV_32SC1, tmp.get());
CHECK(data.rows == labels.rows);
const std::string model{ "E:/GitCode/NN_Test/data/logistic_regression2.model" };
ANN::LogisticRegression2<float> lr;
int ret = lr.load_model(model);
if (ret != 0) {
fprintf(stderr, "load logistic regression model fail: %d\n", ret);
return -1;
}
for (int i = 0; i < data.rows; ++i) {
float probability = lr.predict((float*)(data.row(i).data), data.cols);
fprintf(stdout, "probability: %.6f, ", probability);
if (probability > 0.5) fprintf(stdout, "predict result: 1, ");
else fprintf(stdout, "predict result: 0, ");
fprintf(stdout, "actual result: %d\n", ((int*)(labels.row(i).data))[0]);
}
return 0;
} 测试结果如下:由执行结果可知,测试图像全部分类正确。由于w和b初始值是随机产生的,因此每次执行的结果多少有些差异。
GitHub: https://github.com/fengbingchun/NN_Test
成本函数(cost function):它是针对整个训练集的。衡量参数w和b在整个训练集上的效果。
损失函数或误差函数(loss function or error function):它是针对单个训练样本进行定义的。可以用来衡量算法的效果,衡量预测输出值与实际值有多接近。
梯度下降法的核心是最小化成本函数。使用梯度下降法可以找到一个函数的局部极小值。
关于逻辑回归的介绍可以参考: http://blog.csdn.net/fengbingchun/article/details/78283675
关于梯度下降法的介绍可以参考: http://blog.csdn.net/fengbingchun/article/details/75351323
关于激活函数sigmoid函数的介绍可以参考: http://blog.csdn.net/fengbingchun/article/details/73848734
关于MNIST数据集的介绍可以参考: http://blog.csdn.net/fengbingchun/article/details/49611549
以下截图来自吴恩达老师深度学习视频课:
以下code是完全按照上面的推导公式进行实现的,训练数据集为从MNIST中train中随机选取的0、1各10个图像;测试数据集为从MNIST中test中随机选取的0、1各10个图像,如下图,其中第一排前10个0用于训练,后10个0用于测试;第二排前10个1用于训练,后10个1用于测试:
logistic_regression2.hpp:#ifndef FBC_SRC_NN_LOGISTIC_REGRESSION2_HPP_
#define FBC_SRC_NN_LOGISTIC_REGRESSION2_HPP_
#include <vector>
#include <string>
namespace ANN {
template<typename T>
class LogisticRegression2 { // two categories
public:
LogisticRegression2() = default;
int init(const T* data, const T* labels, int train_num, int feature_length, T learning_rate = 0.00001, int iterations = 10000);
int train(const std::string& model);
int load_model(const std::string& model);
T predict(const T* data, int feature_length) const; // y = 1/(1+exp(-(wx+b)))
private:
int store_model(const std::string& model) const;
T calculate_sigmoid(T value) const; // y = 1/(1+exp(-value))
T calculate_z(const std::vector<T>& feature) const;
std::vector<std::vector<T>> x; // training set
std::vector<T> y; // ground truth labels
int iterations = 1000;
int m = 0; // train samples num
int feature_length = 0;
T alpha = (T)0.00001; // learning rate
std::vector<T> w; // weights
T b = (T)0.; // threshold
}; // class LogisticRegression2
} // namespace ANN
#endif // FBC_SRC_NN_LOGISTIC_REGRESSION2_HPP_ logistic_regression2.cpp:#include "logistic_regression2.hpp"
#include <fstream>
#include <algorithm>
#include <random>
#include <cmath>
#include "common.hpp"
namespace ANN {
template<typename T>
int LogisticRegression2<T>::init(const T* data, const T* labels, int train_num, int feature_length, T learning_rate, int iterations)
{
if (train_num < 2) {
fprintf(stderr, "logistic regression train samples num is too little: %d\n", train_num);
return -1;
}
if (learning_rate <= 0) {
fprintf(stderr, "learning rate must be greater 0: %f\n", learning_rate);
return -1;
}
if (iterations <= 0) {
fprintf(stderr, "number of iterations cannot be zero or a negative number: %d\n", iterations);
return -1;
}
this->alpha = learning_rate;
this->iterations = iterations;
this->m = train_num;
this->feature_length = feature_length;
this->x.resize(train_num);
this->y.resize(train_num);
for (int i = 0; i < train_num; ++i) {
const T* p = data + i * feature_length;
this->x[i].resize(feature_length);
for (int j = 0; j < feature_length; ++j) {
this->x[i][j] = p[j];
}
this->y[i] = labels[i];
}
return 0;
}
template<typename T>
T LogisticRegression2<T>::calculate_z(const std::vector<T>& feature) const
{
T z{ 0. };
for (int i = 0; i < this->feature_length; ++i) {
z += w[i] * feature[i];
}
z += b;
return z;
}
template<typename T>
int LogisticRegression2<T>::train(const std::string& model)
{
CHECK(x.size() == y.size());
w.resize(this->feature_length, (T)0.);
std::random_device rd;
std::mt19937 generator(rd());
std::uniform_real_distribution<T> distribution(-0.1, 0.1);
for (int i = 0; i < this->feature_length; ++i) {
w[i] = distribution(generator);
}
b = distribution(generator);
for (int iter = 0; iter < this->iterations; ++iter) {
T J = (T)0., db = (T)0.;
std::vector<T> dw(this->feature_length, (T)0.);
std::vector<T> z(this->m, (T)0), a(this->m, (T)0), dz(this->m, (T)0);
for (int i = 0; i < this->m; ++i) {
z[i] = calculate_z(x[i]); // z(i)=w^T*x(i)+b
a[i] = calculate_sigmoid(z[i]); // a(i)= 1/(1+e^(-z(i)))
J += -(y[i] * std::log(a[i]) + (1 - y[i] * std::log(1 - a[i]))); // J+=-[y(i)*loga(i)+(1-y(i))*log(1-a(i))]
dz[i] = a[i] - y[i]; // dz(i) = a(i)-y(i)
for (int j = 0; j < this->feature_length; ++j) {
dw[j] += x[i][j] * dz[i]; // dw(i)+=x(i)(j)*dz(i)
}
db += dz[i]; // db+=dz(i)
}
J /= this->m;
for (int j = 0; j < this->feature_length; ++j) {
dw[j] /= m;
}
db /= m;
for (int j = 0; j < this->feature_length; ++j) {
w[j] -= this->alpha * dw[j];
}
b -= this->alpha*db;
}
CHECK(store_model(model) == 0);
return 0;
}
template<typename T>
int LogisticRegression2<T>::load_model(const std::string& model)
{
std::ifstream file;
file.open(model.c_str(), std::ios::binary);
if (!file.is_open()) {
fprintf(stderr, "open file fail: %s\n", model.c_str());
return -1;
}
int length{ 0 };
file.read((char*)&length, sizeof(length));
this->w.resize(length);
this->feature_length = length;
file.read((char*)this->w.data(), sizeof(T)*this->w.size());
file.read((char*)&this->b, sizeof(T));
file.close();
return 0;
}
template<typename T>
T LogisticRegression2<T>::predict(const T* data, int feature_length) const
{
CHECK(feature_length == this->feature_length);
T value{ (T)0. };
for (int t = 0; t < this->feature_length; ++t) {
value += data[t] * this->w[t];
}
value += this->b;
return (calculate_sigmoid(value));
}
template<typename T>
int LogisticRegression2<T>::store_model(const std::string& model) const
{
std::ofstream file;
file.open(model.c_str(), std::ios::binary);
if (!file.is_open()) {
fprintf(stderr, "open file fail: %s\n", model.c_str());
return -1;
}
int length = w.size();
file.write((char*)&length, sizeof(length));
file.write((char*)w.data(), sizeof(T) * w.size());
file.write((char*)&b, sizeof(T));
file.close();
return 0;
}
template<typename T>
T LogisticRegression2<T>::calculate_sigmoid(T value) const
{
return ((T)1 / ((T)1 + exp(-value)));
}
template class LogisticRegression2<float>;
template class LogisticRegression2<double>;
} // namespace ANN main.cpp:#include "funset.hpp"
#include <iostream>
#include "perceptron.hpp"
#include "BP.hpp""
#include "CNN.hpp"
#include "linear_regression.hpp"
#include "naive_bayes_classifier.hpp"
#include "logistic_regression.hpp"
#include "common.hpp"
#include "knn.hpp"
#include "decision_tree.hpp"
#include "pca.hpp"
#include <opencv2/opencv.hpp>
#include "logistic_regression2.hpp"
// ================================ logistic regression =====================
int test_logistic_regression2_train()
{
const std::string image_path{ "E:/GitCode/NN_Test/data/images/digit/handwriting_0_and_1/" };
cv::Mat data, labels;
for (int i = 1; i < 11; ++i) {
const std::vector<std::string> label{ "0_", "1_" };
for (const auto& value : label) {
std::string name = std::to_string(i);
name = image_path + value + name + ".jpg";
cv::Mat image = cv::imread(name, 0);
if (image.empty()) {
fprintf(stderr, "read image fail: %s\n", name.c_str());
return -1;
}
data.push_back(image.reshape(0, 1));
}
}
data.convertTo(data, CV_32F);
std::unique_ptr<float[]> tmp(new float[20]);
for (int i = 0; i < 20; ++i) {
if (i % 2 == 0) tmp[i] = 0.f;
else tmp[i] = 1.f;
}
labels = cv::Mat(20, 1, CV_32FC1, tmp.get());
ANN::LogisticRegression2<float> lr;
const float learning_rate{ 0.0001f };
const int iterations{ 10000 };
int ret = lr.init((float*)data.data, (float*)labels.data, data.rows, data.cols);
if (ret != 0) {
fprintf(stderr, "logistic regression init fail: %d\n", ret);
return -1;
}
const std::string model{ "E:/GitCode/NN_Test/data/logistic_regression2.model" };
ret = lr.train(model);
if (ret != 0) {
fprintf(stderr, "logistic regression train fail: %d\n", ret);
return -1;
}
return 0;
}
int test_logistic_regression2_predict()
{
const std::string image_path{ "E:/GitCode/NN_Test/data/images/digit/handwriting_0_and_1/" };
cv::Mat data, labels, result;
for (int i = 11; i < 21; ++i) {
const std::vector<std::string> label{ "0_", "1_" };
for (const auto& value : label) {
std::string name = std::to_string(i);
name = image_path + value + name + ".jpg";
cv::Mat image = cv::imread(name, 0);
if (image.empty()) {
fprintf(stderr, "read image fail: %s\n", name.c_str());
return -1;
}
data.push_back(image.reshape(0, 1));
}
}
data.convertTo(data, CV_32F);
std::unique_ptr<int[]> tmp(new int[20]);
for (int i = 0; i < 20; ++i) {
if (i % 2 == 0) tmp[i] = 0;
else tmp[i] = 1;
}
labels = cv::Mat(20, 1, CV_32SC1, tmp.get());
CHECK(data.rows == labels.rows);
const std::string model{ "E:/GitCode/NN_Test/data/logistic_regression2.model" };
ANN::LogisticRegression2<float> lr;
int ret = lr.load_model(model);
if (ret != 0) {
fprintf(stderr, "load logistic regression model fail: %d\n", ret);
return -1;
}
for (int i = 0; i < data.rows; ++i) {
float probability = lr.predict((float*)(data.row(i).data), data.cols);
fprintf(stdout, "probability: %.6f, ", probability);
if (probability > 0.5) fprintf(stdout, "predict result: 1, ");
else fprintf(stdout, "predict result: 0, ");
fprintf(stdout, "actual result: %d\n", ((int*)(labels.row(i).data))[0]);
}
return 0;
} 测试结果如下:由执行结果可知,测试图像全部分类正确。由于w和b初始值是随机产生的,因此每次执行的结果多少有些差异。
GitHub: https://github.com/fengbingchun/NN_Test
相关文章推荐
- 吴恩达老师深度学习视频课笔记:单隐含层神经网络公式推导及C++实现(二分类)
- 吴恩达老师深度学习视频课笔记:多隐含层神经网络公式推导(二分类)
- Coursera deeplearning.ai 深度学习笔记1-2-Neural Network Basics-逻辑回归原理推导与代码实现
- 吴恩达深度学习课程笔记 2.2Logistic Regression逻辑回归
- 吴恩达深度学习课程笔记 2.3逻辑回归cost function
- 课程笔记-深度学习之逻辑回归(Regression)pokemon案例分析
- [置顶] 吴恩达机器学习笔记——指数分布族&广义线性模型&逻辑回归概率模型推导
- TensorFlow深度学习笔记 逻辑回归 实践篇
- 孙鑫老师教学视频学习笔记――单文档中实现画笔及画刷
- Coursera deeplearning.ai 深度学习笔记1-4-Deep Neural Networks-深度神经网络原理推导与代码实现
- VC++中属性页(向导)的实现 (学习孙鑫老师视频笔记)
- 吴恩达深度学习视频笔记1-2:《神经网络和深度学习》之《神经网络基础》
- TensorFlow 深度学习笔记 逻辑回归 实践篇
- 吴恩达深度学习视频笔记1-3:《神经网络和深度学习》之《浅层神经网络》
- 吴恩达深度学习视频笔记1-1:《神经网络和深度学习》之《深度学习概论》
- 吴恩达-DeepLearning.ai-Course1-Week2-实现逻辑回归算法-编程作业笔记
- 斯坦福大学深度学习笔记:逻辑回归
- 斯坦福CS20SI:基于Tensorflow的深度学习研究课程笔记,Lecture note3:TensorFlow上的线性回归和逻辑回归
- [深度学习]Python/Theano实现逻辑回归网络的代码分析
- Coursera deeplearning.ai 深度学习笔记1-3-Shallow Neural Networks-浅层神经网络原理推导与代码实现