您的位置:首页 > 理论基础 > 计算机网络

基于steam的游戏销量预测 — PART 3 — 基于BP神经网络的机器学习与预测

2018-02-03 20:02 676 查看
语言:c++

环境windows

训练内容:根据从steam中爬取的数据经过文本分析制作的向量以及标签

使用相关:无

解释:

  就是一个BP神经网络,借鉴参考了一些博客的解释和代码,具体哪些忘了,给出其中一个:

  http://blog.csdn.net/zhongkejingwang/article/details/44514073

代码:

 

#include <iostream>
#include <cstring>
#include <cmath>
#include <vector>
#include <algorithm>
#include <stdlib.h>
#include <time.h>
#include <fstream>

#define TAG_NUM 200
#define VEC_NUM 216
#define BASE_SCORE 5
#define MX_STR 2400
#define DIV 1000000.0
#define INF (1e9 + 44)

using namespace std;

namespace BPnamespace
{
#define IN_NODE_NUM 216    //输入节点数
#define HIDDEN_NODE_NUM 24    //隐含节点数
#define HIDDEN_LAYER_NUM 1    //隐含层数
#define OUT_NODE_NUM 9    //输出节点数
#define LEARNING_RATE 0.03    //学习速率
#define MAX_RAND_SEG (int)144e4

inline double xrand() // 0.1 ~ -0.1
{
return ((2.0*(double)rand() / RAND_MAX) - 1) / 10.0;
}

inline double sigmoid(double x) //sigmoid
{
double ret = 1 / (1 + exp(-x));
return ret;
}

struct InputNode
{
double value; //固定输入值
vector<double> weight; //到首个隐含层权值
vector<double> wdelta_sum;    //到首个隐含层权值的delta值累积

InputNode()
{
weight.clear();
wdelta_sum.clear();
}
};

struct OutputNode
{
double value;
double delta; //与正确值之间的偏差值
double rightout; //正确值
double bias; //偏移量
double bdelta_sum; //bias的delta累积

OutputNode(){ }
};

struct HiddenNode
{
double value;
double delta; //BP推导出的delta
double bias; //偏移量
double bdelta_sum; //bias的delta值累积
vector<double> weight; //对于下一层的每个节点的权值
vector<double> wdelta_sum; //对于下一层的权值delta累积

HiddenNode()
{
weight.clear();
wdelta_sum.clear();
}
};

struct RandSegNode
{
int id, val;
}    rand_seg[MAX_RAND_SEG];

struct Sample
{
vector<double> in, out;
};

bool cmpRandSeg(RandSegNode a,RandSegNode b)
{
return a.val < b.val;
}

class BP
{
public:
double error;
InputNode* input_layer[IN_NODE_NUM];
OutputNode* output_layer[OUT_NODE_NUM];
HiddenNode* hidden_layer[HIDDEN_LAYER_NUM][HIDDEN_NODE_NUM];

void load()
{
string file_name = "data\\data.txt";
ifstream infile(file_name.c_str(), ios::in);
for (int i = 0; i < IN_NODE_NUM; i++)
for (int j = 0; j < HIDDEN_NODE_NUM; j++)
infile >> input_layer[i]->weight[j];
for (int k = 0; k < HIDDEN_LAYER_NUM - 1; k++)
for (int i = 0; i < HIDDEN_NODE_NUM; i++)
for (int j = 0; j < HIDDEN_NODE_NUM; j++)
infile >> hidden_layer[k][i]->weight[j];
for (int i = 0; i < HIDDEN_NODE_NUM; i++)
for (int j = 0; j < OUT_NODE_NUM; j++)
infile >> hidden_layer[HIDDEN_LAYER_NUM - 1][i]->weight[j];
for (int k = 0; k < HIDDEN_LAYER_NUM; k++)
for (int i = 0; i < HIDDEN_NODE_NUM; i++)
infile >> hidden_layer[k][i]->bias;
for (int i = 0; i < OUT_NODE_NUM; i++)
infile >> output_layer[i]->bias;
}

void write()
{
string file_name = "data\\data.txt";
ofstream outfile(file_name.c_str(), ios::out);
for (int i = 0; i < IN_NODE_NUM; i++)
for (int j = 0; j < HIDDEN_NODE_NUM; j++)
outfile << input_layer[i]->weight[j] << ' ';
for (int k = 0; k < HIDDEN_LAYER_NUM - 1; k++)
for (int i = 0; i < HIDDEN_NODE_NUM; i++)
for (int j = 0; j < HIDDEN_NODE_NUM; j++)
outfile << hidden_layer[k][i]->weight[j] << ' ';
for (int i = 0; i < HIDDEN_NODE_NUM; i++)
for (int j = 0; j < OUT_NODE_NUM; j++)
outfile << hidden_layer[HIDDEN_LAYER_NUM - 1][i]->weight[j] << ' ';
for (int k = 0; k < HIDDEN_LAYER_NUM; k++)
for (int i = 0; i < HIDDEN_NODE_NUM; i++)
outfile << hidden_layer[k][i]->bias << ' ';
for (int i = 0; i < OUT_NODE_NUM; i++)
outfile << output_layer[i]->bias << ' ';
}

BP()
{
srand((unsigned)time(NULL));
error = 100;
//初始化输入层
for (int i = 0; i < IN_NODE_NUM; i++)
{
input_layer[i] = new InputNode();
for (int j = 0; j < HIDDEN_NODE_NUM; j++)
{
input_layer[i]->weight.push_back(xrand());
input_layer[i]->wdelta_sum.push_back(0);
}
}
//初始化隐藏层
for (int i = 0; i < HIDDEN_LAYER_NUM; i++)
{
if (i == HIDDEN_LAYER_NUM - 1)
{
for (int j = 0; j < HIDDEN_NODE_NUM;j++)
{
hidden_layer[i][j] = new HiddenNode();
hidden_layer[i][j]->bias = 0;
for (int k = 0; k < OUT_NODE_NUM; k++)
{
hidden_layer[i][j]->weight.push_back(xrand());
hidden_layer[i][j]->wdelta_sum.push_back(0);
}
}
}
else
{
for (int j = 0; j < HIDDEN_NODE_NUM; j++)
{
hidden_layer[i][j] = new HiddenNode();
hidden_layer[i][j]->bias = 0;
for (int k = 0; k < HIDDEN_NODE_NUM; k++)
hidden_layer[i][j]->weight.push_back(xrand());
}
}
}
//初始化输出层
for (int i = 0; i < OUT_NODE_NUM; i++)
{
output_layer[i] = new OutputNode();
output_layer[i]->bias = 0;
}
}

void forwardPropagationEpoc() //单个样本 向前传播
{
//输入层->隐含层  隐含层->隐含层
for (int i = 0; i < HIDDEN_LAYER_NUM; i++)
{
if (i == 0)
{
for (int j = 0; j < HIDDEN_NODE_NUM; j++)
{
double sum = 0;
for (int k = 0; k < IN_NODE_NUM; k++)
sum += input_layer[k]->value * input_layer[k]->weight[j];
sum += hidden_layer[i][j]->bias;
hidden_layer[i][j]->value = sigmoid(sum);
}
}
else
{
for (int j = 0; j < HIDDEN_NODE_NUM; j++)
{
double sum = 0;
for (int k = 0; k < HIDDEN_NODE_NUM; k++)
sum += hidden_layer[i - 1][k]->value*hidden_layer[i - 1][k]->weight[j];
sum += hidden_layer[i][j]->bias;
hidden_layer[i][j]->value = sigmoid(sum);
}
}
}
//隐含层->输出层
for (int i = 0; i < OUT_NODE_NUM; i++)
{
double sum = 0;
for (int j = 0; j < HIDDEN_NODE_NUM; j++)
sum += hidden_layer[HIDDEN_LAYER_NUM - 1][j]->value * hidden_layer[HIDDEN_LAYER_NUM - 1][j]->weight[i];
sum += output_layer[i]->bias;
output_layer[i]->value = sigmoid(sum);
}
//cout << "!!!" << ' ' << output_layer[0]->value << endl;
}

void backPropagationEpoc() //单个样本 向后传播
{
//输出层 计算delta
for (int i = 0; i < OUT_NODE_NUM; i++)
{
double tmp = output_layer[i]->rightout - output_layer[i]->value;
error += tmp*tmp / 2;
output_layer[i]->delta = tmp*(1 - output_layer[i]->value)*output_layer[i]->value;
}
//隐含层 计算delta
for (int i = HIDDEN_LAYER_NUM - 1; i >= 0; i--)
{
if (i == HIDDEN_LAYER_NUM - 1)
{
for (int j = 0; j < HIDDEN_NODE_NUM; j++)
{
double sum = 0;
for (int k = 0; k < OUT_NODE_NUM; k++)
sum += output_layer[k]->delta*hidden_layer[i][j]->weight[k];
hidden_layer[i][j]->delta = sum*(1 - hidden_layer[i][j]->value)*hidden_layer[i][j]->value;
}
}
else
{
for (int j = 0; j < HIDDEN_LAYER_NUM; j++)
{
double sum = 0;
for (int k = 0; k < HIDDEN_NODE_NUM; k++)
sum += hidden_layer[i + 1][k]->delta*hidden_layer[i][j]->weight[k];
hidden_layer[i][j]->delta = sum*(1 - hidden_layer[i][j]->value)*hidden_layer[i][j]->value;
}
}
}
//输入层 更新 wdelta_sum
for (int i = 0; i < IN_NODE_NUM; i++)
for (int j = 0; j < HIDDEN_NODE_NUM; j++)
input_layer[i]->wdelta_sum[j] += input_layer[i]->value*hidden_layer[0][j]->delta;
//隐含层 更新 wdelta_sum 和 bdelta_sum
for (int i = 0; i < HIDDEN_LAYER_NUM; i++)
{
if (i == HIDDEN_LAYER_NUM - 1)
{
for (int j = 0; j < HIDDEN_NODE_NUM; j++)
{
hidden_layer[i][j]->bdelta_sum += hidden_layer[i][j]->delta;
for (int k = 0; k < OUT_NODE_NUM; k++)
hidden_layer[i][j]->wdelta_sum[k] += hidden_layer[i][j]->value*output_layer[k]->delta;
}
}
else
{
for (int j = 0; j < HIDDEN_NODE_NUM; j++)
{
hidden_layer[i][j]->bdelta_sum += hidden_layer[i][j]->delta;
for (int k = 0; k < HIDDEN_NODE_NUM; k++)
hidden_layer[i][j]->wdelta_sum[k] += hidden_layer[i][j]->value*hidden_layer[i + 1][k]->delta;
}
}
}
//输出层 更新 bdelta_sum
for (int i = 0; i < OUT_NODE_NUM; i++)
output_layer[i]->bdelta_sum += output_layer[i]->delta;
}

void training(vector<Sample> sample_group,  int cnt_bound) //更新weight,bias
{
int sample_num = sample_group.size();
for (int i = 0; i < sample_num; i++)
rand_seg[i].id = i, rand_seg[i].val = rand();
sort(rand_seg, rand_seg + sample_num, cmpRandSeg);
//        double error_bound;
//        double last_error = -1;
int cnt = 0;
int now_id;
while (cnt < cnt_bound)
{
//            last_error = error;
error = 0;
for (int i = 0; i < IN_NODE_NUM; i++)
input_layer[i]->wdelta_sum.assign(input_layer[i]->wdelta_sum.size(), 0);
for (int i = 0; i < HIDDEN_LAYER_NUM; i++)
for (int j = 0; j < HIDDEN_NODE_NUM; j++)
{
hidden_layer[i][j]->wdelta_sum.assign(hidden_layer[i][j]->wdelta_sum.size(), 0);
hidden_layer[i][j]->bdelta_sum = 0;
}
for (int i = 0; i < OUT_NODE_NUM; i++)
output_layer[i]->bdelta_sum = 0;
now_id = rand_seg[cnt%sample_group.size()].id;
setInput(sample_group[now_id].in);
setOutput(sample_group[now_id].out);
forwardPropagationEpoc();
backPropagationEpoc();
//输出层反向传递 更新 weight
for (int i = 0; i < IN_NODE_NUM; i++)
for (int j = 0; j < HIDDEN_NODE_NUM; j++)
input_layer[i]->weight[j] += LEARNING_RATE*input_layer[i]->wdelta_sum[j];
//隐含层反向传递 更新 weight 和 bias
for (int i = 0; i < HIDDEN_LAYER_NUM; i++)
{
if (i == HIDDEN_LAYER_NUM - 1)
{
for (int j = 0; j < HIDDEN_NODE_NUM; j++)
{
hidden_layer[i][j]->bias += LEARNING_RATE*hidden_layer[i][j]->bdelta_sum;
for (int k = 0; k < OUT_NODE_NUM; k++)
hidden_layer[i][j]->weight[k] += LEARNING_RATE*hidden_layer[i][j]->wdelta_sum[k];
}
}
else
{
for (int j = 0; j < HIDDEN_NODE_NUM; j++)
{
hidden_layer[i][j]->bias += LEARNING_RATE*hidden_layer[i][j]->bdelta_sum;
for (int k = 0; k < HIDDEN_NODE_NUM; k++)
hidden_layer[i][j]->weight[k] += LEARNING_RATE*hidden_layer[i][j]->wdelta_sum[k];
}
}
}
//输出层反向传递 更新bias
for (int i = 0; i < OUT_NODE_NUM; i++)
output_layer[i]->bias += LEARNING_RATE*output_layer[i]->bdelta_sum;
if (++cnt % 4000 == 0)
{
printf("turn %d/%d finished \n", cnt, cnt_bound);
printf("training error: %lf\n", error);
}
}
}

void predict(vector<Sample>& test_group) //神经网络预测
{
int test_num = test_group.size();
for (int id = 0; id < test_num; id++)
{
test_group[id].out.clear();
setInput(test_group[id].in);
//输入层->隐含层 隐含层->隐含层 正向传播
for (int i = 0; i < HIDDEN_LAYER_NUM; i++)
{
if (i == 0)
{
for (int j = 0; j < HIDDEN_NODE_NUM; j++)
{
double sum = 0;
for (int k = 0; k < IN_NODE_NUM; k++)
sum += input_layer[k]->value*input_layer[k]->weight[j];
sum += hidden_layer[i][j]->bias;
hidden_layer[i][j]->value = sigmoid(sum);
}
}
else
{
for (int j = 0; j < HIDDEN_NODE_NUM; j++)
{
double sum = 0;
for (int k = 0; k < HIDDEN_NODE_NUM; k++)
sum += hidden_layer[i - 1][k]->value*hidden_layer[i - 1][k]->weight[j];
sum += hidden_layer[i][j]->bias;
hidden_layer[i][j]->value = sigmoid(sum);
}
}
}
for (int i = 0; i < OUT_NODE_NUM; i++)
{
double sum = 0;
for (int j = 0; j < HIDDEN_NODE_NUM; j++)
sum += hidden_layer[HIDDEN_LAYER_NUM - 1][j]->value*hidden_layer[HIDDEN_LAYER_NUM - 1][j]->weight[i];
sum += output_layer[i]->bias;
output_layer[i]->value = sigmoid(sum);
test_group[id].out.push_back(output_layer[i]->value);
}
}
}

void setInput(vector<double> sample_in) //设置学习样本输入
{
for (int i = 0; i < IN_NODE_NUM; i++)
input_layer[i]->value = sample_in[i];
}

void setOutput(vector<double> sample_out) //设置学习样本输出
{
for (int i = 0; i < OUT_NODE_NUM; i++)
output_layer[i]->rightout = sample_out[i];
}
};
}
using namespace BPnamespace;

BP bp;
vector<Sample> sample_group;
char ex_str[MX_STR];

int getRank(char num_str[])
{
int len = strlen(num_str), ret = 0;
for(int i = 0; i < len; i++)
if(num_str[i] >= '0' && num_str[i] <= '9')
ret++;
//            ret = ret * 10 + num_str[i] - '0';
return ret;
}

void part_train()
{
sample_group.clear();
char num_str[MX_STR];
int tol, num;
double tmp;
Sample now;
string infile_name = "training_data.txt";
ifstream infile(infile_name.c_str(), ios::in);
infile >> tol;
for(int i = 0; i < tol; i++)
{
infile >> ex_str;
infile >> num;
now.in.clear(),now.out.clear();
for(int j = 0; j < num; j++)
{
infile >> tmp;
now.in.push_back(tmp);
}
for(int j = num; j < VEC_NUM; j++)
now.in.push_back(BASE_SCORE);
infile >> num_str;
if(num_str[0] == '-') continue;
num = getRank(num_str);
for(int j = 0; j < OUT_NODE_NUM; j++)
if(j == num)
now.out.push_back(1);
else now.out.push_back(0);
sample_group.push_back(now);
}
bp.training(sample_group,30000);
bp.write();
}

void part_predict()
{
sample_group.clear();
char num_str[MX_STR];
int tol, num;
double tmp;
Sample now;
string infile_name = "sample.txt";
ifstream infile(infile_name.c_str(), ios::in);
infile >> tol;
for(int i = 0; i < tol; i++)
{
infile >> ex_str;
infile >> num;
now.in.clear(),now.out.clear();
for(int j = 0; j < num; j++)
{
infile >> tmp;
now.in.push_back(tmp);
}
for(int j = num; j < VEC_NUM; j++)
now.in.push_back(BASE_SCORE);
infile >> num_str;
sample_group.push_back(now);
}
bp.predict(sample_group);
string outfile_name = "result.txt";
ofstream outfile(outfile_name.c_str(), ios::out);
for(int i = 0; i < sample_group.size(); i++)
{
tmp = -INF;
now = sample_group[i];
for(int j = 0; j < now.out.size(); j++)
if(now.out[j] > tmp)
tmp = now.out[j], num = j;
outfile << "id: " << i << "  rank: " << num << endl;
cout << "id: " << i << "  rank: " << num << endl;
}
}

int main()
{
int tp;
puts("input\n  0: load last trainning data\n  1: restart tranning\n  2: load last tranning data and continue trainning\n");
scanf("%d",&tp);
if(tp == 0) bp.load();
else if(tp == 1) part_train();
else if(tp == 2) bp.load(), part_train();
else return puts("error"), 0;
part_predict();
system("pause");
return 0;
}


View Code
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签: