基于steam的游戏销量预测 — PART 3 — 基于BP神经网络的机器学习与预测
2018-02-03 20:02
676 查看
语言:c++
环境:windows
训练内容:根据从steam中爬取的数据经过文本分析制作的向量以及标签
使用相关:无
解释:
就是一个BP神经网络,借鉴参考了一些博客的解释和代码,具体哪些忘了,给出其中一个:
http://blog.csdn.net/zhongkejingwang/article/details/44514073
代码:
View Code
环境:windows
训练内容:根据从steam中爬取的数据经过文本分析制作的向量以及标签
使用相关:无
解释:
就是一个BP神经网络,借鉴参考了一些博客的解释和代码,具体哪些忘了,给出其中一个:
http://blog.csdn.net/zhongkejingwang/article/details/44514073
代码:
#include <iostream> #include <cstring> #include <cmath> #include <vector> #include <algorithm> #include <stdlib.h> #include <time.h> #include <fstream> #define TAG_NUM 200 #define VEC_NUM 216 #define BASE_SCORE 5 #define MX_STR 2400 #define DIV 1000000.0 #define INF (1e9 + 44) using namespace std; namespace BPnamespace { #define IN_NODE_NUM 216 //输入节点数 #define HIDDEN_NODE_NUM 24 //隐含节点数 #define HIDDEN_LAYER_NUM 1 //隐含层数 #define OUT_NODE_NUM 9 //输出节点数 #define LEARNING_RATE 0.03 //学习速率 #define MAX_RAND_SEG (int)144e4 inline double xrand() // 0.1 ~ -0.1 { return ((2.0*(double)rand() / RAND_MAX) - 1) / 10.0; } inline double sigmoid(double x) //sigmoid { double ret = 1 / (1 + exp(-x)); return ret; } struct InputNode { double value; //固定输入值 vector<double> weight; //到首个隐含层权值 vector<double> wdelta_sum; //到首个隐含层权值的delta值累积 InputNode() { weight.clear(); wdelta_sum.clear(); } }; struct OutputNode { double value; double delta; //与正确值之间的偏差值 double rightout; //正确值 double bias; //偏移量 double bdelta_sum; //bias的delta累积 OutputNode(){ } }; struct HiddenNode { double value; double delta; //BP推导出的delta double bias; //偏移量 double bdelta_sum; //bias的delta值累积 vector<double> weight; //对于下一层的每个节点的权值 vector<double> wdelta_sum; //对于下一层的权值delta累积 HiddenNode() { weight.clear(); wdelta_sum.clear(); } }; struct RandSegNode { int id, val; } rand_seg[MAX_RAND_SEG]; struct Sample { vector<double> in, out; }; bool cmpRandSeg(RandSegNode a,RandSegNode b) { return a.val < b.val; } class BP { public: double error; InputNode* input_layer[IN_NODE_NUM]; OutputNode* output_layer[OUT_NODE_NUM]; HiddenNode* hidden_layer[HIDDEN_LAYER_NUM][HIDDEN_NODE_NUM]; void load() { string file_name = "data\\data.txt"; ifstream infile(file_name.c_str(), ios::in); for (int i = 0; i < IN_NODE_NUM; i++) for (int j = 0; j < HIDDEN_NODE_NUM; j++) infile >> input_layer[i]->weight[j]; for (int k = 0; k < HIDDEN_LAYER_NUM - 1; k++) for (int i = 0; i < HIDDEN_NODE_NUM; i++) for (int j = 0; j < HIDDEN_NODE_NUM; j++) infile >> hidden_layer[k][i]->weight[j]; for (int i = 0; i < HIDDEN_NODE_NUM; i++) for (int j = 0; j < OUT_NODE_NUM; j++) infile >> hidden_layer[HIDDEN_LAYER_NUM - 1][i]->weight[j]; for (int k = 0; k < HIDDEN_LAYER_NUM; k++) for (int i = 0; i < HIDDEN_NODE_NUM; i++) infile >> hidden_layer[k][i]->bias; for (int i = 0; i < OUT_NODE_NUM; i++) infile >> output_layer[i]->bias; } void write() { string file_name = "data\\data.txt"; ofstream outfile(file_name.c_str(), ios::out); for (int i = 0; i < IN_NODE_NUM; i++) for (int j = 0; j < HIDDEN_NODE_NUM; j++) outfile << input_layer[i]->weight[j] << ' '; for (int k = 0; k < HIDDEN_LAYER_NUM - 1; k++) for (int i = 0; i < HIDDEN_NODE_NUM; i++) for (int j = 0; j < HIDDEN_NODE_NUM; j++) outfile << hidden_layer[k][i]->weight[j] << ' '; for (int i = 0; i < HIDDEN_NODE_NUM; i++) for (int j = 0; j < OUT_NODE_NUM; j++) outfile << hidden_layer[HIDDEN_LAYER_NUM - 1][i]->weight[j] << ' '; for (int k = 0; k < HIDDEN_LAYER_NUM; k++) for (int i = 0; i < HIDDEN_NODE_NUM; i++) outfile << hidden_layer[k][i]->bias << ' '; for (int i = 0; i < OUT_NODE_NUM; i++) outfile << output_layer[i]->bias << ' '; } BP() { srand((unsigned)time(NULL)); error = 100; //初始化输入层 for (int i = 0; i < IN_NODE_NUM; i++) { input_layer[i] = new InputNode(); for (int j = 0; j < HIDDEN_NODE_NUM; j++) { input_layer[i]->weight.push_back(xrand()); input_layer[i]->wdelta_sum.push_back(0); } } //初始化隐藏层 for (int i = 0; i < HIDDEN_LAYER_NUM; i++) { if (i == HIDDEN_LAYER_NUM - 1) { for (int j = 0; j < HIDDEN_NODE_NUM;j++) { hidden_layer[i][j] = new HiddenNode(); hidden_layer[i][j]->bias = 0; for (int k = 0; k < OUT_NODE_NUM; k++) { hidden_layer[i][j]->weight.push_back(xrand()); hidden_layer[i][j]->wdelta_sum.push_back(0); } } } else { for (int j = 0; j < HIDDEN_NODE_NUM; j++) { hidden_layer[i][j] = new HiddenNode(); hidden_layer[i][j]->bias = 0; for (int k = 0; k < HIDDEN_NODE_NUM; k++) hidden_layer[i][j]->weight.push_back(xrand()); } } } //初始化输出层 for (int i = 0; i < OUT_NODE_NUM; i++) { output_layer[i] = new OutputNode(); output_layer[i]->bias = 0; } } void forwardPropagationEpoc() //单个样本 向前传播 { //输入层->隐含层 隐含层->隐含层 for (int i = 0; i < HIDDEN_LAYER_NUM; i++) { if (i == 0) { for (int j = 0; j < HIDDEN_NODE_NUM; j++) { double sum = 0; for (int k = 0; k < IN_NODE_NUM; k++) sum += input_layer[k]->value * input_layer[k]->weight[j]; sum += hidden_layer[i][j]->bias; hidden_layer[i][j]->value = sigmoid(sum); } } else { for (int j = 0; j < HIDDEN_NODE_NUM; j++) { double sum = 0; for (int k = 0; k < HIDDEN_NODE_NUM; k++) sum += hidden_layer[i - 1][k]->value*hidden_layer[i - 1][k]->weight[j]; sum += hidden_layer[i][j]->bias; hidden_layer[i][j]->value = sigmoid(sum); } } } //隐含层->输出层 for (int i = 0; i < OUT_NODE_NUM; i++) { double sum = 0; for (int j = 0; j < HIDDEN_NODE_NUM; j++) sum += hidden_layer[HIDDEN_LAYER_NUM - 1][j]->value * hidden_layer[HIDDEN_LAYER_NUM - 1][j]->weight[i]; sum += output_layer[i]->bias; output_layer[i]->value = sigmoid(sum); } //cout << "!!!" << ' ' << output_layer[0]->value << endl; } void backPropagationEpoc() //单个样本 向后传播 { //输出层 计算delta for (int i = 0; i < OUT_NODE_NUM; i++) { double tmp = output_layer[i]->rightout - output_layer[i]->value; error += tmp*tmp / 2; output_layer[i]->delta = tmp*(1 - output_layer[i]->value)*output_layer[i]->value; } //隐含层 计算delta for (int i = HIDDEN_LAYER_NUM - 1; i >= 0; i--) { if (i == HIDDEN_LAYER_NUM - 1) { for (int j = 0; j < HIDDEN_NODE_NUM; j++) { double sum = 0; for (int k = 0; k < OUT_NODE_NUM; k++) sum += output_layer[k]->delta*hidden_layer[i][j]->weight[k]; hidden_layer[i][j]->delta = sum*(1 - hidden_layer[i][j]->value)*hidden_layer[i][j]->value; } } else { for (int j = 0; j < HIDDEN_LAYER_NUM; j++) { double sum = 0; for (int k = 0; k < HIDDEN_NODE_NUM; k++) sum += hidden_layer[i + 1][k]->delta*hidden_layer[i][j]->weight[k]; hidden_layer[i][j]->delta = sum*(1 - hidden_layer[i][j]->value)*hidden_layer[i][j]->value; } } } //输入层 更新 wdelta_sum for (int i = 0; i < IN_NODE_NUM; i++) for (int j = 0; j < HIDDEN_NODE_NUM; j++) input_layer[i]->wdelta_sum[j] += input_layer[i]->value*hidden_layer[0][j]->delta; //隐含层 更新 wdelta_sum 和 bdelta_sum for (int i = 0; i < HIDDEN_LAYER_NUM; i++) { if (i == HIDDEN_LAYER_NUM - 1) { for (int j = 0; j < HIDDEN_NODE_NUM; j++) { hidden_layer[i][j]->bdelta_sum += hidden_layer[i][j]->delta; for (int k = 0; k < OUT_NODE_NUM; k++) hidden_layer[i][j]->wdelta_sum[k] += hidden_layer[i][j]->value*output_layer[k]->delta; } } else { for (int j = 0; j < HIDDEN_NODE_NUM; j++) { hidden_layer[i][j]->bdelta_sum += hidden_layer[i][j]->delta; for (int k = 0; k < HIDDEN_NODE_NUM; k++) hidden_layer[i][j]->wdelta_sum[k] += hidden_layer[i][j]->value*hidden_layer[i + 1][k]->delta; } } } //输出层 更新 bdelta_sum for (int i = 0; i < OUT_NODE_NUM; i++) output_layer[i]->bdelta_sum += output_layer[i]->delta; } void training(vector<Sample> sample_group, int cnt_bound) //更新weight,bias { int sample_num = sample_group.size(); for (int i = 0; i < sample_num; i++) rand_seg[i].id = i, rand_seg[i].val = rand(); sort(rand_seg, rand_seg + sample_num, cmpRandSeg); // double error_bound; // double last_error = -1; int cnt = 0; int now_id; while (cnt < cnt_bound) { // last_error = error; error = 0; for (int i = 0; i < IN_NODE_NUM; i++) input_layer[i]->wdelta_sum.assign(input_layer[i]->wdelta_sum.size(), 0); for (int i = 0; i < HIDDEN_LAYER_NUM; i++) for (int j = 0; j < HIDDEN_NODE_NUM; j++) { hidden_layer[i][j]->wdelta_sum.assign(hidden_layer[i][j]->wdelta_sum.size(), 0); hidden_layer[i][j]->bdelta_sum = 0; } for (int i = 0; i < OUT_NODE_NUM; i++) output_layer[i]->bdelta_sum = 0; now_id = rand_seg[cnt%sample_group.size()].id; setInput(sample_group[now_id].in); setOutput(sample_group[now_id].out); forwardPropagationEpoc(); backPropagationEpoc(); //输出层反向传递 更新 weight for (int i = 0; i < IN_NODE_NUM; i++) for (int j = 0; j < HIDDEN_NODE_NUM; j++) input_layer[i]->weight[j] += LEARNING_RATE*input_layer[i]->wdelta_sum[j]; //隐含层反向传递 更新 weight 和 bias for (int i = 0; i < HIDDEN_LAYER_NUM; i++) { if (i == HIDDEN_LAYER_NUM - 1) { for (int j = 0; j < HIDDEN_NODE_NUM; j++) { hidden_layer[i][j]->bias += LEARNING_RATE*hidden_layer[i][j]->bdelta_sum; for (int k = 0; k < OUT_NODE_NUM; k++) hidden_layer[i][j]->weight[k] += LEARNING_RATE*hidden_layer[i][j]->wdelta_sum[k]; } } else { for (int j = 0; j < HIDDEN_NODE_NUM; j++) { hidden_layer[i][j]->bias += LEARNING_RATE*hidden_layer[i][j]->bdelta_sum; for (int k = 0; k < HIDDEN_NODE_NUM; k++) hidden_layer[i][j]->weight[k] += LEARNING_RATE*hidden_layer[i][j]->wdelta_sum[k]; } } } //输出层反向传递 更新bias for (int i = 0; i < OUT_NODE_NUM; i++) output_layer[i]->bias += LEARNING_RATE*output_layer[i]->bdelta_sum; if (++cnt % 4000 == 0) { printf("turn %d/%d finished \n", cnt, cnt_bound); printf("training error: %lf\n", error); } } } void predict(vector<Sample>& test_group) //神经网络预测 { int test_num = test_group.size(); for (int id = 0; id < test_num; id++) { test_group[id].out.clear(); setInput(test_group[id].in); //输入层->隐含层 隐含层->隐含层 正向传播 for (int i = 0; i < HIDDEN_LAYER_NUM; i++) { if (i == 0) { for (int j = 0; j < HIDDEN_NODE_NUM; j++) { double sum = 0; for (int k = 0; k < IN_NODE_NUM; k++) sum += input_layer[k]->value*input_layer[k]->weight[j]; sum += hidden_layer[i][j]->bias; hidden_layer[i][j]->value = sigmoid(sum); } } else { for (int j = 0; j < HIDDEN_NODE_NUM; j++) { double sum = 0; for (int k = 0; k < HIDDEN_NODE_NUM; k++) sum += hidden_layer[i - 1][k]->value*hidden_layer[i - 1][k]->weight[j]; sum += hidden_layer[i][j]->bias; hidden_layer[i][j]->value = sigmoid(sum); } } } for (int i = 0; i < OUT_NODE_NUM; i++) { double sum = 0; for (int j = 0; j < HIDDEN_NODE_NUM; j++) sum += hidden_layer[HIDDEN_LAYER_NUM - 1][j]->value*hidden_layer[HIDDEN_LAYER_NUM - 1][j]->weight[i]; sum += output_layer[i]->bias; output_layer[i]->value = sigmoid(sum); test_group[id].out.push_back(output_layer[i]->value); } } } void setInput(vector<double> sample_in) //设置学习样本输入 { for (int i = 0; i < IN_NODE_NUM; i++) input_layer[i]->value = sample_in[i]; } void setOutput(vector<double> sample_out) //设置学习样本输出 { for (int i = 0; i < OUT_NODE_NUM; i++) output_layer[i]->rightout = sample_out[i]; } }; } using namespace BPnamespace; BP bp; vector<Sample> sample_group; char ex_str[MX_STR]; int getRank(char num_str[]) { int len = strlen(num_str), ret = 0; for(int i = 0; i < len; i++) if(num_str[i] >= '0' && num_str[i] <= '9') ret++; // ret = ret * 10 + num_str[i] - '0'; return ret; } void part_train() { sample_group.clear(); char num_str[MX_STR]; int tol, num; double tmp; Sample now; string infile_name = "training_data.txt"; ifstream infile(infile_name.c_str(), ios::in); infile >> tol; for(int i = 0; i < tol; i++) { infile >> ex_str; infile >> num; now.in.clear(),now.out.clear(); for(int j = 0; j < num; j++) { infile >> tmp; now.in.push_back(tmp); } for(int j = num; j < VEC_NUM; j++) now.in.push_back(BASE_SCORE); infile >> num_str; if(num_str[0] == '-') continue; num = getRank(num_str); for(int j = 0; j < OUT_NODE_NUM; j++) if(j == num) now.out.push_back(1); else now.out.push_back(0); sample_group.push_back(now); } bp.training(sample_group,30000); bp.write(); } void part_predict() { sample_group.clear(); char num_str[MX_STR]; int tol, num; double tmp; Sample now; string infile_name = "sample.txt"; ifstream infile(infile_name.c_str(), ios::in); infile >> tol; for(int i = 0; i < tol; i++) { infile >> ex_str; infile >> num; now.in.clear(),now.out.clear(); for(int j = 0; j < num; j++) { infile >> tmp; now.in.push_back(tmp); } for(int j = num; j < VEC_NUM; j++) now.in.push_back(BASE_SCORE); infile >> num_str; sample_group.push_back(now); } bp.predict(sample_group); string outfile_name = "result.txt"; ofstream outfile(outfile_name.c_str(), ios::out); for(int i = 0; i < sample_group.size(); i++) { tmp = -INF; now = sample_group[i]; for(int j = 0; j < now.out.size(); j++) if(now.out[j] > tmp) tmp = now.out[j], num = j; outfile << "id: " << i << " rank: " << num << endl; cout << "id: " << i << " rank: " << num << endl; } } int main() { int tp; puts("input\n 0: load last trainning data\n 1: restart tranning\n 2: load last tranning data and continue trainning\n"); scanf("%d",&tp); if(tp == 0) bp.load(); else if(tp == 1) part_train(); else if(tp == 2) bp.load(), part_train(); else return puts("error"), 0; part_predict(); system("pause"); return 0; }
View Code
相关文章推荐
- 基于steam的游戏销量预测 — PART 1 — 爬取steam游戏相关数据的爬虫
- 基于steam的游戏销量预测 — PART 2 — 文本分析
- 基于机器学习多种方法的kaggle竞赛入门之手写数字的图像识别预测
- 基于机器学习预测Lending Club网站贷款申请结果
- 机器学习实践1:基于logistic regression的性别预测
- 基于机器学习的办理银行贷款预测
- [机器学习入门] 李宏毅机器学习笔记-29 (Sequence Labeling Problem part 1;结构化预测-序列标记 part 1)
- 2016年Steam游戏销量排行榜
- BP神经网络原理与应用-基于电影评分预测案例
- NN:实现BP神经网络的回归拟合,基于近红外光谱的汽油辛烷值含量预测结果对比—Jason niu
- 2016年Steam游戏销量排行榜
- [机器学习入门] 李宏毅机器学习笔记-31 (Sequence Labeling Problem part 3 ;结构化预测-序列标记 part 3)
- 基于BP神经网络的2014世界杯比分预测
- 机器学习第一个练手程序 基于决策树的iris数据预测
- 《基于深度学习的线上农产品销量预测模型研究》阅读笔记
- [Steam]成就游戏销量乐观
- 机器学习笔记1:基于Logistic回归进行数据预测
- 基于BP人工神经网络模型预测课程销量的高低
- 2015年Steam游戏销量排行榜
- [机器学习入门] 李宏毅机器学习笔记-30 (Sequence Labeling Problem part 2 ;结构化预测-序列标记 part 2)