[AI]人工神经网络对葡萄酒打分
2016-05-26 17:58
387 查看
人工神经网络(artificial neural network,缩写ANN),简称神经网络(neural network,缩写NN)或类神经网络,是一种模仿生物神经网络(动物的中枢神经系统,特别是大脑)的结构和功能的数学模型或计算模型。神经网络由大量的人工神经元联结进行计算。大多数情况下人工神经网络能在外界信息的基础上改变内部结构,是一种自适应系统。现代神经网络是一种非线性统计性数据建模工具,常用来对输入和输出间复杂的关系进行建模,或用来探索数据的模式。(摘自https://zh.wikipedia.org/wiki/%E4%BA%BA%E5%B7%A5%E7%A5%9E%E7%BB%8F%E7%BD%91%E7%BB%9C)
给定葡萄酒的各项属性及评分,训练一个人工神经网络,对任意给定新的葡萄酒进行打分。
实验数据如下:
由于样本不足,存在过拟合等问题
源码:
#include <iostream>
#include <cstdlib>
#include <cstring>
#include <fstream>
#include <cmath>
#include <ctime>
#define HIDENODE 5
#define RATE 0.9
#define TOLATHRE 0.001
#define PRECISION 10000
#define TOTALNUM 27
#define SAMPLENUM 17
#define ATTRNUM 7
using namespace std;
double SampleData[SAMPLENUM][ATTRNUM];
double TotalData[TOTALNUM][ATTRNUM], EvalData[TOTALNUM], MaxEval, MinEval;
double hi[HIDENODE], ho[HIDENODE], yi[SAMPLENUM], yo[SAMPLENUM];
double wih[HIDENODE][ATTRNUM - 1], who[HIDENODE];
double bh[HIDENODE], bo;
double Distin = 0x7FFFFFFF;
inline double f(double x)
{
return 1.0 / (1 + exp(-1 * x));
}
void Init()
{
freopen("ANN.txt", "r", stdin);
double max_attr[ATTRNUM], min_attr[ATTRNUM];
memset(max_attr, 0, sizeof(max_attr));
for (int i = 0; i< ATTRNUM; i++)
min_attr[i] = 0x7FFFFFFF;
//Input data
for (int i = 0; i < SAMPLENUM; i++)
{
for (int j = 0; j < ATTRNUM; j++)
{
cin >> SampleData[i][j];
max_attr[j] = (max_attr[j]>SampleData[i][j]) ? max_attr[j] : SampleData[i][j];
min_attr[j] = (min_attr[j]<SampleData[i][j]) ? min_attr[j] : SampleData[i][j];
}
}
//Normalize data
for (int i = 0; i < ATTRNUM; i++)
{
double diff = max_attr[i] - min_attr[i];
for (int j = 0; j < SAMPLENUM; j++)
SampleData[j][i] = (SampleData[j][i] - min_attr[i]) / diff;
}
//Rand weight
for (int i = 0; i < HIDENODE; i++)
{
who[i] = rand() % (2 * PRECISION + 1) / (double)PRECISION - 1;
for (int j = 0; j < ATTRNUM - 1; j++)
wih[i][j] = rand() % (2 * PRECISION + 1) / (double)PRECISION - 1;
}
for (int i = 0; i < SAMPLENUM; i++)
bh[i] = rand() % (2 * PRECISION + 1) / (double)PRECISION - 1;
bo = rand() % (2 * PRECISION + 1) / (double)PRECISION - 1;
}
void Train()
{
double theta1[HIDENODE], theta2 = 0;
Distin = 0;
//cout << Distin << ' ';
for (int i = 0; i < SAMPLENUM; i++)
{
memset(hi, 0, sizeof(hi));
memset(theta1, 0, sizeof(theta1));
yi[i] = 0;
for (int j = 0; j < HIDENODE; j++)
for (int k = 0; k < 6; k++)
hi[j] += SampleData[i][k] * wih[j][k];
for (int j = 0; j < HIDENODE; j++)
ho[j] = f(hi[j] + bh[j]);
for (int j = 0; j < HIDENODE; j++)
yi[i] += who[j] * ho[j];
yo[i] = f(yi[i] + bo);
theta2 = (SampleData[i][ATTRNUM - 1] - yo[i])*yo[i] * (1 - yo[i]);
for (int j = 0; j < HIDENODE; j++)
who[j] += RATE*theta2*ho[j];
for (int j = 0; j < HIDENODE; j++)
{
theta1[j] += theta2 * who[j] * ho[j] * (1 - ho[j]);
for (int k = 0; k < 6; k++)
wih[j][k] += RATE*theta1[j] * SampleData[i][k];
}
bo += RATE*theta2;
for (int j = 0; j < HIDENODE; j++)
bh[j] += RATE*theta1[j];
// compute Distin
Distin += pow(yo[i] - SampleData[i][6], 2);
}
//cout << Distin << endl;
}
void OutputParaments()
{
cout << "weights between input layer and hidden layer:" << endl;
for (int i = 0; i < HIDENODE; i++)
{
for (int j = 0; j < ATTRNUM - 1; j++)
cout << wih[i][j] << ' ';
cout << endl;
}
cout << endl << "weights between hidden layer and output layer:" << endl;
for (int i = 0; i < HIDENODE; i++)
cout << who[i] << ' ';
cout << endl;
cout << endl << "Thresholds in hidden layer:" << endl;
for (int i = 0; i < HIDENODE; i++)
cout << bh[i] << ' ';
cout << endl << endl;
cout << "Threshold in output layer:" << endl << bo << endl << endl;
}
void CalculateDifferences()
{
double max_diff = 0, min_diff = 0x7FFFFFFF, aver_diff = 0;
for (int i = 0; i < SAMPLENUM; i++)
{
aver_diff += fabs(SampleData[i][ATTRNUM - 1] - yo[i]);
max_diff = (max_diff > fabs(SampleData[i][ATTRNUM - 1] - yo[i])) ? max_diff : fabs(SampleData[i][ATTRNUM - 1] - yo[i]);
min_diff = (min_diff < fabs(SampleData[i][ATTRNUM - 1] - yo[i])) ? min_diff : fabs(SampleData[i][ATTRNUM - 1] - yo[i]);
}
aver_diff /= SAMPLENUM;
cout << "Max difference is: " << max_diff << endl;
cout << "Min difference is: " << min_diff << endl;
cout << "Average difference is: " << aver_diff << endl;
}
void GetAllGrap()
{
ifstream fin("ANN.txt");
double max_attr[ATTRNUM], min_attr[ATTRNUM];
memset(max_attr, 0, sizeof(max_attr));
MaxEval = 0;
MinEval = 0x7FFFFFFF;
for (int i = 0; i< ATTRNUM; i++)
min_attr[i] = 0x7FFFFFFF;
for (int i = 0; i < TOTALNUM; i++)
{
for (int j = 0; j < ATTRNUM; j++)
{
fin >> TotalData[i][j];
max_attr[j] = (max_attr[j]>TotalData[i][j]) ? max_attr[j] : TotalData[i][j];
min_attr[j] = (min_attr[j]<TotalData[i][j]) ? min_attr[j] : TotalData[i][j];
}
EvalData[i] = TotalData[i][ATTRNUM - 1];
MaxEval = (MaxEval>TotalData[i][ATTRNUM - 1]) ? MaxEval : TotalData[i][ATTRNUM - 1];
MinEval = (MinEval<TotalData[i][ATTRNUM - 1]) ? MinEval : TotalData[i][ATTRNUM - 1];
}
for (int i = 0; i < ATTRNUM; i++)
{
double diff = max_attr[i] - min_attr[i];
for (int j = 0; j < TOTALNUM; j++)
TotalData[j][i] = (TotalData[j][i] - min_attr[i]) / diff;
}
fin.close();
}
void CalculateEval(int grapnum)
{
double tmp_hidden[HIDENODE];
memset(tmp_hidden, 0, sizeof(tmp_hidden));
for (int i = 0; i < HIDENODE; i++)
{
for (int j = 0; j < ATTRNUM - 1; j++)
{
tmp_hidden[i] += TotalData[grapnum][j] * wih[i][j];
}
tmp_hidden[i] -= bh[i];
}
double tmp_output = 0;
for (int i = 0; i < HIDENODE; i++)
{
for (int j = 0; j < ATTRNUM - 1; j++)
tmp_output += tmp_hidden[i] * who[i];
}
tmp_output -= bo;
//tmp_output*=(MaxEval - MinEval) + MinEval;
cout << "the test result is: " << tmp_output << endl;
cout << "the distinguish is: " << fabs(tmp_output-EvalData[grapnum])<<endl;
}
int main()
{
srand((unsigned)time(0));
memset(hi, 0, sizeof(hi));
memset(ho, 0, sizeof(ho));
memset(yi, 0, sizeof(yi));
memset(yo, 0, sizeof(yo));
Init();
while (Distin > TOLATHRE)
Train();
OutputParaments();
CalculateDifferences();
int TestGrap = 0;
GetAllGrap();
freopen("con", "r", stdin);
cout<<"input the test grap num:"<<endl;
while (cin >> TestGrap)
{
if(TestGrap>=0&&TestGrap<=26)
{
CalculateEval(TestGrap);
cout<<"input the test grap num:"<<endl;
}
else
cout<<"Not legal!"<<endl;
}
return 0;
}
给定葡萄酒的各项属性及评分,训练一个人工神经网络,对任意给定新的葡萄酒进行打分。
实验数据如下:
样品编号 | 蛋白质mg/100g(x1) | DPPH自由基1/IC50(g/L)(x2) | 总酚(mmol/kg)(x3) | 葡萄总黄酮(mmol/kg)(x4) | PH值(x5) | 果皮质量(g)(x6) | 总分(y) |
葡萄样品1 | 555.455 | 0.4314 | 23.576 | 9.509 | 3.54 | 0.120 | 77.1 |
葡萄样品2 | 624.094 | 0.4659 | 26.026 | 13.720 | 3.88 | 0.193 | 78.2 |
葡萄样品3 | 580.273 | 0.4102 | 21.479 | 10.853 | 3.80 | 0.160 | 74.6 |
葡萄样品4 | 527.438 | 0.2660 | 10.783 | 4.394 | 3.36 | 0.173 | 75.8 |
葡萄样品5 | 590.651 | 0.3972 | 18.547 | 10.333 | 3.58 | 0.260 | 74 |
葡萄样品6 | 532.026 | 0.2755 | 10.469 | 6.867 | 3.31 | 0.213 | 74.5 |
葡萄样品7 | 489.320 | 0.1758 | 9.181 | 3.497 | 3.13 | 0.136 | 72.6 |
葡萄样品8 | 556.091 | 0.4160 | 15.343 | 8.454 | 2.90 | 0.240 | 71.5 |
葡萄样品9 | 703.300 | 0.6689 | 31.767 | 20.433 | 3.68 | 0.150 | 72.2 |
葡萄样品10 | 547.695 | 0.3263 | 9.191 | 4.603 | 3.66 | 0.210 | 71.6 |
葡萄样品11 | 545.034 | 0.2796 | 6.197 | 2.545 | 3.46 | 0.125 | 72 |
葡萄样品12 | 491.265 | 0.1975 | 11.924 | 3.926 | 3.37 | 0.253 | 72.6 |
葡萄样品13 | 603.686 | 0.4420 | 14.572 | 7.360 | 3.91 | 0.170 | 72.1 |
葡萄样品14 | 597.274 | 0.3606 | 15.661 | 7.780 | 3.46 | 0.256 | 69.9 |
葡萄样品15 | 531.431 | 0.2193 | 12.001 | 5.598 | 3.16 | 0.208 | 71.5 |
葡萄样品16 | 585.783 | 0.2371 | 10.992 | 9.185 | 3.25 | 0.138 | 68.8 |
葡萄样品17 | 546.516 | 0.3594 | 15.394 | 8.613 | 3.38 | 0.336 | 68.8 |
葡萄样品18 | 511.565 | 0.2260 | 7.979 | 5.273 | 3.33 | 0.150 | 71.2 |
葡萄样品19 | 542.201 | 0.3806 | 16.732 | 9.370 | 3.69 | 0.089 | 66.3 |
葡萄样品20 | 556.895 | 0.2825 | 11.914 | 8.069 | 3.66 | 0.247 | 66 |
葡萄样品21 | 566.188 | 0.3803 | 15.639 | 7.542 | 3.66 | 0.107 | 68.2 |
葡萄样品22 | 490.759 | 0.2842 | 16.066 | 7.759 | 3.72 | 0.126 | 65.3 |
葡萄样品23 | 547.813 | 0.5748 | 32.522 | 24.436 | 3.42 | 0.263 | 61.6 |
葡萄样品24 | 504.429 | 0.2836 | 8.192 | 8.265 | 3.65 | 0.227 | 68.1 |
葡萄样品25 | 539.502 | 0.3518 | 11.813 | 5.429 | 3.41 | 0.201 | 65.4 |
葡萄样品26 | 589.906 | 0.3179 | 7.129 | 3.413 | 3.63 | 0.210 | 65.7 |
葡萄样品27 | 523.837 | 0.2654 | 9.145 | 4.711 | 3.31 | 0.189 | 68.3 |
源码:
#include <iostream>
#include <cstdlib>
#include <cstring>
#include <fstream>
#include <cmath>
#include <ctime>
#define HIDENODE 5
#define RATE 0.9
#define TOLATHRE 0.001
#define PRECISION 10000
#define TOTALNUM 27
#define SAMPLENUM 17
#define ATTRNUM 7
using namespace std;
double SampleData[SAMPLENUM][ATTRNUM];
double TotalData[TOTALNUM][ATTRNUM], EvalData[TOTALNUM], MaxEval, MinEval;
double hi[HIDENODE], ho[HIDENODE], yi[SAMPLENUM], yo[SAMPLENUM];
double wih[HIDENODE][ATTRNUM - 1], who[HIDENODE];
double bh[HIDENODE], bo;
double Distin = 0x7FFFFFFF;
inline double f(double x)
{
return 1.0 / (1 + exp(-1 * x));
}
void Init()
{
freopen("ANN.txt", "r", stdin);
double max_attr[ATTRNUM], min_attr[ATTRNUM];
memset(max_attr, 0, sizeof(max_attr));
for (int i = 0; i< ATTRNUM; i++)
min_attr[i] = 0x7FFFFFFF;
//Input data
for (int i = 0; i < SAMPLENUM; i++)
{
for (int j = 0; j < ATTRNUM; j++)
{
cin >> SampleData[i][j];
max_attr[j] = (max_attr[j]>SampleData[i][j]) ? max_attr[j] : SampleData[i][j];
min_attr[j] = (min_attr[j]<SampleData[i][j]) ? min_attr[j] : SampleData[i][j];
}
}
//Normalize data
for (int i = 0; i < ATTRNUM; i++)
{
double diff = max_attr[i] - min_attr[i];
for (int j = 0; j < SAMPLENUM; j++)
SampleData[j][i] = (SampleData[j][i] - min_attr[i]) / diff;
}
//Rand weight
for (int i = 0; i < HIDENODE; i++)
{
who[i] = rand() % (2 * PRECISION + 1) / (double)PRECISION - 1;
for (int j = 0; j < ATTRNUM - 1; j++)
wih[i][j] = rand() % (2 * PRECISION + 1) / (double)PRECISION - 1;
}
for (int i = 0; i < SAMPLENUM; i++)
bh[i] = rand() % (2 * PRECISION + 1) / (double)PRECISION - 1;
bo = rand() % (2 * PRECISION + 1) / (double)PRECISION - 1;
}
void Train()
{
double theta1[HIDENODE], theta2 = 0;
Distin = 0;
//cout << Distin << ' ';
for (int i = 0; i < SAMPLENUM; i++)
{
memset(hi, 0, sizeof(hi));
memset(theta1, 0, sizeof(theta1));
yi[i] = 0;
for (int j = 0; j < HIDENODE; j++)
for (int k = 0; k < 6; k++)
hi[j] += SampleData[i][k] * wih[j][k];
for (int j = 0; j < HIDENODE; j++)
ho[j] = f(hi[j] + bh[j]);
for (int j = 0; j < HIDENODE; j++)
yi[i] += who[j] * ho[j];
yo[i] = f(yi[i] + bo);
theta2 = (SampleData[i][ATTRNUM - 1] - yo[i])*yo[i] * (1 - yo[i]);
for (int j = 0; j < HIDENODE; j++)
who[j] += RATE*theta2*ho[j];
for (int j = 0; j < HIDENODE; j++)
{
theta1[j] += theta2 * who[j] * ho[j] * (1 - ho[j]);
for (int k = 0; k < 6; k++)
wih[j][k] += RATE*theta1[j] * SampleData[i][k];
}
bo += RATE*theta2;
for (int j = 0; j < HIDENODE; j++)
bh[j] += RATE*theta1[j];
// compute Distin
Distin += pow(yo[i] - SampleData[i][6], 2);
}
//cout << Distin << endl;
}
void OutputParaments()
{
cout << "weights between input layer and hidden layer:" << endl;
for (int i = 0; i < HIDENODE; i++)
{
for (int j = 0; j < ATTRNUM - 1; j++)
cout << wih[i][j] << ' ';
cout << endl;
}
cout << endl << "weights between hidden layer and output layer:" << endl;
for (int i = 0; i < HIDENODE; i++)
cout << who[i] << ' ';
cout << endl;
cout << endl << "Thresholds in hidden layer:" << endl;
for (int i = 0; i < HIDENODE; i++)
cout << bh[i] << ' ';
cout << endl << endl;
cout << "Threshold in output layer:" << endl << bo << endl << endl;
}
void CalculateDifferences()
{
double max_diff = 0, min_diff = 0x7FFFFFFF, aver_diff = 0;
for (int i = 0; i < SAMPLENUM; i++)
{
aver_diff += fabs(SampleData[i][ATTRNUM - 1] - yo[i]);
max_diff = (max_diff > fabs(SampleData[i][ATTRNUM - 1] - yo[i])) ? max_diff : fabs(SampleData[i][ATTRNUM - 1] - yo[i]);
min_diff = (min_diff < fabs(SampleData[i][ATTRNUM - 1] - yo[i])) ? min_diff : fabs(SampleData[i][ATTRNUM - 1] - yo[i]);
}
aver_diff /= SAMPLENUM;
cout << "Max difference is: " << max_diff << endl;
cout << "Min difference is: " << min_diff << endl;
cout << "Average difference is: " << aver_diff << endl;
}
void GetAllGrap()
{
ifstream fin("ANN.txt");
double max_attr[ATTRNUM], min_attr[ATTRNUM];
memset(max_attr, 0, sizeof(max_attr));
MaxEval = 0;
MinEval = 0x7FFFFFFF;
for (int i = 0; i< ATTRNUM; i++)
min_attr[i] = 0x7FFFFFFF;
for (int i = 0; i < TOTALNUM; i++)
{
for (int j = 0; j < ATTRNUM; j++)
{
fin >> TotalData[i][j];
max_attr[j] = (max_attr[j]>TotalData[i][j]) ? max_attr[j] : TotalData[i][j];
min_attr[j] = (min_attr[j]<TotalData[i][j]) ? min_attr[j] : TotalData[i][j];
}
EvalData[i] = TotalData[i][ATTRNUM - 1];
MaxEval = (MaxEval>TotalData[i][ATTRNUM - 1]) ? MaxEval : TotalData[i][ATTRNUM - 1];
MinEval = (MinEval<TotalData[i][ATTRNUM - 1]) ? MinEval : TotalData[i][ATTRNUM - 1];
}
for (int i = 0; i < ATTRNUM; i++)
{
double diff = max_attr[i] - min_attr[i];
for (int j = 0; j < TOTALNUM; j++)
TotalData[j][i] = (TotalData[j][i] - min_attr[i]) / diff;
}
fin.close();
}
void CalculateEval(int grapnum)
{
double tmp_hidden[HIDENODE];
memset(tmp_hidden, 0, sizeof(tmp_hidden));
for (int i = 0; i < HIDENODE; i++)
{
for (int j = 0; j < ATTRNUM - 1; j++)
{
tmp_hidden[i] += TotalData[grapnum][j] * wih[i][j];
}
tmp_hidden[i] -= bh[i];
}
double tmp_output = 0;
for (int i = 0; i < HIDENODE; i++)
{
for (int j = 0; j < ATTRNUM - 1; j++)
tmp_output += tmp_hidden[i] * who[i];
}
tmp_output -= bo;
//tmp_output*=(MaxEval - MinEval) + MinEval;
cout << "the test result is: " << tmp_output << endl;
cout << "the distinguish is: " << fabs(tmp_output-EvalData[grapnum])<<endl;
}
int main()
{
srand((unsigned)time(0));
memset(hi, 0, sizeof(hi));
memset(ho, 0, sizeof(ho));
memset(yi, 0, sizeof(yi));
memset(yo, 0, sizeof(yo));
Init();
while (Distin > TOLATHRE)
Train();
OutputParaments();
CalculateDifferences();
int TestGrap = 0;
GetAllGrap();
freopen("con", "r", stdin);
cout<<"input the test grap num:"<<endl;
while (cin >> TestGrap)
{
if(TestGrap>=0&&TestGrap<=26)
{
CalculateEval(TestGrap);
cout<<"input the test grap num:"<<endl;
}
else
cout<<"Not legal!"<<endl;
}
return 0;
}
相关文章推荐
- 反向传播(Backpropagation)算法的数学原理
- 了解一下
- ANN学习-算法篇--菜鸟之路(一)
- 传统bp算法类C/C++实现
- OpenCV之CvANN_MLP和CvSVM测试
- 读《边城》与《时生》
- 【转载】神经网络编程入门
- 斯坦福吴恩达-cousera课程笔记-Logistic回归
- 数据量多少与训练的过拟合和欠拟合 overfitting and underfitting
- 简单的感知学习原理例子 人工神经网络 Perceptron Learning Rule
- 机器学习中关于正则化的理解
- 数据挖掘: overfitting 过拟合情况 整理
- 机器学习中过拟合问题分析及解决方法
- 用简单的一个例子阐释人工神经网络的机理
- 过拟合以及解决办法
- 神经网络入门(连载之一)
- 机器学习—过拟合overfitting
- NTU-Coursera机器学习:过拟合(Overfitting)与正规化(Regularization)
- overfitting-过拟合
- 详解五大分类方法及其优缺点,数据挖掘师必会!