您的位置:首页 > 编程语言 > Java开发

用java和weka进行分类

2016-07-08 17:03 531 查看
用weka进行分类的小程序。

package test.weka;

import java.io.BufferedWriter;
import java.io.File;
import java.io.FileReader;
import java.io.FileWriter;
import java.io.IOException;
import java.text.DateFormat;
import java.text.SimpleDateFormat;
import java.util.Random;
import weka.classifiers.Classifier;
import weka.core.Instances;
import weka.classifiers.Evaluation;
import weka.core.Instance;
import java.util.Date;

public class Myweka {
private Instances trainInstances=null;
private Evaluation evaluation=null;
private File trainfile=null;
private int classindex=0;
/*
* 构造函数:
* 传入arff文件,和classindex
* */
public Myweka(File trainfile,int classindex){
this.trainfile=trainfile;
this.classindex=classindex;
FileReader fReader=null;
try{
fReader=new FileReader(trainfile);
trainInstances=new Instances(fReader);
trainInstances.setClassIndex(classindex);
//          evaluation=new Evaluation(trainInstances);  使用一个evaluation,跟调用方法是再new,的结果不一样
}catch(Exception e){
e.printStackTrace();
}finally{
try {
fReader.close();
} catch (IOException e2) {
e2.printStackTrace();
}
}
}

/*
* 将训练集作为测试集进行实验
* 并得到分类失败的样例行号
*
* */
public void selfTestResult(Classifier classifier){
FileReader fReader=null;
try {
classifier.buildClassifier(trainInstances);
evaluation=new Evaluation(trainInstances);//选择调用时,new一个evalution
fReader=new FileReader(trainfile);
Instances testInstances=new Instances(fReader);
testInstances.setClassIndex(classindex);
Instance tempInstance=null;
int count=0;
for(int i=0;i<testInstances.numInstances();i++){
tempInstance=testInstances.instance(i);
if(classifier.classifyInstance(tempInstance)!=tempInstance.classValue()){
count++;
System.out.println("第"+i+"个样本实例分类错误!");
}
}
System.out.println("有"+count+"个样本分类错误!");
evaluation.evaluateModel(classifier, testInstances);
writeResult();
consoleResult();
} catch (Exception e) {
e.printStackTrace();
}finally{
try{
fReader.close();
}catch(IOException e0){
e0.printStackTrace();
}
}
}

/*
* 传入测试集进行测试
* 这里默认测试集与训练集,classindex相同
* */
public void useTestset(Classifier classifier,File testFile){
FileReader fReader=null;
try {
evaluation=new Evaluation(trainInstances);
classifier.buildClassifier(trainInstances);
fReader=new FileReader(testFile);
Instances testInstances=new Instances(fReader);
testInstances.setClassIndex(classindex);
Instance tempInstance=null;
int count=0;
for(int i=0;i<testInstances.numInstances();i++){
tempInstance=testInstances.instance(i);
if(classifier.classifyInstance(tempInstance)!=tempInstance.classValue()){
count++;
System.out.println("第"+i+"个样本实例分类错误!");
}
}
System.out.println("有"+count+"个样本分类错误!");

evaluation.evaluateModel(classifier, testInstances);
writeResult();
consoleResult();
} catch (Exception e) {
e.printStackTrace();
}finally{
try{
fReader.close();
}catch(IOException e0){
e0.printStackTrace();
}
}
}

/*
* 传入一个分类器
* 得到十折交叉验证结果
*/
public void GetTenCrossResult(Classifier classifier){
try{
classifier.buildClassifier(trainInstances);
evaluation=new Evaluation(trainInstances);
evaluation.crossValidateModel(classifier, trainInstances, 10, new Random(1));
writeResult();
consoleResult();
}catch(Exception e){
e.printStackTrace();
}
}

private void writeResult(){
BufferedWriter bfr=null;
try{
bfr=new BufferedWriter(new FileWriter(new File("result.txt"),true));
bfr.write(getCurrentTime());
bfr.newLine();
bfr.write("the number of Attributes:  "+trainInstances.numAttributes());
bfr.newLine();
bfr.write("the number if instances:   "+trainInstances.numInstances());
bfr.newLine();
bfr.write(evaluation.toSummaryString());
bfr.newLine();
bfr.write(evaluation.toClassDetailsString());
bfr.newLine();
bfr.write(evaluation.toMatrixString());
bfr.newLine();
bfr.flush();
}catch(IOException e){
System.out.println("文件非法或arff格式错误");
}catch (Exception e) {
System.out.println("分类器创建失败");
}finally{
try{
bfr.close();
}catch(IOException e0){
e0.printStackTrace();
}
}
}

private void consoleResult() throws Exception{
System.out.println(evaluation.toSummaryString());
System.out.println(evaluation.toClassDetailsString());
System.out.println(evaluation.toMatrixString());
}

private String getCurrentTime(){
Date date=new Date();
DateFormat format=new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
return format.format(date);
}
}


经过测试发现,在main方法调用GetTenCrossResult()方法后,继续调用selfTestResult(),若是选择用同一个evaluation,即:在构造函数中new,跟在方法中new,得到的结果不一样,难道是使用一个evalution,第二次的结果会受第一次影响?

看到的大神,解释一下。
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签: