您的位置:首页 > 产品设计 > UI/UE

分类算法--贝叶斯分类法(Maprdecue实现)代码实现<转>

2014-06-27 18:27 447 查看
================================input.txt=======================================

youth high no fair no

youth high no excellent no

middle high no fair yes

senior medium no fair yes

senior low yes fair yes

senior low yes excellent no

middle low yes excellent yes

youth medium no fair no

youth low yes fair yes

senior medium yes fair yes

youth medium yes excellent yes

middle medium no excellent yes

middle high yes fair yes

senior medium no excellent no

====================================================================

package com.mahout.bayes;

import java.io.IOException;

import java.util.HashMap;

import java.util.Iterator;

import java.util.Map;

import java.util.Set;

import java.util.Map.Entry;

import org.apache.hadoop.conf.Configured;

import org.apache.hadoop.fs.Path;

import org.apache.hadoop.io.IntWritable;

import org.apache.hadoop.io.LongWritable;

import org.apache.hadoop.io.Text;

import org.apache.hadoop.mapred.FileInputFormat;

import org.apache.hadoop.mapred.FileOutputFormat;

import org.apache.hadoop.mapred.JobClient;

import org.apache.hadoop.mapred.JobConf;

import org.apache.hadoop.mapred.MapReduceBase;

import org.apache.hadoop.mapred.Mapper;

import org.apache.hadoop.mapred.OutputCollector;

import org.apache.hadoop.mapred.Partitioner;

import org.apache.hadoop.mapred.Reducer;

import org.apache.hadoop.mapred.Reporter;

import org.apache.hadoop.mapred.TextInputFormat;

import org.apache.hadoop.mapred.TextOutputFormat;

import org.apache.hadoop.util.Tool;

import org.apache.hadoop.util.ToolRunner;

import com.mahout.test.FirstGroupingComparator;

import com.mahout.test.StringStringPairAsce;

import com.mahout.test.ItemBasePass1.FirstPartitioner;

/**

* 贝叶斯算法实现

* @author clxin

*

*/

public class Bayes extends Configured implements Tool {



/**

* 把(x1,x2,..,xn,C)转换为

* C A1 x1

* C A1 x2

* @author clxin

*/

public static class BayesMapper extends MapReduceBase implements

Mapper<LongWritable, Text, StringStringPairAsce, Text> {

private StringStringPairAsce tKey = new StringStringPairAsce();

private Text tValue = new Text();



public void map(LongWritable key, Text value,

OutputCollector<StringStringPairAsce, Text> output, Reporter arg3)

throws IOException {

String [] strArr = value.toString().split("\t");

tKey.set("age"+"\t"+strArr[strArr.length-1],strArr[0]);

tValue.set(strArr[0]);

output.collect(tKey, tValue);



tKey.set("income"+"\t"+strArr[strArr.length-1],strArr[1]);

tValue.set(strArr[1]);

output.collect(tKey, tValue);



tKey.set("student"+"\t"+strArr[strArr.length-1],strArr[2]);

tValue.set(strArr[2]);

output.collect(tKey, tValue);



tKey.set("credit_rating"+"\t"+strArr[strArr.length-1],strArr[3]);

tValue.set(strArr[3]);

output.collect(tKey, tValue);

}

}



public static class BayesReducer extends MapReduceBase implements

Reducer<StringStringPairAsce, Text, Text, Text> {

Text tKey = new Text();

Text tValue= new Text();

@Override

public void reduce(StringStringPairAsce key, Iterator<Text> values,

OutputCollector<Text, Text> output, Reporter reporter)

throws IOException {

int pCcount = 1;

int pXcount = 1;

Map xMap = new HashMap<String,String>();



String tmpValue=values.next().toString();

while(values.hasNext()){

pCcount++;

String newValue=values.next().toString();

if(!tmpValue.equals(newValue)){

xMap.put(tmpValue, pXcount);

tmpValue = newValue;

pXcount=1;

}else{

pXcount++;

}

}

xMap.put(tmpValue, pXcount);



Set<Entry<String, String>> sets = xMap.entrySet();

for (Entry<String, String> entry : sets) {

tKey.set(key.getFirst() + "\t" + entry.getKey());

String [] xValue = key.getFirst().split("\t");



Object ob = entry.getValue();

tValue.set(pCcount+"\t"+ob.toString());



System.out.println("p("+xValue[0]+"="+entry.getKey()+"|"+"class="+xValue[1]+

")="+ob.toString()+"/"+pCcount);

output.collect(tKey, tValue);

}

}

}



public static class FirstPartitioner implements

Partitioner<StringStringPairAsce, Text> {

@Override

public int getPartition(StringStringPairAsce key, Text value,

int numPartitions) {

return key.getFirst().hashCode() & Integer.MAX_VALUE

% numPartitions;

}

@Override

public void configure(JobConf job) {

}

}

@Override

public int run(String[] args) throws Exception {

JobConf conf = new JobConf(getConf(), Bayes.class);

conf.setJobName("Bayes");

//conf.setNumMapTasks(200);

// 设置Map输出的key和value的类型

conf.setMapOutputKeyClass(StringStringPairAsce.class);

conf.setMapOutputValueClass(Text.class);



// 设置Reduce输出的key和value的类型

conf.setOutputKeyClass(Text.class);

conf.setOutputValueClass(Text.class);

// 设置Mapper和Reducer

conf.setMapperClass(BayesMapper.class);

conf.setReducerClass(BayesReducer.class);



conf.setInputFormat(TextInputFormat.class);

conf.setOutputFormat(TextOutputFormat.class);



conf.setPartitionerClass(FirstPartitioner.class);

conf.setOutputValueGroupingComparator(FirstGroupingComparator.class);

// 设置输入输出目录

FileInputFormat.setInputPaths(conf, new Path(args[0]));

FileOutputFormat.setOutputPath(conf, new Path(args[1]));

JobClient.runJob(conf);

return 0;

}

public static void main(String[] args) throws Exception {

int exitCode = ToolRunner.run(new Bayes(), args);

System.exit(exitCode);

}

}
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签: 
相关文章推荐