分类算法--贝叶斯分类法(Maprdecue实现)代码实现<转>
2014-06-27 18:27
447 查看
================================input.txt=======================================
youth high no fair no
youth high no excellent no
middle high no fair yes
senior medium no fair yes
senior low yes fair yes
senior low yes excellent no
middle low yes excellent yes
youth medium no fair no
youth low yes fair yes
senior medium yes fair yes
youth medium yes excellent yes
middle medium no excellent yes
middle high yes fair yes
senior medium no excellent no
====================================================================
package com.mahout.bayes;
import java.io.IOException;
import java.util.HashMap;
import java.util.Iterator;
import java.util.Map;
import java.util.Set;
import java.util.Map.Entry;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapred.FileInputFormat;
import org.apache.hadoop.mapred.FileOutputFormat;
import org.apache.hadoop.mapred.JobClient;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.MapReduceBase;
import org.apache.hadoop.mapred.Mapper;
import org.apache.hadoop.mapred.OutputCollector;
import org.apache.hadoop.mapred.Partitioner;
import org.apache.hadoop.mapred.Reducer;
import org.apache.hadoop.mapred.Reporter;
import org.apache.hadoop.mapred.TextInputFormat;
import org.apache.hadoop.mapred.TextOutputFormat;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
import com.mahout.test.FirstGroupingComparator;
import com.mahout.test.StringStringPairAsce;
import com.mahout.test.ItemBasePass1.FirstPartitioner;
/**
* 贝叶斯算法实现
* @author clxin
*
*/
public class Bayes extends Configured implements Tool {
/**
* 把(x1,x2,..,xn,C)转换为
* C A1 x1
* C A1 x2
* @author clxin
*/
public static class BayesMapper extends MapReduceBase implements
Mapper<LongWritable, Text, StringStringPairAsce, Text> {
private StringStringPairAsce tKey = new StringStringPairAsce();
private Text tValue = new Text();
public void map(LongWritable key, Text value,
OutputCollector<StringStringPairAsce, Text> output, Reporter arg3)
throws IOException {
String [] strArr = value.toString().split("\t");
tKey.set("age"+"\t"+strArr[strArr.length-1],strArr[0]);
tValue.set(strArr[0]);
output.collect(tKey, tValue);
tKey.set("income"+"\t"+strArr[strArr.length-1],strArr[1]);
tValue.set(strArr[1]);
output.collect(tKey, tValue);
tKey.set("student"+"\t"+strArr[strArr.length-1],strArr[2]);
tValue.set(strArr[2]);
output.collect(tKey, tValue);
tKey.set("credit_rating"+"\t"+strArr[strArr.length-1],strArr[3]);
tValue.set(strArr[3]);
output.collect(tKey, tValue);
}
}
public static class BayesReducer extends MapReduceBase implements
Reducer<StringStringPairAsce, Text, Text, Text> {
Text tKey = new Text();
Text tValue= new Text();
@Override
public void reduce(StringStringPairAsce key, Iterator<Text> values,
OutputCollector<Text, Text> output, Reporter reporter)
throws IOException {
int pCcount = 1;
int pXcount = 1;
Map xMap = new HashMap<String,String>();
String tmpValue=values.next().toString();
while(values.hasNext()){
pCcount++;
String newValue=values.next().toString();
if(!tmpValue.equals(newValue)){
xMap.put(tmpValue, pXcount);
tmpValue = newValue;
pXcount=1;
}else{
pXcount++;
}
}
xMap.put(tmpValue, pXcount);
Set<Entry<String, String>> sets = xMap.entrySet();
for (Entry<String, String> entry : sets) {
tKey.set(key.getFirst() + "\t" + entry.getKey());
String [] xValue = key.getFirst().split("\t");
Object ob = entry.getValue();
tValue.set(pCcount+"\t"+ob.toString());
System.out.println("p("+xValue[0]+"="+entry.getKey()+"|"+"class="+xValue[1]+
")="+ob.toString()+"/"+pCcount);
output.collect(tKey, tValue);
}
}
}
public static class FirstPartitioner implements
Partitioner<StringStringPairAsce, Text> {
@Override
public int getPartition(StringStringPairAsce key, Text value,
int numPartitions) {
return key.getFirst().hashCode() & Integer.MAX_VALUE
% numPartitions;
}
@Override
public void configure(JobConf job) {
}
}
@Override
public int run(String[] args) throws Exception {
JobConf conf = new JobConf(getConf(), Bayes.class);
conf.setJobName("Bayes");
//conf.setNumMapTasks(200);
// 设置Map输出的key和value的类型
conf.setMapOutputKeyClass(StringStringPairAsce.class);
conf.setMapOutputValueClass(Text.class);
// 设置Reduce输出的key和value的类型
conf.setOutputKeyClass(Text.class);
conf.setOutputValueClass(Text.class);
// 设置Mapper和Reducer
conf.setMapperClass(BayesMapper.class);
conf.setReducerClass(BayesReducer.class);
conf.setInputFormat(TextInputFormat.class);
conf.setOutputFormat(TextOutputFormat.class);
conf.setPartitionerClass(FirstPartitioner.class);
conf.setOutputValueGroupingComparator(FirstGroupingComparator.class);
// 设置输入输出目录
FileInputFormat.setInputPaths(conf, new Path(args[0]));
FileOutputFormat.setOutputPath(conf, new Path(args[1]));
JobClient.runJob(conf);
return 0;
}
public static void main(String[] args) throws Exception {
int exitCode = ToolRunner.run(new Bayes(), args);
System.exit(exitCode);
}
}
youth high no fair no
youth high no excellent no
middle high no fair yes
senior medium no fair yes
senior low yes fair yes
senior low yes excellent no
middle low yes excellent yes
youth medium no fair no
youth low yes fair yes
senior medium yes fair yes
youth medium yes excellent yes
middle medium no excellent yes
middle high yes fair yes
senior medium no excellent no
====================================================================
package com.mahout.bayes;
import java.io.IOException;
import java.util.HashMap;
import java.util.Iterator;
import java.util.Map;
import java.util.Set;
import java.util.Map.Entry;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapred.FileInputFormat;
import org.apache.hadoop.mapred.FileOutputFormat;
import org.apache.hadoop.mapred.JobClient;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.MapReduceBase;
import org.apache.hadoop.mapred.Mapper;
import org.apache.hadoop.mapred.OutputCollector;
import org.apache.hadoop.mapred.Partitioner;
import org.apache.hadoop.mapred.Reducer;
import org.apache.hadoop.mapred.Reporter;
import org.apache.hadoop.mapred.TextInputFormat;
import org.apache.hadoop.mapred.TextOutputFormat;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
import com.mahout.test.FirstGroupingComparator;
import com.mahout.test.StringStringPairAsce;
import com.mahout.test.ItemBasePass1.FirstPartitioner;
/**
* 贝叶斯算法实现
* @author clxin
*
*/
public class Bayes extends Configured implements Tool {
/**
* 把(x1,x2,..,xn,C)转换为
* C A1 x1
* C A1 x2
* @author clxin
*/
public static class BayesMapper extends MapReduceBase implements
Mapper<LongWritable, Text, StringStringPairAsce, Text> {
private StringStringPairAsce tKey = new StringStringPairAsce();
private Text tValue = new Text();
public void map(LongWritable key, Text value,
OutputCollector<StringStringPairAsce, Text> output, Reporter arg3)
throws IOException {
String [] strArr = value.toString().split("\t");
tKey.set("age"+"\t"+strArr[strArr.length-1],strArr[0]);
tValue.set(strArr[0]);
output.collect(tKey, tValue);
tKey.set("income"+"\t"+strArr[strArr.length-1],strArr[1]);
tValue.set(strArr[1]);
output.collect(tKey, tValue);
tKey.set("student"+"\t"+strArr[strArr.length-1],strArr[2]);
tValue.set(strArr[2]);
output.collect(tKey, tValue);
tKey.set("credit_rating"+"\t"+strArr[strArr.length-1],strArr[3]);
tValue.set(strArr[3]);
output.collect(tKey, tValue);
}
}
public static class BayesReducer extends MapReduceBase implements
Reducer<StringStringPairAsce, Text, Text, Text> {
Text tKey = new Text();
Text tValue= new Text();
@Override
public void reduce(StringStringPairAsce key, Iterator<Text> values,
OutputCollector<Text, Text> output, Reporter reporter)
throws IOException {
int pCcount = 1;
int pXcount = 1;
Map xMap = new HashMap<String,String>();
String tmpValue=values.next().toString();
while(values.hasNext()){
pCcount++;
String newValue=values.next().toString();
if(!tmpValue.equals(newValue)){
xMap.put(tmpValue, pXcount);
tmpValue = newValue;
pXcount=1;
}else{
pXcount++;
}
}
xMap.put(tmpValue, pXcount);
Set<Entry<String, String>> sets = xMap.entrySet();
for (Entry<String, String> entry : sets) {
tKey.set(key.getFirst() + "\t" + entry.getKey());
String [] xValue = key.getFirst().split("\t");
Object ob = entry.getValue();
tValue.set(pCcount+"\t"+ob.toString());
System.out.println("p("+xValue[0]+"="+entry.getKey()+"|"+"class="+xValue[1]+
")="+ob.toString()+"/"+pCcount);
output.collect(tKey, tValue);
}
}
}
public static class FirstPartitioner implements
Partitioner<StringStringPairAsce, Text> {
@Override
public int getPartition(StringStringPairAsce key, Text value,
int numPartitions) {
return key.getFirst().hashCode() & Integer.MAX_VALUE
% numPartitions;
}
@Override
public void configure(JobConf job) {
}
}
@Override
public int run(String[] args) throws Exception {
JobConf conf = new JobConf(getConf(), Bayes.class);
conf.setJobName("Bayes");
//conf.setNumMapTasks(200);
// 设置Map输出的key和value的类型
conf.setMapOutputKeyClass(StringStringPairAsce.class);
conf.setMapOutputValueClass(Text.class);
// 设置Reduce输出的key和value的类型
conf.setOutputKeyClass(Text.class);
conf.setOutputValueClass(Text.class);
// 设置Mapper和Reducer
conf.setMapperClass(BayesMapper.class);
conf.setReducerClass(BayesReducer.class);
conf.setInputFormat(TextInputFormat.class);
conf.setOutputFormat(TextOutputFormat.class);
conf.setPartitionerClass(FirstPartitioner.class);
conf.setOutputValueGroupingComparator(FirstGroupingComparator.class);
// 设置输入输出目录
FileInputFormat.setInputPaths(conf, new Path(args[0]));
FileOutputFormat.setOutputPath(conf, new Path(args[1]));
JobClient.runJob(conf);
return 0;
}
public static void main(String[] args) throws Exception {
int exitCode = ToolRunner.run(new Bayes(), args);
System.exit(exitCode);
}
}
相关文章推荐
- 分类算法--贝叶斯分类法(Maprdecue实现)<转>
- TestLink1.9.3测试用例:Excel转换XML工具<二>实现代码
- Java NIO原理图文分析及代码实现<转>
- 最简单的struts2中使用<s:iterator>实现隔行变色代码[珍藏版]
- 算法积累<2>归并排序非递归的实现
- 针对android&ios yuv旋转、镜像、格式转换、裁剪 算法实现<转>
- 使用PHP实现密保卡功能实现代码<打包下载直接运行>
- 已知某公司总人数为W,平均年龄为Y岁(每年3月末计算,同时每年3月初入职新人),假设每年离职率为x,x>0&&x<1,每年保持所有员工总数不变进行招聘,新员工平均年龄21岁。 从今年3月末开始,请实现一个算法,可以计算出第N年后公司员工的平均年龄。(最后结果向上取整)。
- 分类算法--贝叶斯分类法(Maprdecue实现)
- java二维码生成与解析代码实现 <转载自http://blog.csdn.net/about58238/article/details/7494704>
- <仅是自己做笔记。。。系列15>实现一个挺高级的字符匹配算法: 给一串很长字符串,要求找到符合要求的字符串,例如目的串:123 1******3***2 ,12*****3这些都要找出来
- Android代码实现APK的下载安装和卸载<1>
- 数据挖掘--kmeans聚类算法mapreduce实现代码<转>
- 逻辑回归(LR)算法java实现<转>
- <iOS>iPhone 应用里实现截屏功能的代码
- 机器学习常见算法分类汇总<转>
- <基础原理进阶>机器学习算法python实现【1】--分类简谈&KNN算法
- 使用PHP实现密保卡功能实现代码<打包下载直接运行>
- <Machine Learning in Action >之二 朴素贝叶斯 C#实现文章分类
- 256 <--> 64算法 互转 ( Java实现)