您的位置:首页 > 编程语言 > C语言/C++

hadoop的c++版wordcount例子(streaming方式)

2012-09-14 12:15 357 查看
0.数据文件

我爱你
首都
北京
我爱你
北京
我爱你
伟大首都
北京
首都
我爱java
come
go

1.map
#include <iostream>
using namespace std;
void map(){
string line;
getline(cin,line);
while(!cin.eof()){
cout << line <<
"\t" << "1" <<endl;
getline(cin,line);
}

}
int main(int argc,char** argv){
map();
}

2.reduce
#include <iostream>
#include <vector>
using namespace std;
//自个写的string分割split方法,无奈cpp官方没提供
vector<string> split(const string& src, const string& separator)
{
vector<string> dest;
string str = src;
string substring;
string::size_type start = 0, index;
do
{
index = str.find_first_of(separator,start);
if (index != string::npos)
{
substring = str.substr(start,index-start);
dest.push_back(substring);
start = str.find_first_not_of(separator,index);
if (start == string::npos) return dest;
}
}while(index != string::npos);
substring = str.substr(start);
dest.push_back(substring);
return dest;
}

void reduce(){
string last_word = ""; //用作reduce的sort后的分界
string line;
getline(cin,line);
int word_num = 0;
while(!cin.eof()){
try{
vector<string> all = split(line,"\t");
string word = all[0];
//first time last_word is ""
if("" == last_word){
last_word = word;
word_num = 0;
}
//repeat word occurs
if(word == last_word){
word_num++;
}
//not equal current word,next word
else{
cout << last_word << "\t" << word_num <<endl;
word_num = 1;
last_word = word;
}
getline(cin,line);
}
catch(const exception& e){
cerr << e.what() <<endl;
}
}
cout << last_word << "\t" << word_num <<endl;
}

int main(int argc,char** argv){
reduce();
}

3.启动命令
hadoop fs -rmr /output-data

hadoop jar /home/machen/hadoop/hadoop-1.0.3/contrib/streaming/hadoop-streaming-1.0.3.jar -file /home/machen/hadoop/hadoop-1.0.3/WordCount/python/stream_map.out -file /home/machen/hadoop/hadoop-1.0.3/WordCount/python/stream_reduce.out -mapper /home/machen/hadoop/hadoop-1.0.3/WordCount/python/stream_map.out -reducer /home/machen/hadoop/hadoop-1.0.3/WordCount/python/stream_reduce.out -input /input-data -output /output-data
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签: