hadoop的c++版wordcount例子(streaming方式)
2012-09-14 12:15
357 查看
0.数据文件
我爱你
首都
北京
我爱你
北京
我爱你
伟大首都
北京
首都
我爱java
come
go
1.map
#include <iostream>
using namespace std;
void map(){
string line;
getline(cin,line);
while(!cin.eof()){
cout << line <<
"\t" << "1" <<endl;
getline(cin,line);
}
}
int main(int argc,char** argv){
map();
}
2.reduce
#include <iostream>
#include <vector>
using namespace std;
//自个写的string分割split方法,无奈cpp官方没提供
vector<string> split(const string& src, const string& separator)
{
vector<string> dest;
string str = src;
string substring;
string::size_type start = 0, index;
do
{
index = str.find_first_of(separator,start);
if (index != string::npos)
{
substring = str.substr(start,index-start);
dest.push_back(substring);
start = str.find_first_not_of(separator,index);
if (start == string::npos) return dest;
}
}while(index != string::npos);
substring = str.substr(start);
dest.push_back(substring);
return dest;
}
void reduce(){
string last_word = ""; //用作reduce的sort后的分界
string line;
getline(cin,line);
int word_num = 0;
while(!cin.eof()){
try{
vector<string> all = split(line,"\t");
string word = all[0];
//first time last_word is ""
if("" == last_word){
last_word = word;
word_num = 0;
}
//repeat word occurs
if(word == last_word){
word_num++;
}
//not equal current word,next word
else{
cout << last_word << "\t" << word_num <<endl;
word_num = 1;
last_word = word;
}
getline(cin,line);
}
catch(const exception& e){
cerr << e.what() <<endl;
}
}
cout << last_word << "\t" << word_num <<endl;
}
int main(int argc,char** argv){
reduce();
}
3.启动命令
hadoop fs -rmr /output-data
hadoop jar /home/machen/hadoop/hadoop-1.0.3/contrib/streaming/hadoop-streaming-1.0.3.jar -file /home/machen/hadoop/hadoop-1.0.3/WordCount/python/stream_map.out -file /home/machen/hadoop/hadoop-1.0.3/WordCount/python/stream_reduce.out -mapper /home/machen/hadoop/hadoop-1.0.3/WordCount/python/stream_map.out -reducer /home/machen/hadoop/hadoop-1.0.3/WordCount/python/stream_reduce.out -input /input-data -output /output-data
我爱你
首都
北京
我爱你
北京
我爱你
伟大首都
北京
首都
我爱java
come
go
1.map
#include <iostream>
using namespace std;
void map(){
string line;
getline(cin,line);
while(!cin.eof()){
cout << line <<
"\t" << "1" <<endl;
getline(cin,line);
}
}
int main(int argc,char** argv){
map();
}
2.reduce
#include <iostream>
#include <vector>
using namespace std;
//自个写的string分割split方法,无奈cpp官方没提供
vector<string> split(const string& src, const string& separator)
{
vector<string> dest;
string str = src;
string substring;
string::size_type start = 0, index;
do
{
index = str.find_first_of(separator,start);
if (index != string::npos)
{
substring = str.substr(start,index-start);
dest.push_back(substring);
start = str.find_first_not_of(separator,index);
if (start == string::npos) return dest;
}
}while(index != string::npos);
substring = str.substr(start);
dest.push_back(substring);
return dest;
}
void reduce(){
string last_word = ""; //用作reduce的sort后的分界
string line;
getline(cin,line);
int word_num = 0;
while(!cin.eof()){
try{
vector<string> all = split(line,"\t");
string word = all[0];
//first time last_word is ""
if("" == last_word){
last_word = word;
word_num = 0;
}
//repeat word occurs
if(word == last_word){
word_num++;
}
//not equal current word,next word
else{
cout << last_word << "\t" << word_num <<endl;
word_num = 1;
last_word = word;
}
getline(cin,line);
}
catch(const exception& e){
cerr << e.what() <<endl;
}
}
cout << last_word << "\t" << word_num <<endl;
}
int main(int argc,char** argv){
reduce();
}
3.启动命令
hadoop fs -rmr /output-data
hadoop jar /home/machen/hadoop/hadoop-1.0.3/contrib/streaming/hadoop-streaming-1.0.3.jar -file /home/machen/hadoop/hadoop-1.0.3/WordCount/python/stream_map.out -file /home/machen/hadoop/hadoop-1.0.3/WordCount/python/stream_reduce.out -mapper /home/machen/hadoop/hadoop-1.0.3/WordCount/python/stream_map.out -reducer /home/machen/hadoop/hadoop-1.0.3/WordCount/python/stream_reduce.out -input /input-data -output /output-data
相关文章推荐
- 从wordcount 开始 mapreduce (C++\hadoop streaming模式)
- 从wordcount 开始 mapreduce (C++\hadoop streaming模式)
- 【hadoop学习】在伪分布式hadoop上实践word count程序——c/c++ streaming版本(未完)
- eclipse中运行Hadoop2.6.0的WordCount例子
- Hadoop之wordcount(Java 原生和Hadoop Streaming)
- hadoop自带例子wordcount的具体运行步骤
- 查看Hadoop-1.2.1里面的例子jar并对WordCount进行修改
- hadoop基础----hadoop实战(五)-----myeclipse开发MapReduce---WordCount例子---解析MapReduce的写法
- 分析Hadoop自带WordCount例子的执行过程
- 关于hadoop1.0.4运行自带例子WordCount内存溢出问题。
- Hadoop中WordCount例子的实现
- hadoop例子程序:求圆周率和wordcount
- hadoop基础----hadoop实战(五)-----myeclipse开发MapReduce---WordCount例子---解析MapReduce的写法
- Hadoop伪分布式运行wordcount小例子
- Python+Hadoop Streaming实现MapReduce(word count)
- hadoop的第一个例子wordcount
- Hadoop WordCount(Streaming,Python,Java三合一)
- 【hadoop学习】在伪分布式hadoop上实践word count程序——c/c++ pipes版本
- Hadoop测试例子wordcount
- hadoop第一个例子WordCount