您的位置:首页 > 其它

pthread统计文本次数

2016-05-06 00:54 183 查看
该程序使用pthread来统计某一文本中每个单词出现次数。每一个thread处理一行字符串。
使用一个map<string, size_t> word_count作为全局变量。
kernel function 中,使用pthread_mutex_lock来控制对全局变量word_count的改变。使用stringstream来处理字符串。

输入:
first sentence.
second sentence,
third sentence.
fourth sentence.
five sentence
six sentence
seven sentence

输出:
first occurs 1 time
five occurs 1 time
fourth occurs 1 time
second occurs 1 time
sentence occurs 7 times
seven occurs 1 time
six occurs 1 time
third occurs 1 time

Makefile
a.out : map.o
g++ -std=c++0x -o a.out -lpthread map.o

map.o : map.cpp
g++ -std=c++0x -c map.cpp

运行:
cat paragraph.txt | ./a.out

代码:
#include <pthread.h>
#include <map>
#include <string>
#include <iostream>
#include <sstream>
#include <algorithm>
#include <vector>
using namespace std;

#define LINE_PER_THREAD	1

pthread_mutex_t count_mutex = PTHREAD_MUTEX_INITIALIZER;

map<string, size_t> word_count;

struct para {
int tidx;
string str;
};

//kernel function
void * wordCount (void *pt){
struct para *local = (struct para *) pt;
string local_str = local->str;
pthread_mutex_lock(&count_mutex);
stringstream ss(local_str);
string token;
while(ss >> token)
++word_count[token];
pthread_mutex_unlock(&count_mutex);
}

int main(){

string word;
vector<string> vstr;
int num_lines = 0;
while(cin && getline(cin, word) && !cin.eof()){
num_lines++;
word.erase(remove(word.begin(), word.end(),','), word.end());
word.erase(remove(word.begin(), word.end(),'.'), word.end());
vstr.push_back(word);
}
int NUM_THREADS = (num_lines + LINE_PER_THREAD - 1) / LINE_PER_THREAD;

pthread_t threads[NUM_THREADS];
for(int i = 0; i < NUM_THREADS; i++){
struct para *str_para = new para();
str_para->tidx = i;
str_para->str = vstr[i];
pthread_create(&threads[i], NULL, wordCount, (void *) str_para);
}
for(int i = 0; i < NUM_THREADS; i++)
pthread_join(threads[i], NULL);

map<string, size_t>::iterator it;
for (it = word_count.begin(); it != word_count.end(); ++it){
cout << it->first << " occurs " << it->second
<< ((it->second > 1) ? " times" : " time") << endl;
}
}
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息