您的位置:首页 > 编程语言 > C语言/C++

C语言获取文件中单词并进行处理

2012-06-16 11:43 330 查看
/******************************************************
*
*         follows begin the word get process
*
******************************************************/
bool isWord(char c)
{
if ((('a' <= c) && (c <= 'z')) || (('A' <= c) && (c <= 'Z')) || ('-' == c)) {
return true;
}

return false;
}

struct word_geter_engine {
char * buffer;          /* contain the words */
char * current;         /* where we reach in the buffer */
char * end;             /* the last position + 1 */
int bufferSize;         /* the capacity of the buffer */
char wordBuf[WORDLEN];  /* store the word have got */
bool needFill;          /* indicate whether need fill the buffer */
};

bool initStorer(int capacity, struct word_geter_engine * engine)
{
engine->buffer = malloc(capacity);
if (NULL == engine->buffer) {
return false;
}
engine->current = engine->buffer;
engine->end = engine->buffer + capacity;
engine->bufferSize = capacity;
engine->needFill = true;

return true;
}

bool fillEngine(FILE * to_read, struct word_geter_engine * engine)
{
int leftLen = 0;
int read_byte = 0;
char * start = NULL;

if (engine->current != engine->buffer) {
leftLen = engine->end - engine->current;
memcpy(engine->buffer, engine->current, leftLen);
engine->current = engine->buffer;
}

start = engine->buffer + leftLen;
leftLen = engine->bufferSize - leftLen;

read_byte = fread(start, 1, leftLen, to_read);
if (0 == read_byte) {
return false;
}

engine->end = start + read_byte;
engine->needFill = false;
return true;
}

bool getWord(struct word_geter_engine * engine)
{
char * p = engine->current;
int wordlen = 0;
engine->wordBuf[0] = '';
while (!isWord(*p) && (p < engine->end)) {
p++;
}

engine->current = p;
if (engine->end == p) {
engine->needFill = true;
return false;
}

while (isWord(*p) && (p < engine->end)) {
p++;
}

if (engine->end == p) {
engine->needFill = true;
return false;
}

wordlen = p - engine->current;
if (wordlen > (sizeof(engine->wordBuf) - 1)) {
wordlen = sizeof(engine->wordBuf) - 1;
}
memcpy(engine->wordBuf, engine->current, wordlen);
engine->wordBuf[wordlen] = '';
engine->current = p;
return true;
}

/*
* function process read word from the to_read and call the processer
* use the word, you should give the processer.
*/
bool process(FILE * to_read, bool (*processer)(const char * str))
{
struct word_geter_engine engine;
if (!initStorer(BUFLEN, &engine)) {
return false;
}

while (engine.needFill && !feof(to_read)) {
fillEngine(to_read, &engine);
while (!engine.needFill) {
if (!getWord(&engine)) {
continue;
}

processer(engine.wordBuf);
}
}

return true;
}

/*********************** End process ***********************/


process函数是读取函数,对读取到的单词进行处理可以通过processer回调函数进行处理。

这个代码是自己做hash函数测试的时候写的,为了获取大量的字符串。

可以通过调整isWord函数对要获取有哪些字符组成的单词进行控制
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签: 
相关文章推荐