您的位置:首页 > 其它

几种字符串匹配算法性能简单实验对比

2008-02-15 00:24 453 查看
做了一个很粗糙的实验,比较了几种字符串匹配算法的性能。程序用-O3进行编译优化。以下为待查找的文本长度为434018字节,模式串长度为4时的典型实验结果。可以看到,horspool算法最快,表现最差的为KMP系的shift_and算法(实验结果与《柔性字符串匹配》一书中的结果一致)。以下为horspool,shift_and和BNDM算法的实验源码:


strstr(C库函数) time:743 微秒


horspool:   time:642 微秒


shift_and:   time:1465 微秒


BNDM:   time:721 微秒


// horspool算法:计算模式串pat在文本txt中出现的次数


int horspool(const char *txt,const char *pat)




...{


short d[256];




short m = strlen(pat); /**//**< m is the length of pat */


// preprocessing


for(unsigned short c = 0; c < 256; c++)


d[c] = m;




for(short i = 0; i < m-1; i++)...{


d[(unsigned char)pat[i]] = m - i - 1;


}


// searching




const char *p = txt; /**//**< current pointer */


const char *t = txt + strlen(txt) - m;




int cnt = 0; /**//**< the exist times of pat in txt */


int jj = m-1;




while(p <= t)...{


int j = jj;


while(j >= 0 && pat[j] == p[j])


j--;


if(j == -1)


cnt++;


p += d[(unsigned char)p[m-1]];


}


return cnt;


}


// Shift_And算法:计算模式串pat在文本txt中出现的次数


int shift_and(const char *txt, const char *pat)




...{


long b[256];


int m = strlen(pat);


for(int i = 0; i < 256; i++)


b[i] = 0;


for(int i = 0; i < m; i++)


b[(unsigned char)pat[i]] |= (0x1 << i);


int cnt = 0;


long d = 0;


const char *s = txt;


const char *end = txt + strlen(txt);


long mask = 0x1<<m-1;




while(s < end)...{


d = ((d<<1) | 0x1) & b[(unsigned char)*s];


if(d & mask)


cnt ++;


s++;


}


return cnt;


}


// BNDM算法:计算模式串pat在文本txt中出现的次数


int BNDM(const char *txt, const char *pat)




...{


long b[256];


int m = strlen(pat);


for(int i = 0; i < 256; i++)


b[i] = 0;


for(int i = 0; i < m; i++)


b[(unsigned char)pat[i]] |= (0x1 << (m-i-1));


const char *limit = txt + strlen(txt) - m;


const char *s = txt;


int cnt = 0;


long mask = 0x1 << (m-1);




while(s <= limit)...{


int j = m-1;


int last = m-1;


long d = -1;




while(d != 0)...{


d &= b[(unsigned char)s[j]];


j--;




if(d & mask)...{


if(j >= 0)


last = j;


else


cnt++;


}


d <<= 1;


}


s += last+1;


}


return cnt;


}
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签: