几种字符串匹配算法性能简单实验对比
2008-02-15 00:24
453 查看
做了一个很粗糙的实验,比较了几种字符串匹配算法的性能。程序用-O3进行编译优化。以下为待查找的文本长度为434018字节,模式串长度为4时的典型实验结果。可以看到,horspool算法最快,表现最差的为KMP系的shift_and算法(实验结果与《柔性字符串匹配》一书中的结果一致)。以下为horspool,shift_and和BNDM算法的实验源码:
strstr(C库函数) time:743 微秒
horspool: time:642 微秒
shift_and: time:1465 微秒
BNDM: time:721 微秒
// horspool算法:计算模式串pat在文本txt中出现的次数
int horspool(const char *txt,const char *pat)
...{
short d[256];
short m = strlen(pat); /**//**< m is the length of pat */
// preprocessing
for(unsigned short c = 0; c < 256; c++)
d[c] = m;
for(short i = 0; i < m-1; i++)...{
d[(unsigned char)pat[i]] = m - i - 1;
}
// searching
const char *p = txt; /**//**< current pointer */
const char *t = txt + strlen(txt) - m;
int cnt = 0; /**//**< the exist times of pat in txt */
int jj = m-1;
while(p <= t)...{
int j = jj;
while(j >= 0 && pat[j] == p[j])
j--;
if(j == -1)
cnt++;
p += d[(unsigned char)p[m-1]];
}
return cnt;
}
// Shift_And算法:计算模式串pat在文本txt中出现的次数
int shift_and(const char *txt, const char *pat)
...{
long b[256];
int m = strlen(pat);
for(int i = 0; i < 256; i++)
b[i] = 0;
for(int i = 0; i < m; i++)
b[(unsigned char)pat[i]] |= (0x1 << i);
int cnt = 0;
long d = 0;
const char *s = txt;
const char *end = txt + strlen(txt);
long mask = 0x1<<m-1;
while(s < end)...{
d = ((d<<1) | 0x1) & b[(unsigned char)*s];
if(d & mask)
cnt ++;
s++;
}
return cnt;
}
// BNDM算法:计算模式串pat在文本txt中出现的次数
int BNDM(const char *txt, const char *pat)
...{
long b[256];
int m = strlen(pat);
for(int i = 0; i < 256; i++)
b[i] = 0;
for(int i = 0; i < m; i++)
b[(unsigned char)pat[i]] |= (0x1 << (m-i-1));
const char *limit = txt + strlen(txt) - m;
const char *s = txt;
int cnt = 0;
long mask = 0x1 << (m-1);
while(s <= limit)...{
int j = m-1;
int last = m-1;
long d = -1;
while(d != 0)...{
d &= b[(unsigned char)s[j]];
j--;
if(d & mask)...{
if(j >= 0)
last = j;
else
cnt++;
}
d <<= 1;
}
s += last+1;
}
return cnt;
}
strstr(C库函数) time:743 微秒
horspool: time:642 微秒
shift_and: time:1465 微秒
BNDM: time:721 微秒
// horspool算法:计算模式串pat在文本txt中出现的次数
int horspool(const char *txt,const char *pat)
...{
short d[256];
short m = strlen(pat); /**//**< m is the length of pat */
// preprocessing
for(unsigned short c = 0; c < 256; c++)
d[c] = m;
for(short i = 0; i < m-1; i++)...{
d[(unsigned char)pat[i]] = m - i - 1;
}
// searching
const char *p = txt; /**//**< current pointer */
const char *t = txt + strlen(txt) - m;
int cnt = 0; /**//**< the exist times of pat in txt */
int jj = m-1;
while(p <= t)...{
int j = jj;
while(j >= 0 && pat[j] == p[j])
j--;
if(j == -1)
cnt++;
p += d[(unsigned char)p[m-1]];
}
return cnt;
}
// Shift_And算法:计算模式串pat在文本txt中出现的次数
int shift_and(const char *txt, const char *pat)
...{
long b[256];
int m = strlen(pat);
for(int i = 0; i < 256; i++)
b[i] = 0;
for(int i = 0; i < m; i++)
b[(unsigned char)pat[i]] |= (0x1 << i);
int cnt = 0;
long d = 0;
const char *s = txt;
const char *end = txt + strlen(txt);
long mask = 0x1<<m-1;
while(s < end)...{
d = ((d<<1) | 0x1) & b[(unsigned char)*s];
if(d & mask)
cnt ++;
s++;
}
return cnt;
}
// BNDM算法:计算模式串pat在文本txt中出现的次数
int BNDM(const char *txt, const char *pat)
...{
long b[256];
int m = strlen(pat);
for(int i = 0; i < 256; i++)
b[i] = 0;
for(int i = 0; i < m; i++)
b[(unsigned char)pat[i]] |= (0x1 << (m-i-1));
const char *limit = txt + strlen(txt) - m;
const char *s = txt;
int cnt = 0;
long mask = 0x1 << (m-1);
while(s <= limit)...{
int j = m-1;
int last = m-1;
long d = -1;
while(d != 0)...{
d &= b[(unsigned char)s[j]];
j--;
if(d & mask)...{
if(j >= 0)
last = j;
else
cnt++;
}
d <<= 1;
}
s += last+1;
}
return cnt;
}
相关文章推荐
- 字符串匹配暴力匹配法和KMP匹配算法对比
- 浅谈字符串匹配的几种算法(KMP,Boyer-Moore)
- 字符串匹配的几种算法总结(KMP、等)
- Java 遍历文件夹的几种方式及简单性能对比
- 字符串匹配算法——KMP算法简单解释
- 常见的字符串匹配算法对比实现C语言版本
- BF,KMP,BM三种字符串匹配算法性能比较
- 几种字符串匹配算法,我基本上没有听说过
- 算法上级报告(渗透问题(Percolation),几种排序算法的实验性能比较,地图路由(Map Routing))
- JS几种数组遍历方式以及性能分析对比
- FZU 2183 简单题(字符串匹配|字符串压缩)(简单)
- 几种垃圾收集算法的简单理解
- sdutacm-数据结构实验之串二:字符串匹配
- ArrayList和LinkedList的几种循环遍历方式及性能对比分析
- 字符串匹配之通配符问题------一串首尾相连的珠子(m个),有N种颜色(N《=10),设计一个算法,取出其中一段,要求包含所有N中颜色,并使长度最短。
- 字符串匹配算法之KMP算法
- 字符串匹配算法分析--BF和KMP算法
- 字符串匹配---暴力匹配算法
- 从字符串中查找字符出现次数的方法和性能对比
- 算法笔记_009:字符串匹配(Java)