您的位置:首页 > 其它

Levenshtein字符串编辑距离算法

2018-02-12 16:11 399 查看

Levenshtein(莱文斯坦)编辑距离算法实现

1.C++版本(含三个benchmark)

#include <iostream>
#include <string>
#include <vector>

using namespace std;

int minimum(int first, int second, int third)
{
int former = 0;
if (first < second)
{
former = first;
}
else
{
former = second;
}
if (former < third)
{
return former;
}
else
{
return third;
}
}

int getLevenshteinDistance(string firstStr, string secondStr)
{
int firstStrLen = firstStr.length();
int secondStrLen = secondStr.length();
vector<vector<int>> levenshteinDistanceTable;
for (int firstIndex = 0; firstIndex <= firstStrLen; firstIndex++)
{
vector<int> levenshteinRow(secondStrLen+1);
levenshteinDistanceTable.push_back(levenshteinRow);
levenshteinDistanceTable[firstIndex][0] = firstIndex;
}
for (int secondIndex = 0; secondIndex <= secondStrLen; secondIndex++)
{
levenshteinDistanceTable[0][secondIndex] = secondIndex;
}

int lastCharCost = 0;
for (int firstIndex = 1; firstIndex <= firstStrLen; firstIndex++)
{
for (int secondIndex = 1; secondIndex <= secondStrLen; secondIndex++)
{
if (firstStr[firstIndex-1] == secondStr[secondIndex-1])
{
lastCharCost = 0;
}
else
{
lastCharCost = 1;
}
levenshteinDistanceTable[firstIndex][secondIndex] = minimum(levenshteinDistanceTable[firstIndex - 1][secondIndex] + 1, levenshteinDistanceTable[firstIndex][secondIndex - 1] + 1, levenshteinDistanceTable[firstIndex - 1][secondIndex - 1] + lastCharCost);
}
}
return levenshteinDistanceTable[firstStrLen][secondStrLen];
}

int main(int argc, char * * argv, char * * env)
{
string firstStr1 = "sitting";
string secondStr1 = "kitten";
cout << "levenstein distance of " << firstStr1 << " and " << secondStr1 << " is:" << getLevenshteinDistance(firstStr1, secondStr1) << endl;
string firstStr2 = "Saturday";
string secondStr2 = "Sunday";
cout << "levenstein distance of " << firstStr2 << " and " << secondStr2 << " is:" << getLevenshteinDistance(firstStr2, secondStr2) << endl;
string firstStr3 = "levenshtein";
string secondStr3 = "meilenstein";
cout << "levenstein distance of " << firstStr3 << " and " << secondStr3 << " is:" << getLevenshteinDistance(firstStr3, secondStr3) << endl;
char ch;
cin >> ch;
return 0;
}


2.awk版本1(表格法)

function minimum(first, second, third)
{
if(first < second)
{
former = first;
}
else
{
former = second;
}
if(former < third)
{
return former;
}
else
{
return third;
}
}

function getLevenshteinDistance(firstStr, secondStr)
{
firstStrLen = length(firstStr);
secondStrLen = length(secondStr);
for(secondIndex = 0; secondIndex <= secondStrLen; secondIndex++)
{
levenshteinDistanceTable[0, secondIndex] = secondIndex;
}
for(firstIndex = 0; firstIndex <= firstStrLen; firstIndex++)
{
levenshteinDistanceTable[firstIndex, 0] = firstIndex;
}
for(firstIndex = 1; firstIndex <= firstStrLen; firstIndex++)
{
for(secondIndex = 1; secondIndex <= secondStrLen; secondIndex++)
{
if(match(substr(firstStr, firstIndex, 1), substr(secondStr, secondIndex, 1)) > 0)
{
lastCharCost = 0;
}
else
{
lastCharCost = 1;
}
levenshteinDistanceTable[firstIndex, secondIndex] = minimum(levenshteinDistanceTable[firstIndex-1, secondIndex] + 1, levenshteinDistanceTable[firstIndex, secondIndex-1] + 1, levenshteinDistanceTable[firstIndex-1, secondIndex-1] + lastCharCost);
}
}
return levenshteinDistanceTable[firstStrLen, secondStrLen];
}


2.awk版本1(递归法,存在重叠子问题重复计算问题,性能较低)

function minimum(first, second, third)
{
if(first < second)
{
former = first;
}
else
{
former = second;
}
if(former < third)
{
return former;
}
else
{
return third;
}
}

function getLevenshteinDistance(firstStr, firstLen, secondStr, secondLen)
{
lastCost = 0;
if(firstLen == 0)
{
return secondLen;
}
if(secondLen == 0)
{
return firstLen;
}
if(match(substr(firstStr, firstLen, 1), substr(secondStr, secondLen, 1)) > 0)
{
lastCost = 0;
}
else
{
lastCost = 1;
}
first = getLevenshteinDistance(firstStr, firstLen-1, secondStr, secondLen) + 1;
second = getLevenshteinDistance(firstStr, firstLen, secondStr, secondLen-1) + 1;
third = getLevenshteinDistance(firstStr, firstLen-1, secondStr, secondLen-1) + lastCost;
return minimum(first, second, third);
}
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
相关文章推荐