Levenshtein字符串编辑距离算法
2018-02-12 16:11
399 查看
Levenshtein(莱文斯坦)编辑距离算法实现
1.C++版本(含三个benchmark)
#include <iostream> #include <string> #include <vector> using namespace std; int minimum(int first, int second, int third) { int former = 0; if (first < second) { former = first; } else { former = second; } if (former < third) { return former; } else { return third; } } int getLevenshteinDistance(string firstStr, string secondStr) { int firstStrLen = firstStr.length(); int secondStrLen = secondStr.length(); vector<vector<int>> levenshteinDistanceTable; for (int firstIndex = 0; firstIndex <= firstStrLen; firstIndex++) { vector<int> levenshteinRow(secondStrLen+1); levenshteinDistanceTable.push_back(levenshteinRow); levenshteinDistanceTable[firstIndex][0] = firstIndex; } for (int secondIndex = 0; secondIndex <= secondStrLen; secondIndex++) { levenshteinDistanceTable[0][secondIndex] = secondIndex; } int lastCharCost = 0; for (int firstIndex = 1; firstIndex <= firstStrLen; firstIndex++) { for (int secondIndex = 1; secondIndex <= secondStrLen; secondIndex++) { if (firstStr[firstIndex-1] == secondStr[secondIndex-1]) { lastCharCost = 0; } else { lastCharCost = 1; } levenshteinDistanceTable[firstIndex][secondIndex] = minimum(levenshteinDistanceTable[firstIndex - 1][secondIndex] + 1, levenshteinDistanceTable[firstIndex][secondIndex - 1] + 1, levenshteinDistanceTable[firstIndex - 1][secondIndex - 1] + lastCharCost); } } return levenshteinDistanceTable[firstStrLen][secondStrLen]; } int main(int argc, char * * argv, char * * env) { string firstStr1 = "sitting"; string secondStr1 = "kitten"; cout << "levenstein distance of " << firstStr1 << " and " << secondStr1 << " is:" << getLevenshteinDistance(firstStr1, secondStr1) << endl; string firstStr2 = "Saturday"; string secondStr2 = "Sunday"; cout << "levenstein distance of " << firstStr2 << " and " << secondStr2 << " is:" << getLevenshteinDistance(firstStr2, secondStr2) << endl; string firstStr3 = "levenshtein"; string secondStr3 = "meilenstein"; cout << "levenstein distance of " << firstStr3 << " and " << secondStr3 << " is:" << getLevenshteinDistance(firstStr3, secondStr3) << endl; char ch; cin >> ch; return 0; }
2.awk版本1(表格法)
function minimum(first, second, third) { if(first < second) { former = first; } else { former = second; } if(former < third) { return former; } else { return third; } } function getLevenshteinDistance(firstStr, secondStr) { firstStrLen = length(firstStr); secondStrLen = length(secondStr); for(secondIndex = 0; secondIndex <= secondStrLen; secondIndex++) { levenshteinDistanceTable[0, secondIndex] = secondIndex; } for(firstIndex = 0; firstIndex <= firstStrLen; firstIndex++) { levenshteinDistanceTable[firstIndex, 0] = firstIndex; } for(firstIndex = 1; firstIndex <= firstStrLen; firstIndex++) { for(secondIndex = 1; secondIndex <= secondStrLen; secondIndex++) { if(match(substr(firstStr, firstIndex, 1), substr(secondStr, secondIndex, 1)) > 0) { lastCharCost = 0; } else { lastCharCost = 1; } levenshteinDistanceTable[firstIndex, secondIndex] = minimum(levenshteinDistanceTable[firstIndex-1, secondIndex] + 1, levenshteinDistanceTable[firstIndex, secondIndex-1] + 1, levenshteinDistanceTable[firstIndex-1, secondIndex-1] + lastCharCost); } } return levenshteinDistanceTable[firstStrLen, secondStrLen]; }
2.awk版本1(递归法,存在重叠子问题重复计算问题,性能较低)
function minimum(first, second, third) { if(first < second) { former = first; } else { former = second; } if(former < third) { return former; } else { return third; } } function getLevenshteinDistance(firstStr, firstLen, secondStr, secondLen) { lastCost = 0; if(firstLen == 0) { return secondLen; } if(secondLen == 0) { return firstLen; } if(match(substr(firstStr, firstLen, 1), substr(secondStr, secondLen, 1)) > 0) { lastCost = 0; } else { lastCost = 1; } first = getLevenshteinDistance(firstStr, firstLen-1, secondStr, secondLen) + 1; second = getLevenshteinDistance(firstStr, firstLen, secondStr, secondLen-1) + 1; third = getLevenshteinDistance(firstStr, firstLen-1, secondStr, secondLen-1) + lastCost; return minimum(first, second, third); }
相关文章推荐
- 字符串编辑距离(Levenshtein距离)算法
- 【动态规划】字符串编辑距离(Levenshtein距离)算法
- levenshtein字符串编辑距离算法
- 字符串编辑距离(Levenshtein距离)算法
- 找工作知识储备---数组字符串那些经典算法:最大子序列和,最长递增子序列,最长公共子串,最长公共子序列,字符串编辑距离,最长不重复子串,最长回文子串
- 编辑距离LCS算法详解:Levenshtein Distance算法计算两个字符串的相似度
- 字符串编辑距离算法(hdu 4271 hdu 4323 hdu 3540)
- java实现编辑距离算法,计算字符串相似度
- 字符串相似度算法(编辑距离算法 Levenshtein Distance)
- 数组字符串那些经典算法:最大子序列和,最长递增子序列,最长公共子串,最长公共子序列,字符串编辑距离,最长不重复子串,最长回文子串
- 【算法】字符串编辑距离
- 经典算法求字符串的编辑距离
- 经典算法 | 字符串最小编辑距离分析与证明
- 字符串相似度算法(编辑距离算法 Levenshtein Distance)
- ] 找工作知识储备(2)---数组字符串那些经典算法:最大子序列和,最长递增子序列,最长公共子串,最长公共子序列,字符串编辑距离,最长不重复子串,最长回文子串
- 数组字符串那些经典算法:最大子序列和,最长递增子序列,最长公共子串,最长公共子序列,字符串编辑距离,最长不重复子串,最长回文子串
- 编辑距离 算法详述计算两个字符串差异 c++代码
- 用C#实现字符串相似度算法(编辑距离算法 Levenshtein Distance)
- 字符串相关算法(编辑距离,最大公共字串)C#实现版
- [转]字符串相似度算法(编辑距离算法 Levenshtein Distance)[附c#,asp源码]