您的位置:首页 > 其它

利用后缀数组求字符串的最长重复子串的算法 利用二维数组求两个字符串的最长公共字串(动态规划)

2009-07-02 00:22 861 查看
[问题描述]

1- 求一个字符串中存在的最长的重复子串[后缀数组]


2- 求两个字符串的最长公共字串 [动态规划]


[问题解决]

1-利用 “后缀数组”(需排序) 求字符串中存在的最长的重复子串,要建立指针数组并通过qsort排序

时间复杂度是O(nlogn)主要开销在于排序的过程。

2-利用 “二维bool数组”来解决两个字符串的最长公共子串,找出正对角线中连续true最长的对角线,定位到两个字符串中的始末位置。空间复杂度是O(M*N) , 时间复杂度O( M(M+N) ) (M<=N),主要的时间负载度在于字符串扫描和对角线扫描。

[程序实现] 2-待续

1- 求一个字符串中存在的最长的重复子串[后缀数组]


]/*
* main.cpp
*
*  Created on: 2009-7-12
*      Author: NeeSky
*/
#define MAXSIZE 100
#include <iostream>
#include <vector>
using namespace std;
string strGiven = ""; /*The Global String*/
int IndexArray[MAXSIZE]; /*The Array Index Array*/
/***
* Just initilize the IndexArray
*/
void init_strGivenAndIndexArray(string strgiven )
{
strGiven = strgiven;
for (unsigned int i = 0; i < strgiven.length(); ++i)
IndexArray[i] = i;
return;
}
/***
* Just for qsort args
*/
inline int compare_OfIndexArray(const void *Index1, const void *Index2 )
{
int* index1 = (int *) Index1;
int* index2 = (int *) Index2;
if ((string)&strGiven[*index1]> (string)&strGiven[*index2]) return 1; //string compare
if ((string)&strGiven[*index1] == (string)&strGiven[*index2]) return 0;
else return -1;
}
/***
* make the string to be order
* @param strGiven
*/
inline void enOrder_OfIndexArray()
{
std::qsort(IndexArray, strGiven.length(), sizeof(int), compare_OfIndexArray);
return;
}
int getCommonLength(string &comString, string a, string b )
{
int i = 0;
int k = a.length() < b.length() ? a.length() : b.length();
while (a[i] == b[i] && i < k)
++i;
comString = a.substr(0, i);
return i;
}
/***
* [Core Algorithm] Get the max repeat substring
* @param strgiven
*/
void getMaxRepeatSubString(string strgiven )
{
vector<string> vecComStr;       /*The same len substring, not only one*/
int maxlen = 0;
init_strGivenAndIndexArray(strgiven);       /*Initilize the global variables*/
enOrder_OfIndexArray();         /*enorder of array of postfix array of string*/
for (unsigned int i = 0; i < strgiven.length() - 1; ++i)    /* Compare the neighbor*/
{
string strTemp = "";
int templen = getCommonLength(strTemp, (string) &strGiven[IndexArray[i]], (string) &strGiven[IndexArray[i + 1]]);
if (templen > maxlen)
{
maxlen = templen;
vecComStr.clear();
vecComStr.push_back(strTemp);
}else if(templen==maxlen)   /*Max len substring not only one*/
vecComStr.push_back(strTemp);
}
cout << "The max length Repeat SubString of /"" << strgiven << "/"(maxlen is " << maxlen << ") : ";
for(vector<string>::iterator it = vecComStr.begin() ; it != vecComStr.end(); ++it)  /*for output*/
{
if(it!=vecComStr.begin())cout <<" or ";
cout <<"/""<<*it<<"/"";
}
cout<<endl;
return;
}
/***
* The Main Programming
* @return
*/
int main(void )
{
getMaxRepeatSubString("abcdabcdefghefgh");
getMaxRepeatSubString("abdddiedddie239000");
getMaxRepeatSubString("XXXXYYYYYYY");
getMaxRepeatSubString("XXXXXXX");
return 0;
}




1-测试用例输出

The max length Repeat SubString of "abcdabcdefghefgh"(maxlen is 4) : "abcd" or "efgh"

The max length Repeat SubString of "abdddiedddie239000"(maxlen is 5) : "dddie"

The max length Repeat SubString of "XXXXYYYYYYY"(maxlen is 6) : "YYYYYY"

The max length Repeat SubString of "XXXXXXX"(maxlen is 6) : "XXXXXX"

内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签: 
相关文章推荐