您的位置:首页 > 其它

去除字符串中的重复字符——方法汇总和性能测试

2015-07-31 01:30 573 查看

Design an algorithm and write code to remove the duplicate characters in a string without using any additional buffer. NOTE: One or two additional variables are fine. An extra copy of the array is not.

Write the test cases for this method.

去除掉一个字符数组中的重复字符,不能使用附加的缓存空间,并设计测试用例



本文中的C++算法来自于HawStein的博客。

题目中已经要求不可以有额外的buffer,所以得在原数组的基础上操作。

此外,假设所有字符char的取值大小只有[0,256]

算法思想:遍历整个buffer,如果有一个字符,将这个字符写入到s[p]的位置上去(p必然小于等于i),并且遍历后面,和它相同的字符全部标为’\0’。

#include<cstring>
void RemoveDuplicate(char s[]){
int len = strlen(s);
int p = 0;
for (int i = 0; i < len; i++)
{
if (s[i]!='\0')
{
s[p++] = s[i];
for (int j = i + 1; j < len;j++)
if (s[i] == s[j])
s[j] = '\0';
}
}
s[p] = '\0';
}


这个方法的时间复杂度为O(n^2)

如果可以开一个固定大小的数组(即大小与n无关),那么时间复杂度可以到O(n)

算法思想:开设一个bool数组来记录字符的状态

void RemoveDuplicate1(char s[]){
int len = strlen(s);
bool c[256];
int p = 0;
memset(c, 0, sizeof(c));
for (int i = 0; i < len; i++){
if (!c[s[i]]){
c[s[i]] = true;
s[p++] = s[i];
}
}
s[p] = '\0';
}


性能测试

基础测试用例

#include<stdio.h>
#include<stdlib.h>
#include<cstring>
void RemoveDuplicate(char s[]);
void RemoveDuplicate1(char s[]);
void RemoveDuplicate2(char s[]);
int main(){

char s1[] = "starcraft";
char s2[] = "llllllll";
char s3[] = "";
char s4[] = "ssttaarrccrraafftt";
printf("====RemoveDuplicate====\n");
RemoveDuplicate(s1);printf("%s\n",s1);
RemoveDuplicate(s2);printf("%s\n",s2);

printf("====RemoveDuplicate1====\n");
RemoveDuplicate1(s3); printf("%s\n", s3);
RemoveDuplicate1(s4); printf("%s\n", s4);

system("pause");
return 0;
}


====RemoveDuplicate====
starcf
l
====RemoveDuplicate1====

starcf

请按任意键继续. . .


性能比较

为了获得足够大的字符数组,我修改了一下之前的测试用例生成函数,使其可以生成任意大小的字符数组。

#include <time.h>
#include <stdio.h>
#include <random>
#include <stdlib.h>
#include <math.h>
#define random(x) (rand()%x)
char charset[] = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ";

/******生成测试数据data,并且将生成的数组写入到文件中保存***********/
//*****type为'c'时指定生成一个随机字符数组

void* get_test_data (const char* file_name, int size,char type,int maxnum){
void* data;
if (type == 'c')
data = (char*)malloc(sizeof(char)*size);
else
data = (int*)malloc(sizeof(int)*size);
FILE* output_file;
fopen_s(&output_file,file_name, "w+");
if (NULL == output_file){
return NULL;
}
srand((int)time(0));
for (int i = 0; i < size; i++){
if (type == 'c'){
char* temp = (char*)data;
int ram = random(sizeof(charset)-2);
if (ram < 0)
ram *= -1;
temp[i] = charset[ram];
if (i == size - 1)
temp[i] = '\0';
fprintf(output_file, "%c", temp[i]);
}
else if (type == 'd')
{
((int*)data)[i] = random(maxnum);

fprintf(output_file, "%d", ((int*)data)[i]);
}
}

return data;
}


测试用例

#include<stdio.h>
#include<stdlib.h>
#include<cstring>
#define SIZE 1000
void RemoveDuplicate(char s[]);
void RemoveDuplicate1(char s[]);
void RemoveDuplicate2(char s[]);
void* get_test_data(const char* file_name, int size,char type,int maxnum);

int main(){
/*printf("====RemoveDuplicate2====\n");
RemoveDuplicate2(s1); printf("%s\n", s1);
RemoveDuplicate2(s2); printf("%s\n", s2);
RemoveDuplicate2(s3); printf("%s\n", s3);
RemoveDuplicate2(s4); printf("%s\n", s4);*/

char *bc=(char*)get_test_data("test.txt",SIZE,'c',0);
char  bc_copy[SIZE];
memcpy_s(bc_copy,SIZE,bc,SIZE);
printf("%s\n=========\n", bc);
printf("%s\n=========\n", bc_copy);
RemoveDuplicate(bc); printf("%s\n", bc);
RemoveDuplicate1(bc_copy); printf("%s\n", bc_copy);
system("pause");
return 0;
}


结果

slbkoXzycbXuWseuyrwyCjVusUcTggHHFdkrBMYIoHNTKWMEBajIBATVQSPDNclUEfgobDylLRVzVuVR
cajBbDKSGuLpbiGQEccsQfJNsUFGncOeMNujfDxUxijXITEnMOSHrXcxvzInvmGNUVNIvFLBomNxcDTq
ovkKsIQhivebpLMYSPFqLqTVkVhMWmorQabHwiMfAkqScUgUfVGmQWTRjVLnAaHiOqzCEbgtizvHoIkj
CNBnYKhbcVmyqOrKvCLORwhMnOskqVTCUzfaUzlJBnLpnKDlwoqxTthnzPSgcLpJWYstpAtRpDLLbFck
PJsJHodnUWWABoYyEYVjiXXziRCJbEUcpwQRFKWEFDrjbvUwVoDStIbIcLUwIRAyMrpRuywOjMABUGtY
csemrzEHPhTclDMoeaVOIzIMWycsvsArRPDoIXAwoblPSttHOfdflOKlQilTnGKMyySsXAcEObKGRTfg
CvMyLHwIVAVUtJdJfHTUApBHsMrLkOgNmXuOviCgTQdQGTVfopaPgxJjbqkSzghUeUOvTgMpJsxOuJHr
sUFNHzjLdgXRVNybtIaurnNMlfILQsERqycXbxuuWbIHWrgAODVsRgGVcBboVyesbuBrKyAcRKSsMFmj
ajgAnwjSzvBUbubjRwOsXcLEiJjMBUVHmlqQhVpCKhNKwtLFsmBcvNqnuGoUibEYVFncFqgSeXniHlxH
yBgJmueKrfSrbnNnqxhmcrfOCNbbgnXsuqthGQuIilFWAdsUgSecEfMtbzFSfwjOylxVTkqdVWclsEmb
GpGJSJaXuNSxOlbnUXjXRTdCvImdvJcvIzVFtsARccwpbldcyuIcdlrMXDccXSkCLfRQNDrDAIcqoqvq
dbaQGhfLQbEbyhpogAhRzCfpiLABUkBqLBGIokjcvYBtLWlQOiakscoamTYUIXGXaOzRnwGSEvDxUIxY
zuDJdLDwdVXWMrKzgXOEpOBQJQlTVepnrSPBFlk
=========
slbkoXzycbXuWseuyrwyCjVusUcTggHHFdkrBMYIoHNTKWMEBajIBATVQSPDNclUEfgobDylLRVzVuVR
cajBbDKSGuLpbiGQEccsQfJNsUFGncOeMNujfDxUxijXITEnMOSHrXcxvzInvmGNUVNIvFLBomNxcDTq
ovkKsIQhivebpLMYSPFqLqTVkVhMWmorQabHwiMfAkqScUgUfVGmQWTRjVLnAaHiOqzCEbgtizvHoIkj
CNBnYKhbcVmyqOrKvCLORwhMnOskqVTCUzfaUzlJBnLpnKDlwoqxTthnzPSgcLpJWYstpAtRpDLLbFck
PJsJHodnUWWABoYyEYVjiXXziRCJbEUcpwQRFKWEFDrjbvUwVoDStIbIcLUwIRAyMrpRuywOjMABUGtY
csemrzEHPhTclDMoeaVOIzIMWycsvsArRPDoIXAwoblPSttHOfdflOKlQilTnGKMyySsXAcEObKGRTfg
CvMyLHwIVAVUtJdJfHTUApBHsMrLkOgNmXuOviCgTQdQGTVfopaPgxJjbqkSzghUeUOvTgMpJsxOuJHr
sUFNHzjLdgXRVNybtIaurnNMlfILQsERqycXbxuuWbIHWrgAODVsRgGVcBboVyesbuBrKyAcRKSsMFmj
ajgAnwjSzvBUbubjRwOsXcLEiJjMBUVHmlqQhVpCKhNKwtLFsmBcvNqnuGoUibEYVFncFqgSeXniHlxH
yBgJmueKrfSrbnNnqxhmcrfOCNbbgnXsuqthGQuIilFWAdsUgSecEfMtbzFSfwjOylxVTkqdVWclsEmb
GpGJSJaXuNSxOlbnUXjXRTdCvImdvJcvIzVFtsARccwpbldcyuIcdlrMXDccXSkCLfRQNDrDAIcqoqvq
dbaQGhfLQbEbyhpogAhRzCfpiLABUkBqLBGIokjcvYBtLWlQOiakscoamTYUIXGXaOzRnwGSEvDxUIxY
zuDJdLDwdVXWMrKzgXOEpOBQJQlTVepnrSPBFlk
=========
slbkoXzycuWerwCjVUTgHFdBMYINKEaAQSPDfLRGpiJnOxvmqht
slbkoXzycuWerwCjVUTgHFdBMYINKEaAQSPDfLRGpiJnOxvmqht
请按任意键继续. . .
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签: