您的位置:首页 > 编程语言 > Delphi

delphi中如何统计多行文本文件中相同字符串出现的次数

2009-03-14 23:17 423 查看
{ 2002.8.5 Kingron }

{ Source:Source string }

{ Sub:Sub string }

{ Return:Count }

{ Ex:StrSubCount( 'abcdbcd ', 'bc ')=2 }

function StrSubCount(const Source, Sub: string): integer;

var

Buf : string;

i : integer;

Len : integer;

begin

Result := 0;

Buf:=Source;

i := Pos(Sub, Buf);

Len := Length(Sub);

while i <> 0 do

begin

Inc(Result);

Delete(Buf, 1, i + Len -1);

i:=Pos(Sub,Buf);

end;

end; { StrSubCount }

{ 下面这个函数返回SubStr在S中指定位置开始后的位置 }

{ 例如:PosExx( 'ab ', 'abcabcab ',3)返回4 }

function PosExx(const substr: AnsiString; const s: AnsiString; const start: Integer): Integer;

type

StrRec = record

allocSiz: Longint;

refCnt: Longint;

length: Longint;

end;

const

skew = sizeof(StrRec);

asm

{ -> EAX Pointer to substr }

{ EDX Pointer to string }

{ ECX Pointer to start //cs }

{ <-EAX Position of substr in s or 0 }

TEST EAX,EAX

JE @@noWork

TEST EDX,EDX

JE @@stringEmpty

TEST ECX,ECX

JE @@stringEmpty

PUSH EBX

PUSH ESI

PUSH EDI

MOV ESI,EAX { Point ESI to }

MOV EDI,EDX { Point EDI to }

MOV EBX,ECX

MOV ECX,[EDI-skew].StrRec.length

PUSH EDI { remembers position to calculate index }

CMP EBX,ECX

JG @@fail

MOV EDX,[ESI-skew].StrRec.length { EDX = bstr }

DEC EDX { EDX = Length(substr) - }

JS @@fail { < 0 ? return }

MOV AL,[ESI] { AL = first char of }

INC ESI { Point ESI to 2 'nd char of substr }

SUB ECX,EDX { #positions in s to look }

{ = Length(s) - Length(substr) + 1 }

JLE @@fail

DEC EBX

SUB ECX,EBX

JLE @@fail

ADD EDI,EBX

@@loop:

REPNE SCASB

JNE @@fail

MOV EBX,ECX { save outer loop }

PUSH ESI { save outer loop substr pointer }

PUSH EDI { save outer loop s }

MOV ECX,EDX

REPE CMPSB

POP EDI { restore outer loop s pointer }

POP ESI { restore outer loop substr pointer }

JE @@found

MOV ECX,EBX { restore outer loop nter }

JMP @@loop

@@fail:

POP EDX { get rid of saved s nter }

XOR EAX,EAX

JMP @@exit

@@stringEmpty:

XOR EAX,EAX

JMP @@noWork

@@found:

POP EDX { restore pointer to first char of s }

MOV EAX,EDI { EDI points of char after match }

SUB EAX,EDX { the difference is the correct index }

@@exit:

POP EDI

POP ESI

POP EBX

@@noWork:

end;

{ PosEx返回Sub在Source中第Index次出现的位置,若出现的次数不足,返回0,若找不到,返回0 }

{ 例如:PosEx( 'abcdbcd ', 'bcd ',2)返回5,PosEx( 'abcdbcd ', 'adb ')返回0,PoxEx( 'abc ', 'a ',2)返回0 }

function PosEx(const Source, Sub: string; Index: integer): integer;

var

Buf : string;

i, Len, C : integer;

begin

C := 0;

Result := 0;

Buf := Source;

i := Pos(Sub, Source);

Len := Length(Sub);

while i <> 0 do

begin

inc(C);

Inc(Result, i);

Delete(Buf, 1, i + Len - 1);

i := Pos(Sub, Buf);

if C > = Index then Break;

if i > 0 then Inc(Result, Len - 1);

end;

if C < Index then Result := 0;

end;

下面是ZsWang写的代码,效率可能要高一些:

function PosEx1(const Source, Sub: string; Index: Integer = 1): Integer;

var

I, J, K, L: Integer;

T: string;

begin

Result := 0;

T := Source;

K := 0;

L := Length(Sub);

for I := 1 to Index do begin

J := Pos(Sub, T);

if J <= 0 then Exit;

Delete(T, 1, J + L - 1);

Inc(K, J + L - 1);

end;

Dec(K, L - 1);

Result := K;

end; { PosEx1 }

下面是Dirk写的,用的实递归:

function PosN(Substring, Mainstring: string; n: Integer): Integer;

{

Function PosN get recursive - the N th position of "Substring " in

"Mainstring ". Does the Mainstring not contain Substrign the result

is 0. Works with chars and strings.

}

begin

if Pos(substring, mainstring) = 0 then

begin

posn := 0;

Exit;

end

else

begin

if n = 1 then

posn := Pos(substring, mainstring)

else

begin

posn := Pos(substring, mainstring) + posn(substring, Copy(mainstring,

(Pos(substring, mainstring) + 1), Length(mainstring)), n - 1);

end;

end;

end;

function SubStrConut(mStr: string; mSub: string): Integer;

{ 返回子字符串出现的次数 }

begin

Result :=

(Length(mStr) - Length(StringReplace(mStr, mSub, ' ', [rfReplaceAll]))) div

Length(mSub);

end
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签: