您的位置:首页 > 运维架构 > Linux

Linux中自带正则表达式应用举例

2012-01-30 10:52 337 查看
 环境:Fedora12, C程序:

#include <stdio.h>
#include <string.h>
#include <sys/types.h>
#include <regex.h>

// 提取子串
char* getsubstr(char *s, regmatch_t *pmatch)
{
static char buf[100] = {0};
memset(buf, 0, sizeof(buf));
memcpy(buf, s+pmatch->rm_so, pmatch->rm_eo - pmatch->rm_so);

return buf;
}

int main(int argc, char **argv)
{
int status, i;
int cflags = REG_EXTENDED;
regmatch_t pmatch[5];
const size_t nmatch = 5;
regex_t reg;
const char *pattern = "([A-Z]+)([a-z]+)ID[0-9]+@([a-z]+)\\.([a-z]+)";	// 正则表达式
char buf[] = "COMEdavID2012@gmail.com";		// 待搜索的字符串

regcomp(®, pattern, cflags);
status = regexec(®, buf, nmatch, pmatch, 0);
if(status == REG_NOMATCH)
printf("No Match\n");
else
{
printf("Match:\n");
for(i = 0; i < nmatch; i++)
{
if(pmatch[i].rm_so == -1)
continue;
char *p = getsubstr(buf, &pmatch[i]);
printf("[%d, %d): %s\n", pmatch[i].rm_so, pmatch[i].rm_eo, p);
}
}
regfree(®);

return 0;
}


编译运行:

[zcm@t #52]$make
gcc    -c -o a.o a.c
gcc  -o a a.o
[zcm@t #53]$./a
Match:
[0, 23): COMEdavID2012@gmail.com
[0, 4): COME
[4, 7): dav
[14, 19): gmail
[20, 23): com
[zcm@t #54]$


注意

pmatch[0]用来匹配整个正则表达式

pmatch[1]用来匹配子模式1

pmatch[2]用来匹配子模式2

......

 

思考

所以如果想从待搜索的字符串中搜索出所有匹配的结果(假设大于1个),怎么办呢?------- 循环调用regexec,代码如下:

/*
Posix正则表达式应用:循环调用regexec(),以获得多个匹配的结果
*/
#include <stdio.h>
#include <string.h>
#include <sys/types.h>
#include <regex.h>

// 提取子串
char* getsubstr(char *s, regmatch_t *pmatch)
{
static char buf[100] = {0};
memset(buf, 0, sizeof(buf));
memcpy(buf, s+pmatch->rm_so, pmatch->rm_eo - pmatch->rm_so);

return buf;
}

int main(int argc, char **argv)
{
int status, i;
int cflags = REG_EXTENDED;
regmatch_t pmatch[10];
const size_t nmatch = 10;
regex_t reg;
//const char *pattern = "([A-Z]+)([a-z]+)(ID|DB)[0-9]+@([a-z]+)\\.([a-z]+)"; // 正则表达式
const char *pattern = "[[:upper:]]+([[:lower:]]+)"; // 正则表达式
char buf[] = "c COMEdavDB2012@gmail.com ZHOUcimingID2030@sohu.com"; // 待搜索的字符串
char *pSrc = buf, *p = NULL;
int next = 0;
int mCount = 1; // 匹配的次数
int len = strlen(buf);

regcomp(®, pattern, cflags); // 编译正则表达式
do // 循环搜索匹配的结果
{
printf("pSrc = %s\n", pSrc);
status = regexec(®, pSrc, nmatch, pmatch, 0);
if(status == REG_NOMATCH) // 未找到匹配的结果
{
printf("No Match%d\n", mCount);
break;
}
else
{
printf("Match%d:\n", mCount);
for(i = 0; i < nmatch; i++) // 输出此次匹配的结果(包括子模式)
{
if(pmatch[i].rm_so == -1)
break;
p = getsubstr(pSrc, &pmatch[i]);
printf("pmatch[%d] = [%d, %d): %s\n", i, pmatch[i].rm_so, pmatch[i].rm_eo, p);
}
putchar('\n');
pSrc = pSrc + pmatch[0].rm_eo; // 后移搜索的起始位置
}
mCount++;
}while(pSrc < buf + len - 1);

regfree(®);

return 0;
}
编译运行:
[zcm@t #157]$make
gcc -c -o a2.o a2.c
gcc -o a2 a2.o
[zcm@t #158]$./a2
pSrc = c COMEdavDB2012@gmail.com ZHOUcimingID2030@sohu.com
Match1:
pmatch[0] = [2, 9): COMEdav
pmatch[1] = [6, 9): dav

pSrc = DB2012@gmail.com ZHOUcimingID2030@sohu.com
Match2:
pmatch[0] = [17, 27): ZHOUciming
pmatch[1] = [21, 27): ciming

pSrc = ID2030@sohu.com
No Match3
[zcm@t #159]$
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息