您的位置:首页 > 编程语言 > C语言/C++

C++一个不错的正则表达式引擎(三)

2009-11-16 15:23 405 查看
//
// String Elx
//
template <class CHART> class CStringElxT : public ElxInterface
...{
public:
int Match (CContext * pContext) const;
int MatchNext(CContext * pContext) const;
public:
CStringElxT(const CHART * fixed, int nlength, int brightleft, int bignorecase);
public:
CBufferT <CHART> m_szPattern;
int m_brightleft;
int m_bignorecase;
};
//
// Implementation
//
template <class CHART> CStringElxT <CHART> :: CStringElxT(const CHART * fixed, int nlength, int brightleft, int bignorecase) : m_szPattern(fixed, nlength)
...{
m_brightleft = brightleft;
m_bignorecase = bignorecase;
}
template <class CHART> int CStringElxT <CHART> :: Match(CContext * pContext) const
...{
const CHART * pcsz = (const CHART *)pContext->m_pMatchString;
int npos = pContext->m_nCurrentPos;
int tlen = pContext->m_pMatchStringLength;
int slen = m_szPattern.GetSize();
int bsucc;
if(m_brightleft)
...{
if(npos < slen)
return 0;
if(m_bignorecase)
bsucc = ! m_szPattern.nCompareNoCase(pcsz + (npos - slen));
else
bsucc = ! m_szPattern.nCompare (pcsz + (npos - slen));
if( bsucc )
pContext->m_nCurrentPos -= slen;
}
else
...{
if(npos + slen > tlen)
return 0;
if(m_bignorecase)
bsucc = ! m_szPattern.nCompareNoCase(pcsz + npos);
else
bsucc = ! m_szPattern.nCompare (pcsz + npos);
if( bsucc )
pContext->m_nCurrentPos += slen;
}
return bsucc;
}
template <class CHART> int CStringElxT <CHART> :: MatchNext(CContext * pContext) const
...{
int slen = m_szPattern.GetSize();
if(m_brightleft)
pContext->m_nCurrentPos += slen;
else
pContext->m_nCurrentPos -= slen;
return 0;
}
//
// CConditionElx
//
template <class CHART> class CConditionElxT : public ElxInterface
...{
public:
int Match (CContext * pContext) const;
int MatchNext(CContext * pContext) const;
public:
CConditionElxT();
public:
// backref condition
int m_nnumber;
CBufferT <CHART> m_szNamed;
// elx condition
ElxInterface * m_pelxask;
// selection
ElxInterface * m_pelxyes, * m_pelxno;
};
template <class CHART> CConditionElxT <CHART> :: CConditionElxT()
...{
m_nnumber = -1;
}
template <class CHART> int CConditionElxT <CHART> :: Match(CContext * pContext) const
...{
// status
int nbegin = pContext->m_nCurrentPos;
int nsize = pContext->m_stack.GetSize();
int ncsize = pContext->m_capturestack.GetSize();
// condition result
int condition_yes = 0;
// backref type
if( m_nnumber >= 0 )
...{
do
...{
if(m_nnumber >= pContext->m_captureindex.GetSize()) break;
int index = pContext->m_captureindex[m_nnumber];
if( index < 0) break;
// else valid
condition_yes = 1;
}
while(0);
}
else
...{
if( m_pelxask == 0 )
condition_yes = 1;
else
condition_yes = m_pelxask->Match(pContext);
pContext->m_stack.Restore(nsize);
pContext->m_nCurrentPos = nbegin;
}
// elx result
int bsucc;
if( condition_yes )
bsucc = m_pelxyes == 0 ? 1 : m_pelxyes->Match(pContext);
else
bsucc = m_pelxno == 0 ? 1 : m_pelxno ->Match(pContext);
if( bsucc )
...{
pContext->m_stack.Push(ncsize);
pContext->m_stack.Push(condition_yes);
}
else
...{
pContext->m_capturestack.Restore(ncsize);
}
return bsucc;
}
template <class CHART> int CConditionElxT <CHART> :: MatchNext(CContext * pContext) const
...{
// pop
int ncsize, condition_yes;
pContext->m_stack.Pop(condition_yes);
pContext->m_stack.Pop(ncsize);
// elx result
int bsucc;
if( condition_yes )
bsucc = m_pelxyes == 0 ? 0 : m_pelxyes->MatchNext(pContext);
else
bsucc = m_pelxno == 0 ? 0 : m_pelxno ->MatchNext(pContext);
if( bsucc )
...{
pContext->m_stack.Push(ncsize);
pContext->m_stack.Push(condition_yes);
}
else
...{
pContext->m_capturestack.Restore(ncsize);
}
return bsucc;
}
//
// MatchResult
//
template <int x> class MatchResultT
...{
public:
int IsMatched() const;
public:
int GetStart() const;
int GetEnd () const;
public:
int MaxGroupNumber() const;
int GetGroupStart(int nGroupNumber) const;
int GetGroupEnd (int nGroupNumber) const;
public:
MatchResultT(CContext * pContext, int nMaxNumber = -1);
MatchResultT <x> & operator = (const MatchResultT <x> &);
inline operator int() const ...{ return IsMatched(); }
public:
CBufferT <int> m_result;
};
typedef MatchResultT <0> MatchResult;
// Stocked Elx IDs
enum STOCKELX_ID_DEFINES
...{
STOCKELX_EMPTY = 0,
/**////////////////////////
STOCKELX_DOT_ALL,
STOCKELX_DOT_NOT_ALL,
STOCKELX_WORD,
STOCKELX_WORD_NOT,
STOCKELX_SPACE,
STOCKELX_SPACE_NOT,
STOCKELX_DIGITAL,
STOCKELX_DIGITAL_NOT,
/**///////////////////////
STOCKELX_DOT_ALL_RIGHTLEFT,
STOCKELX_DOT_NOT_ALL_RIGHTLEFT,
STOCKELX_WORD_RIGHTLEFT,
STOCKELX_WORD_RIGHTLEFT_NOT,
STOCKELX_SPACE_RIGHTLEFT,
STOCKELX_SPACE_RIGHTLEFT_NOT,
STOCKELX_DIGITAL_RIGHTLEFT,
STOCKELX_DIGITAL_RIGHTLEFT_NOT,
/**//////////////////////
STOCKELX_COUNT
};
// REGEX_FLAGS
#ifndef _REGEX_FLAGS_DEFINED
enum REGEX_FLAGS
...{
NO_FLAG = 0,
SINGLELINE = 0x01,
MULTILINE = 0x02,
GLOBAL = 0x04,
IGNORECASE = 0x08,
RIGHTTOLEFT = 0x10,
EXTENDED = 0x20,
};
#define _REGEX_FLAGS_DEFINED
#endif
//
// Builder T
//
template <class CHART> class CBuilderT
...{
public:
typedef CDelegateElxT <CHART> CDelegateElx;
typedef CBracketElxT <CHART> CBracketElx;
typedef CBackrefElxT <CHART> CBackrefElx;
typedef CConditionElxT <CHART> CConditionElx;
// Methods
public:
ElxInterface * Build(const CBufferRefT <CHART> & pattern, int flags);
int GetNamedNumber(const CBufferRefT <CHART> & named) const;
void Clear();
public:
CBuilderT();
~CBuilderT();
// Public Attributes
public:
ElxInterface * m_pTopElx;
int m_nFlags;
int m_nMaxNumber;
int m_nNextNamed;
int m_nGroupCount;
CBufferT <ElxInterface *> m_objlist;
CBufferT <ElxInterface *> m_grouplist;
CBufferT <CDelegateElx *> m_recursivelist;
CBufferT <CListElx *> m_namedlist;
CBufferT <CBackrefElx *> m_namedbackreflist;
CBufferT <CConditionElx *> m_namedconditionlist;
// CHART_INFO
protected:
struct CHART_INFO
...{
public:
CHART ch;
int type;
int pos;
int len;
public:
CHART_INFO(CHART c, int t, int p = 0, int l = 0) ...{ ch = c; type = t; pos = p; len = l; }
inline int operator == (const CHART_INFO & ci) ...{ return ch == ci.ch && type == ci.type; }
inline int operator != (const CHART_INFO & ci) ...{ return ! operator == (ci); }
};
protected:
static unsigned int Hex2Int(const CHART * pcsz, int length, int & used);
static int ReadDec(char * & str, unsigned int & dec);
void MoveNext();
int GetNext2();
ElxInterface * BuildAlternative(int vaflags);
ElxInterface * BuildList (int & flags);
ElxInterface * BuildRepeat (int & flags);
ElxInterface * BuildSimple (int & flags);
ElxInterface * BuildCharset (int & flags);
ElxInterface * BuildRecursive (int & flags);
ElxInterface * BuildBoundary (int & flags);
ElxInterface * BuildBackref (int & flags);
ElxInterface * GetStockElx (int nStockId);
ElxInterface * Keep(ElxInterface * pElx);
// Private Attributes
protected:
CBufferRefT <CHART> m_pattern;
CHART_INFO prev, curr, next, nex2;
int m_nNextPos;
int m_nCharsetDepth;
int m_bQuoted;
int (*m_quote_fun)(int);
ElxInterface * m_pStockElxs[STOCKELX_COUNT];
};
//
// Implementation
//
template <class CHART> CBuilderT <CHART> :: CBuilderT() : m_pattern(0, 0), prev(0, 0), curr(0, 0), next(0, 0), nex2(0, 0)
...{
Clear();
}
template <class CHART> CBuilderT <CHART> :: ~CBuilderT()
...{
Clear();
}
template <class CHART> int CBuilderT <CHART> :: GetNamedNumber(const CBufferRefT <CHART> & named) const
...{
for(int i=0; i<m_namedlist.GetSize(); i++)
...{
if( ! ((CBracketElx *)m_namedlist[i]->m_elxlist[0])->m_szNamed.CompareNoCase(named) )
return ((CBracketElx *)m_namedlist[i]->m_elxlist[0])->m_nnumber;
}
return -3;
}
template <class CHART> ElxInterface * CBuilderT <CHART> :: Build(const CBufferRefT <CHART> & pattern, int flags)
...{
// init
m_pattern = pattern;
m_nNextPos = 0;
m_nCharsetDepth = 0;
m_nMaxNumber = 0;
m_nNextNamed = 0;
m_nFlags = flags;
m_bQuoted = 0;
m_quote_fun = 0;
m_grouplist .Restore(0);
m_recursivelist .Restore(0);
m_namedlist .Restore(0);
m_namedbackreflist .Restore(0);
m_namedconditionlist.Restore(0);
int i;
for(i=0; i<3; i++) MoveNext();
// build
m_pTopElx = BuildAlternative(flags);
// group 0
m_grouplist.Prepare(0);
m_grouplist[0] = m_pTopElx;
// append named to unnamed
m_nGroupCount = m_grouplist.GetSize();
m_grouplist.Prepare(m_nMaxNumber + m_namedlist.GetSize());
for(i=0; i<m_namedlist.GetSize(); i++)
...{
CBracketElx * pleft = (CBracketElx *)m_namedlist[i]->m_elxlist[0];
CBracketElx * pright = (CBracketElx *)m_namedlist[i]->m_elxlist[2];
// append
m_grouplist[m_nGroupCount ++] = m_namedlist[i];
if( pleft->m_nnumber > 0 )
continue;
// same name
int find_same_name = GetNamedNumber(pleft->m_szNamed);
if( find_same_name >= 0 )
...{
pleft ->m_nnumber = find_same_name;
pright->m_nnumber = find_same_name;
}
else
...{
m_nMaxNumber ++;
pleft ->m_nnumber = m_nMaxNumber;
pright->m_nnumber = m_nMaxNumber;
}
}
for(i=1; i<m_nGroupCount; i++)
...{
CBracketElx * pleft = (CBracketElx *)((CListElx*)m_grouplist[i])->m_elxlist[0];
if( pleft->m_nnumber > m_nMaxNumber )
m_nMaxNumber = pleft->m_nnumber;
}
// connect recursive
for(i=0; i<m_recursivelist.GetSize(); i++)
...{
if( m_recursivelist[i]->m_ndata == -3 )
m_recursivelist[i]->m_ndata = GetNamedNumber(m_recursivelist[i]->m_szNamed);
if( m_recursivelist[i]->m_ndata >= 0 && m_recursivelist[i]->m_ndata < m_grouplist.GetSize() )
m_recursivelist[i]->m_pelx = m_grouplist[m_recursivelist[i]->m_ndata];
}
// named backref
for(i=0; i<m_namedbackreflist.GetSize(); i++)
...{
m_namedbackreflist[i]->m_nnumber = GetNamedNumber(m_namedbackreflist[i]->m_szNamed);
}
// named condition
for(i=0; i<m_namedconditionlist.GetSize(); i++)
...{
int nn = GetNamedNumber(m_namedconditionlist[i]->m_szNamed);
if( nn >= 0 )
...{
m_namedconditionlist[i]->m_nnumber = nn;
m_namedconditionlist[i]->m_pelxask = 0;
}
}
return m_pTopElx;
}
template <class CHART> void CBuilderT <CHART> :: Clear()
...{
for(int i=0; i<m_objlist.GetSize(); i++)
...{
delete m_objlist[i];
}
m_objlist.Restore(0);
m_pTopElx = 0;
memset(m_pStockElxs, 0, sizeof(m_pStockElxs));
}
//
// hex to int
//
template <class CHART> unsigned int CBuilderT <CHART> :: Hex2Int(const CHART * pcsz, int length, int & used)
...{
unsigned int result = 0;
int & i = used;
for(i=0; i<length; i++)
...{
if(pcsz[i] >= RCHART('0') && pcsz[i] <= RCHART('9'))
result = (result << 4) + (pcsz[i] - RCHART('0'));
else if(pcsz[i] >= RCHART('A') && pcsz[i] <= RCHART('F'))
result = (result << 4) + (0x0A + (pcsz[i] - RCHART('A')));
else if(pcsz[i] >= RCHART('a') && pcsz[i] <= RCHART('f'))
result = (result << 4) + (0x0A + (pcsz[i] - RCHART('a')));
else
break;
}
return result;
}
template <class CHART> inline ElxInterface * CBuilderT <CHART> :: Keep(ElxInterface * pelx)
...{
m_objlist.Push(pelx);
return pelx;
}
template <class CHART> void CBuilderT <CHART> :: MoveNext()
...{
// forwards
prev = curr;
curr = next;
next = nex2;
// get nex2
while( ! GetNext2() ) ...{};
}
template <class CHART> int CBuilderT <CHART> :: GetNext2()
...{
// check length
if(m_nNextPos >= m_pattern.GetSize())
...{
nex2 = CHART_INFO(0, 1, m_nNextPos, 0);
return 1;
}
int delta = 1;
CHART ch = m_pattern[m_nNextPos];
// if quoted
if(m_bQuoted)
...{
if(ch == RCHART('\'))
...{
if(m_pattern[m_nNextPos + 1] == RCHART('E'))
...{
m_quote_fun = 0;
m_bQuoted = 0;
m_nNextPos += 2;
return 0;
}
}
if(m_quote_fun != 0)
nex2 = CHART_INFO((CHART)(*m_quote_fun)((int)ch), 0, m_nNextPos, delta);
else
nex2 = CHART_INFO(ch, 0, m_nNextPos, delta);
m_nNextPos += delta;
return 1;
}
// common
switch(ch)
...{
case RCHART('\'):
...{
CHART ch1 = m_pattern[m_nNextPos+1];
// backref
if(ch1 >= RCHART('0') && ch1 <= RCHART('9'))
...{
nex2 = CHART_INFO(ch, 1, m_nNextPos, delta);
break;
}
// escape
delta = 2;
switch(ch1)
...{
case RCHART('A'):
case RCHART('Z'):
case RCHART('w'):
case RCHART('W'):
case RCHART('s'):
case RCHART('S'):
case RCHART('B'):
case RCHART('d'):
case RCHART('D'):
case RCHART('k'):
case RCHART('g'):
nex2 = CHART_INFO(ch1, 1, m_nNextPos, delta);
break;
case RCHART('b'):
if(m_nCharsetDepth > 0)
nex2 = CHART_INFO('', 0, m_nNextPos, delta);
else
nex2 = CHART_INFO(ch1, 1, m_nNextPos, delta);
break;
/**//*
case RCHART('<'):
case RCHART('>'):
if(m_nCharsetDepth > 0)
nex2 = CHART_INFO(ch1, 0, m_nNextPos, delta);
else
nex2 = CHART_INFO(ch1, 1, m_nNextPos, delta);
break;
*/
case RCHART('x'):
if(m_pattern[m_nNextPos+2] != '{')
...{
int red = 0;
unsigned int ch2 = Hex2Int(m_pattern.GetBuffer() + m_nNextPos + 2, 2, red);
delta += red;
if(red > 0)
nex2 = CHART_INFO(RCHART(ch2), 0, m_nNextPos, delta);
else
nex2 = CHART_INFO(ch1, 0, m_nNextPos, delta);
break;
}
case RCHART('u'):
if(m_pattern[m_nNextPos+2] != '{')
...{
int red = 0;
unsigned int ch2 = Hex2Int(m_pattern.GetBuffer() + m_nNextPos + 2, 4, red);
delta += red;
if(red > 0)
nex2 = CHART_INFO(RCHART(ch2), 0, m_nNextPos, delta);
else
nex2 = CHART_INFO(ch1, 0, m_nNextPos, delta);
}
else
...{
int red = 0;
unsigned int ch2 = Hex2Int(m_pattern.GetBuffer() + m_nNextPos + 3, sizeof(int) * 2, red);
delta += red;
while(m_nNextPos + delta < m_pattern.GetSize() && m_pattern.At(m_nNextPos + delta) != RCHART('}'))
delta ++;
delta ++; // skip '}'
nex2 = CHART_INFO(RCHART(ch2), 0, m_nNextPos, delta);
}
break;
case RCHART('a'): nex2 = CHART_INFO(RCHART('a'), 0, m_nNextPos, delta); break;
case RCHART('f'): nex2 = CHART_INFO(RCHART(' '), 0, m_nNextPos, delta); break;
case RCHART('n'): nex2 = CHART_INFO(RCHART(' '), 0, m_nNextPos, delta); break;
case RCHART('r'): nex2 = CHART_INFO(RCHART(' '), 0, m_nNextPos, delta); break;
case RCHART('t'): nex2 = CHART_INFO(RCHART(' '), 0, m_nNextPos, delta); break;
case RCHART('v'): nex2 = CHART_INFO(RCHART('v'), 0, m_nNextPos, delta); break;
case RCHART('e'): nex2 = CHART_INFO(RCHART( 27 ), 0, m_nNextPos, delta); break;
case RCHART('G'): // skip 'G'
if(m_nCharsetDepth > 0)
...{
m_nNextPos += 2;
return 0;
}
else
...{
nex2 = CHART_INFO(ch1, 1, m_nNextPos, delta);
break;
}
case RCHART('L'):
if( ! m_quote_fun ) m_quote_fun = ::tolower;
case RCHART('U'):
if( ! m_quote_fun ) m_quote_fun = ::toupper;
case RCHART('Q'):
...{
m_bQuoted = 1;
m_nNextPos += 2;
return 0;
}
case RCHART('E'):
...{
m_quote_fun = 0;
m_bQuoted = 0;
m_nNextPos += 2;
return 0;
}
case 0:
if(m_nNextPos+1 >= m_pattern.GetSize())
...{
delta = 1;
nex2 = CHART_INFO(ch , 0, m_nNextPos, delta);
}
else
nex2 = CHART_INFO(ch1, 0, m_nNextPos, delta); // common '
本文来自CSDN博客,转载请标明出处:http://blog.csdn.net/zeroDspace/archive/2006/12/27/1464698.aspx
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签: