您的位置:首页 > 其它

修改tinyxml让其支持解析特殊字符

2013-11-20 09:34 393 查看
修改tinyxmlparser.cpp文件中的部分代码如下:

TiXmlBase::Entity TiXmlBase::entity[ NUM_ENTITY ] =
{
{ "&",  5, "&" },
{ "<",   4, "<" },
{ ">",   4, ">" },
{ """, 6, "\"" },
{ "'", 6, "\'" },
{ "&\#955;", 6, "λ" },
{ "&\#934;", 6, "Φ" },
{ "&\#951;", 6, "η" }
};

const char* TiXmlBase::GetEntity( const char* p, char* value, int* length, TiXmlEncoding encoding )
{
// Presume an entity, and pull it out.
TIXML_STRING ent;
int i;
*length = 0;

// Now try to match it.
for( i=0; i<NUM_ENTITY; ++i )
{
if ( strncmp( entity[i].str, p, entity[i].strLength ) == 0 )
{
assert( strlen( entity[i].str ) == entity[i].strLength );
strncat(value, entity[i].chr, strlen(entity[i].chr));
*length = strlen(entity[i].chr);
return ( p + entity[i].strLength );
}
}

if ( *(p+1) && *(p+1) == '#' && *(p+2) )
{
unsigned long ucs = 0;
ptrdiff_t delta = 0;
unsigned mult = 1;

if ( *(p+2) == 'x' )
{
// Hexadecimal.
if ( !*(p+3) ) return 0;

const char* q = p+3;
q = strchr( q, ';' );

if ( !q || !*q ) return 0;

delta = q-p;
--q;

while ( *q != 'x' )
{
if ( *q >= '0' && *q <= '9' )
ucs += mult * (*q - '0');
else if ( *q >= 'a' && *q <= 'f' )
ucs += mult * (*q - 'a' + 10);
else if ( *q >= 'A' && *q <= 'F' )
ucs += mult * (*q - 'A' + 10 );
else
return 0;
mult *= 16;
--q;
}
}
else
{
// Decimal.
if ( !*(p+2) ) return 0;

const char* q = p+2;
q = strchr( q, ';' );

if ( !q || !*q ) return 0;

delta = q-p;
--q;

while ( *q != '#' )
{
if ( *q >= '0' && *q <= '9' )
ucs += mult * (*q - '0');
else
return 0;
mult *= 10;
--q;
}
}
if ( encoding == TIXML_ENCODING_UTF8 )
{
// convert the UCS to UTF-8
ConvertUTF32ToUTF8( ucs, value, length );
}
else
{
*value = (char)ucs;
*length = 1;
}
return p + delta + 1;
}

// So it wasn't an entity, its unrecognized, or something like that.
*value = *p;    // Don't put back the last one, since we return it!
//*length = 1;  // Leave unrecognized entities - this doesn't really work.
// Just writes strange XML.
return p+1;
}

修改tinyxml.h文件中的部分代码如下:

struct Entity
{
const char*     str;
unsigned int    strLength;
char            chr[3];//特殊字符的长度一般最长是两个字节,所以这里设置为3的字符串
};
enum
{
NUM_ENTITY = 8,//特殊符号的个数,可以修改
MAX_ENTITY_LENGTH = 6

};
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签: