您的位置:首页 > 其它

[VC]各种字符集编码之间的转换

2012-02-03 10:59 204 查看
1.字符集转换: Unicode - Ansi

string CXX::UnicodeToAnsi ( const wstring& wstrSrc )
{
/*!< 分配目标空间, 一个16位Unicode字符最多可以转为4个字节*/
//int iAllocSize = static_cast<int>( wstrSrc.size() * 4 + 10 );
int iAllocSize = WideCharToMultiByte(CP_THREAD_ACP,0,wstrSrc.c_str(),-1,NULL,0,NULL,NULL);
char* pwszBuffer = new char[ (UINT)iAllocSize ];
memset(pwszBuffer,0,iAllocSize);
if ( NULL == pwszBuffer )
{
return "";
}
int iCharsRet = WideCharToMultiByte( CP_THREAD_ACP, 0, wstrSrc.c_str(), -1,pwszBuffer, iAllocSize, NULL, NULL );
/*!< 成功 */
string strRet;
if ( 0 < iCharsRet )
{
(void)strRet.assign ( pwszBuffer, static_cast<size_t>( iCharsRet ) );
}

/*!< 释放内存 */
delete[] pwszBuffer;

return strRet;
}


2.字符集转换: Ansi - Unicode

wstring CXX::AnsiToUnicode (const string& strSrc )
{
/*!< 分配目标空间 */
int iAllocSize = MultiByteToWideChar(CP_ACP,0,strSrc.c_str(),-1,NULL,NULL);
WCHAR* pwszBuffer = new WCHAR[ (UINT)iAllocSize ];
if ( NULL == pwszBuffer )
{
return L"";
}
int iCharsRet = MultiByteToWideChar( CP_ACP, 0, strSrc.c_str(),-1,pwszBuffer, iAllocSize );
/*!< 成功 */
wstring wstrRet;
if ( 0 < iCharsRet )
{
(void)wstrRet.assign ( pwszBuffer, static_cast<size_t>( iCharsRet ) );
}

/*!< 释放内存 */
delete[] pwszBuffer;

return wstrRet;
}


3.字符集转换: Ansi - UTF8

string CFunction::AnsiToUtf8 ( const string& strSrc )
{
/*!< 分配目标空间, 长度为 Ansi 编码的两倍 */
int iAllocSize = MultiByteToWideChar(CP_ACP,0,strSrc.c_str(),-1,NULL,NULL);
WCHAR* pwszBuffer = new WCHAR[ (UINT)iAllocSize ];
if ( NULL == pwszBuffer )
{
return "";
}
int iCharsRet = MultiByteToWideChar( CP_ACP, 0, strSrc.c_str(), -1, pwszBuffer, iAllocSize );
/*!< 成功 */
wstring wstrTemp;
if ( 0 < iCharsRet )
{
(void)wstrTemp.assign ( pwszBuffer, static_cast<size_t>( iCharsRet ) );
}

/*!< 释放内存 */
delete[] pwszBuffer;

return UnicodeToUtf8( wstrTemp );
}
4.字符集转换: UTF8 - Ansi

string CFunction::Utf8ToAnsi( const string& strSrc )
{
wstring wstrTemp = Utf8ToUnicode ( strSrc );
int iAllocSize = WideCharToMultiByte(CP_ACP,0,wstrTemp.c_str(),-1,NULL,0,NULL,NULL);
char* pszBuffer = new char[ (UINT)iAllocSize ];
if ( NULL == pszBuffer )
{
return "";
}
int iCharsRet = WideCharToMultiByte( CP_ACP, 0, wstrTemp.c_str(),-1,pszBuffer, iAllocSize, NULL, NULL );
string strRet;
if ( 0 < iCharsRet )
{
(void)strRet.assign(pszBuffer, static_cast<size_t>(iCharsRet));
}

delete[] pszBuffer;

return strRet;
}


5.字符集转换: Unicode - UTF8

string CFunction::UnicodeToUtf8 ( const wstring& wstrSrc )
{
/*!< 分配目标空间, 一个16位Unicode字符最多可以转为4个字节 */
int iAllocSize = WideCharToMultiByte(CP_UTF8,0,wstrSrc.c_str(),-1,NULL,0,NULL,NULL);
char* pszBuffer = new char[ (UINT)iAllocSize ];
if ( NULL == pszBuffer )
{
return "";
}
int iCharsRet = WideCharToMultiByte( CP_UTF8, 0, wstrSrc.c_str(), -1, pszBuffer, iAllocSize, NULL, NULL );
/*!< 成功 */
string strRet;
if ( 0 < iCharsRet )
{
(void)strRet.assign ( pszBuffer, static_cast<size_t>( iCharsRet ) );
}

/*!< 释放内存 */
delete[] pszBuffer;

return strRet;
}


6.字符集转换: UTF8 - Unicode

wstring CFunction::Utf8ToUnicode ( const string& strSrc )
{

int iAllocSize = MultiByteToWideChar(CP_UTF8, 0, strSrc.c_str(),-1,NULL,NULL);

WCHAR* pwszBuffer = new WCHAR[ (UINT)iAllocSize ];
if ( NULL == pwszBuffer )
{
return L"";
}
int iCharsRet = MultiByteToWideChar( CP_UTF8, 0, strSrc.c_str(), -1, pwszBuffer, iAllocSize );
wstring wstrRet;
if ( 0 < iCharsRet )
{
(void)wstrRet.assign(pwszBuffer, static_cast<size_t>(iCharsRet));
}
delete[] pwszBuffer;

return wstrRet;
}

7.判断是否是UTF8编码

bool CFunction::IsUTF8(const void* pBuffer, long size)
{
bool IsUTF8 = true;
unsigned char* start = (unsigned char*)pBuffer;
unsigned char* end = (unsigned char*)pBuffer + size;
while (start < end)
{
if (*start < 0x80) // (10000000): 值小于0x80的为ASCII字符
{
start++;
}
else if (*start < (0xC0)) // (11000000): 值介于0x80与0xC0之间的为无效UTF-8字符
{
IsUTF8 = false;
break;
}
else if (*start < (0xE0)) // (11100000): 此范围内为2字节UTF-8字符
{
if (start >= end - 1)
break;
if ((start[1] & (0xC0)) != 0x80)
{
IsUTF8 = false;
break;
}
start += 2;
}
else if (*start < (0xF0)) // (11110000): 此范围内为3字节UTF-8字符
{
if (start >= end - 2)
break;
if ((start[1] & (0xC0)) != 0x80 || (start[2] & (0xC0)) != 0x80)
{
IsUTF8 = false;
break;
}
start += 3;
}
else
{
IsUTF8 = false;
break;
}
}
return IsUTF8;
}
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签:  null string thread delete