您的位置:首页 > 编程语言 > C语言/C++

GBK转码成UTF-8及奇数个汉字解码得到乱码的问题(C++)

2013-01-31 12:49 501 查看
首先转码方法网上很多,如下面:

__inline void Convert(const char* strIn, char* strOut, int sourceCodepage, int targetCodepage)
{
int len=lstrlen(strIn);
int unicodeLen=MultiByteToWideChar(sourceCodepage,0,strIn,-1,NULL,0);
wchar_t* pUnicode;
pUnicode=new wchar_t[unicodeLen+1];
memset(pUnicode,0,(unicodeLen+1)*sizeof(wchar_t));
MultiByteToWideChar(sourceCodepage,0,strIn,-1,(LPWSTR)pUnicode,unicodeLen);
BYTE * pTargetData = NULL;
int targetLen=WideCharToMultiByte(targetCodepage,0,(LPWSTR)pUnicode,-1,(char *)pTargetData,0,NULL,NULL);
pTargetData=new BYTE[targetLen+1];
memset(pTargetData,0,targetLen+1);
WideCharToMultiByte(targetCodepage,0,(LPWSTR)pUnicode,-1,(char *)pTargetData,targetLen,NULL,NULL);
lstrcpy(strOut,(char*)pTargetData);
delete pUnicode;
pUnicode = NULL;
delete pTargetData;
pTargetData = NULL;
}


GBK转成UTF-8:

CString strConfKind="天安门";

int nLength = strConfKind.GetLength();

char *strOut = new char[nLength+1];

Convert(strConfKind.GetBuffer(0),strOut, CP_ACP, CP_UTF8);

...

delete []strOut;
strOut = NULL;


但上面的代码存在两个问题:

1、char *strOut = new char[nLength+1]; 申请的长度是不够的,如上面“天安门”是3个汉字,nLength为3。但UTF-8格式一个汉字是占三个字符,至少申请10位(3*3+1)。2、奇数个汉字转码后,再由UTF-8转成GBK时,最后一个字符一直显示为“?”。因为一个汉字转成UTF-8是需要3个字节,3个汉字就成了9个字节,而它会2个字节2个字节地转换成字符,当字节是奇数时最后1个字节转字符就会计算错误,然后直接赋予最后这个字符为“?”,这样改变了数据,影响后面的解码。

解决方案:

CString strConfKind="天安门";;

int Lenth = strConfKind.GetLength();
int nELenth = 0;    //英文字符数
int nCLenth = 0;    //中文字符数
for(i=0;i<Lenth;i++)
{
char c = strConfKind.GetAt(i);
//是中文字符 中文字符编码 1XXX XXXX 1XXX XXXX
if(c<0||c>255)
{
i++;		//跳过汉字的第二个字节
continue;
}
//是英文字符
else
nELenth++;
}
//计算中文字符数,每个中文字符占两个字节
nCLenth = (Lenth-nELenth)/2;

char *strOut = new char[nELenth+nCLenth*3+1];
Convert(strConfKind.GetBuffer(0),strOut, CP_ACP, CP_UTF8);


GBK转成UTF-8:一个汉字需要三个字节,一个英文需要一个字节。这样申请nELenth+nCLenth*3+1个字节。

真正的操作是和服务端交互,上传XML文件,服务器返回的是有乱码。但直接在代码测试没出现这个问题,了解的朋友请提示一下。

测试代码:

void test_convert()
{
CString str11 = "天安门";

int Lenth = str11.GetLength();
int nELenth = 0;    //英文字符数
int nCLenth = 0;    //中文字符数
int nTotalLenth = 0;//总共字符数
for(int i=0;i<Lenth;i++)
{
char c = str11.GetAt(i);
//是中文字符 中文字符编码 1XXX XXXX 1XXX XXXX
if(c<0||c>255)
{
i++;		//跳过汉字的第二个字节
continue;
}
//是英文字符
else
nELenth ++;
}
//计算中文字符数,每个中文字符占两个字节
nCLenth = (Lenth-nELenth)/2;

char *strOut = new char[nELenth+nCLenth*3+1];
Convert(str11.GetBuffer(0),strOut, CP_ACP, CP_UTF8);//yangzenghua_2010071316:55
cout << strOut << endl;
CString str = "";
for (int i=0; i<strlen(strOut); i++)
{
cout << (int)strOut[i] << " ";
str.Format("%s,%d", str, (int)strOut[i]);
}
CFile sourceFile;

CFileException ex;
if (!sourceFile.Open("C:\\1.txt",
CFile::modeWrite |CFile::shareExclusive | CFile::modeCreate, &ex))
{
// complain if an error happened
// no need to delete the ex object

TCHAR szError[1024];
ex.GetErrorMessage(szError, 1024);
cout << "Couldn't open source file: ";
cout << szError;
return ;
}
sourceFile.Write(strOut, nELenth+nCLenth*3+1);
delete strOut;
strOut = NULL;
sourceFile.Close();

if (!sourceFile.Open("C:\\1.txt",
CFile::modeRead | CFile::shareDenyWrite, &ex))
{
// complain if an error happened
// no need to delete the ex object

TCHAR szError[1024];
ex.GetErrorMessage(szError, 1024);
cout << "Couldn't open source file: ";
cout << szError;
return ;
}

char *strOut2 = new char[nELenth+nCLenth*3+1];
sourceFile.Read(strOut2, nELenth+nCLenth*3+1);

m_log.Add("%s,%s", str11, str);
strOut = new char[nELenth+nCLenth*3+1];
Convert(strOut2, strOut, CP_UTF8, CP_ACP);
cout << endl;
cout << strOut << endl;
delete strOut;
strOut = NULL;
delete strOut2;
strOut2 = NULL;
}


参考:http://www.iteye.com/topic/1097560
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签: