您的位置:首页 > 编程语言 > C语言/C++

C语言URL编解码

2015-12-01 00:00 651 查看
摘要: 与web项目进行对接时, http所需传的参数有时需要以url进行编确码操作

char *str = "这是一个7 8吗";

用站长工具进行URL编码得到二种形式的编码:

UTF-8(汉字由3个字节表示):%e8%bf%99%e6%98%af%e4%b8%80%e4%b8%aa7+8%e5%90%97

GBK (汉字由2个字节表示):%d5%e2%ca%c7%d2%bb%b8%f67+8%c2%f0

----------------------------------------------------------------------------------

然而程序中的编码格式,由编写程序文件本身的文件编码格式决定;



[term@192.168.101.206]/home/term/mengfh/other>>./url_encode
源数据src(GBK) =[这是一个7 8吗]
urlencode(str, strlen(str), &des_len)= %D5%E2%CA%C7%D2%BB%B8%F67+8%C2%F0
urldecode(desBuf, des_len)=这是一个7 8吗
conv_charset("UTF-8","GBK",  str, strlen(str), tmpBuf, sizeof(tmpBuf)) = 杩..涓€涓. 8?
urlencode(tmpBuf, strlen(tmpBuf)= %E8%BF%99%E6%98%AF%E4%B8%80%E4%B8%AA7+8%E5%90%97
urldecode(desBuf, des_len)=杩..涓€涓. 8?
conv_charset("GBK", "UTF-8",  desBuf, strlen(desBuf), tmpBuf, sizeof(tmpBuf))=这是一个7 8吗


代码如下:

/*
* url_encode.c
*
*  Created on: 2015年12月1日
*      Author: mengfh
*
*	 程序默认编码格式:由程序本身决定,如文件为GBK格式时程序默认产生的也是GBK格式
*	 需要转换成其他格式时,可以自行转换
*
*/

#include "stdio.h"
#include "string.h"
#include "stdlib.h"
#include "iconv.h"
/**
* @param s 需要编码的url字符串
* @param len 需要编码的url的长度
* @param new_length 编码后的url的长度
* @return char * 返回编码后的url
* @note 存储编码后的url存储在一个新审请的内存中,
* 用完后,调用者应该释放它
*/
char * urlencode(char const *s, int len, int *new_length)
{

unsigned char const *from, *end;
unsigned char *start, *to;
unsigned char c;
from = s;
end = s + len;
start = to = (unsigned char *) malloc(3 * len + 1);

unsigned char hexchars[] = "0123456789ABCDEF";

while (from < end)
{
c = *from++;

if (c == ' ')
{
*to++ = '+';
}
else if ((c < '0' && c != '-' && c != '.') || (c < 'A' && c > '9') || (c > 'Z' && c < 'a' && c != '_') || (c > 'z'))
{
to[0] = '%';
to[1] = hexchars[c >> 4];
to[2] = hexchars[c & 15];
to += 3;
}
else
{
*to++ = c;
}
}
*to = 0;
if (new_length)
{
*new_length = to - start;
}
return (char *) start;

}

/**
* @param str 需要解码的url字符串
* @param len 需要解码的url的长度
* @return int 返回解码后的url长度
*/
int urldecode(char *str, int len)
{
char *dest = str;
char *data = str;

int value;
int c;

while (len--)
{
if (*data == '+')
{
*dest = ' ';
}
else if (*data == '%' && len >= 2 && isxdigit((int) *(data + 1)) && isxdigit((int) *(data + 2)))
{

c = ((unsigned char *) (data + 1))[0];
if (isupper(c))
c = tolower(c);
value = (c >= '0' && c <= '9' ? c - '0' : c - 'a' + 10) * 16;
c = ((unsigned char *) (data + 1))[1];
if (isupper(c))
c = tolower(c);
value += c >= '0' && c <= '9' ? c - '0' : c - 'a' + 10;

*dest = (char) value;
data += 2;
len -= 2;
}
else
{
*dest = *data;
}
data++;
dest++;
}
*dest = '\0';
return dest - str;
}

/**
* 编码转换
* dest:目标编码格式
* src:源编码格式
* input:源数据
* ilen:
* output:目标数据
* olen
*/
int conv_charset(const char *dest, const char *src, char *input, size_t ilen, char *output, size_t olen)
{

iconv_t conv = iconv_open(dest, src);
if (conv == (iconv_t) - 1)
return -1;
memset(output, 0, olen);
if (iconv(conv, &input, &ilen, &output, &olen))
return -1;
iconv_close(conv);
return 0;

}

int main()
{

char *str = "这是一个7 8吗";
char desBuf[1024], tmpBuf[1024];
char *s = NULL;
int src_len, des_len;

//===============GBK========================================
fprintf(stderr, " 源数据src(GBK) =[%s]\n", str);
memset(desBuf, 0, sizeof(desBuf));
s = urlencode(str, strlen(str), &des_len);
if (s == NULL)
{
fprintf(stderr, "urlencode error \n");
return -1;
}
memcpy(desBuf, s, des_len);
fprintf(stderr, "urlencode(str, strlen(str), &des_len)= %s\n", desBuf);

src_len = urldecode(desBuf, des_len);
fprintf(stderr, "urldecode(desBuf, des_len)=%s\n", desBuf);

//===================UTF======================================
memset(tmpBuf, 0, sizeof(tmpBuf));
conv_charset("UTF-8", "GBK", str, strlen(str), tmpBuf, sizeof(tmpBuf));

fprintf(stderr, "conv_charset(\"UTF-8\",\"GBK\",  str, strlen(str), tmpBuf, sizeof(tmpBuf)) = %s\n", tmpBuf);

memset(desBuf, 0, sizeof(desBuf));
s = urlencode(tmpBuf, strlen(tmpBuf), &des_len);
if (s == NULL)
{
fprintf(stderr, "urlencode error \n");
return -1;
}
memcpy(desBuf, s, des_len);
fprintf(stderr, "urlencode(tmpBuf, strlen(tmpBuf)= %s\n", desBuf);

src_len = urldecode(desBuf, des_len);
fprintf(stderr, "urldecode(desBuf, des_len)=%s\n", desBuf);

memset(tmpBuf, 0, sizeof(tmpBuf));
conv_charset("GBK", "UTF-8", desBuf, strlen(desBuf), tmpBuf, sizeof(tmpBuf));
fprintf(stderr, "conv_charset(\"GBK\", \"UTF-8\",  desBuf, strlen(desBuf), tmpBuf, sizeof(tmpBuf))=%s\n", tmpBuf);

return 0;
}
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签: