您的位置:首页 > 其它

NGX打印日志时对特殊字符的转码

2015-07-01 17:16 344 查看
问题:

[root@3WR ~]# curl -svo /dev/null test/ -x 127.0.0.1:9711

日志打印结果中http_user_agent不一致

1.控制台输出结果中打印0.9.

User-Agent: curl/7.15.5 (x86_64-redhat-linux-gnu) libcurl/7.15.5 OpenSSL/0.9. zlib/1.2.3 libidn/0.6.5

2.access.log中输出结果中打印0.9.8\x7F

127.0.0.1- - [01/Jul/2015:13:01:14 +0800] “GET http://localhost/HTTP/1.1” 200 30 “-” “curl/7.15.5(x86_64-redhat-li nux-gnu) libcurl/7.15.5 OpenSSL/0.9.8\x7F zlib/1.2.3 libidn/0.6.5”

原因:

先看下ASCII码表

八进制十六进制十进制字符
1777F127del
原因是\x7F字符被解释成【删除】 操作, 于是, 本来应该记录的 0.9.8, 删除一个字符后变成了0.9.

跟踪看下为什么access.log打印出了\x7F

Breakpoint 6, ngx_http_log_variable (r=0x7587e0,
buf=0x7436b8 "curl/7.15.5 (x86_64-redhat-linux-gnu) libcurl/7.15.5 OpenSSL/0.9.8\\x7F zlib/1.2.3 libidn/0.6.5\"\n07t",
op=0x740410) at src/http/modules/ngx_http_log_module.c:893
893     value = ngx_http_get_indexed_variable(r, op->data);
(gdb) p *value
/* escape = 1,内容是"0.9.8\177",\177和\x7F是一个东西
都是符号del */
$16 = {len = 91, valid = 1, no_cacheable = 0, not_found = 0, escape = 1,
data = 0x76c13c "curl/7.15.5 (x86_64-redhat-linux-gnu) libcurl/7.15.5 OpenSSL/0.9.8\177 zlib/1.2.3 libidn/0.6.5"}

(gdb) bt
#0  ngx_http_log_variable (r=Unhandled dwarf expression opcode 0xf3
) at src/http/modules/ngx_http_log_module.c:900
#1  0x0000000000447892 in ngx_http_log_handler (r=0x7587e0) at src/http/modules/ngx_http_log_module.c:331
#2  0x000000000043d35e in ngx_http_log_request (r=0x7587e0) at src/http/ngx_http_request.c:3399
#3  0x000000000043e5a7 in ngx_http_free_request (r=0x7587e0, rc=0) at src/http/ngx_http_request.c:3346
#4  0x000000000043f40b in ngx_http_set_keepalive (r=0x7587e0) at src/http/ngx_http_request.c:2789
#5  ngx_http_finalize_connection (r=0x7587e0) at src/http/ngx_http_request.c:2459
#6  0x000000000044018b in ngx_http_finalize_request (r=0x7587e0, rc=<value optimized out>)
at src/http/ngx_http_request.c:2360
#7  0x000000000043afd9 in ngx_http_core_content_phase (r=0x7587e0, ph=Unhandled dwarf expression opcode 0xf3
) at src/http/ngx_http_core_module.c:1408
#8  0x0000000000435f93 in ngx_http_core_run_phases (r=0x7587e0) at src/http/ngx_http_core_module.c:888
#9  0x00000000004360a2 in ngx_http_handler (r=Unhandled dwarf expression opcode 0xf3
) at src/http/ngx_http_core_module.c:871
#10 0x000000000043e3bb in ngx_http_process_request (r=0x7587e0) at src/http/ngx_http_request.c:1828
#11 0x000000000044106c in ngx_http_process_request_headers (rev=Unhandled dwarf expression opcode 0xf3
) at src/http/ngx_http_request.c:1259
#12 0x00000000004415df in ngx_http_process_request_line (rev=0x7f7c35c971b0) at src/http/ngx_http_request.c:940
#13 0x0000000000441ef9 in ngx_http_wait_request_handler (rev=0x7f7c35c971b0) at src/http/ngx_http_request.c:472
#14 0x00000000004326e8 in ngx_epoll_process_events (cycle=0x73ec90, timer=Unhandled dwarf expression opcode 0xf3
) at src/event/modules/ngx_epoll_module.c:683
#15 0x0000000000429aaa in ngx_process_events_and_timers (cycle=0x73ec90) at src/event/ngx_event.c:249
#16 0x0000000000430da0 in ngx_worker_process_cycle (cycle=0x73ec90, data=Unhandled dwarf expression opcode 0xf3
) at src/os/unix/ngx_process_cycle.c:807
#17 0x000000000042f4cb in ngx_spawn_process (cycle=0x73ec90, proc=0x430cb1 <ngx_worker_process_cycle>, data=0x0,
name=0x4e7d83 "worker process", respawn=-4) at src/os/unix/ngx_process.c:198
#18 0x00000000004301e8 in ngx_start_worker_processes (cycle=0x73ec90, n=1, type=-4)
at src/os/unix/ngx_process_cycle.c:362
#19 0x0000000000431d9d in ngx_master_process_cycle (cycle=0x73ec90) at src/os/unix/ngx_process_cycle.c:249
#20 0x0000000000412d71 in main (argc=Unhandled dwarf expression opcode 0xf3
) at src/core/nginx.c:412
(gdb) n
904         return (u_char *) ngx_http_log_escape(buf, value->data, value->len);  //ngx在此函数内做了转码


源码面前,了无秘密

static uintptr_t
ngx_http_log_escape(u_char *dst, u_char *src, size_t size)
{
ngx_uint_t      n;
/* 这是十六进制字符表 */
static u_char   hex[] = "0123456789ABCDEF";

/* 这是ASCII码表,每一位表示一个符号,其中值为1表示此符号需要转换,值为0表示不需要转换 */
static uint32_t   escape[] = {
0xffffffff, /* 1111 1111 1111 1111  1111 1111 1111 1111 */

/* ?>=< ;:98 7654 3210  /.-, +*)( '&%$ #"!  */
0x00000004, /* 0000 0000 0000 0000  0000 0000 0000 0100 */

/* _^]\ [ZYX WVUT SRQP  ONML KJIH GFED CBA@ */
0x10000000, /* 0001 0000 0000 0000  0000 0000 0000 0000 */

/*  ~}| {zyx wvut srqp  onml kjih gfed cba` */
0x80000000, /* 1000 0000 0000 0000  0000 0000 0000 0000 */

0xffffffff, /* 1111 1111 1111 1111  1111 1111 1111 1111 */
0xffffffff, /* 1111 1111 1111 1111  1111 1111 1111 1111 */
0xffffffff, /* 1111 1111 1111 1111  1111 1111 1111 1111 */
0xffffffff, /* 1111 1111 1111 1111  1111 1111 1111 1111 */
};

if (dst == NULL) {

/* find the number of the characters to be escaped */

n = 0;

while (size) {
if (escape[*src >> 5] & (1 << (*src & 0x1f))) {
n++;
}
src++;
size--;
}

return (uintptr_t) n;
}

while (size) {
/* escape[*src >> 5],escape每一行保存了32个符号,
所以右移5位,即除以32就找到src对应的字符保存在escape的行,
(1 << (*src & 0x1f))此符号在escape一行中的位置,
相&结果就是判断src符号位是否为1,需不需要转换 */
if (escape[*src >> 5] & (1 << (*src & 0x1f))) {
*dst++ = '\\';
*dst++ = 'x';
/* 一个字符占一个字节8位,每4位转成一个16进制表示 */
/* 高4位转换成16进制 */
*dst++ = hex[*src >> 4];
/* 低4位转换成16进制*/
*dst++ = hex[*src & 0xf];
src++;

} else {
/* 不需要转换的字符直接赋值 */
*dst++ = *src++;
}
size--;
}

return (uintptr_t) dst;
}
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签: