您的位置:首页 > 运维架构 > Linux

Linux内核netfilter子系统ulogd项目性能调优记录

2017-05-10 17:04 375 查看
使用ULOGD打SYSLOG到SYSLOG-NG,当NFLOG拿到的数据包在6K左右时,CPU有两个核心占了15%左右

使用修改过的ULOGD直接打TCP,CPU只有一个核心占15%左右(修改版在https://github.com/InsZVA/ulogd-with-syslog-tcp)

使用perf top查看,

8.89%  libc-2.12.so                  [.] vfprintf
3.49%  libc-2.12.so                  [.] _IO_default_xsputn
2.10%  [kernel]                      [k] find_busiest_group
1.94%  libc-2.12.so                  [.] _IO_vfscanf
1.70%  libc-2.12.so                  [.] __strchrnul
1.66%  libpython2.7.so.1.0           [.] PyEval_EvalFrameEx
1.64%  libc-2.12.so                  [.] _itoa_word
1.45%  [kernel]                      [k] schedule
1.13%  [kernel]                      [k] ixgbe_clean_rx_irq
1.08%  ulogd                         [.] ulogd_propagate_results
1.00%  [kernel]                      [k] _spin_lock
0.98%  [kernel]                      [k] ipt_do_table
0.97%  [kernel]                      [k] net_rx_action
0.83%  [kernel]                      [k] memcpy
0.76%  [kernel]                      [k] _spin_unlock_irqrestore
0.72%  [kernel]                      [k] menu_select
0.71%  libc-2.12.so                  [.] _IO_str_init_static_internal
0.70%  [kernel]                      [k] irq_entries_start
0.65%  libc-2.12.so                  [.] _IO_vsprintf
0.59%  [kernel]                      [k] hrtimer_interrupt
0.57%  [kernel]                      [k] __hrtimer_start_range_ns
0.55%  [kernel]                      [k] ixgbe_poll
0.54%  [kernel]                      [k] rb_erase
0.54%  libc-2.12.so                  [.] _IO_old_init
0.53%  [kernel]                      [k] find_next_bit
0.53%  [kernel]                      [k] getnstimeofday
0.49%  [kernel]                      [k] _spin_lock_irqsave
0.47%  [kernel]                      [k] ktime_get_real
0.47%  [kernel]                      [k] tick_nohz_stop_sched_tick
0.46%  libpython2.7.so.1.0           [.] lookdict_string
0.46%  [kernel]                      [k] nf_conntrack_in
0.45%  traffic_server                [.] NetHandler::mainNetEvent
0.45%  [kernel]                      [k] native_read_tsc
0.45%  [kernel]                      [k] kfree
0.43%  libc-2.12.so                  [.] sprintf
0.42%  [kernel]                      [k] __audit_syscall_exit
0.42%  [kernel]                      [k] __switch_to
0.40%  [kernel]                      [k] ip_route_input
0.40%  [kernel]                      [k] __remove_hrtimer
0.38%  [kernel]                      [k] cpumask_next_and
0.37%  libc-2.12.so                  [.] __offtime
0.37%  libc-2.12.so                  [.] free


大量CPU消耗在标准输入输出库函数中,那么看来应该是解析字符串的问题了。

stack=log3:NFLOG,base1:BASE,ifi1:IFINDEX,ip2str1:IP2STR,print1:PRINTPKT,sys1:SYSLOGTCP

写了个代码测sprintf的性能:

#include <stdio.h>
#include <stdlib.h>
#include <sys/time.h>
#define NULL 0

#define TIME_ELAPSED(codeToTime) do{ \
struct timeval beginTime, endTime; \
gettimeofday(&beginTime, NULL); \
{codeToTime;} \
gettimeofday(&endTime, NULL); \
long secTime  = endTime.tv_sec - beginTime.tv_sec; \
long usecTime = endTime.tv_usec - beginTime.tv_usec; \
printf("[%s(%d)]Elapsed Time: SecTime = %lds, UsecTime = %ldus!\n", __FILE__, __LINE__, secTime, usecTime); \
}while(0)

uint16_t randu16() {
return (uint16_t)rand();
}

uint32_t randu32() {
return (uint32_t)rand();
}

char* f_puts(char* buf, const char* data) {
while((*buf++ = *data++));
return --buf;
}

char* f_putn(char* buf, int n) {
while (n) {
int digit = n % 10;
*buf++ = (char)('0' + digit);
n /= 10;
}
return --buf;
}

int optimized() {
char buf[1024];
char* buf_cur = buf;
buf_cur = f_puts(buf_cur, "PROTO=TCP SPT=");

buf_cur = f_putn(buf_cur, randu16());
buf_cur = f_puts(buf_cur, " DPT=");
buf_cur = f_putn(buf_cur, randu16());

buf_cur = f_puts(buf_cur, " SEQ=");
buf_cur = f_putn(buf_cur, randu32());
buf_cur = f_puts(buf_cur, " ACK=");
buf_cur = f_putn(buf_cur, randu32());

buf_cur = f_puts(buf_cur, " WINDOW=");
buf_cur = f_putn(buf_cur, randu16());

buf_cur = f_puts(buf_cur, " URG ACK PSH RST SYN FIN URGP=");
buf_cur = f_putn(buf_cur, randu16());
buf_cur = f_puts(buf_cur, " ");

return (int)(buf_cur - buf);
}

int work() {
char buf[1024];
char* buf_cur = buf;
buf_cur += sprintf(buf_cur, "PROTO=TCP ");

buf_cur += sprintf(buf_cur, "SPT=%u DPT=%u ",
randu16(),
randu16());
/* FIXME: config */
buf_cur += sprintf(buf_cur, "SEQ=%u ACK=%u ",
randu32(),
randu32());

buf_cur += sprintf(buf_cur, "WINDOW=%u ",
randu16());

buf_cur += sprintf(buf_cur, "URG ");

buf_cur += sprintf(buf_cur, "ACK ");

buf_cur += sprintf(buf_cur, "PSH ");

buf_cur += sprintf(buf_cur, "RST ");

buf_cur += sprintf(buf_cur, "SYN ");

buf_cur += sprintf(buf_cur, "FIN ");

buf_cur += sprintf(buf_cur, "URGP=%u ",
randu16());
return (int)(buf_cur - buf);
}

int main() {
TIME_ELAPSED(
for (int i = 0; i < 1000; i++)
work();
);
TIME_ELAPSED(
for (int i = 0; i < 1000; i++)
optimized();
);
return 0;
}


结果:

[main.c(99)]Elapsed Time: SecTime = 0s, UsecTime = 1158us!
[main.c(103)]Elapsed Time: SecTime = 0s, UsecTime = 159us!


注意:

千万当心itoa、sprinf等库函数,性能可能低的可怕。
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息