您的位置:首页 > 移动开发 > IOS开发

ganglia与nagios组合使用

2016-05-20 15:37 609 查看

1.复制check_ganglia.py到/usr/lib64/nagios/plugins

check_ganglia.py(自行修改的,官方的有BUG)

#!/usr/bin/env python

import sys
import getopt
import socket
import xml.parsers.expat

class GParser:
def __init__(self, host, metric):
self.inhost =0
self.inmetric = 0
self.value = None
self.host = host
self.metric = metric

def parse(self, file):
p = xml.parsers.expat.ParserCreate()
p.StartElementHandler = parser.start_element
p.Parse(file)
if self.value == None:
raise Exception('Host/value not found')
return float(self.value)

def start_element(self, name, attrs):
if name == "HOST":
if attrs["NAME"]==self.host:
self.inhost=1
elif self.inhost==1 and name == "METRIC":
if attrs["NAME"]==self.metric:
self.value=attrs["VAL"]

def usage():
print """Usage: check_ganglia \
-h|--host= -m|--metric= -w|--warning= \
-c|--critical= [-s|--server=] [-p|--port=] """
sys.exit(3)

if __name__ == "__main__":
##############################################################
ganglia_host = '127.0.0.1'
ganglia_port = 8649
host = None
metric = None
warning = None
critical = None
opposite = 0

try:
options, args = getopt.getopt(sys.argv[1:],
"h:m:w:c:s:p:",
["host=", "metric=", "warning=", "critical=", "server=", "port="],
)
except getopt.GetoptError, err:
print "check_gmond:", str(err)
usage()
sys.exit(3)

for o, a in options:
if o in ("-h", "--host"):
host = a
elif o in ("-m", "--metric"):
metric = a
elif o in ("-w", "--warning"):
warning = float(a)
elif o in ("-c", "--critical"):
critical = float(a)
elif o in ("-p", "--port"):
ganglia_port = int(a)
elif o in ("-s", "--server"):
ganglia_host = a

if critical == None or warning == None or metric == None or host == None:
usage()
sys.exit(3)

try:
s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
s.connect((ganglia_host,ganglia_port))
parser = GParser(host, metric)
makefile = s.makefile("r")
linea = ""
for line in makefile.readlines():
line = line.replace("\n"," ")
linea += line
value = parser.parse(linea)
s.close()
except Exception, err:
print "CHECKGANGLIA UNKNOWN: Error while getting value \"%s\"" % (err)
sys.exit(3)

if critical > warning:
if value >= critical:
print "CHECKGANGLIA CRITICAL: %s is %.2f" % (metric, value)
sys.exit(2)
elif value >= warning:
print "CHECKGANGLIA WARNING: %s is %.2f" % (metric, value)
sys.exit(1)
else:
print "CHECKGANGLIA OK: %s is %.2f" % (metric, value)
sys.exit(0)
else:
if critical >=value:
print "CHECKGANGLIA CRITICAL: %s is %.2f" % (metric, value)
sys.exit(2)
elif warning >=value:
print "CHECKGANGLIA WARNING: %s is %.2f" % (metric, value)
sys.exit(1)
else:
print "CHECKGANGLIA OK: %s is %.2f" % (metric, value)
sys.exit(0)


2.创建/etc/nagios/objects/ganglia-services.cfg

define host {
use linux-server
host_name 1.1.1.1   # 名字随便起,监控的是1上的flume,就写1的ip
address 1.1.1.1 # 名字随便起,监控的是1上的flume,就写1的ip
}

define hostgroup {
hostgroup_name ganglia-servers
alias   nagios server
members *
}

define servicegroup {
servicegroup_name ganglia-metrics
alias Ganglia Metrics
}

define command {
command_name check_ganglia
command_line /usr/lib64/nagios/plugins/check_ganglia.py -h mg -m $ARG1$ -w $ARG2$ -c $ARG3$ # -h 这个需要在命令行上执行脚本看用ip还是主机名合适
}

define service {
use generic-service
name ganglia-service
hostgroup_name ganglia-servers
service_groups ganglia-metrics
notifications_enabled 0
}

# 监控flume.CHANNEL.memoryChannel.EventPutSuccessCount,其他复制的改两个地方就行【service_description和check_command】
define service{
max_check_attempts      5       ;
normal_check_interval   3       ;
retry_check_interval    2       ;
check_period            24x7    ;
notification_interval   60      ;
notification_period     24x7    ;
notification_options    w,u,c,r ;
contact_groups          admins  ;
use                             ganglia-service
service_description             FLUME发送event数量 # 网页上显示用的
check_command                   check_ganglia!flume.CHANNEL.memoryChannel.EventPutSuccessCount!10!50 # 直接从ganglia标题上复制就行
}


3.修改contacts.cfg

vi /etc/nagios/objects/contacts.cfg

define contact{
contact_name                    nagiosadmin     ; Short name of user
use             generic-contact     ; Inherit default values from generic-contact template (defined above)
alias                           Nagios Admin        ; Full name of user
service_notification_period     workhours                    ;
host_notification_period        workhours                    ;
service_notification_options    w,u,c,r                 ;
host_notification_options       d,u,r                   ;
service_notification_commands   notify-service-by-email        ;
host_notification_commands      notify-host-by-email     ;
email                          12345@qq.com; 【复制以后只改接收邮箱地址就行】
}

define contactgroup{
contactgroup_name       admins
alias                   bfire
members                 nagiosadmin
}


4.修改nagios.cfg

vi /etc/nagios/nagios.cfg

加入cfg_file=/etc/nagios/objects/ganglia-services.cfg


5.重启nagios和apache

service nagios restart
service httpd restart


6.网页设置(http://ip/ganglia





7.查看nagios日志

more /var/log/nagios/nagios.log



SERVICE NOTIFICATION代表邮件发送成功。



8.邮件配置

yum remove sendmail
service postfix restart
## 发送测试邮件
echo "how are you today" | mail -s "test" 12345@qq.com


其他相关文章:

1. ganglia安装和配置

2. nagios安装和配置

内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签:  ganglia nagios