您的位置:首页 > 移动开发 > IOS开发

nagios监控系统解决方案及与cacti监控的整合

2014-06-01 21:55 375 查看
Nagios是一款开源的免费网络监视工具,能有效监控Windows、Linux和Unix的主机状态,交换机路由器等网络设置,打印机等。在系统或服务状态异常时发出邮件或短信报警第一时间通知网站运维人员,在状态恢复后发出正常的邮件或短信通知。 可以监视系统运行状态和网络信息的监视系统,能监视所指定的本地或远程主机以及服务,同时提供异常通知功能等;可运行在Linux/Unix平台之上,同时提供一个可选的基于浏览器的WEB界面以方便系统管理人员查看网络状态,各种系统问题,以及日志等等。

cacti默认是以邮件发送警告信息的,虽然可以安装插件以实现其他方式进行告警,但不是他的强项,而nagios有着强大的报警机制


NSCA是可以实现Nagios被动监测的一个程序。被动监测,就是指由被监测的服务器主动上传数据到Nagios监控系统中。这种监测方式提高了实时性(出现问题的时候,被监测的服务器可以及时上传数据通知Nagios,从而使管理员可以尽快作出处理,而不用像主动监测中一样,非要等到下一个监测周期才能获知被监测服务器的状态)。
NDOUtils是一个可以把nagios的获取的数据信息保存到MySQL数据库里的外部构件。



系统:RHEL6
package:
nagios-cn-3.2.3.tar.bz2
nagios-plugins-1.4.16.tar.gz
gd-devel-2.0.35-11.el6.x86_64.rpm

tar jxf nagios-cn-3.2.3.tar.bz2
cd nagios-cn-3.2.3
./configure //默认安装在/usr/local/nagios根据提示信息,可能需要添加参数和依赖包,--enable-embedded-perl

rhel6.4包中没有gd-devel包,需要下载使用yum
localinstall安装
yum install perl-ExtUtils-Embed

make all
make install
make install-init
make install-commandmode
make install-config
make install-webconf

useradd nagios
usermod -G nagios apache

tar zxf nagios-plugins-1.4.16.tar.gz
cd nagios-plugins-1.4.16
./configure --enable-extra-opts --enable-perl-modules
--enable-libtap
make
make install

chown -R nagios.nagios /usr/local/nagios/libexec/* //设置权限
cd /usr/local/nagios/etc/objects
cp -p localhost.cfg hosts.cfg
cp -p localhost.cfg services.cfg

vi /usr/local/nagios/etc/nagios.cfg
cfg_file=/usr/local/nagios/etc/objects/hosts.cfg //添加两行
cfg_file=/usr/local/nagios/etc/objects/services.cfg
#cfg_file=/usr/local/nagios/etc/objects/localhost.cfg //注释此行

vi hosts.cfg //如下格式添加host主机
define host{
use linux-server
host_name ty1.org
address 192.168.1.2
alias Manager
icon_image server.gif
statusmap_image server.gd2
2d_coords 500,200
3d_coords 500,200,100
}

define hostgroup{
hostgroup_name linux-servers ; The name of the hostgroup
alias Linux Servers ; Long name of the group
members * ;
}

vi services.cfg //配置service,只留下define
servicegroup和define service选项

define servicegroup{ 服务组并不是必须的,这是配合nagios 的监控页面的显示
servicegroup_name 系统负荷检查
alias 负荷检查
members
ty1.org,进程总数,ty1.org,登录用户数,ty1.org,根分区,ty1.org,交换空间利用率
}
define service{
use local-service
host_name *
service_description PING
check_command
check_ping!100.0,20%!500.0,60%
}

define service{
use local-service
host_name ty1.org
service_description 根分区
check_command check_local_disk!20%!10%!/
}
define service{
use local-service
host_name ty1.org
service_description 登录用户数
check_command check_local_users!20!50
}

define service{
use local-service
host_name ty1.org
service_description 进程总数
check_command
check_local_procs!250!400!RSZDT
}

define service{
use local-service
host_name ty1.org
service_description 系统负荷
check_command
check_local_load!5.0,4.0,3.0!10.0,6.0,4.0
}

define service{
use local-service
host_name ty1.org
service_description 交换空间利用率
check_command check_local_swap!20!10
}

/usr/local/nagios/bin/nagios -v /usr/local/nagios/etc/nagios.cfg
//校验nagios配置文件正确性

htpasswd -m /usr/local/nagios/etc/htpasswd.users nagiosadmin
//设置登录nagios的用户(nagiosadmin由自己定义)及密码
/etc/init.d/nagios start
/etc/init.d/httpd restart

--------------------------------------------

nagios主机A远程监控B主机mysql
B:yum install mysql-server
>create database nagdb;
>grant select on nagdb.* to nagios@'ipA'

A:
#cd /usr/local/nagios/libexec
#./check_mysql -H 192.168.0.7 -u nagios -d nagdb //如下显示则监控成功
Uptime: 10070 Threads: 1 Questions: 25 Slow queries: 0 Opens:
15 Flush tables: 1 Open tables: 8 Queries per second avg: 0.2

使nagios给QQ邮箱推送报警邮件
#cd /nagios/etc/objects

#vi commands.cfg //添加check_mysql自动检测命令
# 'check_mysql' command definition
define command{
command_name check_mysql
command_line $USER1$/check_mysql -H $HOSTADDRESS$ -u
$ARG1$ -d $ARG2$
}

$USER1$ 代表/nagios/libexec目录

#vi hosts.cfg //将以下B信息加入hosts.cfg
define host{
use linux-server
host_name desktop7.example.com
//B主机名
alias Mysql
address 192.168.0.7 //B
ip
icon_image server.gif
statusmap_image server.gd2
2d_coords 500,200
3d_coords 500,200,100
}

#vi services.cfg //加入B主机service信息
define service{
use local-service
host_name desktop7.example.com
service_description MYSQL
check_command check_mysql!nagios!nagdb
//!后边分别为commands.cfg中command_line中-H
-d 后的变量
notifications_enabled 1 //是否开启提醒功能,1为开启
}

#/usr/local/nagios/bin/nagios -v /usr/local/nagios/etc/nagios.cfg
//检测语法错误

#vi contacts.cfg //设置接受警报邮件的用户,多个用户用逗号分割
define contact{
email 849186008@qq.com
}

在QQ邮箱里将发送的账户(nagios@server97.example.com)加入白名单
B主机mysql服务器出现问题(nagios会先检测四次,才会sendmail),就会给邮箱发邮件报警

--------------------------------------------------------------

NAGIOS_NRPE 实现远端主机监控
使用nrpe(5666)实现对remote
host基本服务(memory、CPU、login
users等)的监控:
配置步骤可以通过解压nrpe包在docs里有个nrpe.pdf

1、Remote Host Setup:

#tar xzf nagios-plugins-1.4.6.tar.gz
#cd nagios-plugins-1.4.6
#./configure //编译完会显示模块情况,需要的安装完后重新执行./configure
#make && make install
#useradd nagios
#chown -R nagios.nagios /usr/local/nagios/
#yum install xinetd -y

#tar zxf nrpe-2.15.tar.gz
#cd nrpe-2.15
#./configure
#make all
#make install-plugin
#make install-daemon
#make install-daemon-config
#make install-xinetd

#vi /etc/services
nrpe 5666/tcp //添加此行

#vi /etc/xinetd.d/nrpe
service nrpe
{
flags = REUSE
socket_type = stream
port = 5666
wait = no
user = nagios
group = nagios
server = /usr/local/nagios/bin/nrpe
server_args = -c /usr/local/nagios/etc/nrpe.cfg
--inetd
log_on_failure += USERID
disable = no
only_from = 127.0.0.1 192.168.1.3
//添加monitor主监控ip,此处的优先级(会覆盖)大于/usr/local/nagios/etc/nrpe.cfg中的
}

#/etc/init.d/xinetd start
#/usr/local/nagios/libexec/check_nrpe -H 127.0.0.1
NRPE v2.15 //检测nrpe_plugin安装是否OK
此处可能报错:
[root@ty2 nrpe-2.15]# /usr/local/nagios/libexec/check_nrpe -H
127.0.0.1
CHECK_NRPE: Error - Could not complete SSL handshake.
则注意查看是否将/etc/xinetd.d/nrpe中only_from后边的127.0.0.1丢掉了,记得加上

#vi /usr/local/nagios/etc/nrpe.cfg //customize nrpe commands
command[check_disk]=/usr/local/nagios/libexec/check_disk -w 20%
-c 10% -p /
//习惯指定磁盘检测命令为这样,其他的不变,此处根据自己而设定,可以不变,只检测根目录

2、Monitoring Host Setup:

#tar xzf nrpe-2.8.tar.gz
#cd nrpe-2.8
#./configure
#make all
#make install-plugin
#/usr/local/nagios/libexec/check_nrpe -H 192.168.1.100 -c
check_disk
//-H指定remote主机,-c指定命令,不加-c显示nrpe版本信息

3、现在用命令已经可以检测远程主机系统信息,接下来配置nrpe命令,以便nagios能够自动调用nrpe命令以web页面显示监控信息

#vi commands.cfg
define command{
command_name check_nrpe
command_line $USER1$/check_nrpe -H $HOSTADDRESS$ -c
$ARG1$
}

#vi services.cfg //加入想要监控的serivce
define service{
use local-service
host_name ty.org
service_description 根分区
check_command check_nrpe!check_disk
}

define service{
use local-service
host_name ty.org
service_description 登录用户数
check_command check_nrpe!check_users

define servicegroup{
//可以将remote主机被监控的service加入服务组,只需将其host_name和service_description添加在members后边即可
servicegroup_name 系统负荷检查
alias 负荷检查
members
ty2.org,进程总数,ty2.org,登录用户数,ty2.org,根分区,ty2.org,交换空间利用率,ty.org,进程总数,ty.org,登录用户数,ty.org,根分区,ty.org,交换空间利用率
}

/usr/local/nagios/bin/nagios -v /usr/local/nagios/etc/nagios.cfg
//检测配置文件语法
/etc/init.d/nagios reload

--------------------------------------------------------------

配置飞信机器人
需要包:fetion
linuxso_20101113.tar.gz
mv fetion /usr/local/nagios/libexec/
tar zxf linuxso_20101113.tar.gz -C /usr/lib
//32位的包放在lib中,64位的包放在lib64中
chmod +x /usr/lib/lib*
ldconfig /usr/lib

cd /usr/local/nagios/libexec/
chown nagios.nagios fetion
chmod +x fetion
./fetion //执行fetion命令,报错需要装依赖包,根据提示安装
./fetion --mobile=****** --pwd=****** --to=******
--msg-utf8="haha" //测试fetion是否能成功发送信息
第一次要求输入验证码,在/usr/local/nagios/libexec中

将fetion添加到配置文件,实现自动发送飞信
vi /libexec/fetion.sh
/usr/local/nagios/libexec/fetion --mobile=*** --pwd=***
--to="$1" --msg-utf8="$2"

chmod +x fetion.sh
/usr/local/nagios/libexec/fetion.sh 18291449704 "asd"
//手动测试fetion.sh

cd etc/objects/

vi templates.cfg //添加此逗号后面用fetion的模板
service_notification_commands
notify-service-by-email,notify-service-by-fetion
host_notification_commands
notify-host-by-email,notify-service-by-fetion

vi commands.cfg //添加以下两个command
define command{
command_name notify-host-by-fetion
command_line $USER1$/fetion.sh $CONTACTPAGER$
"$NOTIFICATIONTYPE$ Host Alert: $HOSTNAME$ is $HOSTSTATE$"
}

define command{
command_name notify-service-by-fetion
command_line $USER1$/fetion.sh $CONTACTPAGER
"$NOTIFICATIONTYPE$ Service Alert: $HOSTALIAS$/$SERVICEDESC$ is
$SERVICESTATE$"
}
vi contacts.cfg
define contact{
pager 182***
//添加fetion消息接受号码
}

//测试配置文件语法
/etc/init.d/nagios restart

nagios+cacti整合
需要的包:
ndoutils-1.5.2.tar.gz
npc-2.0.4.tar.gz
add.sql

#tar zxf npc-2.0.4.tar.gz -C /var/www/html/cacti/plugins
进入cacti的web界面安装npc插件
Settings->npc编辑npc,给Remote
Commands打勾,
路径/usr/local/nagios/var/rw/nagios.cmd
nagios URL:http://192.168.0.197/nagios

INSTALL ndoutils
tar zxf ndoutils-1.5.2.tar.gz
cd ndoutils-1.5.2 //看以参考README安装

cp src/ndomod-3x.o /usr/local/nagios/bin/ndomod.o
chown nagios.nagios /usr/local/nagios/bin/ndomod.o
cp config/ndomod.cfg-sample /usr/local/nagios/etc/ndomod.cfg
chown nagios.nagios /usr/local/nagios/etc/ndomod.cfg
chmod 664 /usr/local/nagios/etc/ndomod.cfg

cp src/ndo2db-3x /usr/local/nagios/bin/ndo2db
chown nagios.nagios /usr/local/nagios/bin/ndo2db
cp config/ndo2db.cfg-sample /usr/local/nagios/etc/ndo2db.cfg
chown nagios.nagios /usr/local/nagios/etc/ndo2ddb.cfg
chmod 664 /usr/local/nagios/etc/ndo2db.cfg

vi ndomod.cfg
output_type=tcpsocket
#output_type=unixsocket
output=127.0.0.1
#output=/usr/local/nagios/var/ndo.sock

vi ndo2db.cfg //ndo2db默认端口5668
#socket_type=unix
socket_type=tcp
#socket_name=/usr/local/nagios/var/ndo.sock //选用tcp,注释掉uninx的
db_name=cacti
db_prefix=npc_ //下滑线不能少
db_user=cacti
db_pass=cacti

/usr/local/nagios/bin/ndo2db -c /usr/local/nagios/etc/ndo2db.cfg
//启动ndoutils
mysql -ucacti -pcacti cacti < add.sql
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息