您的位置:首页 > 编程语言 > Python开发

python脚本:自动检测rrd文件并群发报警邮件

2013-09-18 17:36 671 查看
脚本背景:
我所在的公司为运营CDN业务的IDC公司,客户域名的流量图经常会出现毛刺,但是服务的域名非常多,每天挨个流量图看耗时耗力。因此用python写了个可以自动检测异常rrd里异常数值并发送报警邮件的脚本。

由于我们的rrd文件是以服务域名命名的,所以先在相应的API上获取服务域名,然后根据域名扫描rrd文件。我设的是扫描半小时的数值,每10分钟执行一次,大概有2000来个rrd文件,执行一次6、7秒左右。

代码如下:

#!/usr/bin/env python
#coding:utf-8
from pyrrd.graph import DEF,CDEF,AREA
from pyrrd.graph import Graph
from pyrrd.graph import ColorAttributes
from email.mime.multipart import MIMEMultipart
from email.mime.text import MIMEText
from email.mime.image import MIMEImage
from datetime import datetime
import calendar
import os
import time
import urllib2
import smtplib
import email
import sys
def graphrrd(files):
now_utc =calendar.timegm(datetime.utcnow().utctimetuple())
def1 = DEF(rrdfile=files, vname='back',dsName='RX')
def2 = DEF(rrdfile=files, vname='CDN',dsName='TX')
cdef1 = CDEF(vname='back_flow',rpn='%s,0.026,*' % def1.vname)
cdef2 = CDEF(vname='CDN_flow',rpn='%s,0.026,*' % def2.vname)
area1 = AREA(defObj=cdef1, color='#002A97FF', legend='back_flow')
area2 = AREA(defObj=cdef2, color='#00CF00FF', legend='CDN_flow')
ca = ColorAttributes()
ca.back = '#333333'
ca.canvas = '#333333'
ca.shadea = '#000000'
ca.shadeb = '#111111'
ca.mgrid = '#CCCCCC'
ca.axis = '#FFFFFF'
ca.frame = '#AAAAAA'
ca.font = '#FFFFFF'
ca.arrow = '#FFFFFF'
graphfile = image_dir
title_url=files[23:-4]
g = Graph(graphfile, start= now_utc-43200, end= now_utc,vertical_label='flow',title=title_url )
g.data.extend([def1, def2, cdef1, cdef2, area2, area1])
g.write()
def connect():
server=smtplib.SMTP(smtpserver)
server.ehlo()
server.login(smtpuser,smtppass)
return server
def sendmessage(server,to,subj,content):
msg = MIMEMultipart('related')
msg['Subject'] = subj
msg['From']    = smtpuser
msg['To']      = to
msg['Date']    = email.Utils.formatdate()
msgText = MIMEText(content,"html", "utf-8")
msg.attach(msgText)
fp = open(image_dir, 'rb')
msgImage = MIMEImage(fp.read())
fp.close()
msgImage.add_header('Content-ID', '<image1>')
msg.attach(msgImage)
try:
server.sendmail(smtpuser, to, msg.as_string())
except Exception ,ex:
print Exception,ex
print 'Error - send failed'
def aver(rrd_file,n=6):
global dict_data
sum1=0
sum2=0
sum3=0
data = os.popen('rrdtool fetch %s AVERAGE -s -1d | tail -%d | grep -v nan| grep -v RX ' % (rrd_file,n)).readlines()
if len(data)< (n/2):
log("[ERRORS: %s] has not enough record ! please check it!!\n" % rrd_file)
return []
for i in data:
if len(i) > 25:
dict_data[i[:10]]=i.strip()[12:].split()
for i in dict_data.values():
try:
sum1 = sum1+float(i[0])
sum2 = sum2+float(i[1])
sum3 = sum3+float(i[2])
except:
log('%s %s\n' % (rrd_file,i))
if sum2/len(data) < 3500000000:
log('WARNING: %s was less then 200M\n' % rrd_file)
return []
return [sum1/len(data),sum2/len(data),sum3/len(data)]
def check(average):
wrong_t=[]
for key in dict_data:
if float(dict_data[key][1])/average > 1.6:
wrong_t.append(key)
return wrong_t

def update(rrd_file,t,aver1,aver2,aver3):
global text
global dict_data
errors_time=os.popen('date -d "1970-01-01 UTC %s seconds"' % t).readline().strip()
content = '<br/><br/>%s 异常信息:<br/>    域名:        %s <br/>    时间:        %s<br/>    流量值:     回源带宽: %.2fM , cdn带宽 : %dM <br/>                 <br/>rrd 异常信息:<br/>    路径:        %s<br/>    UTC 时间:    %s<br/>    异常值:     [%s], [%s], [%s]<br/><br/><img src="cid:image1">' % (rrd_file[23:-4],rrd_file[23:-4],errors_time,float(dict_data[t][0])*8/300000000,int(float(dict_data[t][1])*8/300000000),rrd_file,t,dict_data[t][0],dict_data[t][1],dict_data[t][2])
write_error('[ %s ]: at[ %s(%s) ],the value was [%s] [%s] [%s] \n' %(rrd_file,errors_time,t,dict_data[t][0],dict_data[t][1],dict_data[t][2]))
text = text + content
def log(log_write):
f = open('%s/rrd_alt1.log' % rrd_bak, 'a')
f.write(log_write)
f.close()
def write_error(log_write):
f = open('%s/rrd_error1.log' % rrd_bak, 'a')
f.write(log_write)
f.close

def run_script(rrd_file):
global to_all
global text
aver_rrd=aver(rrd_file)
if len(aver_rrd) == 0:
return
wrong_time=check(aver_rrd[1])
if len(wrong_time)==0:
log('[%s] no errors !\n' % (rrd_file))
return
for t in wrong_time:
update(rrd_file,t,aver_rrd[0],aver_rrd[1],aver_rrd[2])
graphrrd(rrd_file)
if text:
for to in to_all:
server=connect()
sendmessage(server,to,subj,text)
log('sendmail to %s\n' % to)
if __name__=='__main__':
image_time=time.strftime("%d-%H-%M")
rrd_dir='/data/rrd/db/1/billing'
rrd_bak='/data/rrd/db/1/billing/bak'
smtpserver='xxx'
image_dir='%s/rrdgraph_%s.png' % (rrd_bak,image_time)
smtpuser='xxx'
smtppass='yyy'
to_all=['xxx','yyy']
subj='check the flow of CDN!!!!'
while True:
url_list=[]
local_time = time.strftime("%m-%d %H:%M:%S")
url=urllib2.urlopen('xxx').readlines()
for u in url:
a = "%s/%s.rrd" % (rrd_dir,u.strip())
url_list.append(a)
log("-"*60+"\n")
log("the script run time at %s \n" % local_time)
while len(url_list):
text=''
dict_data={}
rrd_file = url_list.pop()
if os.path.exists(rrd_file):
run_script(rrd_file)
else:
continue
log("-"*60+"\n")
break
邮件截图




本文出自 “哲就是我” 博客,谢绝转载!
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签: