您的位置：首页 > 运维架构 > Linux

容易的linux自动化运维工具之add_job命令(三)

2015-03-22 00:00 513 查看

摘要: 三五百行python代码实现基于守护进程的linux集中管理工具，满足%95以上的linux管理任务，喜欢的朋友可以留言交流。

原文：https://www.3qos.com/article/101.html

作者：容易日期：2015-03-17

备注：非本人同意，请勿转载

add_job 接口代码如下

#-*- coding: UTF-8 -*-

__author__ = 'tiger'

#!/usr/bin/env python

import zmq, time, os

#import random

import subprocess

import logging, ConfigParser

from logging.handlers import RotatingFileHandler

#定义日志函数

def mylog(logfile):

rthandler = RotatingFileHandler(logfile, 'a', maxBytes=50 * 1024 * 1024, backupCount=3)

formatter = logging.Formatter(

'%(levelname)s %(thread)d %(threadName)s %(process)d %(funcName)s %(asctime)s %(filename)s[line:%(lineno)d] %(message)s',

datefmt='%a, %d %b %Y %H:%M:%S')

rthandler.setFormatter(formatter)

log = logging.getLogger()

log.setLevel(logging.INFO)

log.addHandler(rthandler)

return log

#定义IP检查判断IP是否正确

def ip_check(ip):

q = ip.split('.')

return len(q) == 4 and len(filter(lambda x: x >= 0 and x <= 255, \

map(int, filter(lambda x: x.isdigit(), q)))) == 4

#配置读取函数

def read_task(config_file):

cfg = config_file

config = ConfigParser.SafeConfigParser()

config.read(cfg)

#sections = config.sections()

#生产任务ID

job_id = int(time.time()) + 1

#定义IP字典，记录批量执行的IP地址

ip_dic = {}

#尝试获取相关配置变量

try:

master_sock = config.get("master", 'sock')

ip_list = config.get("ip_list", 'ip').split(',')

job_task = config.get("job_info", 'task')

except Exception, err:

print err

return 0

for i in ip_list:

if ip_check(i):

ip_dic[i] = 'N'

else:

print "ip error :%s" % i

return 0

#尝试获取任务的非必须变量，不存在设置相关默认值

try:

job_type = config.get("job_info", 'type')

except:

job_type = 'c'

try:

job_rundir = config.get("job_info", 'rundir')

except:

job_rundir = 'None'

try:

job_cmdtimeout = int(config.get("job_info", 'cmdtimeout'))

except:

job_cmdtimeout = 10

try:

job_jobtimeout = int(config.get("job_info", 'jobtimeout'))

except:

job_jobtimeout = 20

if job_type == 's':

try:

job_env = config.get("job_info", 'env')

except:

job_env = 'sh'

try:

job_fileserver = config.get("job_info", 'fileserver')

except:

job_fileserver = 'http://192.168.0.227/ser/'

job_info = {'id': job_id, 'type': job_type, 'task': job_task, 'jobtimeout': job_jobtimeout,

'cmdtimeout': job_cmdtimeout, 'env': job_env, 'fileserver': job_fileserver, 'rep_type': 'newtask',

'rundir': job_rundir}

else:

job_info = {'id': job_id, 'type': job_type, 'task': job_task, 'jobtimeout': job_jobtimeout,

'cmdtimeout': job_cmdtimeout, 'rep_type': 'newtask', 'rundir': job_rundir}

return [ip_dic, job_info, master_sock]

#变量说明，id代表任务的id号是个自增数，根据当前时间生成，job_type代表任务的类别，c代表命令s代表脚本，job_task代表具体的命令，如果是任务

#类别是命令则执行执行，如果是脚本，客户端将去根据脚本名去http服务器下载相关脚本，jobtimeout代表整个任务的超时时间，如果客户端没有在该事件内

#报告任务状态则超时，cmdtimeout代表客户端执行具体任务的超时时间，例如top命令如果不带参数将永远执行，cmdtimeout就是为了避免类似情况

#job_env 代表任务的执行环境，通常只适用于执行脚本，job_fileserver 当任务类别是脚本时，下载脚本的http服务器地址和路径，rep_type响应给客户端的状态

#如果响应的信息为newtask，客户端将知道是有新任务了，会尝试获取任务所需的其他信息。rundir任务运行的具体路径

def add_job(config_file, logfile):

#定义日常函数

log = mylog(logfile)

#读取配置文件

cfg_info = read_task(config_file)

#返回为非0时，表示配置文件读取成功并且符合预期。

if cfg_info != 0:

ip_dic = cfg_info[0]

job_info = cfg_info[1]

job_id = job_info['id']

#task = [ip_dic, job_info]

#生产请求信息，并且发送至master，请求添加新任务。

task = {'req_type': 'addjob', 'job_info': job_info, 'iplist': ip_dic}

#尝试建立与master的连接并且发生相关信息等待响应

sock_file = cfg_info[2]

context = zmq.Context()

socket = context.socket(zmq.REQ)

socket.setsockopt(zmq.LINGER, 0)

try:

socket.connect(sock_file)

socket.send_pyobj(task)

except Exception, err:

log.info("connect to master error " + str(err))

print "connect to master error %s " % str(err)

#等待请求响应

report = socket.recv_pyobj()

#打印响应信息

print report

#记录到相关的日志，关闭与master的连接

log_file = report.split()[-1]

log.info(report)

socket.close()

context.term()

#尝试读取任务报告，直接使用系统自己带的tail命令

read_log = "tail -f " + log_file

p = subprocess.Popen(read_log, shell=True, stdout=subprocess.PIPE,

stderr=subprocess.PIPE)

returncode = p.poll()

end_fag = 'job ' + str(job_id) + ' end'

print '----------------------job report start--------------------------'

while returncode is None:

line = p.stdout.readline()

returncode = p.poll()

line = line.strip()

print line

if line == end_fag:

break

print '----------------------job report end---------------------------'

else:

return

#定义主函数设置相关运行目录和配置文件名

def main():

config_file = 'task.ini'

logfile = 'log/oaos_add_job.log'

homedir = os.getcwd()

for i in ('log', 'run'):

path = homedir + '/' + i

if not os.path.exists(path):

os.makedirs(path, 0755)

add_job(config_file, logfile)

if __name__ == "__main__":

main()

add_job的配置文件如下

#服务端简单端口和地址
[master]
sock=tcp://192.168.4.194:7777

#需要批量任务的IP地址列表，以逗号分隔
[ip_list]
ip=192.168.4.195,192.168.4.196,192.168.4.197,192.168.4.198

#任务信息
[job_info]

#任务类别，c代表命令s代表脚本type=c
task=ls -rlt

#命令或者脚本的超时时间
cmdtimeout=10

#整个任务的超时时间
jobtimeout=10

#命令或者脚本的默认执行环境，可以是shell，java,python等根据实际需求去定义
env=sh

#假如任务需要发布脚本，指定下载脚本的URL地址
fileserver=http://192.168.0.227/ser/

#执行脚本或命令时的默认目录
rundir=/root/

内容来自用户分享和网络整理，不保证内容的准确性，如有侵权内容，可联系管理员处理

标签：

相关文章推荐

新的分享

章节导航