您的位置:首页 > 运维架构 > Nginx

贴一段 nginx日志对应放入mysql的python工具

2014-05-07 00:00 337 查看
#!/usr/bin/python
# -*- coding: utf-8 -*-
"""
用于切分 nginx日志
nginx日志 格式:
log_format  access  '$remote_addr - $remote_user [$time_local] "$request" '
'$status $body_bytes_sent "$http_referer" '
'"$http_user_agent" $http_x_forwarded_for';
INSERT INTO `nginxlog` (`status`, `remote_user`, `http_referer`, `remote_addr`, `http_x_forwarded_for`, `hostname`, `request`, `request_type`, `http_user_agent`, `time_local`) VALUES ('2', '2', '2', '2', '2', '2', '2', '2', '2', '2')
table sql:
CREATE TABLE `nginxlog` (
`status` int(4) DEFAULT NULL,
`remote_user` varchar(20) DEFAULT NULL,
`http_referer` text,
`remote_addr` varchar(20) DEFAULT NULL,
`http_x_forwarded_for` varchar(20) DEFAULT NULL,
`hostname` varchar(50) DEFAULT NULL,
`request` varchar(200) DEFAULT NULL,
`request_type` varchar(10) DEFAULT NULL,
`http_user_agent` varchar(200) DEFAULT NULL,
`time_local` int(10) unsigned DEFAULT NULL,
`server_id` int(5) unsigned DEFAULT NULL
) ENGINE=MyISAM DEFAULT CHARSET=utf8
"""
#--------------------------config----------------------------------------------------------------------
#config
mysql = {}
mysql['mhost'] = 'localhost'
mysql['muser'] = 'root'
mysql['mpwd'] = ''
mysql['mport'] = '3306'
mysql['mdbname'] = 'nginxlog'

#获取当前执行目录  工作目录
ROOT = '/home/nginxweblog/app/'
#ftp 目录  用于存放生产 原始 日志
Sourcedatadir = '/home/nginxweblog/sourcedata/'
#log run
Logfile = ROOT+'nginxlog.log'
#--------------------------------import--------------------------------------------------------
import re,time,os
import MySQLdb
#--------------------------------Sublog--------------------------------------------------------
#分析 每行的 nginxlog 数据
class Sublog:
def __init__(self, logline=None):
if logline:
self.logline = logline.strip()
self.logdate = {}
#self.run()

def run(self):
if self.logline:
self.splitlog()
self.getlog()

def splitlog(self):
# 按照空格切分日志
self.splitlogd = re.split(' ', self.logline)
#print self.splitlogd
return self.splitlogd

def getlog(self):
if len(self.splitlogd) >= 20:
self.logdate['remote_addr'] = self.splitlogd[0]
self.logdate['remote_user'] = self.splitlogd[2]
self.logdate['time_local'] = time.mktime(time.strptime(self.splitlogd[3], '[%d/%b/%Y:%H:%M:%S'))
self.logdate['request_type'] = self.splitlogd[5].replace('\"', '')
self.logdate['request'] = self.splitlogd[6]
self.logdate['status'] = self.splitlogd[8]
self.logdate['http_referer'] = MySQLdb.escape_string(self.splitlogd[10].replace('\"', ''))
self.logdate['http_x_forwarded_for'] = self.splitlogd[len(self.splitlogd)-1]
self.get_http_user_agent()
self.get_hostname()

def get_http_user_agent(self):
agent = re.compile(r'\"(.*?)\"')
self.logdate['http_user_agent'] = MySQLdb.escape_string(agent.findall(self.logline)[2])

def get_hostname(self):
hostname_r = re.compile(r'http://(.*?)/')
ishostname = hostname_r.findall(self.splitlogd[10])
#print ishostname
if len(ishostname) >= 1:
self.logdate['hostname'] = ishostname[0]
else:
self.logdate['hostname'] = '-'

#--------------------------------insert_log---------------------------------------------
#   生成 csv 数据  |||  分割 数据  , 主要用于 mysql快速导入
class insert_log(Sublog):
def __init__(self, logpath, serverid):
Sublog.__init__(self)
self.nowtime = time.strftime("%Y_%m_%d",time.localtime(time.time()))
self.sunlogfile = "nginxlog_"+self.nowtime+'.csv'
self.logpath = logpath
self.serverid = serverid
self.numbres = 0

#self.Mysql_db_instance = Mysql_db()

def createsubfile(self):
self.opencsvfile()
self.subfile = open(ROOT+'CSV/'+self.sunlogfile, 'a')

olog = open(self.logpath)
for i in olog:

#self.logdate = {}
self.logline = i.strip()
self.run()
if self.logdate:
#print self.logdate
self.createsql()
#self.insertdb()
#time.sleep(3)
self.numbres = self.numbres + 1
self.subfile.close()
return [ROOT+'CSV/'+self.sunlogfile, self.sunlogfile, self.numbres]

def insertdb(self):
sql = "INSERT INTO `nginxlog` (`status`, `remote_user`, `http_referer`, `remote_addr`, `http_x_forwarded_for`, `hostname`, `request`, `request_type`, `http_user_agent`, `time_local`) VALUES ('%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s');" % (self.logdate['status'], self.logdate['remote_user'], self.logdate['http_referer'], self.logdate['remote_addr'], self.logdate['http_x_forwarded_for'], self.logdate['hostname'], self.logdate['request'], self.logdate['request_type'], self.logdate['http_user_agent'], self.logdate['time_local'])
#print sql
self.subfile.write(sql + '\n')
#self.Mysql_db_instance.insertsql(sql)
#self.Mysql_db_instance.commit()
def createsql(self):
c = "%s|||%s|||%s|||%s|||%s|||%s|||%s|||%s|||%s|||%s|||%s" % (self.logdate['status'], self.logdate['remote_user'], self.logdate['http_referer'], self.logdate['remote_addr'], self.logdate['http_x_forwarded_for'], self.logdate['hostname'], self.logdate['request'], self.logdate['request_type'], self.logdate['http_user_agent'], self.logdate['time_local'],self.serverid)
self.subfile.write(c + '\n')
def opencsvfile(self):
if os.path.isdir(ROOT+'CSV'):
pass
else:
os.mkdir(ROOT+'CSV')
#--------------------------------function ---------------------------------------------
def load_mysql(csvfile):
if mysql['mpwd']:
os.system("""mysql -h%s -u%s -p%s -e  "LOAD DATA INFILE '%s' INTO TABLE nginxlog.nginxlog FIELDS TERMINATED BY '|||';" && rm -rf %s """ % (mysql['mhost'], mysql['muser'], mysql['mpwd'], csvfile, csvfile))
else:
os.system("""mysql -h%s -u%s -e  "LOAD DATA INFILE '%s' INTO TABLE nginxlog.nginxlog FIELDS TERMINATED BY '|||';" && rm -rf %s """ % (mysql['mhost'], mysql['muser'], csvfile, csvfile))

#索引处理 type = 0  删除索引, 1创建索引
def mysqlindex(type):
if mysql['mpwd']:
ism = "mysql -h%s -u%s -p%s -e " % (mysql['mhost'], mysql['muser'], mysql['mpwd'])
else:
ism = "mysql -h%s -u%s -e  " % (mysql['mhost'], mysql['muser'])
if type == 0:
os.system(" %s 'alter table nginxlog.nginxlog drop index time_local' " % ism)
os.system(" %s 'alter table nginxlog.nginxlog drop index hostname' " % ism)
os.system(" %s 'alter table nginxlog.nginxlog drop index remote_addr' " % ism)
elif type == 1:
os.system(" %s 'alter table nginxlog.nginxlog add index time_local(time_local)' " % ism)
os.system(" %s 'alter table nginxlog.nginxlog add index hostname(hostname)' " % ism)
os.system(" %s 'alter table nginxlog.nginxlog add index remote_addr(remote_addr)' " % ism)
""" 日志记录函数 """
def write_logs(logconten):
logfile_path = Logfile
if logfile_path and logconten:
log_write = open(logfile_path, 'a')
log_write.write(logconten+'\n')
log_write.close()
#
#--------------------------------运行---------------------------------------------
def run():
list = os.listdir(Sourcedatadir)
print 'start.......'
write_logs('-----------%s start.....----%s-----------------' % ( time.strftime("%Y-%m-%d %H:%M:%S",time.localtime(time.time())), str(list)) )
for line in list:
if os.path.isfile(Sourcedatadir+line):

s = re.split('_', line)
if re.match(r'^\d.*$', s[0]):
write_logs('time(%s) logfile(%s) start ....' % (time.strftime("%Y-%m-%d %H:%M:%S",time.localtime(time.time())), line))
insert_log_instance = insert_log(Sourcedatadir+line, s[0])
gfiledata = insert_log_instance.createsubfile()
load_mysql(gfiledata[0])

os.remove(Sourcedatadir+line)
write_logs('time(%s) logfile(%s) log numbers(%s) stop ....' % (time.strftime("%Y-%m-%d %H:%M:%S",time.localtime(time.time())), line, str(gfiledata[2])) )
print Sourcedatadir+line+ ' OK ....... '

#--------------------------------  exece ---------------------------------------------
if __name__ == "__main__":
#run_inotify(Sourcedatadir, run)
run()
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签: