您的位置:首页 > 编程语言 > Python开发

python日志处理脚本模板

2016-08-04 14:59 369 查看
比如处理的日志格式为
2016-08-03 15:35:06,749 MainThread web.py:1946 INFO 200 GET /update_proxy?source=ctripRoundFlight&proxy=110.214.72.53:8123&error=22&speed=0.642103910446 (10.10.141.231) 0.40ms
</pre><pre name="code" class="python">
1# -*- coding: utf-8 -*-
2
3 import sys
4 sys.path.append('/home/workspace/ProxyServer/bin')
5 import os
6 import time
7 import datetime
8 #import db_local as db
9 from DBHandle import DBHandle
10 import re
11 import json
12 #from send_mail import send_mail
13
14
15 log_pat = re.compile(r'update_proxy\?source=(.*?)&proxy=(.*?)&error=(.*?)&speed=(.*?) ')
16
17 log_dir = '/search/log/proxy_new'
18
19 def get_filename():
20     """
21     获取当前时间应该统计的 log 文件名
22     """
23     all_files = os.listdir(log_dir)
24     files=[]
25     for each_file in all_files:
26         if re.match(r'proxy.*_\d\d\.log',each_file) and os.path.isfile(log_dir+'/'+each_file):
27             time = datetime.datetime.strptime(each_file[5:16],'%Y%m%d_%H')
28             if time<datetime.datetime.now()-datetime.timedelta(hours=1):
29                 files.append(log_dir+'/'+each_file)
30
31     return files
32
33 def generate_statistics(files):
34     """
35     读取预处理之后的文件,将其处理成针对每个源的统计结果
36     """
37
38     stat_dict = dict()
39     files=sorted(files,reverse=True)
40     #files=sorted(files)
41     for file_name in files:
42         #print file_name
43         shell_code = 'grep "update_proxy?source" %s > /search/log/statistic_log/temp_proxy_log' % file_name
44         os.system(shell_code)
45         with open('/search/log/statistic_log/temp_proxy_log') as f:
46             content_list = f.readlines()
47             for each_content in content_list[::-1]:
48                 #print each_content
49                 try:
50                     log_content = log_pat.search(each_content).groups()
51                     source_name, proxy_string, error_code, speed = log_content
52                     print log_content
53                     if '.' in proxy_string and ':' in proxy_string:
54                         proxy_ip = proxy_string
55                     else:
56                         continue
57
58                     source=source_name
59                     for typ in ['Car','Bus','MultiFlight','multiFlight','RoundFlight','roundFlight','Flight','Rail','ListHotel','listHotel','Hotel']:
60                         if source_name.endswith(typ):
61                             source=re.sub(typ+'$','',source_name)
62                             break
63
64                     stat_dict.setdefault(source,{})
65                     stat_dict[source].setdefault(proxy_ip,[])
66                     if len(stat_dict[source][proxy_ip])<20:
67                         stat_dict[source][proxy_ip].append((int(error_code),file_name[27:38]))
68                 except Exception, e:
69                     continue
70     return stat_dict
71
72 def stat_log():
73     """
74     log 统计的整个流程
75     """
76
77     files = get_filename()
78     stat_dict = generate_statistics(files)
79
80     for source,dic in stat_dict.items():
81         print source+':'
82         for proxy_ip,error_list in dic.items():
83             success = 0
84             for error in error_list:
85                 if error[0]==0:
86                     success += 1
87             if success*1.0/len(error_list)>0.5:
88                 print proxy_ip, [error[0] for error in error_list[::-1]], error_list[-1][1]+'--'+error_list[0][1]
89
90
91 if __name
4000
__ == '__main__':
92     stat_log()


                                            
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签: