您的位置:首页 > 运维架构 > Nginx

Python正则表达式,统计分析nginx访问日志

2017-01-16 15:57 736 查看
目标:

  1.正则表达式

  2.oop编程,统计nginx访问日志中不同IP地址出现的次数并排序

1.正则表达式

#!/usr/bin/env python
# -*- coding: utf-8 -*-

import re

# match
# 方法一
pattern1 = re.compile(r'hello', re.I)

match = pattern1.match('Hello World')

if match:
print match.group()

# 方法二

m = re.match(r'hello', 'hello world.')

print m.group()

# search
pattern1 = re.compile(r'World')

match = pattern1.search('Hello, hello World.')

if match:
print match.group()

# split
pattern1 = re.compile(r'\d+')
match = pattern1.split('one1two2three3')
print match
for i in match:
print i

# findall
match = pattern1.findall('one1two2three3')
print match

# finditer
match = pattern1.finditer('one1two2three3')
for i in match:
print i.group()


•运行代码,测试效果

2.oop编程,统计nginx访问日志中不同IP地址出现的次数并排序

#!/usr/bin/env python
# -*- coding: utf-8 -*-

import re

class CountPatt(object):
def __init__(self, patt):
self.patt = re.compile(patt)
self.result = {}
def count_patt(self, fname):
with open(fname) as fobj:
for line in fobj:
match = self.patt.search(line)
if match:
key = match.group()
self.result[key] = self.result.get(key, 0) + 1

return self.result

def sort(self):
result = []
alist = self.result.items()
for i in xrange(len(alist)):
greater = alist[0]
for item in alist[1:]:
if greater[1] < item[1]:
greater = item
result.append(greater)
alist.remove(greater)
return result

if __name__ == "__main__":
httpd_log = '/tmp/access.log'
ip_pattern = r'^(\d+\.){3}\d+'
browser_pattern = r'Chrome|Safari|Firefox'
a = CountPatt(ip_pattern)
print a.count_patt(httpd_log)
print a.sort()


•运行代码,测试效果

handetiandeMacBook-Pro:test xkops$ python test2.py
{'192.168.207.21': 25, '192.168.80.165': 20, '192.168.207.1': 46, '127.0.0.1': 10}
[('192.168.207.1', 46), ('192.168.207.21', 25), ('192.168.80.165', 20), ('127.0.0.1', 10)]
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签: