More robust logic to deal with broken lines in HTTP logs

This commit is contained in:
Fabio Manganiello 2021-03-21 10:12:27 +01:00
parent 40dc739d09
commit 782be7794b

View file

@ -100,18 +100,27 @@ class LogEventHandler(EventHandler):
logger.warning('Could not parse log line from {}: {}'.format(file, line)) logger.warning('Could not parse log line from {}: {}'.format(file, line))
return return
url = None
method = 'GET'
http_version = '1.0'
try: try:
url = m.group(5).split(' ')[1]
method = m.group(5).split(' ')[0]
http_version = m.group(5).split(' ')[2].split('/')[1] http_version = m.group(5).split(' ')[2].split('/')[1]
except: except:
http_version = '1.0' pass
if not url:
return
info = { info = {
'address': m.group(1), 'address': m.group(1),
'user_identifier': m.group(2), 'user_identifier': m.group(2),
'user_id': m.group(3), 'user_id': m.group(3),
'time': datetime.datetime.strptime(m.group(4), '%d/%b/%Y:%H:%M:%S %z'), 'time': datetime.datetime.strptime(m.group(4), '%d/%b/%Y:%H:%M:%S %z'),
'method': m.group(5).split(' ')[0], 'method': method,
'url': m.group(5).split(' ')[1], 'url': url,
'http_version': http_version, 'http_version': http_version,
'status': int(m.group(6)), 'status': int(m.group(6)),
'size': int(m.group(7)), 'size': int(m.group(7)),