More robust logic to deal with broken lines in HTTP logs
This commit is contained in:
parent
40dc739d09
commit
782be7794b
1 changed files with 12 additions and 3 deletions
|
@ -100,18 +100,27 @@ class LogEventHandler(EventHandler):
|
||||||
logger.warning('Could not parse log line from {}: {}'.format(file, line))
|
logger.warning('Could not parse log line from {}: {}'.format(file, line))
|
||||||
return
|
return
|
||||||
|
|
||||||
|
url = None
|
||||||
|
method = 'GET'
|
||||||
|
http_version = '1.0'
|
||||||
|
|
||||||
try:
|
try:
|
||||||
|
url = m.group(5).split(' ')[1]
|
||||||
|
method = m.group(5).split(' ')[0]
|
||||||
http_version = m.group(5).split(' ')[2].split('/')[1]
|
http_version = m.group(5).split(' ')[2].split('/')[1]
|
||||||
except:
|
except:
|
||||||
http_version = '1.0'
|
pass
|
||||||
|
|
||||||
|
if not url:
|
||||||
|
return
|
||||||
|
|
||||||
info = {
|
info = {
|
||||||
'address': m.group(1),
|
'address': m.group(1),
|
||||||
'user_identifier': m.group(2),
|
'user_identifier': m.group(2),
|
||||||
'user_id': m.group(3),
|
'user_id': m.group(3),
|
||||||
'time': datetime.datetime.strptime(m.group(4), '%d/%b/%Y:%H:%M:%S %z'),
|
'time': datetime.datetime.strptime(m.group(4), '%d/%b/%Y:%H:%M:%S %z'),
|
||||||
'method': m.group(5).split(' ')[0],
|
'method': method,
|
||||||
'url': m.group(5).split(' ')[1],
|
'url': url,
|
||||||
'http_version': http_version,
|
'http_version': http_version,
|
||||||
'status': int(m.group(6)),
|
'status': int(m.group(6)),
|
||||||
'size': int(m.group(7)),
|
'size': int(m.group(7)),
|
||||||
|
|
Loading…
Reference in a new issue