stats over logs
This commit is contained in:
commit
a5444a864a
4
.gitignore
vendored
Normal file
4
.gitignore
vendored
Normal file
@ -0,0 +1,4 @@
|
||||
temp/**
|
||||
.idea
|
||||
.idea/**
|
||||
|
14
pyproject.toml
Normal file
14
pyproject.toml
Normal file
@ -0,0 +1,14 @@
|
||||
[tool.poetry]
|
||||
name = "java-logs-analyzer"
|
||||
version = "0.1.0"
|
||||
description = ""
|
||||
authors = ["vy.boyko <vy.boyko@tinkoff.ru>"]
|
||||
readme = "README.md"
|
||||
|
||||
[tool.poetry.dependencies]
|
||||
python = "^3.12"
|
||||
|
||||
|
||||
[build-system]
|
||||
requires = ["poetry-core"]
|
||||
build-backend = "poetry.core.masonry.api"
|
92
src/app.py
Normal file
92
src/app.py
Normal file
@ -0,0 +1,92 @@
|
||||
import argparse
|
||||
import re
|
||||
from datetime import datetime
|
||||
import os
|
||||
|
||||
parser = argparse.ArgumentParser(prog='java-logs-analyzer')
|
||||
parser.add_argument('-l', '--logs', help='Logs file')
|
||||
args = parser.parse_args()
|
||||
|
||||
pattern = re.compile(r"""^(?P<day>\d{1,2})[-](?P<month>\d{2})[-](?P<year>\d{4})\s(?P<hour>\d{2})[:](?P<minute>\d{2})[:](?P<second>\d{2})[.]""", re.VERBOSE)
|
||||
stats_by_second = {}
|
||||
stats_by_minute = {}
|
||||
stats_by_hour = {}
|
||||
first_line = None
|
||||
last_line = None
|
||||
total_len = 0
|
||||
file_len = os.path.getsize(args.logs)
|
||||
|
||||
|
||||
def _add_stats(stats: dict, key: str, value: int) -> dict:
|
||||
if key in stats:
|
||||
return stats | { key: stats[key] + value }
|
||||
else:
|
||||
return stats | { key: value }
|
||||
|
||||
|
||||
def _calc_stats(stats: dict) -> (int, int, float):
|
||||
avg = 0
|
||||
min_val = 0
|
||||
max_val = 0
|
||||
count = len(stats.keys())
|
||||
for key in stats.keys():
|
||||
avg += stats[key]
|
||||
min_val = min(min_val, stats[key]) if min_val > 0 else stats[key]
|
||||
max_val = max(max_val, stats[key]) if max_val > 0 else stats[key]
|
||||
return (min_val, max_val, avg / count if count > 0 else 0)
|
||||
|
||||
def _print_stats(name: str, stats: dict) -> None:
|
||||
(min_val, max_val, avg) = _calc_stats(stats)
|
||||
print(f"{name}: MIN={min_val}, MAX={max_val}, AVG={avg}")
|
||||
|
||||
def _parse(line: str) -> (bool, str, str, str, str, str, str):
|
||||
match = pattern.match(line)
|
||||
if match is not None:
|
||||
day = match.group("day")
|
||||
month = match.group("month")
|
||||
year = match.group("year")
|
||||
hour = match.group("hour")
|
||||
minute = match.group("minute")
|
||||
second = match.group("second")
|
||||
return (True, year, month, day, hour, minute, second)
|
||||
else:
|
||||
return (False, None, None, None, None, None, None)
|
||||
|
||||
|
||||
with open(args.logs, 'r') as file:
|
||||
read_len = 0
|
||||
last_percent = -1
|
||||
percent = 0
|
||||
for line in file:
|
||||
l = line.strip()
|
||||
read_len += len(line)
|
||||
percent = round(read_len / file_len * 100, 2)
|
||||
cur_percent = round(percent / 10, 0)
|
||||
if last_percent != cur_percent:
|
||||
print(f"{percent}%", end='\r')
|
||||
last_percent = cur_percent
|
||||
(is_parsed, year, month, day, hour, minute, second) = _parse(l)
|
||||
if is_parsed:
|
||||
if first_line is None:
|
||||
first_line = l
|
||||
last_line = l
|
||||
total_len += len(l)
|
||||
key_hour = f"{year}-{month}-{day} {hour}"
|
||||
key_minute = f"{year}-{month}-{day} {hour}:{minute}"
|
||||
key_second = f"{year}-{month}-{day} {hour}:{minute}:{second}"
|
||||
stats_by_hour = _add_stats(stats_by_hour, key_hour, 1)
|
||||
stats_by_minute = _add_stats(stats_by_minute, key_minute, 1)
|
||||
stats_by_second = _add_stats(stats_by_second, key_second, 1)
|
||||
|
||||
|
||||
_print_stats("Stats by hour", stats_by_hour)
|
||||
_print_stats("Stats by minute", stats_by_minute)
|
||||
_print_stats("Stats by second", stats_by_second)
|
||||
|
||||
(fl_is_parsed, fl_year, fl_month, fl_day, fl_hour, fl_minute, fl_second) = _parse(first_line)
|
||||
(ll_is_parsed, ll_year, ll_month, ll_day, ll_hour, ll_minute, ll_second) = _parse(last_line)
|
||||
started = datetime(*map(int, [fl_year, fl_month, fl_day, fl_hour, fl_minute, fl_second]))
|
||||
finished = datetime(*map(int, [ll_year, ll_month, ll_day, ll_hour, ll_minute, ll_second]))
|
||||
delta = finished - started
|
||||
seconds = delta.seconds
|
||||
print(f"Average bytes per second: {total_len / seconds}")
|
Loading…
Reference in New Issue
Block a user