stats over logs

This commit is contained in:
vy.boyko 2024-10-16 11:39:04 +03:00
commit a5444a864a
3 changed files with 110 additions and 0 deletions

4
.gitignore vendored Normal file
View File

@ -0,0 +1,4 @@
temp/**
.idea
.idea/**

14
pyproject.toml Normal file
View File

@ -0,0 +1,14 @@
[tool.poetry]
name = "java-logs-analyzer"
version = "0.1.0"
description = ""
authors = ["vy.boyko <vy.boyko@tinkoff.ru>"]
readme = "README.md"
[tool.poetry.dependencies]
python = "^3.12"
[build-system]
requires = ["poetry-core"]
build-backend = "poetry.core.masonry.api"

92
src/app.py Normal file
View File

@ -0,0 +1,92 @@
import argparse
import re
from datetime import datetime
import os
parser = argparse.ArgumentParser(prog='java-logs-analyzer')
parser.add_argument('-l', '--logs', help='Logs file')
args = parser.parse_args()
pattern = re.compile(r"""^(?P<day>\d{1,2})[-](?P<month>\d{2})[-](?P<year>\d{4})\s(?P<hour>\d{2})[:](?P<minute>\d{2})[:](?P<second>\d{2})[.]""", re.VERBOSE)
stats_by_second = {}
stats_by_minute = {}
stats_by_hour = {}
first_line = None
last_line = None
total_len = 0
file_len = os.path.getsize(args.logs)
def _add_stats(stats: dict, key: str, value: int) -> dict:
if key in stats:
return stats | { key: stats[key] + value }
else:
return stats | { key: value }
def _calc_stats(stats: dict) -> (int, int, float):
avg = 0
min_val = 0
max_val = 0
count = len(stats.keys())
for key in stats.keys():
avg += stats[key]
min_val = min(min_val, stats[key]) if min_val > 0 else stats[key]
max_val = max(max_val, stats[key]) if max_val > 0 else stats[key]
return (min_val, max_val, avg / count if count > 0 else 0)
def _print_stats(name: str, stats: dict) -> None:
(min_val, max_val, avg) = _calc_stats(stats)
print(f"{name}: MIN={min_val}, MAX={max_val}, AVG={avg}")
def _parse(line: str) -> (bool, str, str, str, str, str, str):
match = pattern.match(line)
if match is not None:
day = match.group("day")
month = match.group("month")
year = match.group("year")
hour = match.group("hour")
minute = match.group("minute")
second = match.group("second")
return (True, year, month, day, hour, minute, second)
else:
return (False, None, None, None, None, None, None)
with open(args.logs, 'r') as file:
read_len = 0
last_percent = -1
percent = 0
for line in file:
l = line.strip()
read_len += len(line)
percent = round(read_len / file_len * 100, 2)
cur_percent = round(percent / 10, 0)
if last_percent != cur_percent:
print(f"{percent}%", end='\r')
last_percent = cur_percent
(is_parsed, year, month, day, hour, minute, second) = _parse(l)
if is_parsed:
if first_line is None:
first_line = l
last_line = l
total_len += len(l)
key_hour = f"{year}-{month}-{day} {hour}"
key_minute = f"{year}-{month}-{day} {hour}:{minute}"
key_second = f"{year}-{month}-{day} {hour}:{minute}:{second}"
stats_by_hour = _add_stats(stats_by_hour, key_hour, 1)
stats_by_minute = _add_stats(stats_by_minute, key_minute, 1)
stats_by_second = _add_stats(stats_by_second, key_second, 1)
_print_stats("Stats by hour", stats_by_hour)
_print_stats("Stats by minute", stats_by_minute)
_print_stats("Stats by second", stats_by_second)
(fl_is_parsed, fl_year, fl_month, fl_day, fl_hour, fl_minute, fl_second) = _parse(first_line)
(ll_is_parsed, ll_year, ll_month, ll_day, ll_hour, ll_minute, ll_second) = _parse(last_line)
started = datetime(*map(int, [fl_year, fl_month, fl_day, fl_hour, fl_minute, fl_second]))
finished = datetime(*map(int, [ll_year, ll_month, ll_day, ll_hour, ll_minute, ll_second]))
delta = finished - started
seconds = delta.seconds
print(f"Average bytes per second: {total_len / seconds}")