From a5444a864a8d4b848630e425d3bec1f9f0a68add Mon Sep 17 00:00:00 2001 From: "vy.boyko" Date: Wed, 16 Oct 2024 11:39:04 +0300 Subject: [PATCH] stats over logs --- .gitignore | 4 +++ pyproject.toml | 14 ++++++++ src/app.py | 92 ++++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 110 insertions(+) create mode 100644 .gitignore create mode 100644 pyproject.toml create mode 100644 src/app.py diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..4595a6c --- /dev/null +++ b/.gitignore @@ -0,0 +1,4 @@ +temp/** +.idea +.idea/** + diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..c0fdb13 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,14 @@ +[tool.poetry] +name = "java-logs-analyzer" +version = "0.1.0" +description = "" +authors = ["vy.boyko "] +readme = "README.md" + +[tool.poetry.dependencies] +python = "^3.12" + + +[build-system] +requires = ["poetry-core"] +build-backend = "poetry.core.masonry.api" diff --git a/src/app.py b/src/app.py new file mode 100644 index 0000000..821c4a3 --- /dev/null +++ b/src/app.py @@ -0,0 +1,92 @@ +import argparse +import re +from datetime import datetime +import os + +parser = argparse.ArgumentParser(prog='java-logs-analyzer') +parser.add_argument('-l', '--logs', help='Logs file') +args = parser.parse_args() + +pattern = re.compile(r"""^(?P\d{1,2})[-](?P\d{2})[-](?P\d{4})\s(?P\d{2})[:](?P\d{2})[:](?P\d{2})[.]""", re.VERBOSE) +stats_by_second = {} +stats_by_minute = {} +stats_by_hour = {} +first_line = None +last_line = None +total_len = 0 +file_len = os.path.getsize(args.logs) + + +def _add_stats(stats: dict, key: str, value: int) -> dict: + if key in stats: + return stats | { key: stats[key] + value } + else: + return stats | { key: value } + + +def _calc_stats(stats: dict) -> (int, int, float): + avg = 0 + min_val = 0 + max_val = 0 + count = len(stats.keys()) + for key in stats.keys(): + avg += stats[key] + min_val = min(min_val, stats[key]) if min_val > 0 else stats[key] + max_val = max(max_val, stats[key]) if max_val > 0 else stats[key] + return (min_val, max_val, avg / count if count > 0 else 0) + +def _print_stats(name: str, stats: dict) -> None: + (min_val, max_val, avg) = _calc_stats(stats) + print(f"{name}: MIN={min_val}, MAX={max_val}, AVG={avg}") + +def _parse(line: str) -> (bool, str, str, str, str, str, str): + match = pattern.match(line) + if match is not None: + day = match.group("day") + month = match.group("month") + year = match.group("year") + hour = match.group("hour") + minute = match.group("minute") + second = match.group("second") + return (True, year, month, day, hour, minute, second) + else: + return (False, None, None, None, None, None, None) + + +with open(args.logs, 'r') as file: + read_len = 0 + last_percent = -1 + percent = 0 + for line in file: + l = line.strip() + read_len += len(line) + percent = round(read_len / file_len * 100, 2) + cur_percent = round(percent / 10, 0) + if last_percent != cur_percent: + print(f"{percent}%", end='\r') + last_percent = cur_percent + (is_parsed, year, month, day, hour, minute, second) = _parse(l) + if is_parsed: + if first_line is None: + first_line = l + last_line = l + total_len += len(l) + key_hour = f"{year}-{month}-{day} {hour}" + key_minute = f"{year}-{month}-{day} {hour}:{minute}" + key_second = f"{year}-{month}-{day} {hour}:{minute}:{second}" + stats_by_hour = _add_stats(stats_by_hour, key_hour, 1) + stats_by_minute = _add_stats(stats_by_minute, key_minute, 1) + stats_by_second = _add_stats(stats_by_second, key_second, 1) + + +_print_stats("Stats by hour", stats_by_hour) +_print_stats("Stats by minute", stats_by_minute) +_print_stats("Stats by second", stats_by_second) + +(fl_is_parsed, fl_year, fl_month, fl_day, fl_hour, fl_minute, fl_second) = _parse(first_line) +(ll_is_parsed, ll_year, ll_month, ll_day, ll_hour, ll_minute, ll_second) = _parse(last_line) +started = datetime(*map(int, [fl_year, fl_month, fl_day, fl_hour, fl_minute, fl_second])) +finished = datetime(*map(int, [ll_year, ll_month, ll_day, ll_hour, ll_minute, ll_second])) +delta = finished - started +seconds = delta.seconds +print(f"Average bytes per second: {total_len / seconds}") \ No newline at end of file