From 225dc70cc8b0289a92a83a85865449f803e2e98a Mon Sep 17 00:00:00 2001 From: Midgard Date: Thu, 14 Mar 2024 10:15:53 +0100 Subject: [PATCH] Harmonize all reports based on parse_numberdealers.py --- .gitignore | 5 + numberdealers/__init__.py | 0 numberdealers/numbers_per_user.py | 38 +++++++ .../parse_numberdealers.py | 11 +- .../report_errors.py | 18 ++-- numberdealers/times.py | 101 ++++++++++++++++++ numberdealers/users.py | 17 +++ report_numberdealers.py | 35 ++++++ 8 files changed, 213 insertions(+), 12 deletions(-) create mode 100644 .gitignore create mode 100644 numberdealers/__init__.py create mode 100755 numberdealers/numbers_per_user.py rename parse_numberdealers.py => numberdealers/parse_numberdealers.py (97%) rename check_numberdealers.py => numberdealers/report_errors.py (89%) create mode 100755 numberdealers/times.py create mode 100644 numberdealers/users.py create mode 100755 report_numberdealers.py diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..5e859b8 --- /dev/null +++ b/.gitignore @@ -0,0 +1,5 @@ +*.tsv +legacy/ +__pycache__/ +*.pyc +*.pyo diff --git a/numberdealers/__init__.py b/numberdealers/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/numberdealers/numbers_per_user.py b/numberdealers/numbers_per_user.py new file mode 100755 index 0000000..1113326 --- /dev/null +++ b/numberdealers/numbers_per_user.py @@ -0,0 +1,38 @@ +#!/usr/bin/env python3 + +from collections import defaultdict +from typing import Dict, Optional, Iterable +from . import parse_numberdealers + + +def analyze_users(numbers: Iterable[parse_numberdealers.Message]) -> Dict[Optional[str], int]: + stats = defaultdict(lambda: 0) + + for msg in numbers: + stats[msg.username] += 1 + + return dict(stats) + + +def report_users(numbers): + stats = analyze_users(numbers) + + for username, count in sorted(stats.items(), key=lambda x: x[1]): + print(f"{count:5d} {username}") + + +def main(): + import sys + numbers, errors = parse_numberdealers.parse(sys.stdin) + + if numbers == [] and errors == []: + print("No input data") + else: + if numbers == []: + print("No valid number messages!") + else: + print(f"Checked from {numbers[0].recognized_number} up to {numbers[-1].recognized_number}") + report_users(numbers) + +if __name__ == '__main__': + main() diff --git a/parse_numberdealers.py b/numberdealers/parse_numberdealers.py similarity index 97% rename from parse_numberdealers.py rename to numberdealers/parse_numberdealers.py index 141e792..97579e4 100755 --- a/parse_numberdealers.py +++ b/numberdealers/parse_numberdealers.py @@ -4,6 +4,11 @@ import re import json from dataclasses import dataclass from typing import Optional, List +from .users import USERS + + +if USERS is None: + print("Warning: could not read Mattermost users; username resolution will not work") @dataclass @@ -29,12 +34,6 @@ class Stray(NumberdealersError): pass class Skipped(NumberdealersError): pass class Jump(NumberdealersError): pass - -try: - USERS = __import__("users").users() -except ImportError: - USERS = None - NUMBER_EMOJI = { "zero": "0", "one": "1", diff --git a/check_numberdealers.py b/numberdealers/report_errors.py similarity index 89% rename from check_numberdealers.py rename to numberdealers/report_errors.py index 0ba0168..baf5ba7 100755 --- a/check_numberdealers.py +++ b/numberdealers/report_errors.py @@ -1,6 +1,6 @@ #!/usr/bin/env python3 -import parse_numberdealers +from . import parse_numberdealers def link(link_text: str, message_obj: parse_numberdealers.Message): @@ -33,9 +33,19 @@ def str_from_error(err): return msg + mention(err.message) + +def report_errors(errors): + if errors: + print("🚨 Errors: 🚨") + print("\n".join(map(str_from_error, errors))) + else: + print("No errors! 🎉") + + def main(): import sys numbers, errors = parse_numberdealers.parse(sys.stdin) + if numbers == [] and errors == []: print("No input data") else: @@ -43,11 +53,7 @@ def main(): print("No valid number messages!") else: print(f"Checked from {numbers[0].recognized_number} up to {numbers[-1].recognized_number}") - if errors: - print("🚨 Errors: 🚨") - print("\n".join(map(str_from_error, errors))) - else: - print("No errors! 🎉") + report_errors(errors) if __name__ == "__main__": diff --git a/numberdealers/times.py b/numberdealers/times.py new file mode 100755 index 0000000..ef16ca2 --- /dev/null +++ b/numberdealers/times.py @@ -0,0 +1,101 @@ +#!/usr/bin/env python3 + +from dataclasses import dataclass +from typing import Iterable +import json +import numpy +from . import parse_numberdealers + + +@dataclass +class TimeAnalysis: + avg: float + stdev: float + min: float + perc5 : float + med: float + perc95: float + max: float + midrange: float + + +def analyze_times(numbers: Iterable[parse_numberdealers.Message]) -> TimeAnalysis: + times = [] + prev_time = None + + for msg in numbers: + if prev_time is not None: + times.append((msg.create_at - prev_time) / 1000) + prev_time = msg.create_at + + times.sort() + + min_ = min(times) + max_ = max(times) + return TimeAnalysis( + avg=numpy.mean(times), + stdev=numpy.std(times), + min=min_, + perc5 =numpy.percentile(times, 5), + med=numpy.median(times), + perc95=numpy.percentile(times, 95), + max=max_, + midrange=(min_ + max_) / 2 + ) + + +def format_time(total_seconds: float): + if total_seconds < 1: + return f"{total_seconds:.3f} s" + if total_seconds < 10: + return f"{total_seconds:.2f} s" + if total_seconds < 60: + return f"{total_seconds:.1f} s" + + formatted = None + seconds_str = f"{total_seconds:.0f} s" + + total_minutes, seconds = divmod(total_seconds, 60) + total_hours, minutes, = divmod(total_minutes, 60) + total_days, hours, = divmod(total_hours, 24) + total_years, days, = divmod(total_days, 365) + if total_minutes < 60: + formatted = f"{total_minutes:.0f} min {seconds:.0f} sec" + elif total_hours < 24: + formatted = f"{total_hours:.0f} hr {minutes:.0f} min" + elif total_days < 365: + formatted = f"{total_days:.0f} days {hours:.0f} hr" + else: + formatted = f"{total_years:.0f} yr {days:.0f} days" + + assert formatted + return f"{seconds_str} ({formatted})" + + +def report_times(numbers): + a = analyze_times(numbers) + print(f""" μ = {format_time(a.avg)} + σ = {format_time(a.stdev)} + + min = {format_time(a.min)} + P5 = {format_time(a.perc5)} +median = {format_time(a.med)} + P95 = {format_time(a.perc95)} + max = {format_time(a.max)}""") + + +def main(): + import sys + numbers, errors = parse_numberdealers.parse(sys.stdin) + + if numbers == [] and errors == []: + print("No input data") + else: + if numbers == []: + print("No valid number messages!") + else: + print(f"Checked from {numbers[0].recognized_number} up to {numbers[-1].recognized_number}") + report_times(numbers) + +if __name__ == '__main__': + main() diff --git a/numberdealers/users.py b/numberdealers/users.py new file mode 100644 index 0000000..7e7039c --- /dev/null +++ b/numberdealers/users.py @@ -0,0 +1,17 @@ +from os.path import expanduser +import json + + +def _users(): + result = {} + try: + with open(expanduser("~/dev/mm-archiver/data/users.json"), encoding="utf-8") as fh: + for line in fh: + user = json.loads(line) + result[user["id"]] = user + return result + except FileNotFoundError: + return None + + +USERS = _users() diff --git a/report_numberdealers.py b/report_numberdealers.py new file mode 100755 index 0000000..20ded32 --- /dev/null +++ b/report_numberdealers.py @@ -0,0 +1,35 @@ +#!/usr/bin/env python3 + +import sys +from numberdealers import parse_numberdealers, times, report_errors, numbers_per_user + + +def main(): + numbers, errors = parse_numberdealers.parse(sys.stdin) + + if numbers == [] and errors == []: + print("No input data") + return + elif numbers == []: + print("No valid number messages!") + return + + print(f"Checked from {numbers[0].recognized_number} up to {numbers[-1].recognized_number}") + print() + report_errors.report_errors(errors) + print() + print("---") + print() + print("```") + times.report_times(numbers) + print("```") + print() + print("---") + print() + print("```") + numbers_per_user.report_users(numbers) + print("```") + + +if __name__ == "__main__": + main()