Harmonize all reports based on parse_numberdealers.py

This commit is contained in:
Midgard 2024-03-14 10:15:53 +01:00
parent a6df150b6f
commit 225dc70cc8
Signed by: midgard
GPG key ID: 511C112F1331BBB4
8 changed files with 213 additions and 12 deletions

5
.gitignore vendored Normal file
View file

@ -0,0 +1,5 @@
*.tsv
legacy/
__pycache__/
*.pyc
*.pyo

View file

View file

@ -0,0 +1,38 @@
#!/usr/bin/env python3
from collections import defaultdict
from typing import Dict, Optional, Iterable
from . import parse_numberdealers
def analyze_users(numbers: Iterable[parse_numberdealers.Message]) -> Dict[Optional[str], int]:
stats = defaultdict(lambda: 0)
for msg in numbers:
stats[msg.username] += 1
return dict(stats)
def report_users(numbers):
stats = analyze_users(numbers)
for username, count in sorted(stats.items(), key=lambda x: x[1]):
print(f"{count:5d} {username}")
def main():
import sys
numbers, errors = parse_numberdealers.parse(sys.stdin)
if numbers == [] and errors == []:
print("No input data")
else:
if numbers == []:
print("No valid number messages!")
else:
print(f"Checked from {numbers[0].recognized_number} up to {numbers[-1].recognized_number}")
report_users(numbers)
if __name__ == '__main__':
main()

View file

@ -4,6 +4,11 @@ import re
import json
from dataclasses import dataclass
from typing import Optional, List
from .users import USERS
if USERS is None:
print("Warning: could not read Mattermost users; username resolution will not work")
@dataclass
@ -29,12 +34,6 @@ class Stray(NumberdealersError): pass
class Skipped(NumberdealersError): pass
class Jump(NumberdealersError): pass
try:
USERS = __import__("users").users()
except ImportError:
USERS = None
NUMBER_EMOJI = {
"zero": "0",
"one": "1",

View file

@ -1,6 +1,6 @@
#!/usr/bin/env python3
import parse_numberdealers
from . import parse_numberdealers
def link(link_text: str, message_obj: parse_numberdealers.Message):
@ -33,9 +33,19 @@ def str_from_error(err):
return msg + mention(err.message)
def report_errors(errors):
if errors:
print("🚨 Errors: 🚨")
print("\n".join(map(str_from_error, errors)))
else:
print("No errors! 🎉")
def main():
import sys
numbers, errors = parse_numberdealers.parse(sys.stdin)
if numbers == [] and errors == []:
print("No input data")
else:
@ -43,11 +53,7 @@ def main():
print("No valid number messages!")
else:
print(f"Checked from {numbers[0].recognized_number} up to {numbers[-1].recognized_number}")
if errors:
print("🚨 Errors: 🚨")
print("\n".join(map(str_from_error, errors)))
else:
print("No errors! 🎉")
report_errors(errors)
if __name__ == "__main__":

101
numberdealers/times.py Executable file
View file

@ -0,0 +1,101 @@
#!/usr/bin/env python3
from dataclasses import dataclass
from typing import Iterable
import json
import numpy
from . import parse_numberdealers
@dataclass
class TimeAnalysis:
avg: float
stdev: float
min: float
perc5 : float
med: float
perc95: float
max: float
midrange: float
def analyze_times(numbers: Iterable[parse_numberdealers.Message]) -> TimeAnalysis:
times = []
prev_time = None
for msg in numbers:
if prev_time is not None:
times.append((msg.create_at - prev_time) / 1000)
prev_time = msg.create_at
times.sort()
min_ = min(times)
max_ = max(times)
return TimeAnalysis(
avg=numpy.mean(times),
stdev=numpy.std(times),
min=min_,
perc5 =numpy.percentile(times, 5),
med=numpy.median(times),
perc95=numpy.percentile(times, 95),
max=max_,
midrange=(min_ + max_) / 2
)
def format_time(total_seconds: float):
if total_seconds < 1:
return f"{total_seconds:.3f} s"
if total_seconds < 10:
return f"{total_seconds:.2f} s"
if total_seconds < 60:
return f"{total_seconds:.1f} s"
formatted = None
seconds_str = f"{total_seconds:.0f} s"
total_minutes, seconds = divmod(total_seconds, 60)
total_hours, minutes, = divmod(total_minutes, 60)
total_days, hours, = divmod(total_hours, 24)
total_years, days, = divmod(total_days, 365)
if total_minutes < 60:
formatted = f"{total_minutes:.0f} min {seconds:.0f} sec"
elif total_hours < 24:
formatted = f"{total_hours:.0f} hr {minutes:.0f} min"
elif total_days < 365:
formatted = f"{total_days:.0f} days {hours:.0f} hr"
else:
formatted = f"{total_years:.0f} yr {days:.0f} days"
assert formatted
return f"{seconds_str} ({formatted})"
def report_times(numbers):
a = analyze_times(numbers)
print(f""" μ = {format_time(a.avg)}
σ = {format_time(a.stdev)}
min = {format_time(a.min)}
P5 = {format_time(a.perc5)}
median = {format_time(a.med)}
P95 = {format_time(a.perc95)}
max = {format_time(a.max)}""")
def main():
import sys
numbers, errors = parse_numberdealers.parse(sys.stdin)
if numbers == [] and errors == []:
print("No input data")
else:
if numbers == []:
print("No valid number messages!")
else:
print(f"Checked from {numbers[0].recognized_number} up to {numbers[-1].recognized_number}")
report_times(numbers)
if __name__ == '__main__':
main()

17
numberdealers/users.py Normal file
View file

@ -0,0 +1,17 @@
from os.path import expanduser
import json
def _users():
result = {}
try:
with open(expanduser("~/dev/mm-archiver/data/users.json"), encoding="utf-8") as fh:
for line in fh:
user = json.loads(line)
result[user["id"]] = user
return result
except FileNotFoundError:
return None
USERS = _users()

35
report_numberdealers.py Executable file
View file

@ -0,0 +1,35 @@
#!/usr/bin/env python3
import sys
from numberdealers import parse_numberdealers, times, report_errors, numbers_per_user
def main():
numbers, errors = parse_numberdealers.parse(sys.stdin)
if numbers == [] and errors == []:
print("No input data")
return
elif numbers == []:
print("No valid number messages!")
return
print(f"Checked from {numbers[0].recognized_number} up to {numbers[-1].recognized_number}")
print()
report_errors.report_errors(errors)
print()
print("---")
print()
print("```")
times.report_times(numbers)
print("```")
print()
print("---")
print()
print("```")
numbers_per_user.report_users(numbers)
print("```")
if __name__ == "__main__":
main()