Harmonize all reports based on parse_numberdealers.py

This commit is contained in:
Midgard 2024-03-14 10:15:53 +01:00
parent a6df150b6f
commit 225dc70cc8
Signed by: midgard
GPG key ID: 511C112F1331BBB4
8 changed files with 213 additions and 12 deletions

5
.gitignore vendored Normal file
View file

@ -0,0 +1,5 @@
*.tsv
legacy/
__pycache__/
*.pyc
*.pyo

View file

View file

@ -0,0 +1,38 @@
#!/usr/bin/env python3
from collections import defaultdict
from typing import Dict, Optional, Iterable
from . import parse_numberdealers
def analyze_users(numbers: Iterable[parse_numberdealers.Message]) -> Dict[Optional[str], int]:
stats = defaultdict(lambda: 0)
for msg in numbers:
stats[msg.username] += 1
return dict(stats)
def report_users(numbers):
stats = analyze_users(numbers)
for username, count in sorted(stats.items(), key=lambda x: x[1]):
print(f"{count:5d} {username}")
def main():
import sys
numbers, errors = parse_numberdealers.parse(sys.stdin)
if numbers == [] and errors == []:
print("No input data")
else:
if numbers == []:
print("No valid number messages!")
else:
print(f"Checked from {numbers[0].recognized_number} up to {numbers[-1].recognized_number}")
report_users(numbers)
if __name__ == '__main__':
main()

View file

@ -4,6 +4,11 @@ import re
import json import json
from dataclasses import dataclass from dataclasses import dataclass
from typing import Optional, List from typing import Optional, List
from .users import USERS
if USERS is None:
print("Warning: could not read Mattermost users; username resolution will not work")
@dataclass @dataclass
@ -29,12 +34,6 @@ class Stray(NumberdealersError): pass
class Skipped(NumberdealersError): pass class Skipped(NumberdealersError): pass
class Jump(NumberdealersError): pass class Jump(NumberdealersError): pass
try:
USERS = __import__("users").users()
except ImportError:
USERS = None
NUMBER_EMOJI = { NUMBER_EMOJI = {
"zero": "0", "zero": "0",
"one": "1", "one": "1",

View file

@ -1,6 +1,6 @@
#!/usr/bin/env python3 #!/usr/bin/env python3
import parse_numberdealers from . import parse_numberdealers
def link(link_text: str, message_obj: parse_numberdealers.Message): def link(link_text: str, message_obj: parse_numberdealers.Message):
@ -33,16 +33,8 @@ def str_from_error(err):
return msg + mention(err.message) return msg + mention(err.message)
def main():
import sys def report_errors(errors):
numbers, errors = parse_numberdealers.parse(sys.stdin)
if numbers == [] and errors == []:
print("No input data")
else:
if numbers == []:
print("No valid number messages!")
else:
print(f"Checked from {numbers[0].recognized_number} up to {numbers[-1].recognized_number}")
if errors: if errors:
print("🚨 Errors: 🚨") print("🚨 Errors: 🚨")
print("\n".join(map(str_from_error, errors))) print("\n".join(map(str_from_error, errors)))
@ -50,5 +42,19 @@ def main():
print("No errors! 🎉") print("No errors! 🎉")
def main():
import sys
numbers, errors = parse_numberdealers.parse(sys.stdin)
if numbers == [] and errors == []:
print("No input data")
else:
if numbers == []:
print("No valid number messages!")
else:
print(f"Checked from {numbers[0].recognized_number} up to {numbers[-1].recognized_number}")
report_errors(errors)
if __name__ == "__main__": if __name__ == "__main__":
main() main()

101
numberdealers/times.py Executable file
View file

@ -0,0 +1,101 @@
#!/usr/bin/env python3
from dataclasses import dataclass
from typing import Iterable
import json
import numpy
from . import parse_numberdealers
@dataclass
class TimeAnalysis:
avg: float
stdev: float
min: float
perc5 : float
med: float
perc95: float
max: float
midrange: float
def analyze_times(numbers: Iterable[parse_numberdealers.Message]) -> TimeAnalysis:
times = []
prev_time = None
for msg in numbers:
if prev_time is not None:
times.append((msg.create_at - prev_time) / 1000)
prev_time = msg.create_at
times.sort()
min_ = min(times)
max_ = max(times)
return TimeAnalysis(
avg=numpy.mean(times),
stdev=numpy.std(times),
min=min_,
perc5 =numpy.percentile(times, 5),
med=numpy.median(times),
perc95=numpy.percentile(times, 95),
max=max_,
midrange=(min_ + max_) / 2
)
def format_time(total_seconds: float):
if total_seconds < 1:
return f"{total_seconds:.3f} s"
if total_seconds < 10:
return f"{total_seconds:.2f} s"
if total_seconds < 60:
return f"{total_seconds:.1f} s"
formatted = None
seconds_str = f"{total_seconds:.0f} s"
total_minutes, seconds = divmod(total_seconds, 60)
total_hours, minutes, = divmod(total_minutes, 60)
total_days, hours, = divmod(total_hours, 24)
total_years, days, = divmod(total_days, 365)
if total_minutes < 60:
formatted = f"{total_minutes:.0f} min {seconds:.0f} sec"
elif total_hours < 24:
formatted = f"{total_hours:.0f} hr {minutes:.0f} min"
elif total_days < 365:
formatted = f"{total_days:.0f} days {hours:.0f} hr"
else:
formatted = f"{total_years:.0f} yr {days:.0f} days"
assert formatted
return f"{seconds_str} ({formatted})"
def report_times(numbers):
a = analyze_times(numbers)
print(f""" μ = {format_time(a.avg)}
σ = {format_time(a.stdev)}
min = {format_time(a.min)}
P5 = {format_time(a.perc5)}
median = {format_time(a.med)}
P95 = {format_time(a.perc95)}
max = {format_time(a.max)}""")
def main():
import sys
numbers, errors = parse_numberdealers.parse(sys.stdin)
if numbers == [] and errors == []:
print("No input data")
else:
if numbers == []:
print("No valid number messages!")
else:
print(f"Checked from {numbers[0].recognized_number} up to {numbers[-1].recognized_number}")
report_times(numbers)
if __name__ == '__main__':
main()

17
numberdealers/users.py Normal file
View file

@ -0,0 +1,17 @@
from os.path import expanduser
import json
def _users():
result = {}
try:
with open(expanduser("~/dev/mm-archiver/data/users.json"), encoding="utf-8") as fh:
for line in fh:
user = json.loads(line)
result[user["id"]] = user
return result
except FileNotFoundError:
return None
USERS = _users()

35
report_numberdealers.py Executable file
View file

@ -0,0 +1,35 @@
#!/usr/bin/env python3
import sys
from numberdealers import parse_numberdealers, times, report_errors, numbers_per_user
def main():
numbers, errors = parse_numberdealers.parse(sys.stdin)
if numbers == [] and errors == []:
print("No input data")
return
elif numbers == []:
print("No valid number messages!")
return
print(f"Checked from {numbers[0].recognized_number} up to {numbers[-1].recognized_number}")
print()
report_errors.report_errors(errors)
print()
print("---")
print()
print("```")
times.report_times(numbers)
print("```")
print()
print("---")
print()
print("```")
numbers_per_user.report_users(numbers)
print("```")
if __name__ == "__main__":
main()