#!/usr/bin/env python3 import re import json from dataclasses import dataclass from typing import Optional, List @dataclass class Message: id: str username: Optional[str] message: Optional[str] first_filename: Optional[str] create_at: int recognized_number: Optional[int] @dataclass class NumberdealersError: message: Message previous_message: Message expected_number: Optional[int] class UnrecognizedNumber(NumberdealersError): pass class EditedMessage(NumberdealersError): pass class NonNumberMessage(NumberdealersError): pass class ShouldHaveBeen(NumberdealersError): pass class Duplicate(NumberdealersError): pass class Stray(NumberdealersError): pass class Skipped(NumberdealersError): pass class Jump(NumberdealersError): pass try: USERS = __import__("users").users() except ImportError: USERS = None NUMBER_EMOJI = { "zero": "0", "one": "1", "two": "2", "three": "3", "four": "4", "five": "5", "six": "6", "seven": "7", "eight": "8", "nine": "9", } URL_PREFIX = "https://mattermost.zeus.gent/zeus/pl/" def parse(message_json_lines): second_last_number = None second_last_message = None last_number = None last_message = None numbers = [] errors = [] start_number = None for line in message_json_lines: line = json.loads(line) # Ignore non-message posts (e.g. join/leave) if line.get("type") is not None: continue username = None if "user_id" in line and USERS is not None and line["user_id"] in USERS: username = USERS[line["user_id"]].get("username") if username is None: username = line.get("username") try: first_filename = line["metadata"]["files"][0]["name"] except (KeyError, IndexError): first_filename = None message_obj = Message( id=line["id"], username=username, message=line.get("message"), first_filename=first_filename, create_at=line["create_at"], recognized_number=None ) message = None if "message" in line and line["message"] != "": message = line["message"] message = re.sub(r"^[#>]* ?|[*_`]*", "", message) for emoji, numb in NUMBER_EMOJI.items(): message = re.sub(f" *:{emoji}: *", numb, message) message = re.sub(" ?:(?:green)?num([0-9]+): ?", lambda m: m.group(1), message) message = message.replace("\ufe0f", "").replace("\u20e3", "").replace("\u200b", "") message = message.strip() elif first_filename is not None: message = first_filename.split(".")[0] else: errors.append( UnrecognizedNumber( message_obj, last_message, last_number+1 if last_number is not None else None ) ) continue if line.get("edit_at") is not None: errors.append( EditedMessage(message_obj, last_message, last_number+1 if last_number is not None else None) ) m = re.fullmatch(r"-?[1-9][0-9]*|0", message) if not m: errors.append( NonNumberMessage(message_obj, last_message, last_number+1 if last_number is not None else None) ) else: number = int(m.group(0)) message_obj.recognized_number = number if last_number is None: start_number = number last_number = number - 1 second_last_number = number - 2 numbers.append(message_obj) if number != last_number + 1: if number == second_last_number + 2 and last_number != second_last_number + 1: errors.pop() errors.append( ShouldHaveBeen(last_message, second_last_message, number-1) ) elif number == last_number: errors.append( Duplicate(message_obj, last_message, last_number+1) ) elif number == second_last_number + 1 and last_number != second_last_number + 1: errors.pop() errors.append( Stray(last_message, second_last_message, last_number+1) ) elif last_number == second_last_number + 1 and number == last_number + 2: errors.pop() errors.append( Skipped(last_message, second_last_message, number-1) ) else: errors.append( Jump(message_obj, last_message, last_number+1) ) second_last_number = last_number second_last_message = last_message last_number = number last_message = message_obj return numbers, errors def main(): import sys from datetime import datetime, timezone numbers, _errors = parse(sys.stdin) for number in numbers: moment = datetime.fromtimestamp(number.create_at / 1000, timezone.utc) moment_str = str(moment).replace("+00:00", "") print(f"{moment_str}\t{number.username}\t{number.recognized_number}") if __name__ == "__main__": main()