check_numberdealers/numberdealers/parse_numberdealers.py

169 lines
4.4 KiB
Python
Raw Normal View History

#!/usr/bin/env python3
import re
import json
from dataclasses import dataclass
from typing import Optional, List
from .users import USERS
if USERS is None:
print("Warning: could not read Mattermost users; username resolution will not work")
@dataclass
class Message:
id: str
username: Optional[str]
message: Optional[str]
first_filename: Optional[str]
create_at: int
recognized_number: Optional[int]
@dataclass
class NumberdealersError:
message: Message
previous_message: Message
expected_number: Optional[int]
class UnrecognizedNumber(NumberdealersError): pass
class EditedMessage(NumberdealersError): pass
class NonNumberMessage(NumberdealersError): pass
class ShouldHaveBeen(NumberdealersError): pass
class Duplicate(NumberdealersError): pass
class Stray(NumberdealersError): pass
class Skipped(NumberdealersError): pass
class Jump(NumberdealersError): pass
NUMBER_EMOJI = {
"zero": "0",
"one": "1",
"two": "2",
"three": "3",
"four": "4",
"five": "5",
"six": "6",
"seven": "7",
"eight": "8",
"nine": "9",
}
URL_PREFIX = "https://mattermost.zeus.gent/zeus/pl/"
def parse(message_json_lines):
second_last_number = None
second_last_message = None
last_number = None
last_message = None
numbers = []
errors = []
start_number = None
for line in message_json_lines:
line = json.loads(line)
# Ignore non-message posts (e.g. join/leave)
if line.get("type") is not None:
continue
username = None
if "user_id" in line and USERS is not None and line["user_id"] in USERS:
username = USERS[line["user_id"]].get("username")
if username is None:
username = line.get("username")
try:
first_filename = line["metadata"]["files"][0]["name"]
except (KeyError, IndexError):
first_filename = None
message_obj = Message(
id=line["id"],
username=username,
message=line.get("message"),
first_filename=first_filename,
create_at=line["create_at"],
recognized_number=None
)
message = None
if "message" in line and line["message"] != "":
message = line["message"]
message = re.sub(r"^[#>]* ?|[*_`]*", "", message)
for emoji, numb in NUMBER_EMOJI.items():
message = re.sub(f" *:{emoji}: *", numb, message)
message = re.sub(" ?:(?:green)?num([0-9]+): ?", lambda m: m.group(1), message)
message = message.replace("\ufe0f", "").replace("\u20e3", "").replace("\u200b", "")
message = message.strip()
elif first_filename is not None:
message = first_filename.split(".")[0]
else:
errors.append(
UnrecognizedNumber(
message_obj, last_message, last_number+1 if last_number is not None else None
)
)
continue
if line.get("edit_at") is not None:
errors.append(
EditedMessage(message_obj, last_message, last_number+1 if last_number is not None else None)
)
m = re.fullmatch(r"-?[1-9][0-9]*|0", message)
if not m:
errors.append(
NonNumberMessage(message_obj, last_message, last_number+1 if last_number is not None else None)
)
else:
number = int(m.group(0))
message_obj.recognized_number = number
if last_number is None:
start_number = number
last_number = number - 1
second_last_number = number - 2
numbers.append(message_obj)
if number != last_number + 1:
if number == second_last_number + 2 and last_number != second_last_number + 1:
errors.pop()
errors.append(
ShouldHaveBeen(last_message, second_last_message, number-1)
)
elif number == last_number:
errors.append(
Duplicate(message_obj, last_message, last_number+1)
)
elif number == second_last_number + 1 and last_number != second_last_number + 1:
errors.pop()
errors.append(
Stray(last_message, second_last_message, last_number+1)
)
elif last_number == second_last_number + 1 and number == last_number + 2:
errors.pop()
errors.append(
Skipped(last_message, second_last_message, number-1)
)
else:
errors.append(
Jump(message_obj, last_message, last_number+1)
)
second_last_number = last_number
second_last_message = last_message
last_number = number
last_message = message_obj
return numbers, errors
def main():
import sys
from datetime import datetime, timezone
numbers, _errors = parse(sys.stdin)
for number in numbers:
moment = datetime.fromtimestamp(number.create_at / 1000, timezone.utc)
moment_str = str(moment).replace("+00:00", "")
print(f"{moment_str}\t{number.username}\t{number.recognized_number}")
if __name__ == "__main__":
main()