diff --git a/src/db.py b/src/db.py index 910dfec..ff05961 100644 --- a/src/db.py +++ b/src/db.py @@ -1,6 +1,8 @@ import json from os.path import exists -from typing import List +from typing import Dict, List + +from utils import id_to_url, url_to_id db_filename = "db.json" @@ -24,7 +26,7 @@ def _load_db(): def _save_db(db): with open(db_filename, "w") as db_file: - db_file.write(json.dumps(db)) + db_file.write(json.dumps(db, indent=4)) def get_latest_sync_time() -> int: @@ -38,33 +40,66 @@ def set_latest_sync_time(le_date) -> None: _save_db(db) -def add_discovered_file(file_url) -> List[str]: +def add_discovered_url(file_url, originating_mm_post) -> List[str]: db = _load_db() - discovered_files = set(db.get("discovered_files", [])) - discovered_files.add(file_url) - discovered_files = list(discovered_files) - db["discovered_files"] = discovered_files + files = db.get("files", {}) + file_id = url_to_id(file_url) + if file_id not in files: + files[file_id] = { + "originating_mm_post_id": originating_mm_post["id"], + "originating_mm_post_channel_id": originating_mm_post["channel_id"], + "source_url": file_url, + } + db["files"] = files _save_db(db) - return discovered_files + return files -def get_discovered_files() -> List[str]: +def get_files() -> List[str]: db = _load_db() - discovered_files = set(db.get("discovered_files", [])) - return discovered_files + files = db.get("files", {}) + return files -def add_valid_file(filename, metadata) -> bool: +def set_local_file_path(file_id, local_file_path): + db = _load_db() + file = db["files"][file_id] + file["local_file_path"] = local_file_path + _save_db(db) + return file + + +def mark_file_valid(file_id, metadata) -> (bool, Dict): """ - Returns: Boolean that indicates if the file is newly identified as valid. + Return: Boolean that indicates if the file is newly identified as valid. """ db = _load_db() - valid_files = db.get("valid_files", {}) + + file = db["files"][file_id] new_file = False - if filename not in valid_files: + if "valid" not in file or not file["valid"]: new_file = True - valid_files[filename] = metadata - db["valid_files"] = valid_files + file["valid"] = True + file["metadata"] = metadata + _save_db(db) - return new_file + return new_file, file + + +def mark_file_invalid(file_id) -> (bool, Dict): + """ + Return: Boolean that indicates if the state is changed. + True means it was valid but is now invalid or it was unknown and is know invalid + False means it was invalid and is still invalid. + """ + db = _load_db() + file = db["files"][file_id] + changed = False + if "valid" not in file or file["valid"]: + changed = True + file["valid"] = False + + _save_db(db) + + return changed, file diff --git a/src/dir_utils.py b/src/dir_utils.py index ae1b4eb..e6ab2f5 100644 --- a/src/dir_utils.py +++ b/src/dir_utils.py @@ -2,7 +2,7 @@ import os import re # pattern = re.compile(":::spoiler Gitlab sync([^:]*):::") -pattern = re.compile("[^:]*:::") +# pattern = re.compile("[^:]*:::") def find_metadata(filename): diff --git a/src/mattermost.py b/src/mattermost.py index 6dc1b7a..fda10c9 100644 --- a/src/mattermost.py +++ b/src/mattermost.py @@ -12,7 +12,7 @@ from colored import style from mattermostdriver import Driver from tabulate import tabulate -from mattermost_objects import MMUser, MMPost, MMChannelPosts +from mattermost_objects import MMChannelPosts, MMPost, MMUser from utils import humanize_date_difference, timer pp = pp.PrettyPrinter(indent=2) @@ -156,15 +156,33 @@ class MMApi(Driver): class ChannelApi(MMApi): - def __init__(self, channel_name, user=None): + def __init__(self, channel_name=None, channel_id=None, user=None): MMApi.__init__(self, user) - self.channel_id = self.get_channel_id(channel_name) + assert channel_name != None or channel_id != None + + if channel_name is not None: + self.channel_id = self.get_channel_id(channel_name) + if channel_id is not None: + self.channel_id = channel_id def create_post(self, message: str, props: Dict = None) -> None: resp = self.posts.create_post( options={"channel_id": self.channel_id, "message": message, "props": props} ) self.log(f'Message successfully created: "{message}"') + + def create_threaded_post( + self, post_id: str, message: str, props: Dict = None + ) -> None: + resp = self.posts.create_post( + options={ + "channel_id": self.channel_id, + "message": message, + "root_id": post_id, + "props": props, + } + ) + self.log(f'Message successfully created: "{message}"') # print_response("Create post", resp) diff --git a/src/mattermost_objects.py b/src/mattermost_objects.py index f1d7c25..8492817 100644 --- a/src/mattermost_objects.py +++ b/src/mattermost_objects.py @@ -1,4 +1,4 @@ -from typing import NamedTuple, List, Dict +from typing import Dict, List, NamedTuple class MMUser(NamedTuple): diff --git a/src/sync.py b/src/sync.py index 4e4cff9..0e52658 100755 --- a/src/sync.py +++ b/src/sync.py @@ -1,11 +1,12 @@ #!/usr/bin/env -S nix shell --impure --expr "(import (builtins.getFlake \"nixpkgs\") {}).python3.withPackages (ps: [ ps.python-gitlab ps.GitPython ])" --command python -import git -import gitlab import json import os import pathlib +import git +import gitlab + TOKEN_NAME = os.environ['GITLAB_ACCESS_TOKEN_NAME'] TOKEN = os.environ['GITLAB_ACCESS_TOKEN'] REPO_FOLDER = 'drive' diff --git a/src/utils.py b/src/utils.py index 10f7cb1..ac4b8c0 100644 --- a/src/utils.py +++ b/src/utils.py @@ -3,6 +3,15 @@ import functools import time +def url_to_id(url: str) -> str: + return url[url.rfind("/") + 1 :] + + +def id_to_url(id): + # filename[5:-3] + return f"https://codimd.zeus.gent/{id}" + + def timer(func): """Print the runtime of the decorated function""" diff --git a/src/web.py b/src/web.py index dd852c6..7810dfc 100644 --- a/src/web.py +++ b/src/web.py @@ -2,14 +2,17 @@ import json import os import re import time +from pprint import pprint import hug +import mattermostdriver.exceptions import requests import db import dir_utils -from mattermost import MMApi, ChannelApi import mattermost +from mattermost import ChannelApi, MMApi +from utils import id_to_url, url_to_id def find_codimd_files_on_mattermost(): @@ -31,15 +34,23 @@ def find_codimd_files_on_mattermost(): print(f"Scraping {len(posts)} posts") for post_id, post in posts.items(): - urls = re.findall(r"(https?://[^\s#?]+)", post["message"]) + # old: r"(https?://[^\s#?]+)" + url_regex = r"https?:\/\/(?:www\.)?[-a-zA-Z0-9@:%._\+~#=]{1,256}\.[a-zA-Z0-9()]{1,6}\b(?:[-a-zA-Z0-9()@:%_\+.~#?&\/=]*)" + urls = re.findall(url_regex, post["message"]) for url in urls: idx = url.find("codimd.zeus.gent") if idx == -1: # In this case it's an url but not for codimd continue + # Remove everything after the # of ? + for char in ["#", "?"]: + cut_idx = url.rfind(char) if url.rfind(char) != -1 else len(url) + url = url[:cut_idx] + + # pprint(post) print(url) - db.add_discovered_file(url) + db.add_discovered_url(url, post) # When everything succeeded. Save the current unix time as latest fetched moment db.set_latest_sync_time(current_fetch_time) @@ -50,32 +61,80 @@ def read_note(url): def download_files(): - for url in db.get_discovered_files(): - with open(f'data/note-{url[url.rfind("/") + 1:]}.md', "w") as f: + for file_id, file_info in db.get_files().items(): + local_file_path = f"data/note-{file_id}.md" + url = file_info["source_url"] + with open(local_file_path, "w") as f: print(f"Downloading url {url}") f.write(read_note(url)) + db.set_local_file_path(file_id, local_file_path) -def filename_to_url(filename): - return f"https://codimd.zeus.gent/{filename[5:-3]}" -def report_newly_found_file(filename): - channel = ChannelApi("bestuur-dev", mattermost.users["flynn"]) - - channel.create_post( - f"I found a new report: {filename_to_url(filename)}! Making work of putting it on gitlab :)" +def send_message(file_id, file_info, message): + channel_id = file_info["originating_mm_post_channel_id"] + post_id = file_info["originating_mm_post_id"] + + # TODO Comment below line, this is for testing purposes + # channel_id = MMApi().get_channel_id("bestuur-dev") + channel = ChannelApi( + channel_id=channel_id, + user=mattermost.users["flynn"], ) + prefix = "" + # This is bestuur-INTERN where you can only post when you prefix you message with a '!' + if file_info["originating_mm_post_channel_id"] == "hrx6pgfswjbttcj8nim3jrwe7w": + prefix = "! " + + try: + channel.create_threaded_post( + post_id, + f"{prefix}{message}", + ) + except mattermostdriver.exceptions.InvalidOrMissingParameters as e: + # This will occur when we try to react to a file in a channel that is not the same as the originating channel. + unique_post_url = f"https://mattermost.zeus.gent/zeus/pl/{post_id}" + channel.create_post( + f"{unique_post_url}\n\n{message}", + ) + + +def report_newly_found_file(file_id, file_info): + message = f"I found a new CodiMD file in this post! Making work of putting it on gitlab :)\n - Requested location in the [drive](https://git.zeus.gent/bestuur/drive): {file_info['metadata']['sync-to']}" + send_message(file_id, file_info, message) + + +def report_newly_found_but_invalid_file(file_id, file_info): + message = """Hi there! :wave: +I'm your friendly neighbourhood document sync bot. +I could synchronize this CodiMD file automatically to our Gitlab DRIVE for safekeeping, but the necessary metadata block is not present. +You can easily add the correct info and I will do the rest of the work for you! + +Just add the following lines to your file, the location in your file is not important but at the top would be my recommendation. + +``` +:::spoiler Gitlab sync +- sync-to: +::: +```""" + send_message(file_id, file_info, message) + def validate_downloaded_files(): path = "data" dir_list = os.listdir(path) - for filename in dir_list: - metadata = dir_utils.find_metadata("data/" + filename) + for file_id, file_info in db.get_files().items(): + file_path = file_info["local_file_path"] + metadata = dir_utils.find_metadata(file_path) if metadata is not None: - is_new_file = db.add_valid_file(filename, metadata) + is_new_file, new_file_info = db.mark_file_valid(file_id, metadata) if is_new_file: - report_newly_found_file(filename) + report_newly_found_file(file_id, new_file_info) + else: + changed, new_file_info = db.mark_file_invalid(file_id) + if changed: + report_newly_found_but_invalid_file(file_id, new_file_info) return db._load_db()