import json import os import re import time from pprint import pprint import hug import mattermostdriver.exceptions import requests import db import dir_utils import mattermost from mattermost import ChannelApi, MMApi from utils import id_to_url, url_to_id def find_codimd_files_on_mattermost(): mattermost = MMApi() channels = [ "hrx6pgfswjbttcj8nim3jrwe7w", # bestuur-INTERN "uda7ax9poprduq8ob56e1fqk4e", # bestuur ] last_fetch_time = db.get_latest_sync_time() current_fetch_time = int(time.time() * 1000) print(f"Fetching posts since: {last_fetch_time}") for channel_id in channels: print(f"Fetching posts for channel_id: {channel_id}") # TODO Use first statement for all posts posts = mattermost.get_posts_for_channel(channel_id, last_fetch_time) # posts = mattermost.posts.get_posts_for_channel(channel_id) print(f"Scraping {len(posts)} posts") for post_id, post in posts.items(): # old: r"(https?://[^\s#?]+)" url_regex = r"https?:\/\/(?:www\.)?[-a-zA-Z0-9@:%._\+~#=]{1,256}\.[a-zA-Z0-9()]{1,6}\b(?:[-a-zA-Z0-9()@:%_\+.~#?&\/=]*)" urls = re.findall(url_regex, post["message"]) for url in urls: idx = url.find("codimd.zeus.gent") if idx == -1: # In this case it's an url but not for codimd continue # Remove everything after the # of ? for char in ["#", "?"]: cut_idx = url.rfind(char) if url.rfind(char) != -1 else len(url) url = url[:cut_idx] # pprint(post) print(url) db.add_discovered_url(url, post) # When everything succeeded. Save the current unix time as latest fetched moment db.set_latest_sync_time(current_fetch_time) def read_note(url): return requests.get(f"{url}/download").text def download_files(): for file_id, file_info in db.get_files().items(): local_file_path = f"data/note-{file_id}.md" url = file_info["source_url"] with open(local_file_path, "w") as f: print(f"Downloading url {url}") f.write(read_note(url)) db.set_local_file_path(file_id, local_file_path) def send_message(file_id, file_info, message): channel_id = file_info["originating_mm_post_channel_id"] post_id = file_info["originating_mm_post_id"] # TODO Comment below line, this is for testing purposes # channel_id = MMApi().get_channel_id("bestuur-dev") channel = ChannelApi( channel_id=channel_id, user=mattermost.users["flynn"], ) prefix = "" # This is bestuur-INTERN where you can only post when you prefix you message with a '!' if file_info["originating_mm_post_channel_id"] == "hrx6pgfswjbttcj8nim3jrwe7w": prefix = "! " try: channel.create_threaded_post( post_id, f"{prefix}{message}", ) except mattermostdriver.exceptions.InvalidOrMissingParameters as e: # This will occur when we try to react to a file in a channel that is not the same as the originating channel. unique_post_url = f"https://mattermost.zeus.gent/zeus/pl/{post_id}" channel.create_post( f"{unique_post_url}\n\n{message}", ) def report_newly_found_file(file_id, file_info): message = f"I found a new CodiMD file in this post! Making work of putting it on gitlab :)\n - Requested location in the [drive](https://git.zeus.gent/bestuur/drive): {file_info['metadata']['sync-to']}" send_message(file_id, file_info, message) def report_newly_found_but_invalid_file(file_id, file_info): message = """Hi there! :wave: I'm your friendly neighbourhood document sync bot. I could synchronize this CodiMD file automatically to our Gitlab DRIVE for safekeeping, but the necessary metadata block is not present. You can easily add the correct info and I will do the rest of the work for you! Just add the following lines to your file, the location in your file is not important but at the top would be my recommendation. ``` :::spoiler Gitlab sync - sync-to: ::: ```""" send_message(file_id, file_info, message) def validate_downloaded_files(): path = "data" dir_list = os.listdir(path) for file_id, file_info in db.get_files().items(): file_path = file_info["local_file_path"] metadata = dir_utils.find_metadata(file_path) if metadata is not None: is_new_file, new_file_info = db.mark_file_valid(file_id, metadata) if is_new_file: report_newly_found_file(file_id, new_file_info) else: changed, new_file_info = db.mark_file_invalid(file_id) if changed: report_newly_found_but_invalid_file(file_id, new_file_info) return db._load_db() @hug.get("/sync-mattermost") def sync_mattermost(): print() print("=======================================") print("== Finding urls posted on mattermost ==") find_codimd_files_on_mattermost() print() print("=============================") print("== Downloading found files ==") download_files() print() print("================================================") print("== Finding valid files in the downloaded ones ==") validate_downloaded_files() print() return db._load_db()