import json import os import re import time import hug import requests import db import dir_utils from mattermost import MMApi def find_codimd_files_on_mattermost(): mattermost = MMApi() channels = [ "hrx6pgfswjbttcj8nim3jrwe7w", # bestuur-INTERN "uda7ax9poprduq8ob56e1fqk4e", # bestuur ] last_fetch_time = db.get_latest_sync_time() current_fetch_time = int(time.time() * 1000) print(f"Fetching posts since: {last_fetch_time}") for channel_id in channels: print(f"Fetching posts for channel_id: {channel_id}") # TODO Use first statement for all posts posts = mattermost.get_posts_for_channel(channel_id, last_fetch_time) # posts = mattermost.posts.get_posts_for_channel(channel_id) print(f"Scraping {len(posts)} posts") for post_id, post in posts.items(): urls = re.findall(r"(https?://[^\s#?]+)", post["message"]) for url in urls: idx = url.find("codimd.zeus.gent") if idx == -1: # In this case it's an url but not for codimd continue print(url) db.add_discovered_file(url) # When everything succeeded. Save the current unix time as latest fetched moment db.set_latest_sync_time(current_fetch_time) def read_note(url): return requests.get(f"{url}/download").text def download_files(): for url in db.get_discovered_files(): with open(f'data/note-{url[url.rfind("/") + 1:]}.md', "w") as f: print(f"Downloading url {url}") f.write(read_note(url)) def validate_downloaded_files(): path = "data" dir_list = os.listdir(path) for filename in dir_list: metadata = dir_utils.find_metadata("data/" + filename) if metadata is not None: db.add_valid_file(filename, metadata) return db._load_db() @hug.get("/sync-mattermost") def sync_mattermost(): print() print("=======================================") print("== Finding urls posted on mattermost ==") find_codimd_files_on_mattermost() print() print("=============================") print("== Downloading found files ==") download_files() print() print("================================================") print("== Finding valid files in the downloaded ones ==") validate_downloaded_files() print() return db._load_db()