codimd-git-sync/src/web.py

159 lines
5.4 KiB
Python

import json
import os
import re
import time
from pprint import pprint
import hug
import mattermostdriver.exceptions
import requests
import db
import dir_utils
import mattermost
from mattermost import ChannelApi, MMApi
from utils import id_to_url, url_to_id
def find_codimd_files_on_mattermost():
mattermost = MMApi()
channels = [
"hrx6pgfswjbttcj8nim3jrwe7w", # bestuur-INTERN
"uda7ax9poprduq8ob56e1fqk4e", # bestuur
]
last_fetch_time = db.get_latest_sync_time()
current_fetch_time = int(time.time() * 1000)
print(f"Fetching posts since: {last_fetch_time}")
for channel_id in channels:
print(f"Fetching posts for channel_id: {channel_id}")
# TODO Use first statement for all posts
posts = mattermost.get_posts_for_channel(channel_id, last_fetch_time)
# posts = mattermost.posts.get_posts_for_channel(channel_id)
print(f"Scraping {len(posts)} posts")
for post_id, post in posts.items():
# old: r"(https?://[^\s#?]+)"
url_regex = r"https?:\/\/(?:www\.)?[-a-zA-Z0-9@:%._\+~#=]{1,256}\.[a-zA-Z0-9()]{1,6}\b(?:[-a-zA-Z0-9()@:%_\+.~#?&\/=]*)"
urls = re.findall(url_regex, post["message"])
for url in urls:
idx = url.find("codimd.zeus.gent")
if idx == -1:
# In this case it's an url but not for codimd
continue
# Remove everything after the # of ?
for char in ["#", "?"]:
cut_idx = url.rfind(char) if url.rfind(char) != -1 else len(url)
url = url[:cut_idx]
# pprint(post)
print(url)
db.add_discovered_url(url, post)
# When everything succeeded. Save the current unix time as latest fetched moment
db.set_latest_sync_time(current_fetch_time)
def read_note(url):
return requests.get(f"{url}/download").text
def download_files():
for file_id, file_info in db.get_files().items():
local_file_path = f"data/note-{file_id}.md"
url = file_info["source_url"]
with open(local_file_path, "w") as f:
print(f"Downloading url {url}")
f.write(read_note(url))
db.set_local_file_path(file_id, local_file_path)
def send_message(file_id, file_info, message):
channel_id = file_info["originating_mm_post_channel_id"]
post_id = file_info["originating_mm_post_id"]
# TODO Comment below line, this is for testing purposes
# channel_id = MMApi().get_channel_id("bestuur-dev")
channel = ChannelApi(
channel_id=channel_id,
user=mattermost.users["flynn"],
)
prefix = ""
# This is bestuur-INTERN where you can only post when you prefix you message with a '!'
if file_info["originating_mm_post_channel_id"] == "hrx6pgfswjbttcj8nim3jrwe7w":
prefix = "! "
try:
channel.create_threaded_post(
post_id,
f"{prefix}{message}",
)
except mattermostdriver.exceptions.InvalidOrMissingParameters as e:
# This will occur when we try to react to a file in a channel that is not the same as the originating channel.
unique_post_url = f"https://mattermost.zeus.gent/zeus/pl/{post_id}"
channel.create_post(
f"{unique_post_url}\n\n{message}",
)
def report_newly_found_file(file_id, file_info):
message = f"I found a new CodiMD file in this post! Making work of putting it on gitlab :)\n - Requested location in the [drive](https://git.zeus.gent/bestuur/drive): {file_info['metadata']['sync-to']}"
send_message(file_id, file_info, message)
def report_newly_found_but_invalid_file(file_id, file_info):
message = """Hi there! :wave:
I'm your friendly neighbourhood document sync bot.
I could synchronize this CodiMD file automatically to our Gitlab DRIVE for safekeeping, but the necessary metadata block is not present.
You can easily add the correct info and I will do the rest of the work for you!
Just add the following lines to your file, the location in your file is not important but at the top would be my recommendation.
```
:::spoiler Gitlab sync
- sync-to: <a valid path on the DRIVE, for ex.: verslagen/21-22/2022-05-13.md>
:::
```"""
send_message(file_id, file_info, message)
def validate_downloaded_files():
path = "data"
dir_list = os.listdir(path)
for file_id, file_info in db.get_files().items():
file_path = file_info["local_file_path"]
metadata = dir_utils.find_metadata(file_path)
if metadata is not None:
is_new_file, new_file_info = db.mark_file_valid(file_id, metadata)
if is_new_file:
report_newly_found_file(file_id, new_file_info)
else:
changed, new_file_info = db.mark_file_invalid(file_id)
if changed:
report_newly_found_but_invalid_file(file_id, new_file_info)
return db._load_db()
@hug.get("/sync-mattermost")
def sync_mattermost():
print()
print("=======================================")
print("== Finding urls posted on mattermost ==")
find_codimd_files_on_mattermost()
print()
print("=============================")
print("== Downloading found files ==")
download_files()
print()
print("================================================")
print("== Finding valid files in the downloaded ones ==")
validate_downloaded_files()
print()
return db._load_db()