158 lines
5.4 KiB
Python
158 lines
5.4 KiB
Python
import json
|
|
import os
|
|
import re
|
|
import time
|
|
from pprint import pprint
|
|
|
|
import hug
|
|
import mattermostdriver.exceptions
|
|
import requests
|
|
|
|
import db
|
|
import dir_utils
|
|
import mattermost
|
|
from mattermost import ChannelApi, MMApi
|
|
from utils import id_to_url, url_to_id
|
|
|
|
|
|
def find_codimd_files_on_mattermost():
|
|
mattermost = MMApi()
|
|
channels = [
|
|
"hrx6pgfswjbttcj8nim3jrwe7w", # bestuur-INTERN
|
|
"uda7ax9poprduq8ob56e1fqk4e", # bestuur
|
|
]
|
|
|
|
last_fetch_time = db.get_latest_sync_time()
|
|
current_fetch_time = int(time.time() * 1000)
|
|
print(f"Fetching posts since: {last_fetch_time}")
|
|
for channel_id in channels:
|
|
print(f"Fetching posts for channel_id: {channel_id}")
|
|
|
|
# TODO Use first statement for all posts
|
|
posts = mattermost.get_posts_for_channel(channel_id, last_fetch_time)
|
|
# posts = mattermost.posts.get_posts_for_channel(channel_id)
|
|
|
|
print(f"Scraping {len(posts)} posts")
|
|
for post_id, post in posts.items():
|
|
# old: r"(https?://[^\s#?]+)"
|
|
url_regex = r"https?:\/\/(?:www\.)?[-a-zA-Z0-9@:%._\+~#=]{1,256}\.[a-zA-Z0-9()]{1,6}\b(?:[-a-zA-Z0-9()@:%_\+.~#?&\/=]*)"
|
|
urls = re.findall(url_regex, post["message"])
|
|
for url in urls:
|
|
idx = url.find("codimd.zeus.gent")
|
|
if idx == -1:
|
|
# In this case it's an url but not for codimd
|
|
continue
|
|
|
|
# Remove everything after the # of ?
|
|
for char in ["#", "?"]:
|
|
cut_idx = url.rfind(char) if url.rfind(char) != -1 else len(url)
|
|
url = url[:cut_idx]
|
|
|
|
# pprint(post)
|
|
print(url)
|
|
db.add_discovered_url(url, post)
|
|
|
|
# When everything succeeded. Save the current unix time as latest fetched moment
|
|
db.set_latest_sync_time(current_fetch_time)
|
|
|
|
|
|
def read_note(url):
|
|
return requests.get(f"{url}/download").text
|
|
|
|
|
|
def download_files():
|
|
for file_id, file_info in db.get_files().items():
|
|
local_file_path = f"data/note-{file_id}.md"
|
|
url = file_info["source_url"]
|
|
with open(local_file_path, "w") as f:
|
|
print(f"Downloading url {url}")
|
|
f.write(read_note(url))
|
|
db.set_local_file_path(file_id, local_file_path)
|
|
|
|
|
|
def send_message(file_id, file_info, message):
|
|
channel_id = file_info["originating_mm_post_channel_id"]
|
|
post_id = file_info["originating_mm_post_id"]
|
|
|
|
# TODO Comment below line, this is for testing purposes
|
|
# channel_id = MMApi().get_channel_id("bestuur-dev")
|
|
channel = ChannelApi(
|
|
channel_id=channel_id,
|
|
user=mattermost.users["flynn"],
|
|
)
|
|
|
|
prefix = ""
|
|
# This is bestuur-INTERN where you can only post when you prefix you message with a '!'
|
|
if file_info["originating_mm_post_channel_id"] == "hrx6pgfswjbttcj8nim3jrwe7w":
|
|
prefix = "! "
|
|
|
|
try:
|
|
channel.create_threaded_post(
|
|
post_id,
|
|
f"{prefix}{message}",
|
|
)
|
|
except mattermostdriver.exceptions.InvalidOrMissingParameters as e:
|
|
# This will occur when we try to react to a file in a channel that is not the same as the originating channel.
|
|
unique_post_url = f"https://mattermost.zeus.gent/zeus/pl/{post_id}"
|
|
channel.create_post(
|
|
f"{unique_post_url}\n\n{message}",
|
|
)
|
|
|
|
|
|
def report_newly_found_file(file_id, file_info):
|
|
message = f"I found a new CodiMD file in this post! Making work of putting it on gitlab :)\n - Requested location in the [drive](https://git.zeus.gent/bestuur/drive): {file_info['metadata']['sync-to']}"
|
|
send_message(file_id, file_info, message)
|
|
|
|
|
|
def report_newly_found_but_invalid_file(file_id, file_info):
|
|
message = """Hi there! :wave:
|
|
I'm your friendly neighbourhood document sync bot.
|
|
I could synchronize this CodiMD file automatically to our Gitlab DRIVE for safekeeping, but the necessary metadata block is not present.
|
|
You can easily add the correct info and I will do the rest of the work for you!
|
|
|
|
Just add the following lines to your file, the location in your file is not important but at the top would be my recommendation.
|
|
|
|
```
|
|
:::spoiler Gitlab sync
|
|
- sync-to: <a valid path on the DRIVE, for ex.: verslagen/21-22/2022-05-13.md>
|
|
:::
|
|
```"""
|
|
send_message(file_id, file_info, message)
|
|
|
|
|
|
def validate_downloaded_files():
|
|
path = "data"
|
|
dir_list = os.listdir(path)
|
|
|
|
for file_id, file_info in db.get_files().items():
|
|
file_path = file_info["local_file_path"]
|
|
metadata = dir_utils.find_metadata(file_path)
|
|
if metadata is not None:
|
|
is_new_file, new_file_info = db.mark_file_valid(file_id, metadata)
|
|
if is_new_file:
|
|
report_newly_found_file(file_id, new_file_info)
|
|
else:
|
|
changed, new_file_info = db.mark_file_invalid(file_id)
|
|
if changed:
|
|
report_newly_found_but_invalid_file(file_id, new_file_info)
|
|
|
|
return db._load_db()
|
|
|
|
|
|
@hug.get("/sync-mattermost")
|
|
def sync_mattermost():
|
|
print()
|
|
print("=======================================")
|
|
print("== Finding urls posted on mattermost ==")
|
|
find_codimd_files_on_mattermost()
|
|
print()
|
|
print("=============================")
|
|
print("== Downloading found files ==")
|
|
download_files()
|
|
print()
|
|
print("================================================")
|
|
print("== Finding valid files in the downloaded ones ==")
|
|
validate_downloaded_files()
|
|
print()
|
|
|
|
return db._load_db()
|