Add mattermost posts for new valid or invalid files

This commit is contained in:
mcbloch 2022-10-07 13:22:07 +02:00
parent edb25acdd9
commit 3aec9ffe3a
7 changed files with 163 additions and 41 deletions

View file

@ -1,6 +1,8 @@
import json
from os.path import exists
from typing import List
from typing import Dict, List
from utils import id_to_url, url_to_id
db_filename = "db.json"
@ -24,7 +26,7 @@ def _load_db():
def _save_db(db):
with open(db_filename, "w") as db_file:
db_file.write(json.dumps(db))
db_file.write(json.dumps(db, indent=4))
def get_latest_sync_time() -> int:
@ -38,33 +40,66 @@ def set_latest_sync_time(le_date) -> None:
_save_db(db)
def add_discovered_file(file_url) -> List[str]:
def add_discovered_url(file_url, originating_mm_post) -> List[str]:
db = _load_db()
discovered_files = set(db.get("discovered_files", []))
discovered_files.add(file_url)
discovered_files = list(discovered_files)
db["discovered_files"] = discovered_files
files = db.get("files", {})
file_id = url_to_id(file_url)
if file_id not in files:
files[file_id] = {
"originating_mm_post_id": originating_mm_post["id"],
"originating_mm_post_channel_id": originating_mm_post["channel_id"],
"source_url": file_url,
}
db["files"] = files
_save_db(db)
return discovered_files
return files
def get_discovered_files() -> List[str]:
def get_files() -> List[str]:
db = _load_db()
discovered_files = set(db.get("discovered_files", []))
return discovered_files
files = db.get("files", {})
return files
def add_valid_file(filename, metadata) -> bool:
def set_local_file_path(file_id, local_file_path):
db = _load_db()
file = db["files"][file_id]
file["local_file_path"] = local_file_path
_save_db(db)
return file
def mark_file_valid(file_id, metadata) -> (bool, Dict):
"""
Returns: Boolean that indicates if the file is newly identified as valid.
Return: Boolean that indicates if the file is newly identified as valid.
"""
db = _load_db()
valid_files = db.get("valid_files", {})
file = db["files"][file_id]
new_file = False
if filename not in valid_files:
if "valid" not in file or not file["valid"]:
new_file = True
valid_files[filename] = metadata
db["valid_files"] = valid_files
file["valid"] = True
file["metadata"] = metadata
_save_db(db)
return new_file
return new_file, file
def mark_file_invalid(file_id) -> (bool, Dict):
"""
Return: Boolean that indicates if the state is changed.
True means it was valid but is now invalid or it was unknown and is know invalid
False means it was invalid and is still invalid.
"""
db = _load_db()
file = db["files"][file_id]
changed = False
if "valid" not in file or file["valid"]:
changed = True
file["valid"] = False
_save_db(db)
return changed, file

View file

@ -2,7 +2,7 @@ import os
import re
# pattern = re.compile(":::spoiler Gitlab sync([^:]*):::")
pattern = re.compile("[^:]*:::")
# pattern = re.compile("[^:]*:::")
def find_metadata(filename):

View file

@ -12,7 +12,7 @@ from colored import style
from mattermostdriver import Driver
from tabulate import tabulate
from mattermost_objects import MMUser, MMPost, MMChannelPosts
from mattermost_objects import MMChannelPosts, MMPost, MMUser
from utils import humanize_date_difference, timer
pp = pp.PrettyPrinter(indent=2)
@ -156,15 +156,33 @@ class MMApi(Driver):
class ChannelApi(MMApi):
def __init__(self, channel_name, user=None):
def __init__(self, channel_name=None, channel_id=None, user=None):
MMApi.__init__(self, user)
assert channel_name != None or channel_id != None
if channel_name is not None:
self.channel_id = self.get_channel_id(channel_name)
if channel_id is not None:
self.channel_id = channel_id
def create_post(self, message: str, props: Dict = None) -> None:
resp = self.posts.create_post(
options={"channel_id": self.channel_id, "message": message, "props": props}
)
self.log(f'Message successfully created: "{message}"')
def create_threaded_post(
self, post_id: str, message: str, props: Dict = None
) -> None:
resp = self.posts.create_post(
options={
"channel_id": self.channel_id,
"message": message,
"root_id": post_id,
"props": props,
}
)
self.log(f'Message successfully created: "{message}"')
# print_response("Create post", resp)

View file

@ -1,4 +1,4 @@
from typing import NamedTuple, List, Dict
from typing import Dict, List, NamedTuple
class MMUser(NamedTuple):

View file

@ -1,11 +1,12 @@
#!/usr/bin/env -S nix shell --impure --expr "(import (builtins.getFlake \"nixpkgs\") {}).python3.withPackages (ps: [ ps.python-gitlab ps.GitPython ])" --command python
import git
import gitlab
import json
import os
import pathlib
import git
import gitlab
TOKEN_NAME = os.environ['GITLAB_ACCESS_TOKEN_NAME']
TOKEN = os.environ['GITLAB_ACCESS_TOKEN']
REPO_FOLDER = 'drive'

View file

@ -3,6 +3,15 @@ import functools
import time
def url_to_id(url: str) -> str:
return url[url.rfind("/") + 1 :]
def id_to_url(id):
# filename[5:-3]
return f"https://codimd.zeus.gent/{id}"
def timer(func):
"""Print the runtime of the decorated function"""

View file

@ -2,14 +2,17 @@ import json
import os
import re
import time
from pprint import pprint
import hug
import mattermostdriver.exceptions
import requests
import db
import dir_utils
from mattermost import MMApi, ChannelApi
import mattermost
from mattermost import ChannelApi, MMApi
from utils import id_to_url, url_to_id
def find_codimd_files_on_mattermost():
@ -31,15 +34,23 @@ def find_codimd_files_on_mattermost():
print(f"Scraping {len(posts)} posts")
for post_id, post in posts.items():
urls = re.findall(r"(https?://[^\s#?]+)", post["message"])
# old: r"(https?://[^\s#?]+)"
url_regex = r"https?:\/\/(?:www\.)?[-a-zA-Z0-9@:%._\+~#=]{1,256}\.[a-zA-Z0-9()]{1,6}\b(?:[-a-zA-Z0-9()@:%_\+.~#?&\/=]*)"
urls = re.findall(url_regex, post["message"])
for url in urls:
idx = url.find("codimd.zeus.gent")
if idx == -1:
# In this case it's an url but not for codimd
continue
# Remove everything after the # of ?
for char in ["#", "?"]:
cut_idx = url.rfind(char) if url.rfind(char) != -1 else len(url)
url = url[:cut_idx]
# pprint(post)
print(url)
db.add_discovered_file(url)
db.add_discovered_url(url, post)
# When everything succeeded. Save the current unix time as latest fetched moment
db.set_latest_sync_time(current_fetch_time)
@ -50,32 +61,80 @@ def read_note(url):
def download_files():
for url in db.get_discovered_files():
with open(f'data/note-{url[url.rfind("/") + 1:]}.md', "w") as f:
for file_id, file_info in db.get_files().items():
local_file_path = f"data/note-{file_id}.md"
url = file_info["source_url"]
with open(local_file_path, "w") as f:
print(f"Downloading url {url}")
f.write(read_note(url))
db.set_local_file_path(file_id, local_file_path)
def filename_to_url(filename):
return f"https://codimd.zeus.gent/{filename[5:-3]}"
def report_newly_found_file(filename):
channel = ChannelApi("bestuur-dev", mattermost.users["flynn"])
def send_message(file_id, file_info, message):
channel_id = file_info["originating_mm_post_channel_id"]
post_id = file_info["originating_mm_post_id"]
channel.create_post(
f"I found a new report: {filename_to_url(filename)}! Making work of putting it on gitlab :)"
# TODO Comment below line, this is for testing purposes
# channel_id = MMApi().get_channel_id("bestuur-dev")
channel = ChannelApi(
channel_id=channel_id,
user=mattermost.users["flynn"],
)
prefix = ""
# This is bestuur-INTERN where you can only post when you prefix you message with a '!'
if file_info["originating_mm_post_channel_id"] == "hrx6pgfswjbttcj8nim3jrwe7w":
prefix = "! "
try:
channel.create_threaded_post(
post_id,
f"{prefix}{message}",
)
except mattermostdriver.exceptions.InvalidOrMissingParameters as e:
# This will occur when we try to react to a file in a channel that is not the same as the originating channel.
unique_post_url = f"https://mattermost.zeus.gent/zeus/pl/{post_id}"
channel.create_post(
f"{unique_post_url}\n\n{message}",
)
def report_newly_found_file(file_id, file_info):
message = f"I found a new CodiMD file in this post! Making work of putting it on gitlab :)\n - Requested location in the [drive](https://git.zeus.gent/bestuur/drive): {file_info['metadata']['sync-to']}"
send_message(file_id, file_info, message)
def report_newly_found_but_invalid_file(file_id, file_info):
message = """Hi there! :wave:
I'm your friendly neighbourhood document sync bot.
I could synchronize this CodiMD file automatically to our Gitlab DRIVE for safekeeping, but the necessary metadata block is not present.
You can easily add the correct info and I will do the rest of the work for you!
Just add the following lines to your file, the location in your file is not important but at the top would be my recommendation.
```
:::spoiler Gitlab sync
- sync-to: <a valid path on the DRIVE, for ex.: verslagen/21-22/2022-05-13.md>
:::
```"""
send_message(file_id, file_info, message)
def validate_downloaded_files():
path = "data"
dir_list = os.listdir(path)
for filename in dir_list:
metadata = dir_utils.find_metadata("data/" + filename)
for file_id, file_info in db.get_files().items():
file_path = file_info["local_file_path"]
metadata = dir_utils.find_metadata(file_path)
if metadata is not None:
is_new_file = db.add_valid_file(filename, metadata)
is_new_file, new_file_info = db.mark_file_valid(file_id, metadata)
if is_new_file:
report_newly_found_file(filename)
report_newly_found_file(file_id, new_file_info)
else:
changed, new_file_info = db.mark_file_invalid(file_id)
if changed:
report_newly_found_but_invalid_file(file_id, new_file_info)
return db._load_db()