Add mattermost posts for new valid or invalid files

This commit is contained in:
mcbloch 2022-10-07 13:22:07 +02:00
parent edb25acdd9
commit 3aec9ffe3a
7 changed files with 163 additions and 41 deletions

View file

@ -1,6 +1,8 @@
import json import json
from os.path import exists from os.path import exists
from typing import List from typing import Dict, List
from utils import id_to_url, url_to_id
db_filename = "db.json" db_filename = "db.json"
@ -24,7 +26,7 @@ def _load_db():
def _save_db(db): def _save_db(db):
with open(db_filename, "w") as db_file: with open(db_filename, "w") as db_file:
db_file.write(json.dumps(db)) db_file.write(json.dumps(db, indent=4))
def get_latest_sync_time() -> int: def get_latest_sync_time() -> int:
@ -38,33 +40,66 @@ def set_latest_sync_time(le_date) -> None:
_save_db(db) _save_db(db)
def add_discovered_file(file_url) -> List[str]: def add_discovered_url(file_url, originating_mm_post) -> List[str]:
db = _load_db() db = _load_db()
discovered_files = set(db.get("discovered_files", [])) files = db.get("files", {})
discovered_files.add(file_url) file_id = url_to_id(file_url)
discovered_files = list(discovered_files) if file_id not in files:
db["discovered_files"] = discovered_files files[file_id] = {
"originating_mm_post_id": originating_mm_post["id"],
"originating_mm_post_channel_id": originating_mm_post["channel_id"],
"source_url": file_url,
}
db["files"] = files
_save_db(db) _save_db(db)
return discovered_files return files
def get_discovered_files() -> List[str]: def get_files() -> List[str]:
db = _load_db() db = _load_db()
discovered_files = set(db.get("discovered_files", [])) files = db.get("files", {})
return discovered_files return files
def add_valid_file(filename, metadata) -> bool: def set_local_file_path(file_id, local_file_path):
db = _load_db()
file = db["files"][file_id]
file["local_file_path"] = local_file_path
_save_db(db)
return file
def mark_file_valid(file_id, metadata) -> (bool, Dict):
""" """
Returns: Boolean that indicates if the file is newly identified as valid. Return: Boolean that indicates if the file is newly identified as valid.
""" """
db = _load_db() db = _load_db()
valid_files = db.get("valid_files", {})
file = db["files"][file_id]
new_file = False new_file = False
if filename not in valid_files: if "valid" not in file or not file["valid"]:
new_file = True new_file = True
valid_files[filename] = metadata file["valid"] = True
db["valid_files"] = valid_files file["metadata"] = metadata
_save_db(db) _save_db(db)
return new_file return new_file, file
def mark_file_invalid(file_id) -> (bool, Dict):
"""
Return: Boolean that indicates if the state is changed.
True means it was valid but is now invalid or it was unknown and is know invalid
False means it was invalid and is still invalid.
"""
db = _load_db()
file = db["files"][file_id]
changed = False
if "valid" not in file or file["valid"]:
changed = True
file["valid"] = False
_save_db(db)
return changed, file

View file

@ -2,7 +2,7 @@ import os
import re import re
# pattern = re.compile(":::spoiler Gitlab sync([^:]*):::") # pattern = re.compile(":::spoiler Gitlab sync([^:]*):::")
pattern = re.compile("[^:]*:::") # pattern = re.compile("[^:]*:::")
def find_metadata(filename): def find_metadata(filename):

View file

@ -12,7 +12,7 @@ from colored import style
from mattermostdriver import Driver from mattermostdriver import Driver
from tabulate import tabulate from tabulate import tabulate
from mattermost_objects import MMUser, MMPost, MMChannelPosts from mattermost_objects import MMChannelPosts, MMPost, MMUser
from utils import humanize_date_difference, timer from utils import humanize_date_difference, timer
pp = pp.PrettyPrinter(indent=2) pp = pp.PrettyPrinter(indent=2)
@ -156,15 +156,33 @@ class MMApi(Driver):
class ChannelApi(MMApi): class ChannelApi(MMApi):
def __init__(self, channel_name, user=None): def __init__(self, channel_name=None, channel_id=None, user=None):
MMApi.__init__(self, user) MMApi.__init__(self, user)
self.channel_id = self.get_channel_id(channel_name) assert channel_name != None or channel_id != None
if channel_name is not None:
self.channel_id = self.get_channel_id(channel_name)
if channel_id is not None:
self.channel_id = channel_id
def create_post(self, message: str, props: Dict = None) -> None: def create_post(self, message: str, props: Dict = None) -> None:
resp = self.posts.create_post( resp = self.posts.create_post(
options={"channel_id": self.channel_id, "message": message, "props": props} options={"channel_id": self.channel_id, "message": message, "props": props}
) )
self.log(f'Message successfully created: "{message}"') self.log(f'Message successfully created: "{message}"')
def create_threaded_post(
self, post_id: str, message: str, props: Dict = None
) -> None:
resp = self.posts.create_post(
options={
"channel_id": self.channel_id,
"message": message,
"root_id": post_id,
"props": props,
}
)
self.log(f'Message successfully created: "{message}"')
# print_response("Create post", resp) # print_response("Create post", resp)

View file

@ -1,4 +1,4 @@
from typing import NamedTuple, List, Dict from typing import Dict, List, NamedTuple
class MMUser(NamedTuple): class MMUser(NamedTuple):

View file

@ -1,11 +1,12 @@
#!/usr/bin/env -S nix shell --impure --expr "(import (builtins.getFlake \"nixpkgs\") {}).python3.withPackages (ps: [ ps.python-gitlab ps.GitPython ])" --command python #!/usr/bin/env -S nix shell --impure --expr "(import (builtins.getFlake \"nixpkgs\") {}).python3.withPackages (ps: [ ps.python-gitlab ps.GitPython ])" --command python
import git
import gitlab
import json import json
import os import os
import pathlib import pathlib
import git
import gitlab
TOKEN_NAME = os.environ['GITLAB_ACCESS_TOKEN_NAME'] TOKEN_NAME = os.environ['GITLAB_ACCESS_TOKEN_NAME']
TOKEN = os.environ['GITLAB_ACCESS_TOKEN'] TOKEN = os.environ['GITLAB_ACCESS_TOKEN']
REPO_FOLDER = 'drive' REPO_FOLDER = 'drive'

View file

@ -3,6 +3,15 @@ import functools
import time import time
def url_to_id(url: str) -> str:
return url[url.rfind("/") + 1 :]
def id_to_url(id):
# filename[5:-3]
return f"https://codimd.zeus.gent/{id}"
def timer(func): def timer(func):
"""Print the runtime of the decorated function""" """Print the runtime of the decorated function"""

View file

@ -2,14 +2,17 @@ import json
import os import os
import re import re
import time import time
from pprint import pprint
import hug import hug
import mattermostdriver.exceptions
import requests import requests
import db import db
import dir_utils import dir_utils
from mattermost import MMApi, ChannelApi
import mattermost import mattermost
from mattermost import ChannelApi, MMApi
from utils import id_to_url, url_to_id
def find_codimd_files_on_mattermost(): def find_codimd_files_on_mattermost():
@ -31,15 +34,23 @@ def find_codimd_files_on_mattermost():
print(f"Scraping {len(posts)} posts") print(f"Scraping {len(posts)} posts")
for post_id, post in posts.items(): for post_id, post in posts.items():
urls = re.findall(r"(https?://[^\s#?]+)", post["message"]) # old: r"(https?://[^\s#?]+)"
url_regex = r"https?:\/\/(?:www\.)?[-a-zA-Z0-9@:%._\+~#=]{1,256}\.[a-zA-Z0-9()]{1,6}\b(?:[-a-zA-Z0-9()@:%_\+.~#?&\/=]*)"
urls = re.findall(url_regex, post["message"])
for url in urls: for url in urls:
idx = url.find("codimd.zeus.gent") idx = url.find("codimd.zeus.gent")
if idx == -1: if idx == -1:
# In this case it's an url but not for codimd # In this case it's an url but not for codimd
continue continue
# Remove everything after the # of ?
for char in ["#", "?"]:
cut_idx = url.rfind(char) if url.rfind(char) != -1 else len(url)
url = url[:cut_idx]
# pprint(post)
print(url) print(url)
db.add_discovered_file(url) db.add_discovered_url(url, post)
# When everything succeeded. Save the current unix time as latest fetched moment # When everything succeeded. Save the current unix time as latest fetched moment
db.set_latest_sync_time(current_fetch_time) db.set_latest_sync_time(current_fetch_time)
@ -50,32 +61,80 @@ def read_note(url):
def download_files(): def download_files():
for url in db.get_discovered_files(): for file_id, file_info in db.get_files().items():
with open(f'data/note-{url[url.rfind("/") + 1:]}.md', "w") as f: local_file_path = f"data/note-{file_id}.md"
url = file_info["source_url"]
with open(local_file_path, "w") as f:
print(f"Downloading url {url}") print(f"Downloading url {url}")
f.write(read_note(url)) f.write(read_note(url))
db.set_local_file_path(file_id, local_file_path)
def filename_to_url(filename):
return f"https://codimd.zeus.gent/{filename[5:-3]}"
def report_newly_found_file(filename): def send_message(file_id, file_info, message):
channel = ChannelApi("bestuur-dev", mattermost.users["flynn"]) channel_id = file_info["originating_mm_post_channel_id"]
post_id = file_info["originating_mm_post_id"]
channel.create_post(
f"I found a new report: {filename_to_url(filename)}! Making work of putting it on gitlab :)" # TODO Comment below line, this is for testing purposes
# channel_id = MMApi().get_channel_id("bestuur-dev")
channel = ChannelApi(
channel_id=channel_id,
user=mattermost.users["flynn"],
) )
prefix = ""
# This is bestuur-INTERN where you can only post when you prefix you message with a '!'
if file_info["originating_mm_post_channel_id"] == "hrx6pgfswjbttcj8nim3jrwe7w":
prefix = "! "
try:
channel.create_threaded_post(
post_id,
f"{prefix}{message}",
)
except mattermostdriver.exceptions.InvalidOrMissingParameters as e:
# This will occur when we try to react to a file in a channel that is not the same as the originating channel.
unique_post_url = f"https://mattermost.zeus.gent/zeus/pl/{post_id}"
channel.create_post(
f"{unique_post_url}\n\n{message}",
)
def report_newly_found_file(file_id, file_info):
message = f"I found a new CodiMD file in this post! Making work of putting it on gitlab :)\n - Requested location in the [drive](https://git.zeus.gent/bestuur/drive): {file_info['metadata']['sync-to']}"
send_message(file_id, file_info, message)
def report_newly_found_but_invalid_file(file_id, file_info):
message = """Hi there! :wave:
I'm your friendly neighbourhood document sync bot.
I could synchronize this CodiMD file automatically to our Gitlab DRIVE for safekeeping, but the necessary metadata block is not present.
You can easily add the correct info and I will do the rest of the work for you!
Just add the following lines to your file, the location in your file is not important but at the top would be my recommendation.
```
:::spoiler Gitlab sync
- sync-to: <a valid path on the DRIVE, for ex.: verslagen/21-22/2022-05-13.md>
:::
```"""
send_message(file_id, file_info, message)
def validate_downloaded_files(): def validate_downloaded_files():
path = "data" path = "data"
dir_list = os.listdir(path) dir_list = os.listdir(path)
for filename in dir_list: for file_id, file_info in db.get_files().items():
metadata = dir_utils.find_metadata("data/" + filename) file_path = file_info["local_file_path"]
metadata = dir_utils.find_metadata(file_path)
if metadata is not None: if metadata is not None:
is_new_file = db.add_valid_file(filename, metadata) is_new_file, new_file_info = db.mark_file_valid(file_id, metadata)
if is_new_file: if is_new_file:
report_newly_found_file(filename) report_newly_found_file(file_id, new_file_info)
else:
changed, new_file_info = db.mark_file_invalid(file_id)
if changed:
report_newly_found_but_invalid_file(file_id, new_file_info)
return db._load_db() return db._load_db()