Add mattermost posts for new valid or invalid files
This commit is contained in:
parent
edb25acdd9
commit
3aec9ffe3a
7 changed files with 163 additions and 41 deletions
71
src/db.py
71
src/db.py
|
@ -1,6 +1,8 @@
|
||||||
import json
|
import json
|
||||||
from os.path import exists
|
from os.path import exists
|
||||||
from typing import List
|
from typing import Dict, List
|
||||||
|
|
||||||
|
from utils import id_to_url, url_to_id
|
||||||
|
|
||||||
db_filename = "db.json"
|
db_filename = "db.json"
|
||||||
|
|
||||||
|
@ -24,7 +26,7 @@ def _load_db():
|
||||||
|
|
||||||
def _save_db(db):
|
def _save_db(db):
|
||||||
with open(db_filename, "w") as db_file:
|
with open(db_filename, "w") as db_file:
|
||||||
db_file.write(json.dumps(db))
|
db_file.write(json.dumps(db, indent=4))
|
||||||
|
|
||||||
|
|
||||||
def get_latest_sync_time() -> int:
|
def get_latest_sync_time() -> int:
|
||||||
|
@ -38,33 +40,66 @@ def set_latest_sync_time(le_date) -> None:
|
||||||
_save_db(db)
|
_save_db(db)
|
||||||
|
|
||||||
|
|
||||||
def add_discovered_file(file_url) -> List[str]:
|
def add_discovered_url(file_url, originating_mm_post) -> List[str]:
|
||||||
db = _load_db()
|
db = _load_db()
|
||||||
discovered_files = set(db.get("discovered_files", []))
|
files = db.get("files", {})
|
||||||
discovered_files.add(file_url)
|
file_id = url_to_id(file_url)
|
||||||
discovered_files = list(discovered_files)
|
if file_id not in files:
|
||||||
db["discovered_files"] = discovered_files
|
files[file_id] = {
|
||||||
|
"originating_mm_post_id": originating_mm_post["id"],
|
||||||
|
"originating_mm_post_channel_id": originating_mm_post["channel_id"],
|
||||||
|
"source_url": file_url,
|
||||||
|
}
|
||||||
|
db["files"] = files
|
||||||
_save_db(db)
|
_save_db(db)
|
||||||
return discovered_files
|
return files
|
||||||
|
|
||||||
|
|
||||||
def get_discovered_files() -> List[str]:
|
def get_files() -> List[str]:
|
||||||
db = _load_db()
|
db = _load_db()
|
||||||
discovered_files = set(db.get("discovered_files", []))
|
files = db.get("files", {})
|
||||||
return discovered_files
|
return files
|
||||||
|
|
||||||
|
|
||||||
def add_valid_file(filename, metadata) -> bool:
|
def set_local_file_path(file_id, local_file_path):
|
||||||
|
db = _load_db()
|
||||||
|
file = db["files"][file_id]
|
||||||
|
file["local_file_path"] = local_file_path
|
||||||
|
_save_db(db)
|
||||||
|
return file
|
||||||
|
|
||||||
|
|
||||||
|
def mark_file_valid(file_id, metadata) -> (bool, Dict):
|
||||||
"""
|
"""
|
||||||
Returns: Boolean that indicates if the file is newly identified as valid.
|
Return: Boolean that indicates if the file is newly identified as valid.
|
||||||
"""
|
"""
|
||||||
db = _load_db()
|
db = _load_db()
|
||||||
valid_files = db.get("valid_files", {})
|
|
||||||
|
file = db["files"][file_id]
|
||||||
new_file = False
|
new_file = False
|
||||||
if filename not in valid_files:
|
if "valid" not in file or not file["valid"]:
|
||||||
new_file = True
|
new_file = True
|
||||||
valid_files[filename] = metadata
|
file["valid"] = True
|
||||||
db["valid_files"] = valid_files
|
file["metadata"] = metadata
|
||||||
|
|
||||||
_save_db(db)
|
_save_db(db)
|
||||||
|
|
||||||
return new_file
|
return new_file, file
|
||||||
|
|
||||||
|
|
||||||
|
def mark_file_invalid(file_id) -> (bool, Dict):
|
||||||
|
"""
|
||||||
|
Return: Boolean that indicates if the state is changed.
|
||||||
|
True means it was valid but is now invalid or it was unknown and is know invalid
|
||||||
|
False means it was invalid and is still invalid.
|
||||||
|
"""
|
||||||
|
db = _load_db()
|
||||||
|
file = db["files"][file_id]
|
||||||
|
changed = False
|
||||||
|
if "valid" not in file or file["valid"]:
|
||||||
|
changed = True
|
||||||
|
file["valid"] = False
|
||||||
|
|
||||||
|
_save_db(db)
|
||||||
|
|
||||||
|
return changed, file
|
||||||
|
|
|
@ -2,7 +2,7 @@ import os
|
||||||
import re
|
import re
|
||||||
|
|
||||||
# pattern = re.compile(":::spoiler Gitlab sync([^:]*):::")
|
# pattern = re.compile(":::spoiler Gitlab sync([^:]*):::")
|
||||||
pattern = re.compile("[^:]*:::")
|
# pattern = re.compile("[^:]*:::")
|
||||||
|
|
||||||
|
|
||||||
def find_metadata(filename):
|
def find_metadata(filename):
|
||||||
|
|
|
@ -12,7 +12,7 @@ from colored import style
|
||||||
from mattermostdriver import Driver
|
from mattermostdriver import Driver
|
||||||
from tabulate import tabulate
|
from tabulate import tabulate
|
||||||
|
|
||||||
from mattermost_objects import MMUser, MMPost, MMChannelPosts
|
from mattermost_objects import MMChannelPosts, MMPost, MMUser
|
||||||
from utils import humanize_date_difference, timer
|
from utils import humanize_date_difference, timer
|
||||||
|
|
||||||
pp = pp.PrettyPrinter(indent=2)
|
pp = pp.PrettyPrinter(indent=2)
|
||||||
|
@ -156,15 +156,33 @@ class MMApi(Driver):
|
||||||
|
|
||||||
|
|
||||||
class ChannelApi(MMApi):
|
class ChannelApi(MMApi):
|
||||||
def __init__(self, channel_name, user=None):
|
def __init__(self, channel_name=None, channel_id=None, user=None):
|
||||||
MMApi.__init__(self, user)
|
MMApi.__init__(self, user)
|
||||||
self.channel_id = self.get_channel_id(channel_name)
|
assert channel_name != None or channel_id != None
|
||||||
|
|
||||||
|
if channel_name is not None:
|
||||||
|
self.channel_id = self.get_channel_id(channel_name)
|
||||||
|
if channel_id is not None:
|
||||||
|
self.channel_id = channel_id
|
||||||
|
|
||||||
def create_post(self, message: str, props: Dict = None) -> None:
|
def create_post(self, message: str, props: Dict = None) -> None:
|
||||||
resp = self.posts.create_post(
|
resp = self.posts.create_post(
|
||||||
options={"channel_id": self.channel_id, "message": message, "props": props}
|
options={"channel_id": self.channel_id, "message": message, "props": props}
|
||||||
)
|
)
|
||||||
self.log(f'Message successfully created: "{message}"')
|
self.log(f'Message successfully created: "{message}"')
|
||||||
|
|
||||||
|
def create_threaded_post(
|
||||||
|
self, post_id: str, message: str, props: Dict = None
|
||||||
|
) -> None:
|
||||||
|
resp = self.posts.create_post(
|
||||||
|
options={
|
||||||
|
"channel_id": self.channel_id,
|
||||||
|
"message": message,
|
||||||
|
"root_id": post_id,
|
||||||
|
"props": props,
|
||||||
|
}
|
||||||
|
)
|
||||||
|
self.log(f'Message successfully created: "{message}"')
|
||||||
# print_response("Create post", resp)
|
# print_response("Create post", resp)
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
from typing import NamedTuple, List, Dict
|
from typing import Dict, List, NamedTuple
|
||||||
|
|
||||||
|
|
||||||
class MMUser(NamedTuple):
|
class MMUser(NamedTuple):
|
||||||
|
|
|
@ -1,11 +1,12 @@
|
||||||
#!/usr/bin/env -S nix shell --impure --expr "(import (builtins.getFlake \"nixpkgs\") {}).python3.withPackages (ps: [ ps.python-gitlab ps.GitPython ])" --command python
|
#!/usr/bin/env -S nix shell --impure --expr "(import (builtins.getFlake \"nixpkgs\") {}).python3.withPackages (ps: [ ps.python-gitlab ps.GitPython ])" --command python
|
||||||
|
|
||||||
import git
|
|
||||||
import gitlab
|
|
||||||
import json
|
import json
|
||||||
import os
|
import os
|
||||||
import pathlib
|
import pathlib
|
||||||
|
|
||||||
|
import git
|
||||||
|
import gitlab
|
||||||
|
|
||||||
TOKEN_NAME = os.environ['GITLAB_ACCESS_TOKEN_NAME']
|
TOKEN_NAME = os.environ['GITLAB_ACCESS_TOKEN_NAME']
|
||||||
TOKEN = os.environ['GITLAB_ACCESS_TOKEN']
|
TOKEN = os.environ['GITLAB_ACCESS_TOKEN']
|
||||||
REPO_FOLDER = 'drive'
|
REPO_FOLDER = 'drive'
|
||||||
|
|
|
@ -3,6 +3,15 @@ import functools
|
||||||
import time
|
import time
|
||||||
|
|
||||||
|
|
||||||
|
def url_to_id(url: str) -> str:
|
||||||
|
return url[url.rfind("/") + 1 :]
|
||||||
|
|
||||||
|
|
||||||
|
def id_to_url(id):
|
||||||
|
# filename[5:-3]
|
||||||
|
return f"https://codimd.zeus.gent/{id}"
|
||||||
|
|
||||||
|
|
||||||
def timer(func):
|
def timer(func):
|
||||||
"""Print the runtime of the decorated function"""
|
"""Print the runtime of the decorated function"""
|
||||||
|
|
||||||
|
|
91
src/web.py
91
src/web.py
|
@ -2,14 +2,17 @@ import json
|
||||||
import os
|
import os
|
||||||
import re
|
import re
|
||||||
import time
|
import time
|
||||||
|
from pprint import pprint
|
||||||
|
|
||||||
import hug
|
import hug
|
||||||
|
import mattermostdriver.exceptions
|
||||||
import requests
|
import requests
|
||||||
|
|
||||||
import db
|
import db
|
||||||
import dir_utils
|
import dir_utils
|
||||||
from mattermost import MMApi, ChannelApi
|
|
||||||
import mattermost
|
import mattermost
|
||||||
|
from mattermost import ChannelApi, MMApi
|
||||||
|
from utils import id_to_url, url_to_id
|
||||||
|
|
||||||
|
|
||||||
def find_codimd_files_on_mattermost():
|
def find_codimd_files_on_mattermost():
|
||||||
|
@ -31,15 +34,23 @@ def find_codimd_files_on_mattermost():
|
||||||
|
|
||||||
print(f"Scraping {len(posts)} posts")
|
print(f"Scraping {len(posts)} posts")
|
||||||
for post_id, post in posts.items():
|
for post_id, post in posts.items():
|
||||||
urls = re.findall(r"(https?://[^\s#?]+)", post["message"])
|
# old: r"(https?://[^\s#?]+)"
|
||||||
|
url_regex = r"https?:\/\/(?:www\.)?[-a-zA-Z0-9@:%._\+~#=]{1,256}\.[a-zA-Z0-9()]{1,6}\b(?:[-a-zA-Z0-9()@:%_\+.~#?&\/=]*)"
|
||||||
|
urls = re.findall(url_regex, post["message"])
|
||||||
for url in urls:
|
for url in urls:
|
||||||
idx = url.find("codimd.zeus.gent")
|
idx = url.find("codimd.zeus.gent")
|
||||||
if idx == -1:
|
if idx == -1:
|
||||||
# In this case it's an url but not for codimd
|
# In this case it's an url but not for codimd
|
||||||
continue
|
continue
|
||||||
|
|
||||||
|
# Remove everything after the # of ?
|
||||||
|
for char in ["#", "?"]:
|
||||||
|
cut_idx = url.rfind(char) if url.rfind(char) != -1 else len(url)
|
||||||
|
url = url[:cut_idx]
|
||||||
|
|
||||||
|
# pprint(post)
|
||||||
print(url)
|
print(url)
|
||||||
db.add_discovered_file(url)
|
db.add_discovered_url(url, post)
|
||||||
|
|
||||||
# When everything succeeded. Save the current unix time as latest fetched moment
|
# When everything succeeded. Save the current unix time as latest fetched moment
|
||||||
db.set_latest_sync_time(current_fetch_time)
|
db.set_latest_sync_time(current_fetch_time)
|
||||||
|
@ -50,32 +61,80 @@ def read_note(url):
|
||||||
|
|
||||||
|
|
||||||
def download_files():
|
def download_files():
|
||||||
for url in db.get_discovered_files():
|
for file_id, file_info in db.get_files().items():
|
||||||
with open(f'data/note-{url[url.rfind("/") + 1:]}.md', "w") as f:
|
local_file_path = f"data/note-{file_id}.md"
|
||||||
|
url = file_info["source_url"]
|
||||||
|
with open(local_file_path, "w") as f:
|
||||||
print(f"Downloading url {url}")
|
print(f"Downloading url {url}")
|
||||||
f.write(read_note(url))
|
f.write(read_note(url))
|
||||||
|
db.set_local_file_path(file_id, local_file_path)
|
||||||
|
|
||||||
def filename_to_url(filename):
|
|
||||||
return f"https://codimd.zeus.gent/{filename[5:-3]}"
|
|
||||||
|
|
||||||
def report_newly_found_file(filename):
|
def send_message(file_id, file_info, message):
|
||||||
channel = ChannelApi("bestuur-dev", mattermost.users["flynn"])
|
channel_id = file_info["originating_mm_post_channel_id"]
|
||||||
|
post_id = file_info["originating_mm_post_id"]
|
||||||
channel.create_post(
|
|
||||||
f"I found a new report: {filename_to_url(filename)}! Making work of putting it on gitlab :)"
|
# TODO Comment below line, this is for testing purposes
|
||||||
|
# channel_id = MMApi().get_channel_id("bestuur-dev")
|
||||||
|
channel = ChannelApi(
|
||||||
|
channel_id=channel_id,
|
||||||
|
user=mattermost.users["flynn"],
|
||||||
)
|
)
|
||||||
|
|
||||||
|
prefix = ""
|
||||||
|
# This is bestuur-INTERN where you can only post when you prefix you message with a '!'
|
||||||
|
if file_info["originating_mm_post_channel_id"] == "hrx6pgfswjbttcj8nim3jrwe7w":
|
||||||
|
prefix = "! "
|
||||||
|
|
||||||
|
try:
|
||||||
|
channel.create_threaded_post(
|
||||||
|
post_id,
|
||||||
|
f"{prefix}{message}",
|
||||||
|
)
|
||||||
|
except mattermostdriver.exceptions.InvalidOrMissingParameters as e:
|
||||||
|
# This will occur when we try to react to a file in a channel that is not the same as the originating channel.
|
||||||
|
unique_post_url = f"https://mattermost.zeus.gent/zeus/pl/{post_id}"
|
||||||
|
channel.create_post(
|
||||||
|
f"{unique_post_url}\n\n{message}",
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def report_newly_found_file(file_id, file_info):
|
||||||
|
message = f"I found a new CodiMD file in this post! Making work of putting it on gitlab :)\n - Requested location in the [drive](https://git.zeus.gent/bestuur/drive): {file_info['metadata']['sync-to']}"
|
||||||
|
send_message(file_id, file_info, message)
|
||||||
|
|
||||||
|
|
||||||
|
def report_newly_found_but_invalid_file(file_id, file_info):
|
||||||
|
message = """Hi there! :wave:
|
||||||
|
I'm your friendly neighbourhood document sync bot.
|
||||||
|
I could synchronize this CodiMD file automatically to our Gitlab DRIVE for safekeeping, but the necessary metadata block is not present.
|
||||||
|
You can easily add the correct info and I will do the rest of the work for you!
|
||||||
|
|
||||||
|
Just add the following lines to your file, the location in your file is not important but at the top would be my recommendation.
|
||||||
|
|
||||||
|
```
|
||||||
|
:::spoiler Gitlab sync
|
||||||
|
- sync-to: <a valid path on the DRIVE, for ex.: verslagen/21-22/2022-05-13.md>
|
||||||
|
:::
|
||||||
|
```"""
|
||||||
|
send_message(file_id, file_info, message)
|
||||||
|
|
||||||
|
|
||||||
def validate_downloaded_files():
|
def validate_downloaded_files():
|
||||||
path = "data"
|
path = "data"
|
||||||
dir_list = os.listdir(path)
|
dir_list = os.listdir(path)
|
||||||
|
|
||||||
for filename in dir_list:
|
for file_id, file_info in db.get_files().items():
|
||||||
metadata = dir_utils.find_metadata("data/" + filename)
|
file_path = file_info["local_file_path"]
|
||||||
|
metadata = dir_utils.find_metadata(file_path)
|
||||||
if metadata is not None:
|
if metadata is not None:
|
||||||
is_new_file = db.add_valid_file(filename, metadata)
|
is_new_file, new_file_info = db.mark_file_valid(file_id, metadata)
|
||||||
if is_new_file:
|
if is_new_file:
|
||||||
report_newly_found_file(filename)
|
report_newly_found_file(file_id, new_file_info)
|
||||||
|
else:
|
||||||
|
changed, new_file_info = db.mark_file_invalid(file_id)
|
||||||
|
if changed:
|
||||||
|
report_newly_found_but_invalid_file(file_id, new_file_info)
|
||||||
|
|
||||||
return db._load_db()
|
return db._load_db()
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue