codimd-git-sync/src/web.py

100 lines
2.8 KiB
Python
Raw Normal View History

import json
import os
import re
import time
import hug
import requests
import db
import dir_utils
from mattermost import MMApi, ChannelApi
import mattermost
def find_codimd_files_on_mattermost():
mattermost = MMApi()
channels = [
"hrx6pgfswjbttcj8nim3jrwe7w", # bestuur-INTERN
"uda7ax9poprduq8ob56e1fqk4e", # bestuur
]
last_fetch_time = db.get_latest_sync_time()
current_fetch_time = int(time.time() * 1000)
print(f"Fetching posts since: {last_fetch_time}")
for channel_id in channels:
print(f"Fetching posts for channel_id: {channel_id}")
# TODO Use first statement for all posts
posts = mattermost.get_posts_for_channel(channel_id, last_fetch_time)
# posts = mattermost.posts.get_posts_for_channel(channel_id)
print(f"Scraping {len(posts)} posts")
for post_id, post in posts.items():
urls = re.findall(r"(https?://[^\s#?]+)", post["message"])
for url in urls:
idx = url.find("codimd.zeus.gent")
if idx == -1:
# In this case it's an url but not for codimd
continue
print(url)
db.add_discovered_file(url)
# When everything succeeded. Save the current unix time as latest fetched moment
db.set_latest_sync_time(current_fetch_time)
def read_note(url):
return requests.get(f"{url}/download").text
def download_files():
for url in db.get_discovered_files():
with open(f'data/note-{url[url.rfind("/") + 1:]}.md', "w") as f:
print(f"Downloading url {url}")
f.write(read_note(url))
def filename_to_url(filename):
return f"https://codimd.zeus.gent/{filename[5:-3]}"
def report_newly_found_file(filename):
channel = ChannelApi("bestuur-dev", mattermost.users["flynn"])
channel.create_post(
f"I found a new report: {filename_to_url(filename)}! Making work of putting it on gitlab :)"
)
def validate_downloaded_files():
path = "data"
dir_list = os.listdir(path)
for filename in dir_list:
metadata = dir_utils.find_metadata("data/" + filename)
if metadata is not None:
is_new_file = db.add_valid_file(filename, metadata)
if is_new_file:
report_newly_found_file(filename)
return db._load_db()
@hug.get("/sync-mattermost")
def sync_mattermost():
print()
print("=======================================")
print("== Finding urls posted on mattermost ==")
find_codimd_files_on_mattermost()
print()
print("=============================")
print("== Downloading found files ==")
download_files()
print()
print("================================================")
print("== Finding valid files in the downloaded ones ==")
validate_downloaded_files()
print()
return db._load_db()