2022-10-06 22:44:04 +02:00
import json
import os
import re
import time
2022-10-07 13:22:07 +02:00
from pprint import pprint
2022-10-06 22:44:04 +02:00
import hug
2022-10-07 13:22:07 +02:00
import mattermostdriver . exceptions
2022-10-06 22:44:04 +02:00
import requests
import db
import dir_utils
2022-10-06 23:07:26 +02:00
import mattermost
2022-10-07 13:22:07 +02:00
from mattermost import ChannelApi , MMApi
from utils import id_to_url , url_to_id
2022-10-06 22:44:04 +02:00
def find_codimd_files_on_mattermost ( ) :
mattermost = MMApi ( )
channels = [
" hrx6pgfswjbttcj8nim3jrwe7w " , # bestuur-INTERN
" uda7ax9poprduq8ob56e1fqk4e " , # bestuur
]
last_fetch_time = db . get_latest_sync_time ( )
current_fetch_time = int ( time . time ( ) * 1000 )
print ( f " Fetching posts since: { last_fetch_time } " )
for channel_id in channels :
print ( f " Fetching posts for channel_id: { channel_id } " )
# TODO Use first statement for all posts
posts = mattermost . get_posts_for_channel ( channel_id , last_fetch_time )
# posts = mattermost.posts.get_posts_for_channel(channel_id)
print ( f " Scraping { len ( posts ) } posts " )
for post_id , post in posts . items ( ) :
2022-10-07 13:22:07 +02:00
# old: r"(https?://[^\s#?]+)"
url_regex = r " https?: \ / \ /(?:www \ .)?[-a-zA-Z0-9@: % ._ \ +~#=] { 1,256} \ .[a-zA-Z0-9()] { 1,6} \ b(?:[-a-zA-Z0-9()@: % _ \ +.~#?& \ /=]*) "
urls = re . findall ( url_regex , post [ " message " ] )
2022-10-06 22:44:04 +02:00
for url in urls :
idx = url . find ( " codimd.zeus.gent " )
if idx == - 1 :
# In this case it's an url but not for codimd
continue
2022-10-07 13:22:07 +02:00
# Remove everything after the # of ?
for char in [ " # " , " ? " ] :
cut_idx = url . rfind ( char ) if url . rfind ( char ) != - 1 else len ( url )
url = url [ : cut_idx ]
# pprint(post)
2022-10-06 22:44:04 +02:00
print ( url )
2022-10-07 13:22:07 +02:00
db . add_discovered_url ( url , post )
2022-10-06 22:44:04 +02:00
# When everything succeeded. Save the current unix time as latest fetched moment
db . set_latest_sync_time ( current_fetch_time )
def read_note ( url ) :
return requests . get ( f " { url } /download " ) . text
def download_files ( ) :
2022-10-07 13:22:07 +02:00
for file_id , file_info in db . get_files ( ) . items ( ) :
local_file_path = f " data/note- { file_id } .md "
url = file_info [ " source_url " ]
with open ( local_file_path , " w " ) as f :
2022-10-06 22:44:04 +02:00
print ( f " Downloading url { url } " )
f . write ( read_note ( url ) )
2022-10-07 13:22:07 +02:00
db . set_local_file_path ( file_id , local_file_path )
2022-10-06 22:44:04 +02:00
2022-10-07 13:22:07 +02:00
def send_message ( file_id , file_info , message ) :
channel_id = file_info [ " originating_mm_post_channel_id " ]
post_id = file_info [ " originating_mm_post_id " ]
2022-10-06 23:07:26 +02:00
2022-10-07 13:22:07 +02:00
# TODO Comment below line, this is for testing purposes
# channel_id = MMApi().get_channel_id("bestuur-dev")
channel = ChannelApi (
channel_id = channel_id ,
user = mattermost . users [ " flynn " ] ,
2022-10-06 23:07:26 +02:00
)
2022-10-07 13:22:07 +02:00
prefix = " "
# This is bestuur-INTERN where you can only post when you prefix you message with a '!'
if file_info [ " originating_mm_post_channel_id " ] == " hrx6pgfswjbttcj8nim3jrwe7w " :
prefix = " ! "
try :
channel . create_threaded_post (
post_id ,
f " { prefix } { message } " ,
)
except mattermostdriver . exceptions . InvalidOrMissingParameters as e :
# This will occur when we try to react to a file in a channel that is not the same as the originating channel.
unique_post_url = f " https://mattermost.zeus.gent/zeus/pl/ { post_id } "
channel . create_post (
f " { unique_post_url } \n \n { message } " ,
)
def report_newly_found_file ( file_id , file_info ) :
message = f " I found a new CodiMD file in this post! Making work of putting it on gitlab :) \n - Requested location in the [drive](https://git.zeus.gent/bestuur/drive): { file_info [ ' metadata ' ] [ ' sync-to ' ] } "
send_message ( file_id , file_info , message )
def report_newly_found_but_invalid_file ( file_id , file_info ) :
message = """ Hi there! :wave:
I ' m your friendly neighbourhood document sync bot.
I could synchronize this CodiMD file automatically to our Gitlab DRIVE for safekeeping , but the necessary metadata block is not present .
You can easily add the correct info and I will do the rest of the work for you !
Just add the following lines to your file , the location in your file is not important but at the top would be my recommendation .
` ` `
: : : spoiler Gitlab sync
- sync - to : < a valid path on the DRIVE , for ex . : verslagen / 21 - 22 / 2022 - 05 - 13. md >
: : :
` ` ` """
send_message ( file_id , file_info , message )
2022-10-06 22:44:04 +02:00
def validate_downloaded_files ( ) :
path = " data "
dir_list = os . listdir ( path )
2022-10-07 13:22:07 +02:00
for file_id , file_info in db . get_files ( ) . items ( ) :
file_path = file_info [ " local_file_path " ]
metadata = dir_utils . find_metadata ( file_path )
2022-10-06 22:44:04 +02:00
if metadata is not None :
2022-10-07 13:22:07 +02:00
is_new_file , new_file_info = db . mark_file_valid ( file_id , metadata )
2022-10-06 23:07:26 +02:00
if is_new_file :
2022-10-07 13:22:07 +02:00
report_newly_found_file ( file_id , new_file_info )
else :
changed , new_file_info = db . mark_file_invalid ( file_id )
if changed :
report_newly_found_but_invalid_file ( file_id , new_file_info )
2022-10-06 22:44:04 +02:00
return db . _load_db ( )
@hug.get ( " /sync-mattermost " )
def sync_mattermost ( ) :
print ( )
print ( " ======================================= " )
print ( " == Finding urls posted on mattermost == " )
find_codimd_files_on_mattermost ( )
print ( )
print ( " ============================= " )
print ( " == Downloading found files == " )
download_files ( )
print ( )
print ( " ================================================ " )
print ( " == Finding valid files in the downloaded ones == " )
validate_downloaded_files ( )
print ( )
return db . _load_db ( )