Add new python code with mattermost scraping stuff
This commit is contained in:
parent
5f89372558
commit
3c13ec2eae
12 changed files with 585 additions and 171 deletions
8
.gitignore
vendored
8
.gitignore
vendored
|
@ -1,4 +1,10 @@
|
||||||
data/
|
data/
|
||||||
drive-temp/
|
drive-temp/
|
||||||
cookiefile
|
db.json
|
||||||
.env
|
.env
|
||||||
|
|
||||||
|
venv/
|
||||||
|
__pycache__/
|
||||||
|
.idea
|
||||||
|
users.toml
|
||||||
|
|
||||||
|
|
1
.tool-versions
Normal file
1
.tool-versions
Normal file
|
@ -0,0 +1 @@
|
||||||
|
python 3.10.4
|
2
Makefile
Normal file
2
Makefile
Normal file
|
@ -0,0 +1,2 @@
|
||||||
|
web:
|
||||||
|
python -m hug -f src/web.py
|
|
@ -1,22 +0,0 @@
|
||||||
#!/bin/bash
|
|
||||||
|
|
||||||
mkdir -p data
|
|
||||||
|
|
||||||
echo "Login to CodiMD"
|
|
||||||
curl -c cookiefile "$CMD_SERVER_URL/login" -X POST -H "Referer: $CMD_SERVER_URL/" --data-raw "email=$CMD_EMAIL&password=$CMD_PASSWORD" >/dev/null
|
|
||||||
|
|
||||||
echo
|
|
||||||
echo
|
|
||||||
|
|
||||||
curl -b cookiefile 'https://codimd.zeus.gent/me' | jq
|
|
||||||
echo
|
|
||||||
notes_history=$(curl -b cookiefile 'https://codimd.zeus.gent/history')
|
|
||||||
|
|
||||||
# echo $notes_history | jq
|
|
||||||
# note_id=$(echo "$notes_history" | jq -r '.history[1].id')
|
|
||||||
ids=$(echo "$notes_history" | jq -r '.history | map(.id) | .[]')
|
|
||||||
|
|
||||||
while IFS= read -r line; do
|
|
||||||
echo "... Reading note with ID: $line ..."
|
|
||||||
curl -b cookiefile "https://codimd.zeus.gent/$line/download" >"data/note-$line.md"
|
|
||||||
done <<<"$ids"
|
|
4
requirements.txt
Normal file
4
requirements.txt
Normal file
|
@ -0,0 +1,4 @@
|
||||||
|
tabulate
|
||||||
|
colored
|
||||||
|
mattermostdriver
|
||||||
|
hug
|
131
src/MattermostObjects.py
Normal file
131
src/MattermostObjects.py
Normal file
|
@ -0,0 +1,131 @@
|
||||||
|
from typing import NamedTuple, List, Dict
|
||||||
|
|
||||||
|
|
||||||
|
class MMUser(NamedTuple):
|
||||||
|
id: str
|
||||||
|
create_at: int
|
||||||
|
update_at: int
|
||||||
|
delete_at: int
|
||||||
|
username: str
|
||||||
|
first_name: str
|
||||||
|
last_name: str
|
||||||
|
nickname: str
|
||||||
|
email: str
|
||||||
|
auth_data: str
|
||||||
|
auth_service: str
|
||||||
|
roles: str
|
||||||
|
locale: str
|
||||||
|
timezone: dict
|
||||||
|
position: any
|
||||||
|
|
||||||
|
is_bot: bool = None
|
||||||
|
bot_description: str = None
|
||||||
|
email_verified: bool = None
|
||||||
|
notify_props: dict = None
|
||||||
|
last_password_update: int = None
|
||||||
|
failed_attempts: int = None
|
||||||
|
mfa_active: bool = False
|
||||||
|
terms_of_service_id: str = None
|
||||||
|
terms_of_service_create_at: int = None
|
||||||
|
props: dict = {}
|
||||||
|
last_picture_update: int = None
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def load(data):
|
||||||
|
try:
|
||||||
|
return MMUser(**data)
|
||||||
|
except TypeError as e:
|
||||||
|
print("[ERROR] Could not load dict into MMUser namedtuple")
|
||||||
|
print(str(e))
|
||||||
|
|
||||||
|
|
||||||
|
class MMPostProps(NamedTuple):
|
||||||
|
from_webhook: str = False
|
||||||
|
override_icon_url: str = None
|
||||||
|
override_username: str = None
|
||||||
|
webhook_display_name: str = None
|
||||||
|
|
||||||
|
channel_mentions: Dict = None
|
||||||
|
matterircd_krcggydky38kdcuubsc7fddc7w: str = None
|
||||||
|
matterircd_s4ptwhx7wfnx7qwexp1khorh7e: str = None
|
||||||
|
username: str = None
|
||||||
|
userId: str = None
|
||||||
|
old_header: str = None
|
||||||
|
new_header: str = None
|
||||||
|
old_purpose: str = None
|
||||||
|
new_purpose: str = None
|
||||||
|
old_displayname: str = None
|
||||||
|
new_displayname: str = None
|
||||||
|
remove_link_preview: str = None
|
||||||
|
removedUserId: str = None
|
||||||
|
addedUserId: str = None
|
||||||
|
removedUsername: str = None
|
||||||
|
addedUsername: str = None
|
||||||
|
message: str = None
|
||||||
|
attachments: str = None
|
||||||
|
from_bot: str = False
|
||||||
|
|
||||||
|
|
||||||
|
class MMPost(NamedTuple):
|
||||||
|
channel_id: str
|
||||||
|
create_at: int
|
||||||
|
delete_at: int
|
||||||
|
edit_at: int
|
||||||
|
hashtags: str
|
||||||
|
id: str
|
||||||
|
is_pinned: bool
|
||||||
|
message: str
|
||||||
|
metadata: Dict
|
||||||
|
original_id: str
|
||||||
|
parent_id: str
|
||||||
|
pending_post_id: str
|
||||||
|
root_id: str
|
||||||
|
type: str
|
||||||
|
update_at: int
|
||||||
|
user_id: str
|
||||||
|
message_source: str = None
|
||||||
|
has_reactions: bool = None
|
||||||
|
file_ids: List[str] = None
|
||||||
|
props: MMPostProps = None
|
||||||
|
|
||||||
|
def from_human(self):
|
||||||
|
return self.props is None or (
|
||||||
|
self.props.from_webhook is False and self.props.from_bot is False
|
||||||
|
)
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def load(data):
|
||||||
|
try:
|
||||||
|
props = None
|
||||||
|
if "props" in data:
|
||||||
|
try:
|
||||||
|
props: MMPostProps = MMPostProps(**data["props"])
|
||||||
|
except TypeError as e:
|
||||||
|
print("[ERROR] Could not load dict into MMPostProps namedtuple")
|
||||||
|
print(str(e))
|
||||||
|
del data["props"]
|
||||||
|
return MMPost(props=props, **data)
|
||||||
|
except TypeError as e:
|
||||||
|
print("[ERROR] Could not load dict into MMPost namedtuple")
|
||||||
|
print(str(e))
|
||||||
|
|
||||||
|
|
||||||
|
class MMChannelPosts(NamedTuple):
|
||||||
|
prev_post_id: str
|
||||||
|
next_post_id: str
|
||||||
|
order: List[str]
|
||||||
|
posts: Dict[str, MMPost]
|
||||||
|
disable_group_highlight: any
|
||||||
|
reply_count: any
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def load(data):
|
||||||
|
try:
|
||||||
|
posts: Dict[str, MMPost] = {
|
||||||
|
k: MMPost.load(v) for (k, v) in data["posts"].items()
|
||||||
|
}
|
||||||
|
del data["posts"]
|
||||||
|
return MMChannelPosts(posts=posts, **data)
|
||||||
|
except TypeError as e:
|
||||||
|
print("[ERROR] Could not load dict into MMUser namedtuple")
|
||||||
|
print(str(e))
|
63
src/db.py
Normal file
63
src/db.py
Normal file
|
@ -0,0 +1,63 @@
|
||||||
|
import json
|
||||||
|
from os.path import exists
|
||||||
|
from typing import List
|
||||||
|
|
||||||
|
db_filename = "db.json"
|
||||||
|
|
||||||
|
|
||||||
|
def init_db():
|
||||||
|
file_exists = exists(db_filename)
|
||||||
|
if not file_exists:
|
||||||
|
print("Initializing json file database")
|
||||||
|
with open(db_filename, "w") as db_file:
|
||||||
|
db_file.write("{}")
|
||||||
|
|
||||||
|
|
||||||
|
init_db()
|
||||||
|
|
||||||
|
|
||||||
|
def _load_db():
|
||||||
|
with open(db_filename, "r") as db_file:
|
||||||
|
db = json.loads(db_file.read())
|
||||||
|
return db
|
||||||
|
|
||||||
|
|
||||||
|
def _save_db(db):
|
||||||
|
with open(db_filename, "w") as db_file:
|
||||||
|
db_file.write(json.dumps(db))
|
||||||
|
|
||||||
|
|
||||||
|
def get_latest_sync_time() -> int:
|
||||||
|
db = _load_db()
|
||||||
|
return db.get("latest_sync_time", 0)
|
||||||
|
|
||||||
|
|
||||||
|
def set_latest_sync_time(le_date) -> None:
|
||||||
|
db = _load_db()
|
||||||
|
db["latest_sync_time"] = le_date
|
||||||
|
_save_db(db)
|
||||||
|
|
||||||
|
|
||||||
|
def add_discovered_file(file_url) -> List[str]:
|
||||||
|
db = _load_db()
|
||||||
|
discovered_files = set(db.get("discovered_files", []))
|
||||||
|
discovered_files.add(file_url)
|
||||||
|
discovered_files = list(discovered_files)
|
||||||
|
db["discovered_files"] = discovered_files
|
||||||
|
_save_db(db)
|
||||||
|
return discovered_files
|
||||||
|
|
||||||
|
|
||||||
|
def get_discovered_files() -> List[str]:
|
||||||
|
db = _load_db()
|
||||||
|
discovered_files = set(db.get("discovered_files", []))
|
||||||
|
return discovered_files
|
||||||
|
|
||||||
|
|
||||||
|
def add_valid_file(filename, metadata):
|
||||||
|
db = _load_db()
|
||||||
|
valid_files = db.get("valid_files", {})
|
||||||
|
valid_files[filename] = metadata
|
||||||
|
db["valid_files"] = valid_files
|
||||||
|
_save_db(db)
|
||||||
|
return valid_files
|
35
src/dir_utils.py
Normal file
35
src/dir_utils.py
Normal file
|
@ -0,0 +1,35 @@
|
||||||
|
import os
|
||||||
|
import re
|
||||||
|
|
||||||
|
# pattern = re.compile(":::spoiler Gitlab sync([^:]*):::")
|
||||||
|
pattern = re.compile("[^:]*:::")
|
||||||
|
|
||||||
|
|
||||||
|
def find_metadata(filename):
|
||||||
|
with open(filename, "r") as file:
|
||||||
|
print(f"File: {filename}")
|
||||||
|
data = file.read()
|
||||||
|
|
||||||
|
metadata = {}
|
||||||
|
|
||||||
|
start_str = ":::spoiler Gitlab sync\n"
|
||||||
|
end_str = "\n:::"
|
||||||
|
|
||||||
|
start_i = data.find(start_str)
|
||||||
|
if start_i >= 0:
|
||||||
|
start_i += len(start_str)
|
||||||
|
end_i = data.find(end_str, start_i + 1)
|
||||||
|
file_data = data[start_i:end_i]
|
||||||
|
for line in file_data.split("\n"):
|
||||||
|
key_index = 2
|
||||||
|
value_index = line.find(": ")
|
||||||
|
key = line[key_index:value_index]
|
||||||
|
value = line[value_index + 2 :]
|
||||||
|
metadata[key] = value
|
||||||
|
print("Valid report")
|
||||||
|
print(metadata)
|
||||||
|
else:
|
||||||
|
print("Not a valid report")
|
||||||
|
return None
|
||||||
|
|
||||||
|
return metadata
|
181
src/mattermost.py
Normal file
181
src/mattermost.py
Normal file
|
@ -0,0 +1,181 @@
|
||||||
|
#!/usr/bin/env python
|
||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
import pprint as pp
|
||||||
|
from abc import ABC, abstractmethod
|
||||||
|
from datetime import datetime
|
||||||
|
from enum import Enum
|
||||||
|
from time import sleep
|
||||||
|
from typing import Dict, List
|
||||||
|
|
||||||
|
import toml
|
||||||
|
from colored import style
|
||||||
|
from mattermostdriver import Driver
|
||||||
|
from tabulate import tabulate
|
||||||
|
|
||||||
|
from MattermostObjects import MMUser, MMPost, MMChannelPosts
|
||||||
|
from utils import humanize_date_difference, timer
|
||||||
|
|
||||||
|
pp = pp.PrettyPrinter(indent=2)
|
||||||
|
|
||||||
|
|
||||||
|
class LogLevel(Enum):
|
||||||
|
INFO = "INFO"
|
||||||
|
ERROR = "ERROR"
|
||||||
|
|
||||||
|
|
||||||
|
class User(ABC):
|
||||||
|
@abstractmethod
|
||||||
|
def credentials_dict(self) -> dict:
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
class NormalUser(User):
|
||||||
|
def __init__(self, login_id, password):
|
||||||
|
self.login_id = login_id
|
||||||
|
self.password = password
|
||||||
|
|
||||||
|
def credentials_dict(self) -> dict:
|
||||||
|
return {"login_id": self.login_id, "password": self.password}
|
||||||
|
|
||||||
|
def __repr__(self):
|
||||||
|
return "User<name: {}, password: ******>".format(self.login_id)
|
||||||
|
|
||||||
|
|
||||||
|
class TokenUser(User):
|
||||||
|
def __init__(self, token):
|
||||||
|
self.token = token
|
||||||
|
|
||||||
|
def credentials_dict(self) -> dict:
|
||||||
|
return {"token": self.token}
|
||||||
|
|
||||||
|
def __repr__(self):
|
||||||
|
return "TokenUser<token: {}>".format(self.token)
|
||||||
|
|
||||||
|
|
||||||
|
users: {str: [User]} = {}
|
||||||
|
|
||||||
|
|
||||||
|
def loadusers():
|
||||||
|
with open("users.toml") as f:
|
||||||
|
usersstring = f.read()
|
||||||
|
usersdict = toml.loads(usersstring)
|
||||||
|
|
||||||
|
usr = None
|
||||||
|
for name, data in usersdict.items():
|
||||||
|
if "token" in data:
|
||||||
|
usr = TokenUser(token=data["token"])
|
||||||
|
elif "name" in data and "password" in data:
|
||||||
|
usr = NormalUser(login_id=data["name"], password=data["password"])
|
||||||
|
else:
|
||||||
|
print("Invalid user '{}' in toml file".format(name))
|
||||||
|
exit(1)
|
||||||
|
users[name] = usr
|
||||||
|
|
||||||
|
|
||||||
|
loadusers()
|
||||||
|
|
||||||
|
|
||||||
|
def merge_dict(a: dict, b: dict) -> dict:
|
||||||
|
return {**a, **b}
|
||||||
|
|
||||||
|
|
||||||
|
class MMApi(Driver):
|
||||||
|
def __init__(self, user: User = users["flynn"]):
|
||||||
|
Driver.__init__(
|
||||||
|
self,
|
||||||
|
merge_dict(
|
||||||
|
{
|
||||||
|
"url": "mattermost.zeus.gent",
|
||||||
|
"port": 443,
|
||||||
|
"debug": False,
|
||||||
|
},
|
||||||
|
user.credentials_dict(),
|
||||||
|
),
|
||||||
|
)
|
||||||
|
self.login()
|
||||||
|
self.user_id = self.users.get_user(user_id="me")["id"]
|
||||||
|
self.team_id = self.teams.get_team_by_name("zeus")["id"]
|
||||||
|
print(f" = Creating mattermost client")
|
||||||
|
print(f" = - User: {self.user_id}")
|
||||||
|
print(f" = - Team: {self.team_id}")
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def print_response(resp, title="Response"):
|
||||||
|
print("--------")
|
||||||
|
print(style.BOLD + title + style.RESET)
|
||||||
|
pp.pprint(resp)
|
||||||
|
|
||||||
|
def log(self, text: str, log_level: LogLevel = LogLevel.INFO):
|
||||||
|
print(f"{style.BOLD}[{log_level.value}]{style.RESET} {text}")
|
||||||
|
|
||||||
|
def get_channel_id(self, channel_name):
|
||||||
|
resp = self.channels.get_channel_by_name(self.team_id, channel_name, since)
|
||||||
|
id = resp["id"]
|
||||||
|
self.log(f"Fetching channel id for {channel_name}: {id}")
|
||||||
|
return id
|
||||||
|
|
||||||
|
@timer
|
||||||
|
def get_posts_for_channel(self, channel_id, since):
|
||||||
|
print(f"Fetching posts for {channel_id} since {since}")
|
||||||
|
page_size = 200
|
||||||
|
page_i = 0
|
||||||
|
data = {}
|
||||||
|
more = True
|
||||||
|
while more:
|
||||||
|
resp = self.posts.get_posts_for_channel(
|
||||||
|
channel_id,
|
||||||
|
params={"page": page_i, "per_page": page_size, "since": since},
|
||||||
|
)
|
||||||
|
page_i += 1
|
||||||
|
print(f"Fetching page {page_i}")
|
||||||
|
# print("-", end=" ")
|
||||||
|
|
||||||
|
paged_data = resp["posts"]
|
||||||
|
paged_count = len(paged_data)
|
||||||
|
|
||||||
|
if since != 0:
|
||||||
|
# The mattermost api is absolutely retarted
|
||||||
|
# If you add the since parameter and it's different then 0 it will give you 1000 posts max.
|
||||||
|
# It will not respect you page_index or page_size.
|
||||||
|
more = False
|
||||||
|
else:
|
||||||
|
if paged_count < page_size:
|
||||||
|
more = False
|
||||||
|
|
||||||
|
# Transform the data into something more sensible or practical
|
||||||
|
if type(paged_data) is list:
|
||||||
|
paged_data = {item["id"]: item for item in paged_data}
|
||||||
|
|
||||||
|
# Append the paged_data to our global data variable
|
||||||
|
data = {**data, **paged_data}
|
||||||
|
print()
|
||||||
|
|
||||||
|
self.log(f"Post count: {len(data)}")
|
||||||
|
return data
|
||||||
|
|
||||||
|
|
||||||
|
class ChannelApi(MMApi):
|
||||||
|
def __init__(self, channel_name, user=None):
|
||||||
|
MMApi.__init__(self, user)
|
||||||
|
self.channel_id = self.get_channel_id(channel_name)
|
||||||
|
|
||||||
|
def create_post(self, message: str, props: Dict = None) -> None:
|
||||||
|
resp = self.posts.create_post(
|
||||||
|
options={"channel_id": self.channel_id, "message": message, "props": props}
|
||||||
|
)
|
||||||
|
self.log(f'Message successfully created: "{message}"')
|
||||||
|
# print_response("Create post", resp)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
foo = MMApi(user=users["flynn"])
|
||||||
|
|
||||||
|
# all_posts = foo.get_all_posts()
|
||||||
|
|
||||||
|
channel = foo.channels.get_channel_by_name(
|
||||||
|
foo.team_id,
|
||||||
|
"bestuur",
|
||||||
|
)
|
||||||
|
channel_id = channel["id"]
|
||||||
|
resp = foo.posts.get_posts_for_channel(channel_id, params={"per_page": 200})
|
||||||
|
channel_posts: MMChannelPosts = MMChannelPosts.load(resp)
|
75
src/utils.py
Normal file
75
src/utils.py
Normal file
|
@ -0,0 +1,75 @@
|
||||||
|
import datetime
|
||||||
|
import functools
|
||||||
|
import time
|
||||||
|
|
||||||
|
|
||||||
|
def timer(func):
|
||||||
|
"""Print the runtime of the decorated function"""
|
||||||
|
|
||||||
|
@functools.wraps(func)
|
||||||
|
def wrapper_timer(*args, **kwargs):
|
||||||
|
start_time = time.perf_counter() # 1
|
||||||
|
value = func(*args, **kwargs)
|
||||||
|
end_time = time.perf_counter() # 2
|
||||||
|
run_time = end_time - start_time # 3
|
||||||
|
print(f"Finished {func.__name__!r} in {run_time:.4f} secs")
|
||||||
|
return value
|
||||||
|
|
||||||
|
return wrapper_timer
|
||||||
|
|
||||||
|
|
||||||
|
def humanize_date_difference(
|
||||||
|
older: datetime, newer: datetime = None, offset: int = None, debug=False
|
||||||
|
):
|
||||||
|
if newer:
|
||||||
|
dt = newer - older
|
||||||
|
milliseconds = dt.microseconds / 1e3
|
||||||
|
offset = milliseconds + (dt.seconds * 1000) + (dt.days * 1000 * 60 * 60 * 24)
|
||||||
|
if offset:
|
||||||
|
if debug:
|
||||||
|
print(f"{offset} s offset")
|
||||||
|
|
||||||
|
delta_ms = int(offset % 1000)
|
||||||
|
offset /= 1e3
|
||||||
|
delta_s = int(offset % 60)
|
||||||
|
offset /= 60
|
||||||
|
delta_m = int(offset % 60)
|
||||||
|
offset /= 60
|
||||||
|
delta_h = int(offset % 24)
|
||||||
|
offset /= 24
|
||||||
|
delta_d = int(offset)
|
||||||
|
|
||||||
|
if debug:
|
||||||
|
print("{:d} ms".format(delta_ms))
|
||||||
|
print("{:d} s".format(delta_s))
|
||||||
|
print("{:d} m".format(delta_m))
|
||||||
|
print("{:d} h".format(delta_h))
|
||||||
|
print("{:d} d".format(delta_d))
|
||||||
|
else:
|
||||||
|
raise ValueError("Must supply otherdate or offset (from now)")
|
||||||
|
|
||||||
|
if delta_d > 1:
|
||||||
|
if delta_d > 6:
|
||||||
|
date = older + datetime.timedelta(
|
||||||
|
days=-delta_d, hours=-delta_h, minutes=-delta_m
|
||||||
|
)
|
||||||
|
return date.strftime("%A, %Y %B %m, %H:%I")
|
||||||
|
else:
|
||||||
|
wday = older + datetime.timedelta(days=-delta_d)
|
||||||
|
return wday.strftime("%A")
|
||||||
|
if delta_d == 1:
|
||||||
|
return "Yesterday"
|
||||||
|
if delta_h > 0:
|
||||||
|
return "{:.0f}h {:.0f}m ago".format(delta_h, delta_m)
|
||||||
|
if delta_m > 0:
|
||||||
|
return "{:.0f}m {:.0f}s ago".format(delta_m, delta_s)
|
||||||
|
if delta_s > 0:
|
||||||
|
return "{:.0f}s ago".format(delta_s)
|
||||||
|
else:
|
||||||
|
return "{:.0f} ms ago".format(delta_ms)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
date1 = datetime.datetime.now()
|
||||||
|
date2 = datetime.datetime.now() - datetime.timedelta(milliseconds=20)
|
||||||
|
print(humanize_date_difference(date2, date1, debug=True))
|
86
src/web.py
Normal file
86
src/web.py
Normal file
|
@ -0,0 +1,86 @@
|
||||||
|
import json
|
||||||
|
import os
|
||||||
|
import re
|
||||||
|
import time
|
||||||
|
|
||||||
|
import hug
|
||||||
|
import requests
|
||||||
|
|
||||||
|
import db
|
||||||
|
import dir_utils
|
||||||
|
from mattermost import MMApi
|
||||||
|
|
||||||
|
|
||||||
|
def find_codimd_files_on_mattermost():
|
||||||
|
mattermost = MMApi()
|
||||||
|
channels = [
|
||||||
|
"hrx6pgfswjbttcj8nim3jrwe7w", # bestuur-INTERN
|
||||||
|
"uda7ax9poprduq8ob56e1fqk4e", # bestuur
|
||||||
|
]
|
||||||
|
|
||||||
|
last_fetch_time = db.get_latest_sync_time()
|
||||||
|
current_fetch_time = int(time.time() * 1000)
|
||||||
|
print(f"Fetching posts since: {last_fetch_time}")
|
||||||
|
for channel_id in channels:
|
||||||
|
print(f"Fetching posts for channel_id: {channel_id}")
|
||||||
|
|
||||||
|
# TODO Use first statement for all posts
|
||||||
|
posts = mattermost.get_posts_for_channel(channel_id, last_fetch_time)
|
||||||
|
# posts = mattermost.posts.get_posts_for_channel(channel_id)
|
||||||
|
|
||||||
|
print(f"Scraping {len(posts)} posts")
|
||||||
|
for post_id, post in posts.items():
|
||||||
|
urls = re.findall(r"(https?://[^\s#?]+)", post["message"])
|
||||||
|
for url in urls:
|
||||||
|
idx = url.find("codimd.zeus.gent")
|
||||||
|
if idx == -1:
|
||||||
|
# In this case it's an url but not for codimd
|
||||||
|
continue
|
||||||
|
|
||||||
|
print(url)
|
||||||
|
db.add_discovered_file(url)
|
||||||
|
|
||||||
|
# When everything succeeded. Save the current unix time as latest fetched moment
|
||||||
|
db.set_latest_sync_time(current_fetch_time)
|
||||||
|
|
||||||
|
|
||||||
|
def read_note(url):
|
||||||
|
return requests.get(f"{url}/download").text
|
||||||
|
|
||||||
|
|
||||||
|
def download_files():
|
||||||
|
for url in db.get_discovered_files():
|
||||||
|
with open(f'data/note-{url[url.rfind("/") + 1:]}.md', "w") as f:
|
||||||
|
print(f"Downloading url {url}")
|
||||||
|
f.write(read_note(url))
|
||||||
|
|
||||||
|
|
||||||
|
def validate_downloaded_files():
|
||||||
|
path = "data"
|
||||||
|
dir_list = os.listdir(path)
|
||||||
|
|
||||||
|
for filename in dir_list:
|
||||||
|
metadata = dir_utils.find_metadata("data/" + filename)
|
||||||
|
if metadata is not None:
|
||||||
|
db.add_valid_file(filename, metadata)
|
||||||
|
|
||||||
|
return db._load_db()
|
||||||
|
|
||||||
|
|
||||||
|
@hug.get("/sync-mattermost")
|
||||||
|
def sync_mattermost():
|
||||||
|
print()
|
||||||
|
print("=======================================")
|
||||||
|
print("== Finding urls posted on mattermost ==")
|
||||||
|
find_codimd_files_on_mattermost()
|
||||||
|
print()
|
||||||
|
print("=============================")
|
||||||
|
print("== Downloading found files ==")
|
||||||
|
download_files()
|
||||||
|
print()
|
||||||
|
print("================================================")
|
||||||
|
print("== Finding valid files in the downloaded ones ==")
|
||||||
|
validate_downloaded_files()
|
||||||
|
print()
|
||||||
|
|
||||||
|
return db._load_db()
|
148
sync_notes.sh
148
sync_notes.sh
|
@ -1,148 +0,0 @@
|
||||||
#!/usr/bin/env bash
|
|
||||||
|
|
||||||
set -euo pipefail
|
|
||||||
|
|
||||||
REPO_FOLDER="drive-temp"
|
|
||||||
|
|
||||||
function clone_repo() {
|
|
||||||
mkdir -p "$REPO_FOLDER"
|
|
||||||
cd "$REPO_FOLDER"
|
|
||||||
|
|
||||||
inside_git_repo="$(git rev-parse --is-inside-work-tree 2>/dev/null || true)"
|
|
||||||
if [ ! "$inside_git_repo" ]; then
|
|
||||||
git init
|
|
||||||
git remote add origin "https://$GITLAB_ACCESS_TOKEN_NAME:$GITLAB_ACCESS_TOKEN@git.zeus.gent/bestuur/drive.git"
|
|
||||||
git config user.email "codimd.zeus.gent@mcbloch.dev"
|
|
||||||
git config user.name "CodiMD sync bot"
|
|
||||||
git pull origin master
|
|
||||||
else
|
|
||||||
echo "> Git repo already initialized, skipping"
|
|
||||||
fi
|
|
||||||
git fetch -a
|
|
||||||
|
|
||||||
cd ..
|
|
||||||
}
|
|
||||||
|
|
||||||
function clear_repo() {
|
|
||||||
git restore .
|
|
||||||
git checkout -- .
|
|
||||||
}
|
|
||||||
|
|
||||||
function checkout_branch() {
|
|
||||||
branch_name=$1
|
|
||||||
|
|
||||||
# Start from master
|
|
||||||
git checkout master
|
|
||||||
|
|
||||||
# Now go to the correct branch name
|
|
||||||
if ! git checkout -b "$branch_name" >/dev/null; then
|
|
||||||
echo "> Checkout existing branch"
|
|
||||||
git checkout "$branch_name" >/dev/null
|
|
||||||
else
|
|
||||||
echo "> Created new branch"
|
|
||||||
fi
|
|
||||||
|
|
||||||
if git branch --set-upstream-to="origin/$branch_name" "$branch_name"; then # >/dev/null
|
|
||||||
git pull
|
|
||||||
fi
|
|
||||||
}
|
|
||||||
|
|
||||||
function sync_file() {
|
|
||||||
note_name=$1
|
|
||||||
branch_name="codimd-sync_$sync_to"
|
|
||||||
|
|
||||||
echo "> Starting sync of $note_name"
|
|
||||||
|
|
||||||
clear_repo
|
|
||||||
checkout_branch "$branch_name"
|
|
||||||
|
|
||||||
echo "> Copy the note to $sync_to"
|
|
||||||
cp "../data/$note_name" "$sync_to"
|
|
||||||
|
|
||||||
git add "$sync_to"
|
|
||||||
if ! git commit -m "[bot] Update Gitlab with latest CodiMD file version"; then
|
|
||||||
#echo "> No changes in our file."
|
|
||||||
:
|
|
||||||
else
|
|
||||||
#echo "> Changes in our file, committing."
|
|
||||||
:
|
|
||||||
fi
|
|
||||||
|
|
||||||
git push -u origin "$branch_name"
|
|
||||||
|
|
||||||
#MR_NAME="[CodiMD Sync] $note_name"
|
|
||||||
#echo "> Checking if pr with name '$MR_NAME' already exists"
|
|
||||||
|
|
||||||
# mrs=$(curl --header "PRIVATE-TOKEN: $GITLAB_ACCESS_TOKEN" "https://git.zeus.gent/api/v4/projects/$GITLAB_PROJECT_ID/merge_requests?labels=codimd-sync" | jq -e 'select(type == "array" and length == 0)' )
|
|
||||||
|
|
||||||
# echo $mrs | jq -e 'select(type == "array" and length == 0)'
|
|
||||||
|
|
||||||
# Check if a MR is already made (open or merged)
|
|
||||||
echo "> Checking if the branch differs from master"
|
|
||||||
echo "> If so a new pr should be created to push our work"
|
|
||||||
echo "> If an open pr already exists, pass"
|
|
||||||
echo
|
|
||||||
|
|
||||||
diff_lines=$(git diff "origin/master..$branch_name" | wc -l)
|
|
||||||
if [ "$diff_lines" == "0" ]; then
|
|
||||||
echo "> Branch has no changes compared to master."
|
|
||||||
else
|
|
||||||
echo "> Branch has changes"
|
|
||||||
|
|
||||||
if (glab mr list --all --source-branch="$branch_name" | grep "No merge requests match your search"); then
|
|
||||||
echo "> No matching Merge Request found at all"
|
|
||||||
|
|
||||||
glab mr create --label codimd-sync -t "[CodiMD sync] Add document $sync_to" --fill --yes
|
|
||||||
|
|
||||||
cd ..
|
|
||||||
rm -rf drive-temp
|
|
||||||
else
|
|
||||||
echo "> Matching Merge Request found"
|
|
||||||
echo "> Making sure it is an open one"
|
|
||||||
|
|
||||||
if (glab mr list --source-branch="$branch_name" | grep "No open merge requests match your search"); then
|
|
||||||
echo "No open merge request found"
|
|
||||||
glab mr create --label codimd-sync -t "[CodiMD sync] Update document $sync_to" --fill --yes
|
|
||||||
else
|
|
||||||
echo "Open merge request found."
|
|
||||||
fi
|
|
||||||
fi
|
|
||||||
|
|
||||||
fi
|
|
||||||
}
|
|
||||||
|
|
||||||
function sync_files() {
|
|
||||||
cd data
|
|
||||||
for note_name in *.md; do
|
|
||||||
echo
|
|
||||||
echo "> ======================"
|
|
||||||
echo "> Analyzing $note_name"
|
|
||||||
|
|
||||||
# Extract the sync-to path
|
|
||||||
sync_to=$(sed -n -e '/:::spoiler Gitlab sync/,/:::/ p' "$note_name" | grep "sync-to" | cut -d":" -f2 | xargs || true)
|
|
||||||
|
|
||||||
if [ "$sync_to" == "" ]; then
|
|
||||||
# echo "> No metadata found, skip"
|
|
||||||
:
|
|
||||||
else
|
|
||||||
echo "> Found a requested sync to: $sync_to"
|
|
||||||
cd ../$REPO_FOLDER
|
|
||||||
sync_file "$note_name"
|
|
||||||
cd ../data
|
|
||||||
fi
|
|
||||||
|
|
||||||
done
|
|
||||||
}
|
|
||||||
|
|
||||||
glab auth login --hostname git.zeus.gent --token "$GITLAB_TOKEN"
|
|
||||||
|
|
||||||
# A one time operation to clone the repo.
|
|
||||||
clone_repo
|
|
||||||
|
|
||||||
# Loop over the files in the data folder and sync them to Gitlab via Merge Requests
|
|
||||||
sync_files
|
|
||||||
|
|
||||||
exit 0
|
|
||||||
|
|
||||||
# https://git.zeus.gent/bestuur/drive
|
|
||||||
# GITLAB_PROJECT_ID=2
|
|
Loading…
Reference in a new issue