From 9d664a9ff1cc11743909203c1b8f58ed4d1e15b0 Mon Sep 17 00:00:00 2001 From: Midgard Date: Thu, 6 May 2021 14:04:05 +0200 Subject: [PATCH] Account for quirks in GET /channels/.../posts --- mmcli.py | 75 ++++++++++++++++++++++++++++++++++++++++++++------------ 1 file changed, 60 insertions(+), 15 deletions(-) diff --git a/mmcli.py b/mmcli.py index 56c8970..aac3e12 100755 --- a/mmcli.py +++ b/mmcli.py @@ -4,7 +4,7 @@ import sys import argparse import os import json -from typing import Dict, Optional, List +from typing import Dict, Optional, List, Iterable import re from time import sleep import threading @@ -38,28 +38,73 @@ def http_to_ws(url): return "ws" + url[4:] -def get_posts_for_channel(self, channel_id: str, progress=lambda x: None, **kwargs) -> List[Dict]: +def get_posts_for_channel(self, channel_id: str, progress=lambda x: None, after=None, since=None, **kwargs) -> Iterable[Dict]: """ @raises ApiException: Passed on from lower layers. """ + per_page = 200 page = 0 - posts = [] + total = 0 - while True: - data_page = self._get(f"/v4/channels/{channel_id}/posts", params={"page":str(page), "per_page":200, **kwargs}) + # if after and since: + # raise ValueError("after and since cannot be used together") - if data_page["order"] == []: - break - page += 1 + if since: + raise Exception("'since' functionality is broken in the API and behaves non-deterministically. It cannot be meaningfully used.") + # Posts in channel updated after a given timestamp: pagination is broken in the API + # current_since = since + # while True: + # data_page = self._get(f"/v4/channels/{channel_id}/posts", params={"since": current_since, **kwargs}) + # order = data_page["order"] - posts.extend(data_page["posts"][order] for order in data_page["order"]) - progress(len(posts)) - sleep(0.1) + # yield from ( + # data_page["posts"][post_id] + # for post_id in reversed(order) + # ) + # total += len(order) + # progress(total) + # if len(order) < 1000: # For some reason the pages go up to 1000 posts if 'since' is given + # break + # current_since = data_page["posts"][order[0]]["create_at"] + # sleep(0.1) - # Mattermost gives newest first, so reverse order - posts.reverse() + elif after: + # Posts in channel after a given ID: API gives pages with OLDEST messages first, so we can + # yield each page when it is fetched + while True: + data_page = self._get(f"/v4/channels/{channel_id}/posts", params={"page": page, "per_page": per_page, "after": after, **kwargs}) + order = data_page["order"] - return posts + yield from ( + data_page["posts"][post_id] + for post_id in reversed(order) + ) + total += len(order) + progress(total) + if len(order) < per_page: + break + page += 1 + sleep(0.1) + + else: + # All posts in channel: API gives pages with NEWEST messages first, so reverse the order in + # the end (and don't reverse the order of each page separately) + posts = [] + while True: + data_page = self._get(f"/v4/channels/{channel_id}/posts", params={"page": page, "per_page": per_page, **kwargs}) + order = data_page["order"] + + posts.extend( + data_page["posts"][post_id] + for post_id in order + ) + progress(len(posts)) + if len(order) < per_page: + break + page += 1 + sleep(0.1) + + yield from reversed(posts) @@ -150,7 +195,7 @@ def cat(mm_api: mattermost.MMApi, parsed): backlog_lock = threading.Lock() def print_initial_messages(): - posts = get_posts_for_channel(mm_api, channel["id"], after=parsed.after) + posts = get_posts_for_channel(mm_api, channel["id"], after=parsed.after, since=parsed.since) for post in posts: print(str_for_post(attribute, post, parsed))