Account for quirks in GET /channels/.../posts

This commit is contained in:
Midgard 2021-05-06 14:04:05 +02:00
parent d853e51048
commit 9d664a9ff1
Signed by: midgard
GPG key ID: 511C112F1331BBB4

View file

@ -4,7 +4,7 @@ import sys
import argparse import argparse
import os import os
import json import json
from typing import Dict, Optional, List from typing import Dict, Optional, List, Iterable
import re import re
from time import sleep from time import sleep
import threading import threading
@ -38,28 +38,73 @@ def http_to_ws(url):
return "ws" + url[4:] return "ws" + url[4:]
def get_posts_for_channel(self, channel_id: str, progress=lambda x: None, **kwargs) -> List[Dict]: def get_posts_for_channel(self, channel_id: str, progress=lambda x: None, after=None, since=None, **kwargs) -> Iterable[Dict]:
""" """
@raises ApiException: Passed on from lower layers. @raises ApiException: Passed on from lower layers.
""" """
per_page = 200
page = 0 page = 0
posts = [] total = 0
while True: # if after and since:
data_page = self._get(f"/v4/channels/{channel_id}/posts", params={"page":str(page), "per_page":200, **kwargs}) # raise ValueError("after and since cannot be used together")
if data_page["order"] == []: if since:
break raise Exception("'since' functionality is broken in the API and behaves non-deterministically. It cannot be meaningfully used.")
page += 1 # Posts in channel updated after a given timestamp: pagination is broken in the API
# current_since = since
# while True:
# data_page = self._get(f"/v4/channels/{channel_id}/posts", params={"since": current_since, **kwargs})
# order = data_page["order"]
posts.extend(data_page["posts"][order] for order in data_page["order"]) # yield from (
progress(len(posts)) # data_page["posts"][post_id]
sleep(0.1) # for post_id in reversed(order)
# )
# total += len(order)
# progress(total)
# if len(order) < 1000: # For some reason the pages go up to 1000 posts if 'since' is given
# break
# current_since = data_page["posts"][order[0]]["create_at"]
# sleep(0.1)
# Mattermost gives newest first, so reverse order elif after:
posts.reverse() # Posts in channel after a given ID: API gives pages with OLDEST messages first, so we can
# yield each page when it is fetched
while True:
data_page = self._get(f"/v4/channels/{channel_id}/posts", params={"page": page, "per_page": per_page, "after": after, **kwargs})
order = data_page["order"]
return posts yield from (
data_page["posts"][post_id]
for post_id in reversed(order)
)
total += len(order)
progress(total)
if len(order) < per_page:
break
page += 1
sleep(0.1)
else:
# All posts in channel: API gives pages with NEWEST messages first, so reverse the order in
# the end (and don't reverse the order of each page separately)
posts = []
while True:
data_page = self._get(f"/v4/channels/{channel_id}/posts", params={"page": page, "per_page": per_page, **kwargs})
order = data_page["order"]
posts.extend(
data_page["posts"][post_id]
for post_id in order
)
progress(len(posts))
if len(order) < per_page:
break
page += 1
sleep(0.1)
yield from reversed(posts)
@ -150,7 +195,7 @@ def cat(mm_api: mattermost.MMApi, parsed):
backlog_lock = threading.Lock() backlog_lock = threading.Lock()
def print_initial_messages(): def print_initial_messages():
posts = get_posts_for_channel(mm_api, channel["id"], after=parsed.after) posts = get_posts_for_channel(mm_api, channel["id"], after=parsed.after, since=parsed.since)
for post in posts: for post in posts:
print(str_for_post(attribute, post, parsed)) print(str_for_post(attribute, post, parsed))