Account for quirks in GET /channels/.../posts

This commit is contained in:
Midgard 2021-05-06 14:04:05 +02:00
parent d853e51048
commit 9d664a9ff1
Signed by: midgard
GPG key ID: 511C112F1331BBB4

View file

@ -4,7 +4,7 @@ import sys
import argparse import argparse
import os import os
import json import json
from typing import Dict, Optional, List from typing import Dict, Optional, List, Iterable
import re import re
from time import sleep from time import sleep
import threading import threading
@ -38,28 +38,73 @@ def http_to_ws(url):
return "ws" + url[4:] return "ws" + url[4:]
def get_posts_for_channel(self, channel_id: str, progress=lambda x: None, **kwargs) -> List[Dict]: def get_posts_for_channel(self, channel_id: str, progress=lambda x: None, after=None, since=None, **kwargs) -> Iterable[Dict]:
""" """
@raises ApiException: Passed on from lower layers. @raises ApiException: Passed on from lower layers.
""" """
per_page = 200
page = 0 page = 0
posts = [] total = 0
# if after and since:
# raise ValueError("after and since cannot be used together")
if since:
raise Exception("'since' functionality is broken in the API and behaves non-deterministically. It cannot be meaningfully used.")
# Posts in channel updated after a given timestamp: pagination is broken in the API
# current_since = since
# while True:
# data_page = self._get(f"/v4/channels/{channel_id}/posts", params={"since": current_since, **kwargs})
# order = data_page["order"]
# yield from (
# data_page["posts"][post_id]
# for post_id in reversed(order)
# )
# total += len(order)
# progress(total)
# if len(order) < 1000: # For some reason the pages go up to 1000 posts if 'since' is given
# break
# current_since = data_page["posts"][order[0]]["create_at"]
# sleep(0.1)
elif after:
# Posts in channel after a given ID: API gives pages with OLDEST messages first, so we can
# yield each page when it is fetched
while True: while True:
data_page = self._get(f"/v4/channels/{channel_id}/posts", params={"page":str(page), "per_page":200, **kwargs}) data_page = self._get(f"/v4/channels/{channel_id}/posts", params={"page": page, "per_page": per_page, "after": after, **kwargs})
order = data_page["order"]
if data_page["order"] == []: yield from (
data_page["posts"][post_id]
for post_id in reversed(order)
)
total += len(order)
progress(total)
if len(order) < per_page:
break break
page += 1 page += 1
posts.extend(data_page["posts"][order] for order in data_page["order"])
progress(len(posts))
sleep(0.1) sleep(0.1)
# Mattermost gives newest first, so reverse order else:
posts.reverse() # All posts in channel: API gives pages with NEWEST messages first, so reverse the order in
# the end (and don't reverse the order of each page separately)
posts = []
while True:
data_page = self._get(f"/v4/channels/{channel_id}/posts", params={"page": page, "per_page": per_page, **kwargs})
order = data_page["order"]
return posts posts.extend(
data_page["posts"][post_id]
for post_id in order
)
progress(len(posts))
if len(order) < per_page:
break
page += 1
sleep(0.1)
yield from reversed(posts)
@ -150,7 +195,7 @@ def cat(mm_api: mattermost.MMApi, parsed):
backlog_lock = threading.Lock() backlog_lock = threading.Lock()
def print_initial_messages(): def print_initial_messages():
posts = get_posts_for_channel(mm_api, channel["id"], after=parsed.after) posts = get_posts_for_channel(mm_api, channel["id"], after=parsed.after, since=parsed.since)
for post in posts: for post in posts:
print(str_for_post(attribute, post, parsed)) print(str_for_post(attribute, post, parsed))