Account for quirks in GET /channels/.../posts

This commit is contained in:
Midgard 2021-05-06 14:04:05 +02:00
parent d853e51048
commit 9d664a9ff1
Signed by: midgard
GPG key ID: 511C112F1331BBB4

View file

@ -4,7 +4,7 @@ import sys
import argparse
import os
import json
from typing import Dict, Optional, List
from typing import Dict, Optional, List, Iterable
import re
from time import sleep
import threading
@ -38,28 +38,73 @@ def http_to_ws(url):
return "ws" + url[4:]
def get_posts_for_channel(self, channel_id: str, progress=lambda x: None, **kwargs) -> List[Dict]:
def get_posts_for_channel(self, channel_id: str, progress=lambda x: None, after=None, since=None, **kwargs) -> Iterable[Dict]:
"""
@raises ApiException: Passed on from lower layers.
"""
per_page = 200
page = 0
posts = []
total = 0
# if after and since:
# raise ValueError("after and since cannot be used together")
if since:
raise Exception("'since' functionality is broken in the API and behaves non-deterministically. It cannot be meaningfully used.")
# Posts in channel updated after a given timestamp: pagination is broken in the API
# current_since = since
# while True:
# data_page = self._get(f"/v4/channels/{channel_id}/posts", params={"since": current_since, **kwargs})
# order = data_page["order"]
# yield from (
# data_page["posts"][post_id]
# for post_id in reversed(order)
# )
# total += len(order)
# progress(total)
# if len(order) < 1000: # For some reason the pages go up to 1000 posts if 'since' is given
# break
# current_since = data_page["posts"][order[0]]["create_at"]
# sleep(0.1)
elif after:
# Posts in channel after a given ID: API gives pages with OLDEST messages first, so we can
# yield each page when it is fetched
while True:
data_page = self._get(f"/v4/channels/{channel_id}/posts", params={"page":str(page), "per_page":200, **kwargs})
data_page = self._get(f"/v4/channels/{channel_id}/posts", params={"page": page, "per_page": per_page, "after": after, **kwargs})
order = data_page["order"]
if data_page["order"] == []:
yield from (
data_page["posts"][post_id]
for post_id in reversed(order)
)
total += len(order)
progress(total)
if len(order) < per_page:
break
page += 1
posts.extend(data_page["posts"][order] for order in data_page["order"])
progress(len(posts))
sleep(0.1)
# Mattermost gives newest first, so reverse order
posts.reverse()
else:
# All posts in channel: API gives pages with NEWEST messages first, so reverse the order in
# the end (and don't reverse the order of each page separately)
posts = []
while True:
data_page = self._get(f"/v4/channels/{channel_id}/posts", params={"page": page, "per_page": per_page, **kwargs})
order = data_page["order"]
return posts
posts.extend(
data_page["posts"][post_id]
for post_id in order
)
progress(len(posts))
if len(order) < per_page:
break
page += 1
sleep(0.1)
yield from reversed(posts)
@ -150,7 +195,7 @@ def cat(mm_api: mattermost.MMApi, parsed):
backlog_lock = threading.Lock()
def print_initial_messages():
posts = get_posts_for_channel(mm_api, channel["id"], after=parsed.after)
posts = get_posts_for_channel(mm_api, channel["id"], after=parsed.after, since=parsed.since)
for post in posts:
print(str_for_post(attribute, post, parsed))