From 456e10305b9431c1cfc5786798c497928ef2a929 Mon Sep 17 00:00:00 2001 From: Midgard Date: Fri, 22 Nov 2024 22:46:08 +0100 Subject: [PATCH] Make getting posts more robust --- mmcli/mmcli.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/mmcli/mmcli.py b/mmcli/mmcli.py index 860d7d3..be11030 100755 --- a/mmcli/mmcli.py +++ b/mmcli/mmcli.py @@ -67,13 +67,15 @@ def get_posts_for_channel(self, channel_id: str, progress=lambda x: None, after= progress(total) if len(order) < per_page: break - page += 1 + after = order[-1] sleep(0.1) else: # All posts in channel: API gives pages with NEWEST messages first, so reverse the order in # the end (and don't reverse the order of each page separately) posts = [] + # To avoid race conditions when new messages are posted while the channel is being fetched + post_ids = set() while True: data_page = self._get(f"/v4/channels/{channel_id}/posts", params={"page": page, "per_page": per_page, **kwargs}) order = data_page["order"] @@ -81,7 +83,9 @@ def get_posts_for_channel(self, channel_id: str, progress=lambda x: None, after= posts.extend( data_page["posts"][post_id] for post_id in order + if post_id not in post_ids ) + post_ids |= set(order) progress(len(posts)) if len(order) < per_page: break