Make getting posts more robust

This commit is contained in:
Midgard 2024-11-22 22:46:08 +01:00
parent 80cd8cc4c4
commit 456e10305b
Signed by: midgard
GPG key ID: 511C112F1331BBB4

View file

@ -67,13 +67,15 @@ def get_posts_for_channel(self, channel_id: str, progress=lambda x: None, after=
progress(total) progress(total)
if len(order) < per_page: if len(order) < per_page:
break break
page += 1 after = order[-1]
sleep(0.1) sleep(0.1)
else: else:
# All posts in channel: API gives pages with NEWEST messages first, so reverse the order in # All posts in channel: API gives pages with NEWEST messages first, so reverse the order in
# the end (and don't reverse the order of each page separately) # the end (and don't reverse the order of each page separately)
posts = [] posts = []
# To avoid race conditions when new messages are posted while the channel is being fetched
post_ids = set()
while True: while True:
data_page = self._get(f"/v4/channels/{channel_id}/posts", params={"page": page, "per_page": per_page, **kwargs}) data_page = self._get(f"/v4/channels/{channel_id}/posts", params={"page": page, "per_page": per_page, **kwargs})
order = data_page["order"] order = data_page["order"]
@ -81,7 +83,9 @@ def get_posts_for_channel(self, channel_id: str, progress=lambda x: None, after=
posts.extend( posts.extend(
data_page["posts"][post_id] data_page["posts"][post_id]
for post_id in order for post_id in order
if post_id not in post_ids
) )
post_ids |= set(order)
progress(len(posts)) progress(len(posts))
if len(order) < per_page: if len(order) < per_page:
break break