Make getting posts more robust
This commit is contained in:
parent
80cd8cc4c4
commit
456e10305b
1 changed files with 5 additions and 1 deletions
|
@ -67,13 +67,15 @@ def get_posts_for_channel(self, channel_id: str, progress=lambda x: None, after=
|
||||||
progress(total)
|
progress(total)
|
||||||
if len(order) < per_page:
|
if len(order) < per_page:
|
||||||
break
|
break
|
||||||
page += 1
|
after = order[-1]
|
||||||
sleep(0.1)
|
sleep(0.1)
|
||||||
|
|
||||||
else:
|
else:
|
||||||
# All posts in channel: API gives pages with NEWEST messages first, so reverse the order in
|
# All posts in channel: API gives pages with NEWEST messages first, so reverse the order in
|
||||||
# the end (and don't reverse the order of each page separately)
|
# the end (and don't reverse the order of each page separately)
|
||||||
posts = []
|
posts = []
|
||||||
|
# To avoid race conditions when new messages are posted while the channel is being fetched
|
||||||
|
post_ids = set()
|
||||||
while True:
|
while True:
|
||||||
data_page = self._get(f"/v4/channels/{channel_id}/posts", params={"page": page, "per_page": per_page, **kwargs})
|
data_page = self._get(f"/v4/channels/{channel_id}/posts", params={"page": page, "per_page": per_page, **kwargs})
|
||||||
order = data_page["order"]
|
order = data_page["order"]
|
||||||
|
@ -81,7 +83,9 @@ def get_posts_for_channel(self, channel_id: str, progress=lambda x: None, after=
|
||||||
posts.extend(
|
posts.extend(
|
||||||
data_page["posts"][post_id]
|
data_page["posts"][post_id]
|
||||||
for post_id in order
|
for post_id in order
|
||||||
|
if post_id not in post_ids
|
||||||
)
|
)
|
||||||
|
post_ids |= set(order)
|
||||||
progress(len(posts))
|
progress(len(posts))
|
||||||
if len(order) < per_page:
|
if len(order) < per_page:
|
||||||
break
|
break
|
||||||
|
|
Loading…
Reference in a new issue