Skip to content

Commit

Permalink
Catch rate limits in Telegram media downloads
Browse files Browse the repository at this point in the history
  • Loading branch information
stijn-uva committed Oct 23, 2024
1 parent c479a85 commit 5d5a0e3
Show file tree
Hide file tree
Showing 2 changed files with 37 additions and 10 deletions.
35 changes: 26 additions & 9 deletions processors/visualisation/download-telegram-images.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,13 +7,14 @@

from pathlib import Path

import telethon.errors
from telethon import TelegramClient
from telethon.errors import TimedOutError

from common.config_manager import config
from backend.lib.processor import BasicProcessor
from common.lib.exceptions import ProcessorInterruptedException
from common.lib.helpers import UserInput
from common.lib.helpers import UserInput, timify_long
from common.lib.dataset import DataSet
from processors.visualisation.download_images import ImageDownloader

Expand Down Expand Up @@ -194,6 +195,13 @@ async def get_images(self):
if self.interrupted:
raise ProcessorInterruptedException("Interrupted while downloading images")

if not message:
# message no longer exists
self.dataset.log(f"Could not download image for message {msg_id} - message is unavailable (it "
f"may have been deleted)")
self.flawless = False
continue

success = False
try:
# it's actually unclear if images are always jpegs, but this
Expand All @@ -215,14 +223,23 @@ async def get_images(self):
msg_id = str(message.id) if hasattr(message, "id") else f"with index {media_done:,}"
self.dataset.log(f"Could not download image for message {msg_id} ({e})")
self.flawless = False

media_done += 1
self.metadata[filename] = {
"filename": filename,
"success": success,
"from_dataset": self.source_dataset.key,
"post_ids": [msg_id]
}
finally:
media_done += 1
self.metadata[filename] = {
"filename": filename,
"success": success,
"from_dataset": self.source_dataset.key,
"post_ids": [msg_id]
}

except telethon.errors.FloodError as e:
later = "later"
if hasattr(e, "seconds"):
later = f"in {timify_long(e.seconds)}"
self.dataset.update_status(f"Rate-limited by Telegram after downloading {media_done-1:,} image(s); "
f"halting download process. Try again {later}.", is_final=True)
self.flawless = False
break

except ValueError as e:
self.dataset.log(f"Couldn't retrieve images for {entity}, it probably does not exist anymore ({e})")
Expand Down
12 changes: 11 additions & 1 deletion processors/visualisation/download-telegram-videos.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,12 +8,13 @@
from pathlib import Path

from telethon import TelegramClient
from telethon.errors import FloodError

from common.config_manager import config
from backend.lib.processor import BasicProcessor
from common.lib.exceptions import ProcessorInterruptedException
from processors.visualisation.download_videos import VideoDownloaderPlus
from common.lib.helpers import UserInput
from common.lib.helpers import UserInput, timify_long
from common.lib.dataset import DataSet

__author__ = "Stijn Peeters"
Expand Down Expand Up @@ -210,6 +211,15 @@ async def get_videos(self):
"from_dataset": self.source_dataset.key,
"post_ids": [msg_id]
}

except FloodError as e:
later = "later"
if hasattr(e, "seconds"):
later = f"in {timify_long(e.seconds)}"
self.dataset.update_status(f"Rate-limited by Telegram after downloading {media_done-1:,} image(s); "
f"halting download process. Try again {later}.", is_final=True)
self.flawless = False
break

except ValueError as e:
self.dataset.log(f"Couldn't retrieve video for {entity}, it probably does not exist anymore ({e})")
Expand Down

0 comments on commit 5d5a0e3

Please sign in to comment.