Skip to content

Commit

Permalink
Clean link references in Telegram crawler
Browse files Browse the repository at this point in the history
  • Loading branch information
stijn-uva committed Sep 26, 2024
1 parent bfaf23b commit cb4b770
Showing 1 changed file with 3 additions and 1 deletion.
4 changes: 3 additions & 1 deletion datasources/telegram/search_telegram.py
Original file line number Diff line number Diff line change
Expand Up @@ -484,7 +484,7 @@ async def gather_posts(self, client, queries, max_items, min_date, max_date):
# invite links
continue

entity_name = link.split("?")[0].split("#")[0]
entity_name = link.split("/")[0].split("?")[0].split("#")[0]
linked_entities.add(entity_name)

# @references
Expand All @@ -493,6 +493,8 @@ async def gather_posts(self, client, queries, max_items, min_date, max_date):
if reference.startswith("@"):
reference = reference[1:]

reference = reference.split("/")[0]

linked_entities.add(reference)

# Check if fwd_from or the resolved entity ID is already queued or has been queried
Expand Down

0 comments on commit cb4b770

Please sign in to comment.