Skip to content

Commit

Permalink
Look elsewhere for data to extract from TikTok post page
Browse files Browse the repository at this point in the history
  • Loading branch information
stijn-uva committed Dec 19, 2023
1 parent a4e6904 commit f24828b
Showing 1 changed file with 11 additions and 0 deletions.
11 changes: 11 additions & 0 deletions datasources/tiktok_urls/search_tiktok_urls.py
Original file line number Diff line number Diff line change
Expand Up @@ -338,6 +338,10 @@ async def request_metadata(self, urls):
soup = BeautifulSoup(response.text, "html.parser")
sigil = soup.select_one("script#SIGI_STATE")

if not sigil:
# alternatively, the JSON is here
sigil = soup.select_one("script#__UNIVERSAL_DATA_FOR_REHYDRATION__")

if not sigil:
if url not in retries or retries[url] < 3:
if url not in retries:
Expand Down Expand Up @@ -399,6 +403,13 @@ def reformat_metadata(self, metadata):
:param dict metadata: Metadata extracted from the TikTok video page
:return: Yields one dictionary per video
"""
# may need some extra parsing to find the item data...
if "__DEFAULT_SCOPE__" in metadata and "webapp.video-detail" in metadata["__DEFAULT_SCOPE__"]:
video = metadata["__DEFAULT_SCOPE__"]["webapp.video-detail"]["itemInfo"]["itemStruct"]
metadata = {"ItemModule": {
video["id"]: video
}}

if "ItemModule" in metadata:
for video_id, item in metadata["ItemModule"].items():
if "CommentItem" in metadata:
Expand Down

0 comments on commit f24828b

Please sign in to comment.