Skip to content

Commit 1f8883f

Browse files
committed
April 2022 MP3 download issue and URL path fixes
1 parent caff617 commit 1f8883f

File tree

2 files changed

+23
-6
lines changed

2 files changed

+23
-6
lines changed

README.md

+1-1
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ This script is ideal for:
88
- *Anyone* who wishes to study from the conference talks but doesn't have a reliable internet connection.
99

1010
## What it can do?
11-
This script will allow you to download the LDS General Conference talks in mp3 form that are available at https://www.churchofjesuschrist.org/general-conference.
11+
This script will allow you to download the LDS General Conference talks in mp3 form that are available at https://www.churchofjesuschrist.org/study/general-conference.
1212
It will create *playlists* as *.m3u files to allow you to play an *entire session*.
1313
It will also create playlists for *speakers* and *topics*.
1414
This will not only work with the default English versions, but also for *every other language* for which audio files are available.

gen_conf_downloader.py

+22-5
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
"""
44

55
import argparse
6+
import base64
67
import datetime
78
import glob
89
import io
@@ -35,17 +36,20 @@
3536
TalkByTopic = namedtuple('TalkByTopic', 'link speaker title topic')
3637

3738
LDS_ORG_URL = 'https://www.churchofjesuschrist.org'
38-
ALL_CONFERENCES_URL = f'{LDS_ORG_URL}/general-conference/conferences'
39-
ALL_TOPICS_URL = f'{LDS_ORG_URL}/general-conference/topics'
39+
ALL_CONFERENCES_URL = f'{LDS_ORG_URL}/study/general-conference'
40+
ALL_TOPICS_URL = f'{LDS_ORG_URL}/study/general-conference/topics'
4041

4142
GET_LANGS_REGEX = 'data-lang=\".*?\" data-clang=\"(.*?)\">(.*?)</a>'
4243
CONFERENCES_REGEX = '<a[^>]*href="([^"]*)"[^>]*><div[^>]*><img[^>]*></div><span[^>]*>([A-Z][a-z]* \d{4})</span></a>'
4344
CONFERENCE_GROUPS_REGEX = '<a[^>]*href="([^"]*)"[^>]*><div[^>]*><img[^>]*></div><span[^>]*>(\d{4}.\d{4})</span></a>'
4445
CONFERENCE_GROUPS_RANGE_REGEX = '.*/(\d{4})(\d{4})\?lang=.*'
4546
CONFERENCE_LINK_YEAR_MONTH_REGEX = '.*(\d{4})/(\d{2})\?lang=.*'
4647

48+
SCRIPT_BASE64_REGEX = '<script>window.__INITIAL_STATE__[^"]*"([^"]*)";</script>'
4749
MP3_DOWNLOAD_REGEX = '<a[^>]*href="([^"]*)"[^>]*>This Page \(MP3\).*?</a>'
48-
MP3_FILENAME_REGEX = '.*/(.*\.mp3)\?lang=.*'
50+
MP3_DOWNLOAD_FILENAME_REGEX = '.*/(.*\.mp3)\?lang=.*'
51+
MP3_MEDIAURL_REGEX = '{"mediaUrl":"([^"]*)","variant":"audio"}'
52+
MP3_MEDIAURL_FILENAME_REGEX = '.*/(.*\.mp3)'
4953

5054
SESSIONS_REGEX = '<a[^>]*href="([^"]*)"[^>]*><div[^>]*><p><span[^>]*>([^<]*)</span></p></div></a><ul[^>]*>(.*?)</ul>'
5155
SESSION_TALKS_REGEX = '<a[^>]*href="([^"]*)"[^>]*><div[^>]*><p><span[^>]*>([^<]*)</span></p><p[^>]*>([^<]*)</p></div></a>'
@@ -302,10 +306,23 @@ def get_all_talks_by_topic(args):
302306
def get_audio(args, talk):
303307
link_html = get_html(args, f'{LDS_ORG_URL}{decode(talk.link)}')
304308
mp3_link = re.search(MP3_DOWNLOAD_REGEX, link_html)
305-
if not mp3_link:
309+
# In April 2022 the MP3 link became buried in base64 encoded script section
310+
match = re.search(SCRIPT_BASE64_REGEX, link_html)
311+
if mp3_link:
312+
# Extract and reuse the filename from the MP3 URL (exclude language)
313+
mp3_file = re.match(MP3_DOWNLOAD_FILENAME_REGEX, mp3_link.group(1))
314+
elif not mp3_link and not match:
306315
return
316+
elif not mp3_link and match:
317+
# MP3 link is probably in the base64 encoded script section
318+
script_data = str(base64.b64decode(match.group(1)))
319+
# Search for JSON object containing mediaUrl key and value
320+
mp3_link = re.search(MP3_MEDIAURL_REGEX, script_data)
321+
if not mp3_link:
322+
return
323+
# Extract and reuse the filename from the MP3 URL
324+
mp3_file = re.match(MP3_MEDIAURL_FILENAME_REGEX, mp3_link.group(1))
307325

308-
mp3_file = re.match(MP3_FILENAME_REGEX, mp3_link.group(1))
309326
if not mp3_file:
310327
return
311328

0 commit comments

Comments
 (0)