1- from pytube .extract import playlist_id
1+ # from pytube.extract import playlist_id
2+
3+ # from yt_dlp import YoutubeDL
4+ import yt_dlp
5+
26import requests
3- from utils import encode , decode , getRandomString , download_file
7+ from utils import getRandomString
48import os
59import json
610from time import perf_counter
11+ import datetime
712
813#from pytube import YouTube
9- import pytube
14+ # import pytube
1015
1116from mediaprovider import MediaProvider , InvalidPlaylistInfoException
1217
@@ -42,7 +47,10 @@ def get_youtube_channel(self, identifier):
4247 print (f'get_youtube_channel({ identifier } )' )
4348
4449 url = YOUTUBE_CHANNEL_BASE_URL + identifier
45- channel = pytube .Channel (url )
50+ # Use yt_dlp to create a channel,
51+
52+ channel = yt_dlp .Youtube (url ).get_channel ()
53+ ## channel.playlist_id = channel.playlist_id.replace('UC', 'UU')
4654
4755 playlist_id = channel .playlist_id
4856 #according to one StackOver and one test, channels-to-playlists can also be converted with string replace UCXXXX to UUXXXX
@@ -53,26 +61,33 @@ def get_youtube_playlist(self, identifier):
5361 try :
5462 start_time = perf_counter ()
5563
56- url = YOUTUBE_PLAYLIST_BASE_URL + identifier
64+ url = YOUTUBE_PLAYLIST_BASE_URL + identifier
5765 print (f"get_youtube_playlist(identifier): { url } " )
58- playlist = pytube .Playlist (url )
59-
66+
67+ ydl_opts = {
68+ 'quiet' : True ,
69+ 'extract_flat' : 'in_playlist' , # Ensure we are extracting playlist entries
70+ 'force_generic_extractor' : True ,
71+ }
6072 medias = []
61- for v in playlist .videos :
62-
63- published_at = v .publish_date .strftime ('%Y/%m/%d' )
64- media = {
65- #"channelTitle": channelTitle,
66- "channelId" : v .channel_id ,
67- "playlistId" : identifier ,
68- "title" : v .title ,
69- "description" : v .description ,
70- "publishedAt" : published_at ,
71- "videoUrl" : v .watch_url ,
72- "videoId" : v .video_id ,
73- "createdAt" : published_at
74- }
75- medias .append (media )
73+ # Current time in YYYYMMDD format
74+ now = datetime .datetime .now ().strftime ('%Y%m%d' )
75+ with yt_dlp .YoutubeDL (ydl_opts ) as ydl :
76+ info_dict = ydl .extract_info (url , download = False )
77+ for entry in info_dict .get ( 'entries' , []):
78+ print (entry )
79+ published_at = entry .get ('upload_date' , now )
80+ media = {
81+ "channelId" : entry ['channel_id' ],
82+ "playlistId" : identifier ,
83+ "title" : entry ['title' ],
84+ "description" : entry ['description' ],
85+ "publishedAt" : published_at ,
86+ "videoUrl" : "https://youtube.com/watch?v=" + entry ['id' ],
87+ "videoId" : entry ['id' ],
88+ "createdAt" : published_at
89+ }
90+ medias .append (media )
7691 end_time = perf_counter ()
7792 print (f'Youtube playlist { identifier } : Returning { len (medias )} items. Processing time { end_time - start_time :.2f} seconds' )
7893 return medias
@@ -86,7 +101,21 @@ def download_youtube_video(self, youtubeUrl):
86101 start_time = perf_counter ()
87102 extension = '.mp4'
88103 filename = getRandomString (8 )
89- filepath = pytube .YouTube (youtubeUrl ).streams .filter (subtype = 'mp4' ).get_highest_resolution ().download (output_path = DATA_DIRECTORY , filename = filename )
104+ filepath = f'{ DATA_DIRECTORY } /{ filename } '
105+ ydl_opts = {
106+ 'quiet' : True ,
107+ 'format' : 'best[ext=mp4]' ,
108+ 'outtmpl' : filepath ,
109+ 'cachedir' : False ,
110+ 'progress_hooks' : [],
111+ 'call_home' : False ,
112+ 'no_color' : True ,
113+ 'noprogress' : True ,
114+ }
115+ with yt_dlp .YoutubeDL (ydl_opts ) as ydl :
116+ x = ydl .download ([youtubeUrl ])
117+ print (x )
118+ #filepath = yt_dlp.YoutubeDL(ydl_opts).streams.filter(subtype='mp4').get_highest_resolution().download(output_path = DATA_DIRECTORY, filename = filename)
90119 end_time = perf_counter ()
91120 print (f"download_youtube_video({ youtubeUrl } ): Done. Downloaded in { end_time - start_time :.2f} seconds" )
92121 return filepath , extension
0 commit comments