From 64fca94210882dd830ffea75beef0316865e2d2a Mon Sep 17 00:00:00 2001 From: Baraa Al-Masri Date: Fri, 24 May 2024 20:49:17 +0300 Subject: [PATCH 1/2] fat-commit: replace yt_dlp with pytube and FastAPI (not so fast) with Flask --- ytdl/Dockerfile | 2 +- ytdl/main.py | 275 ++++++++++++++++++++---------------------- ytdl/requirements.txt | 45 +------ 3 files changed, 135 insertions(+), 187 deletions(-) diff --git a/ytdl/Dockerfile b/ytdl/Dockerfile index 0311dac..a8ff931 100644 --- a/ytdl/Dockerfile +++ b/ytdl/Dockerfile @@ -25,5 +25,5 @@ COPY --from=build /usr/local/bin/ /usr/local/bin/ COPY --from=build /app . EXPOSE 8000 -CMD [ "uvicorn", "main:app", "--host", "0.0.0.0", "--port", "8000" ] +CMD [ "gunicorn", "-w", "2", "-b", "0.0.0.0:8000", "main:app" ] diff --git a/ytdl/main.py b/ytdl/main.py index 6acbc85..162c903 100644 --- a/ytdl/main.py +++ b/ytdl/main.py @@ -1,21 +1,39 @@ -from yt_dlp import YoutubeDL -from yt_dlp.utils import DownloadError -from fastapi import FastAPI, status, Response -from fastapi.requests import Request +from flask import Flask +import mariadb +import os import os.path -from threading import Lock, Thread -import time +from pytube import YouTube, exceptions as pytube_exceptions import signal -import mariadb import sys +import threading +import time + + +############################################################################################################################################################################################################################## +############################################################################################################################################################################################################################## +## Envirnmental variables +############################################################################################################################################################################################################################## +############################################################################################################################################################################################################################## -DOWNLOAD_PATH = os.environ.get("YOUTUBE_MUSIC_DOWNLOAD_PATH") -DB_NAME = os.environ.get("DB_NAME") -DB_HOST = os.environ.get("DB_HOST") -DB_USERNAME = os.environ.get("DB_USERNAME") -DB_PASSWORD = os.environ.get("DB_PASSWORD") +def get_env(key) -> str: + val = os.environ.get(key) + if val is None or val == "": + print(f"Missing {val} suka") + exit(1) + return val + +DB_NAME = get_env("DB_NAME") +DB_HOST = get_env("DB_HOST") +DB_USERNAME = get_env("DB_USERNAME") +DB_PASSWORD = get_env("DB_PASSWORD") +DOWNLOAD_PATH = get_env("YOUTUBE_MUSIC_DOWNLOAD_PATH") + +############################################################################################################################################################################################################################## +############################################################################################################################################################################################################################## +## DB +############################################################################################################################################################################################################################## +############################################################################################################################################################################################################################## -## DB stuff conn = None def open_db_conn(): @@ -56,59 +74,66 @@ def song_exists(id: str) -> bool: finally: cur.close() -## Download Video stuff +open_db_conn() -class MutexArray: - def __init__(self, initial_array: []): - self._lock = Lock() - self._array = initial_array.copy() +############################################################################################################################################################################################################################## +############################################################################################################################################################################################################################## +## Downloader +############################################################################################################################################################################################################################## +############################################################################################################################################################################################################################## - def exists(self, item) -> bool: - with self._lock: - return item in self._array +YT_ERROR = { + 0: "none", + 1: "age restiction", + 2: "video unavailble", + 3: "other youtube error", +} - def get(self, index): - with self._lock: - return self._array[index] +def download_yt_song(id) -> int: + audio_stream = None + try: + YouTube("https://www.youtube.com/watch?v="+id) \ + .streams.filter(only_audio=True).first() \ + .download(output_path=DOWNLOAD_PATH, filename=id+".mp3") + except pytube_exceptions.AgeRestrictedError: + return 1 + except pytube_exceptions.VideoUnavailable: + return 2 + except pytube_exceptions.RegexMatchError: + return 3 - def set(self, index, value): - with self._lock: - self._array[index] = value + return 0 - def append(self, value): - with self._lock: - self._array.append(value) +############################################################################################################################################################################################################################## - def remove(self, value): - with self._lock: - self._array.remove(value) +to_download_lock = threading.Lock() +to_download_stop_event = threading.Event() +to_download_queue = set([]) - def get_array_and_clear(self): - with self._lock: - clone = self._array.copy() - self._array.clear() - return clone +currently_downloading_lock = threading.Lock() +currently_downloading_stop_event = threading.Event() +currently_downloading_queue = set([]) - def length(self): - with self._lock: - return len(self._array) - def release(self): - self._lock.release() +def background_task(): + while not to_download_stop_event.is_set(): + with to_download_lock: + if to_download_queue: + id = to_download_queue.pop() + print(f"Downloading {id} from the queue.") + download_song(id) -background_download_list = MutexArray([]) -to_be_downloaded = MutexArray([]) + time.sleep(0.5) -ytdl = YoutubeDL({ - "format": "bestaudio/best", - "postprocessors": [{ - "key": "FFmpegExtractAudio", - "preferredcodec": "mp3", - "preferredquality": "192", - }], - "outtmpl": f"{DOWNLOAD_PATH}/%(id)s.%(ext)s" -}) +def add_song_to_queue(id: str) -> int: + """ + add_song_to_queue adds a song's id to the download queue. + """ + with to_download_lock: + to_download_queue.add(id) + print(f"Appended {id} to the array.") + return 0 def download_song(id: str) -> int: @@ -116,111 +141,67 @@ def download_song(id: str) -> int: download_song downloads the given song's ids using yt_dlp, and returns the operation's status code. """ - try: - if id is None or len(id) == 0: - return - - ## wait list - while to_be_downloaded.exists(id): - time.sleep(1) - pass - - ## download the stuff - if song_exists(id): - to_be_downloaded.remove(id) + if not currently_downloading_stop_event.is_set(): + with currently_downloading_lock: + print(f"Downloading song with id {id} ...") + with to_download_lock: + if song_exists(id): + to_download_queue.remove(id) + print(f"The song with id {id} is already downloaded 😬") + return 0 + + while id in currently_downloading_queue: + pass + + currently_downloading_queue.add(id) + res = download_yt_song(id) + currently_downloading_queue.remove(id) + if res != 0: + print(f"error: {YT_ERROR[res]} when downloading {id}") + return res + print("Successfully downloaded " + id) return 0 + return 3 - to_be_downloaded.append(id) - ytdl.download(f"https://www.youtube.com/watch?v={id}") - to_be_downloaded.remove(id) - update_song_status(id) +thread = threading.Thread(target=background_task) +thread.start() - return 0 - except DownloadError: - return 1 - except Exception: - return 2 +############################################################################################################################################################################################################################## +############################################################################################################################################################################################################################## +############################################################################################################################################################################################################################## +############################################################################################################################################################################################################################## +app = Flask(__name__) -def download_songs_from_queue(): - """ - download_songs_from_queue fetches the current songs in the download queue, - and starts the download process. - """ - if background_download_list.length() == 0: - return - for id in background_download_list.get_array_and_clear(): - download_song(id) +@app.route("/download/queue/") +def handle_add_download_song_to_queue(id): + res = add_song_to_queue(id) + if res != 0: + return {"error": YT_ERROR[res]} + return {"msg": "woohoo"} -def add_song_to_queue(id: str): - """ - add_song_to_queue adds a song's id to the download queue. - """ - background_download_list.append(id) - - -## BG downloader thread - -def download_songs_in_background(interval=1): - """ - download_songs_in_background runs every given interval time in seconds (default is 1), - and downloads the songs in the queue in the background. - """ - while True: - download_songs_from_queue() - time.sleep(interval) - - -download_thread = Thread(target=download_songs_in_background, args=(1,)) -## FastAPI Stuff +@app.route("/download/") +def handle_download_song(id): + res = download_song(id) + if res != 0: + return {"error": YT_ERROR[res]} + return {"msg": "woohoo"} -app = FastAPI( - title="DankMuzikk's YouTube Downloader", - description="Apparently the CLI's overhead and limitation has got the best of me.", -) - -@app.on_event("startup") -def on_startup(): - open_db_conn() - global download_thread - download_thread.start() - - -@app.on_event("shutdown") -def on_shutdown(): +def close_server(arg1, arg2): + print("signal shit", arg1, arg2) print("Stopping background download thread...") - background_download_list.release() - to_be_downloaded.release() - download_thread.join() + to_download_stop_event.set() + currently_downloading_queue.set() + thread.join() print("Closing MariaDB's connection...") conn.close() + exit(0) +signal.signal(signal.SIGINT, close_server) +signal.signal(signal.SIGTERM, close_server) -@app.get("/download/queue/{id}", status_code=status.HTTP_200_OK) -def handle_add_download_song_to_queue(id: str, response: Response): - add_song_to_queue(id) - - -@app.get("/download/{id}", status_code=status.HTTP_200_OK) -def handle_download_song(id: str, response: Response): - err = download_song(id) - if err != 0: - response.status_code = status.HTTP_400_BAD_REQUEST - - -@app.get("/download/multi/{ids}", status_code=status.HTTP_200_OK) -def handle_download_songs(ids: str, response: Response): - for id in ids.split(","): - err = download_song(id) - if err != 0: - response.status_code = status.HTTP_400_BAD_REQUEST - - -@app.get("/download/queue/multi/{ids}", status_code=status.HTTP_200_OK) -def handle_add_download_songs_to_queue(ids: str, response: Response): - for id in ids.split(","): - add_song_to_queue(id) - +if __name__ == '__main__': + app.run(port=4321) diff --git a/ytdl/requirements.txt b/ytdl/requirements.txt index 12c3b63..57d0b39 100644 --- a/ytdl/requirements.txt +++ b/ytdl/requirements.txt @@ -1,44 +1,11 @@ -annotated-types==0.6.0 -anyio==4.3.0 -Brotli==1.1.0 -certifi==2024.2.2 -charset-normalizer==3.3.2 +blinker==1.8.2 click==8.1.7 -dnspython==2.6.1 -email_validator==2.1.1 -fastapi==0.111.0 -fastapi-cli==0.0.3 -h11==0.14.0 -httpcore==1.0.5 -httptools==0.6.1 -httpx==0.27.0 -idna==3.7 +Flask==3.0.3 +gunicorn==22.0.0 +itsdangerous==2.2.0 Jinja2==3.1.4 mariadb==1.1.10 -markdown-it-py==3.0.0 MarkupSafe==2.1.5 -mdurl==0.1.2 -mutagen==1.47.0 -orjson==3.10.3 packaging==24.0 -pycryptodomex==3.20.0 -pydantic==2.7.1 -pydantic_core==2.18.2 -Pygments==2.18.0 -python-dotenv==1.0.1 -python-multipart==0.0.9 -PyYAML==6.0.1 -requests==2.31.0 -rich==13.7.1 -shellingham==1.5.4 -sniffio==1.3.1 -starlette==0.37.2 -typer==0.12.3 -typing_extensions==4.11.0 -ujson==5.9.0 -urllib3==2.2.1 -uvicorn==0.29.0 -uvloop==0.19.0 -watchfiles==0.21.0 -websockets==12.0 -yt-dlp==2024.4.9 +pytube==15.0.0 +Werkzeug==3.0.3 From cc74ecbbd15cf145d4a120fe6297b5b87f87ee9f Mon Sep 17 00:00:00 2001 From: Baraa Al-Masri Date: Fri, 24 May 2024 22:02:34 +0300 Subject: [PATCH 2/2] chore(ytdl): add yt_dlp as fallback :) --- ytdl/main.py | 54 +++++++++++++++++++++++++++++++++---------- ytdl/requirements.txt | 10 ++++++++ 2 files changed, 52 insertions(+), 12 deletions(-) diff --git a/ytdl/main.py b/ytdl/main.py index 162c903..f63f67b 100644 --- a/ytdl/main.py +++ b/ytdl/main.py @@ -7,6 +7,8 @@ import sys import threading import time +from yt_dlp import YoutubeDL +from yt_dlp.utils import DownloadError ############################################################################################################################################################################################################################## @@ -71,8 +73,13 @@ def song_exists(id: str) -> bool: cur.execute("SELECT id FROM songs WHERE yt_id=? AND fully_downloaded=1", (id,)) result = cur.fetchone() return result[0] if result else False + except: + cur.close() + return False finally: cur.close() + return False + open_db_conn() @@ -90,17 +97,31 @@ def song_exists(id: str) -> bool: } def download_yt_song(id) -> int: - audio_stream = None try: YouTube("https://www.youtube.com/watch?v="+id) \ .streams.filter(only_audio=True).first() \ .download(output_path=DOWNLOAD_PATH, filename=id+".mp3") except pytube_exceptions.AgeRestrictedError: - return 1 + try: + print(f"song with id {id} is age resticted, trying yt_dlp...") + ytdl = YoutubeDL({ + "format": "bestaudio/mp3", + "postprocessors": [{ + "key": "FFmpegExtractAudio", + "preferredcodec": "mp3", + "preferredquality": "192", + }], + "outtmpl": f"{DOWNLOAD_PATH}/%(id)s.%(ext)s" + }) + ytdl.download("https://www.youtube.com/watch?v="+id) + except: + return 1 except pytube_exceptions.VideoUnavailable: return 2 except pytube_exceptions.RegexMatchError: return 3 + except: + return 3 return 0 @@ -121,8 +142,9 @@ def background_task(): if to_download_queue: id = to_download_queue.pop() print(f"Downloading {id} from the queue.") - download_song(id) - + res = download_song(id) + if res != 0: + print(f"Error downloading {id}, error: {YT_ERROR[res]}") time.sleep(0.5) @@ -131,8 +153,12 @@ def add_song_to_queue(id: str) -> int: add_song_to_queue adds a song's id to the download queue. """ with to_download_lock: + if song_exists(id): + print(f"The song with id {id} was already downloaded 😬") + return 0 + to_download_queue.add(id) - print(f"Appended {id} to the array.") + print(f"Added song {id} to the download queue.") return 0 @@ -141,16 +167,16 @@ def download_song(id: str) -> int: download_song downloads the given song's ids using yt_dlp, and returns the operation's status code. """ + if song_exists(id): + print(f"The song with id {id} was already downloaded 😬") + return 0 + if not currently_downloading_stop_event.is_set(): with currently_downloading_lock: print(f"Downloading song with id {id} ...") - with to_download_lock: - if song_exists(id): - to_download_queue.remove(id) - print(f"The song with id {id} is already downloaded 😬") - return 0 - while id in currently_downloading_queue: + print("waiting suka") + time.sleep(0.5) pass currently_downloading_queue.add(id) @@ -193,10 +219,14 @@ def handle_download_song(id): def close_server(arg1, arg2): print("signal shit", arg1, arg2) print("Stopping background download thread...") + global to_download_stop_event to_download_stop_event.set() - currently_downloading_queue.set() + global currently_downloading_stop_event + currently_downloading_stop_event.set() + global thread thread.join() print("Closing MariaDB's connection...") + global conn conn.close() exit(0) diff --git a/ytdl/requirements.txt b/ytdl/requirements.txt index 57d0b39..0511518 100644 --- a/ytdl/requirements.txt +++ b/ytdl/requirements.txt @@ -1,11 +1,21 @@ blinker==1.8.2 +Brotli==1.1.0 +certifi==2024.2.2 +charset-normalizer==3.3.2 click==8.1.7 Flask==3.0.3 gunicorn==22.0.0 +idna==3.7 itsdangerous==2.2.0 Jinja2==3.1.4 mariadb==1.1.10 MarkupSafe==2.1.5 +mutagen==1.47.0 packaging==24.0 +pycryptodomex==3.20.0 pytube==15.0.0 +requests==2.32.2 +urllib3==2.2.1 +websockets==12.0 Werkzeug==3.0.3 +yt-dlp==2024.4.9