Cleanup video decoder build stuff (#8602)

NicolasHug · web-flow · commit 0a0f34b4d2c5 · 2024-08-19T12:45:12.000+01:00
diff --git a/setup.py b/setup.py
@@ -21,8 +21,14 @@
 USE_WEBP = os.getenv("TORCHVISION_USE_WEBP", "1") == "1"
 USE_NVJPEG = os.getenv("TORCHVISION_USE_NVJPEG", "1") == "1"
 NVCC_FLAGS = os.getenv("NVCC_FLAGS", None)
-USE_FFMPEG = os.getenv("TORCHVISION_USE_FFMPEG", "1") == "1"
-USE_VIDEO_CODEC = os.getenv("TORCHVISION_USE_VIDEO_CODEC", "1") == "1"
+# Note: the GPU video decoding stuff used to be called "video codec", which
+# isn't an accurate or descriptive name considering there are at least 2 other
+# video deocding backends in torchvision. I'm renaming this to "gpu video
+# decoder" where possible, keeping user facing names (like the env var below) to
+# the old scheme for BC.
+USE_GPU_VIDEO_DECODER = os.getenv("TORCHVISION_USE_VIDEO_CODEC", "1") == "1"
+# Same here: "use ffmpeg" was used to denote "use cpu video decoder".
+USE_CPU_VIDEO_DECODER = os.getenv("TORCHVISION_USE_FFMPEG", "1") == "1"
 
 TORCHVISION_INCLUDE = os.environ.get("TORCHVISION_INCLUDE", "")
 TORCHVISION_LIBRARY = os.environ.get("TORCHVISION_LIBRARY", "")
@@ -45,8 +51,8 @@
 print(f"{USE_WEBP = }")
 print(f"{USE_NVJPEG = }")
 print(f"{NVCC_FLAGS = }")
-print(f"{USE_FFMPEG = }")
-print(f"{USE_VIDEO_CODEC = }")
+print(f"{USE_CPU_VIDEO_DECODER = }")
+print(f"{USE_GPU_VIDEO_DECODER = }")
 print(f"{TORCHVISION_INCLUDE = }")
 print(f"{TORCHVISION_LIBRARY = }")
 print(f"{IS_ROCM = }")
@@ -351,28 +357,21 @@ def make_image_extension():
 def make_video_decoders_extensions():
     print("Building video decoder extensions")
 
-    # Locating ffmpeg
-    ffmpeg_exe = shutil.which("ffmpeg")
-    has_ffmpeg = ffmpeg_exe is not None
-    ffmpeg_version = None
-    # FIXME: Building torchvision with ffmpeg on MacOS or with Python 3.9
-    # FIXME: causes crash. See the following GitHub issues for more details.
-    # FIXME: https://github.com/pytorch/pytorch/issues/65000
-    # FIXME: https://github.com/pytorch/vision/issues/3367
+    build_without_extensions_msg = "Building without video decoders extensions."
     if sys.platform != "linux" or (sys.version_info.major == 3 and sys.version_info.minor == 9):
-        has_ffmpeg = False
-    if has_ffmpeg:
-        try:
-            # This is to check if ffmpeg is installed properly.
-            ffmpeg_version = subprocess.check_output(["ffmpeg", "-version"])
-        except subprocess.CalledProcessError:
-            print("Building torchvision without ffmpeg support")
-            print("  Error fetching ffmpeg version, ignoring ffmpeg.")
-            has_ffmpeg = False
+        # FIXME: Building torchvision with ffmpeg on MacOS or with Python 3.9
+        # FIXME: causes crash. See the following GitHub issues for more details.
+        # FIXME: https://github.com/pytorch/pytorch/issues/65000
+        # FIXME: https://github.com/pytorch/vision/issues/3367
+        print("Can only build video decoder extensions on linux and Python != 3.9")
+        return []
 
-    use_ffmpeg = USE_FFMPEG and has_ffmpeg
+    ffmpeg_exe = shutil.which("ffmpeg")
+    if ffmpeg_exe is None:
+        print(f"{build_without_extensions_msg} Couldn't find ffmpeg binary.")
+        return []
 
-    if use_ffmpeg:
+    def find_ffmpeg_libraries():
         ffmpeg_libraries = {"libavcodec", "libavformat", "libavutil", "libswresample", "libswscale"}
 
         ffmpeg_bin = os.path.dirname(ffmpeg_exe)
@@ -399,18 +398,23 @@ def make_video_decoders_extensions():
                 library_found |= len(glob.glob(full_path)) > 0
 
             if not library_found:
-                print("Building torchvision without ffmpeg support")
-                print(f"  {library} header files were not found, disabling ffmpeg support")
-                use_ffmpeg = False
-    else:
-        print("Building torchvision without ffmpeg support")
+                print(f"{build_without_extensions_msg}")
+                print(f"{library} header files were not found.")
+                return None, None
+
+        return ffmpeg_include_dir, ffmpeg_library_dir
+
+    ffmpeg_include_dir, ffmpeg_library_dir = find_ffmpeg_libraries()
+    if ffmpeg_include_dir is None or ffmpeg_library_dir is None:
+        return []
+
+    print("Found ffmpeg:")
+    print(f"  ffmpeg include path: {ffmpeg_include_dir}")
+    print(f"  ffmpeg library_dir: {ffmpeg_library_dir}")
 
     extensions = []
-    if use_ffmpeg:
-        print("Building torchvision with ffmpeg support")
-        print(f"  ffmpeg version: {ffmpeg_version}")
-        print(f"  ffmpeg include path: {ffmpeg_include_dir}")
-        print(f"  ffmpeg library_dir: {ffmpeg_library_dir}")
+    if USE_CPU_VIDEO_DECODER:
+        print("Building with CPU video decoder support")
 
         # TorchVision base decoder + video reader
         video_reader_src_dir = os.path.join(ROOT_DIR, "torchvision", "csrc", "io", "video_reader")
@@ -427,6 +431,7 @@ def make_video_decoders_extensions():
 
         extensions.append(
             CppExtension(
+                # This is an aweful name. It should be "cpu_video_decoder". Keeping for BC.
                 "torchvision.video_reader",
                 combined_src,
                 include_dirs=[
@@ -450,25 +455,24 @@ def make_video_decoders_extensions():
             )
         )
 
-    # Locating video codec
-    # CUDA_HOME should be set to the cuda root directory.
-    # TORCHVISION_INCLUDE and TORCHVISION_LIBRARY should include the location to
-    # video codec header files and libraries respectively.
-    video_codec_found = (
-        BUILD_CUDA_SOURCES
-        and CUDA_HOME is not None
-        and any([os.path.exists(os.path.join(folder, "cuviddec.h")) for folder in TORCHVISION_INCLUDE])
-        and any([os.path.exists(os.path.join(folder, "nvcuvid.h")) for folder in TORCHVISION_INCLUDE])
-        and any([os.path.exists(os.path.join(folder, "libnvcuvid.so")) for folder in TORCHVISION_LIBRARY])
-    )
+    if USE_GPU_VIDEO_DECODER:
+        # Locating GPU video decoder headers and libraries
+        # CUDA_HOME should be set to the cuda root directory.
+        # TORCHVISION_INCLUDE and TORCHVISION_LIBRARY should include the locations
+        # to the headers and libraries below
+        if not (
+            BUILD_CUDA_SOURCES
+            and CUDA_HOME is not None
+            and any([os.path.exists(os.path.join(folder, "cuviddec.h")) for folder in TORCHVISION_INCLUDE])
+            and any([os.path.exists(os.path.join(folder, "nvcuvid.h")) for folder in TORCHVISION_INCLUDE])
+            and any([os.path.exists(os.path.join(folder, "libnvcuvid.so")) for folder in TORCHVISION_LIBRARY])
+            and any([os.path.exists(os.path.join(folder, "libavcodec", "bsf.h")) for folder in ffmpeg_include_dir])
+        ):
+            print("Could not find necessary dependencies. Refer the setup.py to check which ones are needed.")
+            print("Building without GPU video decoder support")
+            return extensions
+        print("Building torchvision with GPU video decoder support")
 
-    use_video_codec = USE_VIDEO_CODEC and video_codec_found
-    if (
-        use_video_codec
-        and use_ffmpeg
-        and any([os.path.exists(os.path.join(folder, "libavcodec", "bsf.h")) for folder in ffmpeg_include_dir])
-    ):
-        print("Building torchvision with video codec support")
         gpu_decoder_path = os.path.join(CSRS_DIR, "io", "decoder", "gpu")
         gpu_decoder_src = glob.glob(os.path.join(gpu_decoder_path, "*.cpp"))
         cuda_libs = os.path.join(CUDA_HOME, "lib64")
@@ -477,7 +481,7 @@ def make_video_decoders_extensions():
         _, extra_compile_args = get_macros_and_flags()
         extensions.append(
             CUDAExtension(
-                "torchvision.Decoder",
+                "torchvision.gpu_decoder",
                 gpu_decoder_src,
                 include_dirs=[CSRS_DIR] + TORCHVISION_INCLUDE + [gpu_decoder_path] + [cuda_inc] + ffmpeg_include_dir,
                 library_dirs=ffmpeg_library_dir + TORCHVISION_LIBRARY + [cuda_libs],
@@ -498,18 +502,6 @@ def make_video_decoders_extensions():
                 extra_compile_args=extra_compile_args,
             )
         )
-    else:
-        print("Building torchvision without video codec support")
-        if (
-            use_video_codec
-            and use_ffmpeg
-            and not any([os.path.exists(os.path.join(folder, "libavcodec", "bsf.h")) for folder in ffmpeg_include_dir])
-        ):
-            print(
-                "  The installed version of ffmpeg is missing the header file 'bsf.h' which is "
-                "  required for GPU video decoding. Please install the latest ffmpeg from conda-forge channel:"
-                "   `conda install -c conda-forge ffmpeg`."
-            )
 
     return extensions
 
diff --git a/test/test_io.py b/test/test_io.py
@@ -63,7 +63,7 @@ def temp_video(num_frames, height, width, fps, lossless=False, video_codec=None,
 
 
 @pytest.mark.skipif(
-    get_video_backend() != "pyav" and not io._HAS_VIDEO_OPT, reason="video_reader backend not available"
+    get_video_backend() != "pyav" and not io._HAS_CPU_VIDEO_DECODER, reason="video_reader backend not available"
 )
 @pytest.mark.skipif(av is None, reason="PyAV unavailable")
 class TestVideo:
@@ -77,14 +77,14 @@ def test_write_read_video(self):
             assert_equal(data, lv)
             assert info["video_fps"] == 5
 
-    @pytest.mark.skipif(not io._HAS_VIDEO_OPT, reason="video_reader backend is not chosen")
+    @pytest.mark.skipif(not io._HAS_CPU_VIDEO_DECODER, reason="video_reader backend is not chosen")
     def test_probe_video_from_file(self):
         with temp_video(10, 300, 300, 5) as (f_name, data):
             video_info = io._probe_video_from_file(f_name)
             assert pytest.approx(2, rel=0.0, abs=0.1) == video_info.video_duration
             assert pytest.approx(5, rel=0.0, abs=0.1) == video_info.video_fps
 
-    @pytest.mark.skipif(not io._HAS_VIDEO_OPT, reason="video_reader backend is not chosen")
+    @pytest.mark.skipif(not io._HAS_CPU_VIDEO_DECODER, reason="video_reader backend is not chosen")
     def test_probe_video_from_memory(self):
         with temp_video(10, 300, 300, 5) as (f_name, data):
             with open(f_name, "rb") as fp:
diff --git a/test/test_video_reader.py b/test/test_video_reader.py
@@ -11,7 +11,7 @@
 from numpy.random import randint
 from pytest import approx
 from torchvision import set_video_backend
-from torchvision.io import _HAS_VIDEO_OPT
+from torchvision.io import _HAS_CPU_VIDEO_DECODER
 
 
 try:
@@ -263,7 +263,7 @@ def _get_video_tensor(video_dir, video_file):
 
 
 @pytest.mark.skipif(av is None, reason="PyAV unavailable")
-@pytest.mark.skipif(_HAS_VIDEO_OPT is False, reason="Didn't compile with ffmpeg")
+@pytest.mark.skipif(_HAS_CPU_VIDEO_DECODER is False, reason="Didn't compile with ffmpeg")
 class TestVideoReader:
     def check_separate_decoding_result(self, tv_result, config):
         """check the decoding results from TorchVision decoder"""
diff --git a/test/test_videoapi.py b/test/test_videoapi.py
@@ -7,7 +7,7 @@
 import torchvision
 from pytest import approx
 from torchvision.datasets.utils import download_url
-from torchvision.io import _HAS_VIDEO_OPT, VideoReader
+from torchvision.io import _HAS_CPU_VIDEO_DECODER, VideoReader
 
 
 # WARNING: these tests have been skipped forever on the CI because the video ops
@@ -62,7 +62,7 @@ def fate(name, path="."):
 }
 
 
-@pytest.mark.skipif(_HAS_VIDEO_OPT is False, reason="Didn't compile with ffmpeg")
+@pytest.mark.skipif(_HAS_CPU_VIDEO_DECODER is False, reason="Didn't compile with ffmpeg")
 class TestVideoApi:
     @pytest.mark.skipif(av is None, reason="PyAV unavailable")
     @pytest.mark.parametrize("test_video", test_videos.keys())
diff --git a/torchvision/__init__.py b/torchvision/__init__.py
@@ -72,7 +72,7 @@ def set_video_backend(backend):
     global _video_backend
     if backend not in ["pyav", "video_reader", "cuda"]:
         raise ValueError("Invalid video backend '%s'. Options are 'pyav', 'video_reader' and 'cuda'" % backend)
-    if backend == "video_reader" and not io._HAS_VIDEO_OPT:
+    if backend == "video_reader" and not io._HAS_CPU_VIDEO_DECODER:
         # TODO: better messages
         message = "video_reader video backend is not available. Please compile torchvision from source and try again"
         raise RuntimeError(message)
diff --git a/torchvision/io/__init__.py b/torchvision/io/__init__.py
@@ -10,6 +10,7 @@
     _HAS_GPU_VIDEO_DECODER = False
 
 from ._video_opt import (
+    _HAS_CPU_VIDEO_DECODER,
     _HAS_VIDEO_OPT,
     _probe_video_from_file,
     _probe_video_from_memory,
@@ -49,6 +50,7 @@
     "_read_video_from_memory",
     "_read_video_timestamps_from_memory",
     "_probe_video_from_memory",
+    "_HAS_CPU_VIDEO_DECODER",
     "_HAS_VIDEO_OPT",
     "_HAS_GPU_VIDEO_DECODER",
     "_read_video_clip_from_memory",
diff --git a/torchvision/io/_load_gpu_decoder.py b/torchvision/io/_load_gpu_decoder.py
@@ -2,7 +2,7 @@
 
 
 try:
-    _load_library("Decoder")
+    _load_library("gpu_decoder")
     _HAS_GPU_VIDEO_DECODER = True
 except (ImportError, OSError):
     _HAS_GPU_VIDEO_DECODER = False
diff --git a/torchvision/io/_video_opt.py b/torchvision/io/_video_opt.py
@@ -10,10 +10,11 @@
 
 try:
     _load_library("video_reader")
-    _HAS_VIDEO_OPT = True
+    _HAS_CPU_VIDEO_DECODER = True
 except (ImportError, OSError):
-    _HAS_VIDEO_OPT = False
+    _HAS_CPU_VIDEO_DECODER = False
 
+_HAS_VIDEO_OPT = _HAS_CPU_VIDEO_DECODER  # For BC
 default_timebase = Fraction(0, 1)
 
 
diff --git a/torchvision/io/video_reader.py b/torchvision/io/video_reader.py
@@ -7,9 +7,9 @@
 
 from ..utils import _log_api_usage_once
 
-from ._video_opt import _HAS_VIDEO_OPT
+from ._video_opt import _HAS_CPU_VIDEO_DECODER
 
-if _HAS_VIDEO_OPT:
+if _HAS_CPU_VIDEO_DECODER:
 
     def _has_video_opt() -> bool:
         return True