Skip to content

Commit 0a0f34b

Browse files
authored
Cleanup video decoder build stuff (#8602)
1 parent be7cdf1 commit 0a0f34b

9 files changed

+72
-77
lines changed

setup.py

+56-64
Original file line numberDiff line numberDiff line change
@@ -21,8 +21,14 @@
2121
USE_WEBP = os.getenv("TORCHVISION_USE_WEBP", "1") == "1"
2222
USE_NVJPEG = os.getenv("TORCHVISION_USE_NVJPEG", "1") == "1"
2323
NVCC_FLAGS = os.getenv("NVCC_FLAGS", None)
24-
USE_FFMPEG = os.getenv("TORCHVISION_USE_FFMPEG", "1") == "1"
25-
USE_VIDEO_CODEC = os.getenv("TORCHVISION_USE_VIDEO_CODEC", "1") == "1"
24+
# Note: the GPU video decoding stuff used to be called "video codec", which
25+
# isn't an accurate or descriptive name considering there are at least 2 other
26+
# video deocding backends in torchvision. I'm renaming this to "gpu video
27+
# decoder" where possible, keeping user facing names (like the env var below) to
28+
# the old scheme for BC.
29+
USE_GPU_VIDEO_DECODER = os.getenv("TORCHVISION_USE_VIDEO_CODEC", "1") == "1"
30+
# Same here: "use ffmpeg" was used to denote "use cpu video decoder".
31+
USE_CPU_VIDEO_DECODER = os.getenv("TORCHVISION_USE_FFMPEG", "1") == "1"
2632

2733
TORCHVISION_INCLUDE = os.environ.get("TORCHVISION_INCLUDE", "")
2834
TORCHVISION_LIBRARY = os.environ.get("TORCHVISION_LIBRARY", "")
@@ -45,8 +51,8 @@
4551
print(f"{USE_WEBP = }")
4652
print(f"{USE_NVJPEG = }")
4753
print(f"{NVCC_FLAGS = }")
48-
print(f"{USE_FFMPEG = }")
49-
print(f"{USE_VIDEO_CODEC = }")
54+
print(f"{USE_CPU_VIDEO_DECODER = }")
55+
print(f"{USE_GPU_VIDEO_DECODER = }")
5056
print(f"{TORCHVISION_INCLUDE = }")
5157
print(f"{TORCHVISION_LIBRARY = }")
5258
print(f"{IS_ROCM = }")
@@ -351,28 +357,21 @@ def make_image_extension():
351357
def make_video_decoders_extensions():
352358
print("Building video decoder extensions")
353359

354-
# Locating ffmpeg
355-
ffmpeg_exe = shutil.which("ffmpeg")
356-
has_ffmpeg = ffmpeg_exe is not None
357-
ffmpeg_version = None
358-
# FIXME: Building torchvision with ffmpeg on MacOS or with Python 3.9
359-
# FIXME: causes crash. See the following GitHub issues for more details.
360-
# FIXME: https://github.com/pytorch/pytorch/issues/65000
361-
# FIXME: https://github.com/pytorch/vision/issues/3367
360+
build_without_extensions_msg = "Building without video decoders extensions."
362361
if sys.platform != "linux" or (sys.version_info.major == 3 and sys.version_info.minor == 9):
363-
has_ffmpeg = False
364-
if has_ffmpeg:
365-
try:
366-
# This is to check if ffmpeg is installed properly.
367-
ffmpeg_version = subprocess.check_output(["ffmpeg", "-version"])
368-
except subprocess.CalledProcessError:
369-
print("Building torchvision without ffmpeg support")
370-
print(" Error fetching ffmpeg version, ignoring ffmpeg.")
371-
has_ffmpeg = False
362+
# FIXME: Building torchvision with ffmpeg on MacOS or with Python 3.9
363+
# FIXME: causes crash. See the following GitHub issues for more details.
364+
# FIXME: https://github.com/pytorch/pytorch/issues/65000
365+
# FIXME: https://github.com/pytorch/vision/issues/3367
366+
print("Can only build video decoder extensions on linux and Python != 3.9")
367+
return []
372368

373-
use_ffmpeg = USE_FFMPEG and has_ffmpeg
369+
ffmpeg_exe = shutil.which("ffmpeg")
370+
if ffmpeg_exe is None:
371+
print(f"{build_without_extensions_msg} Couldn't find ffmpeg binary.")
372+
return []
374373

375-
if use_ffmpeg:
374+
def find_ffmpeg_libraries():
376375
ffmpeg_libraries = {"libavcodec", "libavformat", "libavutil", "libswresample", "libswscale"}
377376

378377
ffmpeg_bin = os.path.dirname(ffmpeg_exe)
@@ -399,18 +398,23 @@ def make_video_decoders_extensions():
399398
library_found |= len(glob.glob(full_path)) > 0
400399

401400
if not library_found:
402-
print("Building torchvision without ffmpeg support")
403-
print(f" {library} header files were not found, disabling ffmpeg support")
404-
use_ffmpeg = False
405-
else:
406-
print("Building torchvision without ffmpeg support")
401+
print(f"{build_without_extensions_msg}")
402+
print(f"{library} header files were not found.")
403+
return None, None
404+
405+
return ffmpeg_include_dir, ffmpeg_library_dir
406+
407+
ffmpeg_include_dir, ffmpeg_library_dir = find_ffmpeg_libraries()
408+
if ffmpeg_include_dir is None or ffmpeg_library_dir is None:
409+
return []
410+
411+
print("Found ffmpeg:")
412+
print(f" ffmpeg include path: {ffmpeg_include_dir}")
413+
print(f" ffmpeg library_dir: {ffmpeg_library_dir}")
407414

408415
extensions = []
409-
if use_ffmpeg:
410-
print("Building torchvision with ffmpeg support")
411-
print(f" ffmpeg version: {ffmpeg_version}")
412-
print(f" ffmpeg include path: {ffmpeg_include_dir}")
413-
print(f" ffmpeg library_dir: {ffmpeg_library_dir}")
416+
if USE_CPU_VIDEO_DECODER:
417+
print("Building with CPU video decoder support")
414418

415419
# TorchVision base decoder + video reader
416420
video_reader_src_dir = os.path.join(ROOT_DIR, "torchvision", "csrc", "io", "video_reader")
@@ -427,6 +431,7 @@ def make_video_decoders_extensions():
427431

428432
extensions.append(
429433
CppExtension(
434+
# This is an aweful name. It should be "cpu_video_decoder". Keeping for BC.
430435
"torchvision.video_reader",
431436
combined_src,
432437
include_dirs=[
@@ -450,25 +455,24 @@ def make_video_decoders_extensions():
450455
)
451456
)
452457

453-
# Locating video codec
454-
# CUDA_HOME should be set to the cuda root directory.
455-
# TORCHVISION_INCLUDE and TORCHVISION_LIBRARY should include the location to
456-
# video codec header files and libraries respectively.
457-
video_codec_found = (
458-
BUILD_CUDA_SOURCES
459-
and CUDA_HOME is not None
460-
and any([os.path.exists(os.path.join(folder, "cuviddec.h")) for folder in TORCHVISION_INCLUDE])
461-
and any([os.path.exists(os.path.join(folder, "nvcuvid.h")) for folder in TORCHVISION_INCLUDE])
462-
and any([os.path.exists(os.path.join(folder, "libnvcuvid.so")) for folder in TORCHVISION_LIBRARY])
463-
)
458+
if USE_GPU_VIDEO_DECODER:
459+
# Locating GPU video decoder headers and libraries
460+
# CUDA_HOME should be set to the cuda root directory.
461+
# TORCHVISION_INCLUDE and TORCHVISION_LIBRARY should include the locations
462+
# to the headers and libraries below
463+
if not (
464+
BUILD_CUDA_SOURCES
465+
and CUDA_HOME is not None
466+
and any([os.path.exists(os.path.join(folder, "cuviddec.h")) for folder in TORCHVISION_INCLUDE])
467+
and any([os.path.exists(os.path.join(folder, "nvcuvid.h")) for folder in TORCHVISION_INCLUDE])
468+
and any([os.path.exists(os.path.join(folder, "libnvcuvid.so")) for folder in TORCHVISION_LIBRARY])
469+
and any([os.path.exists(os.path.join(folder, "libavcodec", "bsf.h")) for folder in ffmpeg_include_dir])
470+
):
471+
print("Could not find necessary dependencies. Refer the setup.py to check which ones are needed.")
472+
print("Building without GPU video decoder support")
473+
return extensions
474+
print("Building torchvision with GPU video decoder support")
464475

465-
use_video_codec = USE_VIDEO_CODEC and video_codec_found
466-
if (
467-
use_video_codec
468-
and use_ffmpeg
469-
and any([os.path.exists(os.path.join(folder, "libavcodec", "bsf.h")) for folder in ffmpeg_include_dir])
470-
):
471-
print("Building torchvision with video codec support")
472476
gpu_decoder_path = os.path.join(CSRS_DIR, "io", "decoder", "gpu")
473477
gpu_decoder_src = glob.glob(os.path.join(gpu_decoder_path, "*.cpp"))
474478
cuda_libs = os.path.join(CUDA_HOME, "lib64")
@@ -477,7 +481,7 @@ def make_video_decoders_extensions():
477481
_, extra_compile_args = get_macros_and_flags()
478482
extensions.append(
479483
CUDAExtension(
480-
"torchvision.Decoder",
484+
"torchvision.gpu_decoder",
481485
gpu_decoder_src,
482486
include_dirs=[CSRS_DIR] + TORCHVISION_INCLUDE + [gpu_decoder_path] + [cuda_inc] + ffmpeg_include_dir,
483487
library_dirs=ffmpeg_library_dir + TORCHVISION_LIBRARY + [cuda_libs],
@@ -498,18 +502,6 @@ def make_video_decoders_extensions():
498502
extra_compile_args=extra_compile_args,
499503
)
500504
)
501-
else:
502-
print("Building torchvision without video codec support")
503-
if (
504-
use_video_codec
505-
and use_ffmpeg
506-
and not any([os.path.exists(os.path.join(folder, "libavcodec", "bsf.h")) for folder in ffmpeg_include_dir])
507-
):
508-
print(
509-
" The installed version of ffmpeg is missing the header file 'bsf.h' which is "
510-
" required for GPU video decoding. Please install the latest ffmpeg from conda-forge channel:"
511-
" `conda install -c conda-forge ffmpeg`."
512-
)
513505

514506
return extensions
515507

test/test_io.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -63,7 +63,7 @@ def temp_video(num_frames, height, width, fps, lossless=False, video_codec=None,
6363

6464

6565
@pytest.mark.skipif(
66-
get_video_backend() != "pyav" and not io._HAS_VIDEO_OPT, reason="video_reader backend not available"
66+
get_video_backend() != "pyav" and not io._HAS_CPU_VIDEO_DECODER, reason="video_reader backend not available"
6767
)
6868
@pytest.mark.skipif(av is None, reason="PyAV unavailable")
6969
class TestVideo:
@@ -77,14 +77,14 @@ def test_write_read_video(self):
7777
assert_equal(data, lv)
7878
assert info["video_fps"] == 5
7979

80-
@pytest.mark.skipif(not io._HAS_VIDEO_OPT, reason="video_reader backend is not chosen")
80+
@pytest.mark.skipif(not io._HAS_CPU_VIDEO_DECODER, reason="video_reader backend is not chosen")
8181
def test_probe_video_from_file(self):
8282
with temp_video(10, 300, 300, 5) as (f_name, data):
8383
video_info = io._probe_video_from_file(f_name)
8484
assert pytest.approx(2, rel=0.0, abs=0.1) == video_info.video_duration
8585
assert pytest.approx(5, rel=0.0, abs=0.1) == video_info.video_fps
8686

87-
@pytest.mark.skipif(not io._HAS_VIDEO_OPT, reason="video_reader backend is not chosen")
87+
@pytest.mark.skipif(not io._HAS_CPU_VIDEO_DECODER, reason="video_reader backend is not chosen")
8888
def test_probe_video_from_memory(self):
8989
with temp_video(10, 300, 300, 5) as (f_name, data):
9090
with open(f_name, "rb") as fp:

test/test_video_reader.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111
from numpy.random import randint
1212
from pytest import approx
1313
from torchvision import set_video_backend
14-
from torchvision.io import _HAS_VIDEO_OPT
14+
from torchvision.io import _HAS_CPU_VIDEO_DECODER
1515

1616

1717
try:
@@ -263,7 +263,7 @@ def _get_video_tensor(video_dir, video_file):
263263

264264

265265
@pytest.mark.skipif(av is None, reason="PyAV unavailable")
266-
@pytest.mark.skipif(_HAS_VIDEO_OPT is False, reason="Didn't compile with ffmpeg")
266+
@pytest.mark.skipif(_HAS_CPU_VIDEO_DECODER is False, reason="Didn't compile with ffmpeg")
267267
class TestVideoReader:
268268
def check_separate_decoding_result(self, tv_result, config):
269269
"""check the decoding results from TorchVision decoder"""

test/test_videoapi.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
import torchvision
88
from pytest import approx
99
from torchvision.datasets.utils import download_url
10-
from torchvision.io import _HAS_VIDEO_OPT, VideoReader
10+
from torchvision.io import _HAS_CPU_VIDEO_DECODER, VideoReader
1111

1212

1313
# WARNING: these tests have been skipped forever on the CI because the video ops
@@ -62,7 +62,7 @@ def fate(name, path="."):
6262
}
6363

6464

65-
@pytest.mark.skipif(_HAS_VIDEO_OPT is False, reason="Didn't compile with ffmpeg")
65+
@pytest.mark.skipif(_HAS_CPU_VIDEO_DECODER is False, reason="Didn't compile with ffmpeg")
6666
class TestVideoApi:
6767
@pytest.mark.skipif(av is None, reason="PyAV unavailable")
6868
@pytest.mark.parametrize("test_video", test_videos.keys())

torchvision/__init__.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -72,7 +72,7 @@ def set_video_backend(backend):
7272
global _video_backend
7373
if backend not in ["pyav", "video_reader", "cuda"]:
7474
raise ValueError("Invalid video backend '%s'. Options are 'pyav', 'video_reader' and 'cuda'" % backend)
75-
if backend == "video_reader" and not io._HAS_VIDEO_OPT:
75+
if backend == "video_reader" and not io._HAS_CPU_VIDEO_DECODER:
7676
# TODO: better messages
7777
message = "video_reader video backend is not available. Please compile torchvision from source and try again"
7878
raise RuntimeError(message)

torchvision/io/__init__.py

+2
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
_HAS_GPU_VIDEO_DECODER = False
1111

1212
from ._video_opt import (
13+
_HAS_CPU_VIDEO_DECODER,
1314
_HAS_VIDEO_OPT,
1415
_probe_video_from_file,
1516
_probe_video_from_memory,
@@ -49,6 +50,7 @@
4950
"_read_video_from_memory",
5051
"_read_video_timestamps_from_memory",
5152
"_probe_video_from_memory",
53+
"_HAS_CPU_VIDEO_DECODER",
5254
"_HAS_VIDEO_OPT",
5355
"_HAS_GPU_VIDEO_DECODER",
5456
"_read_video_clip_from_memory",

torchvision/io/_load_gpu_decoder.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22

33

44
try:
5-
_load_library("Decoder")
5+
_load_library("gpu_decoder")
66
_HAS_GPU_VIDEO_DECODER = True
77
except (ImportError, OSError):
88
_HAS_GPU_VIDEO_DECODER = False

torchvision/io/_video_opt.py

+3-2
Original file line numberDiff line numberDiff line change
@@ -10,10 +10,11 @@
1010

1111
try:
1212
_load_library("video_reader")
13-
_HAS_VIDEO_OPT = True
13+
_HAS_CPU_VIDEO_DECODER = True
1414
except (ImportError, OSError):
15-
_HAS_VIDEO_OPT = False
15+
_HAS_CPU_VIDEO_DECODER = False
1616

17+
_HAS_VIDEO_OPT = _HAS_CPU_VIDEO_DECODER # For BC
1718
default_timebase = Fraction(0, 1)
1819

1920

torchvision/io/video_reader.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -7,9 +7,9 @@
77

88
from ..utils import _log_api_usage_once
99

10-
from ._video_opt import _HAS_VIDEO_OPT
10+
from ._video_opt import _HAS_CPU_VIDEO_DECODER
1111

12-
if _HAS_VIDEO_OPT:
12+
if _HAS_CPU_VIDEO_DECODER:
1313

1414
def _has_video_opt() -> bool:
1515
return True

0 commit comments

Comments
 (0)