diff --git a/setup.py b/setup.py index fb3b503e6e6..406d8149174 100644 --- a/setup.py +++ b/setup.py @@ -21,8 +21,14 @@ USE_WEBP = os.getenv("TORCHVISION_USE_WEBP", "1") == "1" USE_NVJPEG = os.getenv("TORCHVISION_USE_NVJPEG", "1") == "1" NVCC_FLAGS = os.getenv("NVCC_FLAGS", None) -USE_FFMPEG = os.getenv("TORCHVISION_USE_FFMPEG", "1") == "1" -USE_VIDEO_CODEC = os.getenv("TORCHVISION_USE_VIDEO_CODEC", "1") == "1" +# Note: the GPU video decoding stuff used to be called "video codec", which +# isn't an accurate or descriptive name considering there are at least 2 other +# video deocding backends in torchvision. I'm renaming this to "gpu video +# decoder" where possible, keeping user facing names (like the env var below) to +# the old scheme for BC. +USE_GPU_VIDEO_DECODER = os.getenv("TORCHVISION_USE_VIDEO_CODEC", "1") == "1" +# Same here: "use ffmpeg" was used to denote "use cpu video decoder". +USE_CPU_VIDEO_DECODER = os.getenv("TORCHVISION_USE_FFMPEG", "1") == "1" TORCHVISION_INCLUDE = os.environ.get("TORCHVISION_INCLUDE", "") TORCHVISION_LIBRARY = os.environ.get("TORCHVISION_LIBRARY", "") @@ -45,8 +51,8 @@ print(f"{USE_WEBP = }") print(f"{USE_NVJPEG = }") print(f"{NVCC_FLAGS = }") -print(f"{USE_FFMPEG = }") -print(f"{USE_VIDEO_CODEC = }") +print(f"{USE_CPU_VIDEO_DECODER = }") +print(f"{USE_GPU_VIDEO_DECODER = }") print(f"{TORCHVISION_INCLUDE = }") print(f"{TORCHVISION_LIBRARY = }") print(f"{IS_ROCM = }") @@ -351,28 +357,21 @@ def make_image_extension(): def make_video_decoders_extensions(): print("Building video decoder extensions") - # Locating ffmpeg - ffmpeg_exe = shutil.which("ffmpeg") - has_ffmpeg = ffmpeg_exe is not None - ffmpeg_version = None - # FIXME: Building torchvision with ffmpeg on MacOS or with Python 3.9 - # FIXME: causes crash. See the following GitHub issues for more details. - # FIXME: https://github.com/pytorch/pytorch/issues/65000 - # FIXME: https://github.com/pytorch/vision/issues/3367 + build_without_extensions_msg = "Building without video decoders extensions." if sys.platform != "linux" or (sys.version_info.major == 3 and sys.version_info.minor == 9): - has_ffmpeg = False - if has_ffmpeg: - try: - # This is to check if ffmpeg is installed properly. - ffmpeg_version = subprocess.check_output(["ffmpeg", "-version"]) - except subprocess.CalledProcessError: - print("Building torchvision without ffmpeg support") - print(" Error fetching ffmpeg version, ignoring ffmpeg.") - has_ffmpeg = False + # FIXME: Building torchvision with ffmpeg on MacOS or with Python 3.9 + # FIXME: causes crash. See the following GitHub issues for more details. + # FIXME: https://github.com/pytorch/pytorch/issues/65000 + # FIXME: https://github.com/pytorch/vision/issues/3367 + print("Can only build video decoder extensions on linux and Python != 3.9") + return [] - use_ffmpeg = USE_FFMPEG and has_ffmpeg + ffmpeg_exe = shutil.which("ffmpeg") + if ffmpeg_exe is None: + print(f"{build_without_extensions_msg} Couldn't find ffmpeg binary.") + return [] - if use_ffmpeg: + def find_ffmpeg_libraries(): ffmpeg_libraries = {"libavcodec", "libavformat", "libavutil", "libswresample", "libswscale"} ffmpeg_bin = os.path.dirname(ffmpeg_exe) @@ -399,18 +398,23 @@ def make_video_decoders_extensions(): library_found |= len(glob.glob(full_path)) > 0 if not library_found: - print("Building torchvision without ffmpeg support") - print(f" {library} header files were not found, disabling ffmpeg support") - use_ffmpeg = False - else: - print("Building torchvision without ffmpeg support") + print(f"{build_without_extensions_msg}") + print(f"{library} header files were not found.") + return None, None + + return ffmpeg_include_dir, ffmpeg_library_dir + + ffmpeg_include_dir, ffmpeg_library_dir = find_ffmpeg_libraries() + if ffmpeg_include_dir is None or ffmpeg_library_dir is None: + return [] + + print("Found ffmpeg:") + print(f" ffmpeg include path: {ffmpeg_include_dir}") + print(f" ffmpeg library_dir: {ffmpeg_library_dir}") extensions = [] - if use_ffmpeg: - print("Building torchvision with ffmpeg support") - print(f" ffmpeg version: {ffmpeg_version}") - print(f" ffmpeg include path: {ffmpeg_include_dir}") - print(f" ffmpeg library_dir: {ffmpeg_library_dir}") + if USE_CPU_VIDEO_DECODER: + print("Building with CPU video decoder support") # TorchVision base decoder + video reader video_reader_src_dir = os.path.join(ROOT_DIR, "torchvision", "csrc", "io", "video_reader") @@ -427,6 +431,7 @@ def make_video_decoders_extensions(): extensions.append( CppExtension( + # This is an aweful name. It should be "cpu_video_decoder". Keeping for BC. "torchvision.video_reader", combined_src, include_dirs=[ @@ -450,25 +455,24 @@ def make_video_decoders_extensions(): ) ) - # Locating video codec - # CUDA_HOME should be set to the cuda root directory. - # TORCHVISION_INCLUDE and TORCHVISION_LIBRARY should include the location to - # video codec header files and libraries respectively. - video_codec_found = ( - BUILD_CUDA_SOURCES - and CUDA_HOME is not None - and any([os.path.exists(os.path.join(folder, "cuviddec.h")) for folder in TORCHVISION_INCLUDE]) - and any([os.path.exists(os.path.join(folder, "nvcuvid.h")) for folder in TORCHVISION_INCLUDE]) - and any([os.path.exists(os.path.join(folder, "libnvcuvid.so")) for folder in TORCHVISION_LIBRARY]) - ) + if USE_GPU_VIDEO_DECODER: + # Locating GPU video decoder headers and libraries + # CUDA_HOME should be set to the cuda root directory. + # TORCHVISION_INCLUDE and TORCHVISION_LIBRARY should include the locations + # to the headers and libraries below + if not ( + BUILD_CUDA_SOURCES + and CUDA_HOME is not None + and any([os.path.exists(os.path.join(folder, "cuviddec.h")) for folder in TORCHVISION_INCLUDE]) + and any([os.path.exists(os.path.join(folder, "nvcuvid.h")) for folder in TORCHVISION_INCLUDE]) + and any([os.path.exists(os.path.join(folder, "libnvcuvid.so")) for folder in TORCHVISION_LIBRARY]) + and any([os.path.exists(os.path.join(folder, "libavcodec", "bsf.h")) for folder in ffmpeg_include_dir]) + ): + print("Could not find necessary dependencies. Refer the setup.py to check which ones are needed.") + print("Building without GPU video decoder support") + return extensions + print("Building torchvision with GPU video decoder support") - use_video_codec = USE_VIDEO_CODEC and video_codec_found - if ( - use_video_codec - and use_ffmpeg - and any([os.path.exists(os.path.join(folder, "libavcodec", "bsf.h")) for folder in ffmpeg_include_dir]) - ): - print("Building torchvision with video codec support") gpu_decoder_path = os.path.join(CSRS_DIR, "io", "decoder", "gpu") gpu_decoder_src = glob.glob(os.path.join(gpu_decoder_path, "*.cpp")) cuda_libs = os.path.join(CUDA_HOME, "lib64") @@ -477,7 +481,7 @@ def make_video_decoders_extensions(): _, extra_compile_args = get_macros_and_flags() extensions.append( CUDAExtension( - "torchvision.Decoder", + "torchvision.gpu_decoder", gpu_decoder_src, include_dirs=[CSRS_DIR] + TORCHVISION_INCLUDE + [gpu_decoder_path] + [cuda_inc] + ffmpeg_include_dir, library_dirs=ffmpeg_library_dir + TORCHVISION_LIBRARY + [cuda_libs], @@ -498,18 +502,6 @@ def make_video_decoders_extensions(): extra_compile_args=extra_compile_args, ) ) - else: - print("Building torchvision without video codec support") - if ( - use_video_codec - and use_ffmpeg - and not any([os.path.exists(os.path.join(folder, "libavcodec", "bsf.h")) for folder in ffmpeg_include_dir]) - ): - print( - " The installed version of ffmpeg is missing the header file 'bsf.h' which is " - " required for GPU video decoding. Please install the latest ffmpeg from conda-forge channel:" - " `conda install -c conda-forge ffmpeg`." - ) return extensions diff --git a/test/test_io.py b/test/test_io.py index 1b7b7eb15a1..d2950ac9595 100644 --- a/test/test_io.py +++ b/test/test_io.py @@ -63,7 +63,7 @@ def temp_video(num_frames, height, width, fps, lossless=False, video_codec=None, @pytest.mark.skipif( - get_video_backend() != "pyav" and not io._HAS_VIDEO_OPT, reason="video_reader backend not available" + get_video_backend() != "pyav" and not io._HAS_CPU_VIDEO_DECODER, reason="video_reader backend not available" ) @pytest.mark.skipif(av is None, reason="PyAV unavailable") class TestVideo: @@ -77,14 +77,14 @@ def test_write_read_video(self): assert_equal(data, lv) assert info["video_fps"] == 5 - @pytest.mark.skipif(not io._HAS_VIDEO_OPT, reason="video_reader backend is not chosen") + @pytest.mark.skipif(not io._HAS_CPU_VIDEO_DECODER, reason="video_reader backend is not chosen") def test_probe_video_from_file(self): with temp_video(10, 300, 300, 5) as (f_name, data): video_info = io._probe_video_from_file(f_name) assert pytest.approx(2, rel=0.0, abs=0.1) == video_info.video_duration assert pytest.approx(5, rel=0.0, abs=0.1) == video_info.video_fps - @pytest.mark.skipif(not io._HAS_VIDEO_OPT, reason="video_reader backend is not chosen") + @pytest.mark.skipif(not io._HAS_CPU_VIDEO_DECODER, reason="video_reader backend is not chosen") def test_probe_video_from_memory(self): with temp_video(10, 300, 300, 5) as (f_name, data): with open(f_name, "rb") as fp: diff --git a/test/test_video_reader.py b/test/test_video_reader.py index 243aa12fc12..10995424982 100644 --- a/test/test_video_reader.py +++ b/test/test_video_reader.py @@ -11,7 +11,7 @@ from numpy.random import randint from pytest import approx from torchvision import set_video_backend -from torchvision.io import _HAS_VIDEO_OPT +from torchvision.io import _HAS_CPU_VIDEO_DECODER try: @@ -263,7 +263,7 @@ def _get_video_tensor(video_dir, video_file): @pytest.mark.skipif(av is None, reason="PyAV unavailable") -@pytest.mark.skipif(_HAS_VIDEO_OPT is False, reason="Didn't compile with ffmpeg") +@pytest.mark.skipif(_HAS_CPU_VIDEO_DECODER is False, reason="Didn't compile with ffmpeg") class TestVideoReader: def check_separate_decoding_result(self, tv_result, config): """check the decoding results from TorchVision decoder""" diff --git a/test/test_videoapi.py b/test/test_videoapi.py index dc878ca9f8c..aabcf6407f7 100644 --- a/test/test_videoapi.py +++ b/test/test_videoapi.py @@ -7,7 +7,7 @@ import torchvision from pytest import approx from torchvision.datasets.utils import download_url -from torchvision.io import _HAS_VIDEO_OPT, VideoReader +from torchvision.io import _HAS_CPU_VIDEO_DECODER, VideoReader # WARNING: these tests have been skipped forever on the CI because the video ops @@ -62,7 +62,7 @@ def fate(name, path="."): } -@pytest.mark.skipif(_HAS_VIDEO_OPT is False, reason="Didn't compile with ffmpeg") +@pytest.mark.skipif(_HAS_CPU_VIDEO_DECODER is False, reason="Didn't compile with ffmpeg") class TestVideoApi: @pytest.mark.skipif(av is None, reason="PyAV unavailable") @pytest.mark.parametrize("test_video", test_videos.keys()) diff --git a/torchvision/__init__.py b/torchvision/__init__.py index d7365f17c9c..dd1e4ea6e94 100644 --- a/torchvision/__init__.py +++ b/torchvision/__init__.py @@ -72,7 +72,7 @@ def set_video_backend(backend): global _video_backend if backend not in ["pyav", "video_reader", "cuda"]: raise ValueError("Invalid video backend '%s'. Options are 'pyav', 'video_reader' and 'cuda'" % backend) - if backend == "video_reader" and not io._HAS_VIDEO_OPT: + if backend == "video_reader" and not io._HAS_CPU_VIDEO_DECODER: # TODO: better messages message = "video_reader video backend is not available. Please compile torchvision from source and try again" raise RuntimeError(message) diff --git a/torchvision/io/__init__.py b/torchvision/io/__init__.py index ad1a1482585..31af6c5a0b1 100644 --- a/torchvision/io/__init__.py +++ b/torchvision/io/__init__.py @@ -10,6 +10,7 @@ _HAS_GPU_VIDEO_DECODER = False from ._video_opt import ( + _HAS_CPU_VIDEO_DECODER, _HAS_VIDEO_OPT, _probe_video_from_file, _probe_video_from_memory, @@ -49,6 +50,7 @@ "_read_video_from_memory", "_read_video_timestamps_from_memory", "_probe_video_from_memory", + "_HAS_CPU_VIDEO_DECODER", "_HAS_VIDEO_OPT", "_HAS_GPU_VIDEO_DECODER", "_read_video_clip_from_memory", diff --git a/torchvision/io/_load_gpu_decoder.py b/torchvision/io/_load_gpu_decoder.py index f7869f0a9d1..cfd40c545d8 100644 --- a/torchvision/io/_load_gpu_decoder.py +++ b/torchvision/io/_load_gpu_decoder.py @@ -2,7 +2,7 @@ try: - _load_library("Decoder") + _load_library("gpu_decoder") _HAS_GPU_VIDEO_DECODER = True except (ImportError, OSError): _HAS_GPU_VIDEO_DECODER = False diff --git a/torchvision/io/_video_opt.py b/torchvision/io/_video_opt.py index 2bd7d11929e..69af045e773 100644 --- a/torchvision/io/_video_opt.py +++ b/torchvision/io/_video_opt.py @@ -10,10 +10,11 @@ try: _load_library("video_reader") - _HAS_VIDEO_OPT = True + _HAS_CPU_VIDEO_DECODER = True except (ImportError, OSError): - _HAS_VIDEO_OPT = False + _HAS_CPU_VIDEO_DECODER = False +_HAS_VIDEO_OPT = _HAS_CPU_VIDEO_DECODER # For BC default_timebase = Fraction(0, 1) diff --git a/torchvision/io/video_reader.py b/torchvision/io/video_reader.py index c00723a4534..505909fd984 100644 --- a/torchvision/io/video_reader.py +++ b/torchvision/io/video_reader.py @@ -7,9 +7,9 @@ from ..utils import _log_api_usage_once -from ._video_opt import _HAS_VIDEO_OPT +from ._video_opt import _HAS_CPU_VIDEO_DECODER -if _HAS_VIDEO_OPT: +if _HAS_CPU_VIDEO_DECODER: def _has_video_opt() -> bool: return True