Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Cleanup video decoder build stuff #8602

Merged
merged 2 commits into from
Aug 19, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
120 changes: 56 additions & 64 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,14 @@
USE_WEBP = os.getenv("TORCHVISION_USE_WEBP", "1") == "1"
USE_NVJPEG = os.getenv("TORCHVISION_USE_NVJPEG", "1") == "1"
NVCC_FLAGS = os.getenv("NVCC_FLAGS", None)
USE_FFMPEG = os.getenv("TORCHVISION_USE_FFMPEG", "1") == "1"
USE_VIDEO_CODEC = os.getenv("TORCHVISION_USE_VIDEO_CODEC", "1") == "1"
# Note: the GPU video decoding stuff used to be called "video codec", which
# isn't an accurate or descriptive name considering there are at least 2 other
# video deocding backends in torchvision. I'm renaming this to "gpu video
# decoder" where possible, keeping user facing names (like the env var below) to
# the old scheme for BC.
USE_GPU_VIDEO_DECODER = os.getenv("TORCHVISION_USE_VIDEO_CODEC", "1") == "1"
# Same here: "use ffmpeg" was used to denote "use cpu video decoder".
USE_CPU_VIDEO_DECODER = os.getenv("TORCHVISION_USE_FFMPEG", "1") == "1"

TORCHVISION_INCLUDE = os.environ.get("TORCHVISION_INCLUDE", "")
TORCHVISION_LIBRARY = os.environ.get("TORCHVISION_LIBRARY", "")
Expand All @@ -45,8 +51,8 @@
print(f"{USE_WEBP = }")
print(f"{USE_NVJPEG = }")
print(f"{NVCC_FLAGS = }")
print(f"{USE_FFMPEG = }")
print(f"{USE_VIDEO_CODEC = }")
print(f"{USE_CPU_VIDEO_DECODER = }")
print(f"{USE_GPU_VIDEO_DECODER = }")
print(f"{TORCHVISION_INCLUDE = }")
print(f"{TORCHVISION_LIBRARY = }")
print(f"{IS_ROCM = }")
Expand Down Expand Up @@ -351,28 +357,21 @@ def make_image_extension():
def make_video_decoders_extensions():
print("Building video decoder extensions")

# Locating ffmpeg
ffmpeg_exe = shutil.which("ffmpeg")
has_ffmpeg = ffmpeg_exe is not None
ffmpeg_version = None
# FIXME: Building torchvision with ffmpeg on MacOS or with Python 3.9
# FIXME: causes crash. See the following GitHub issues for more details.
# FIXME: https://github.com/pytorch/pytorch/issues/65000
# FIXME: https://github.com/pytorch/vision/issues/3367
build_without_extensions_msg = "Building without video decoders extensions."
if sys.platform != "linux" or (sys.version_info.major == 3 and sys.version_info.minor == 9):
has_ffmpeg = False
if has_ffmpeg:
try:
# This is to check if ffmpeg is installed properly.
ffmpeg_version = subprocess.check_output(["ffmpeg", "-version"])
except subprocess.CalledProcessError:
print("Building torchvision without ffmpeg support")
print(" Error fetching ffmpeg version, ignoring ffmpeg.")
has_ffmpeg = False
# FIXME: Building torchvision with ffmpeg on MacOS or with Python 3.9
# FIXME: causes crash. See the following GitHub issues for more details.
# FIXME: https://github.com/pytorch/pytorch/issues/65000
# FIXME: https://github.com/pytorch/vision/issues/3367
print("Can only build video decoder extensions on linux and Python != 3.9")
return []

use_ffmpeg = USE_FFMPEG and has_ffmpeg
ffmpeg_exe = shutil.which("ffmpeg")
if ffmpeg_exe is None:
print(f"{build_without_extensions_msg} Couldn't find ffmpeg binary.")
return []

if use_ffmpeg:
def find_ffmpeg_libraries():
ffmpeg_libraries = {"libavcodec", "libavformat", "libavutil", "libswresample", "libswscale"}

ffmpeg_bin = os.path.dirname(ffmpeg_exe)
Expand All @@ -399,18 +398,23 @@ def make_video_decoders_extensions():
library_found |= len(glob.glob(full_path)) > 0

if not library_found:
print("Building torchvision without ffmpeg support")
print(f" {library} header files were not found, disabling ffmpeg support")
use_ffmpeg = False
else:
print("Building torchvision without ffmpeg support")
print(f"{build_without_extensions_msg}")
print(f"{library} header files were not found.")
return None, None

return ffmpeg_include_dir, ffmpeg_library_dir

ffmpeg_include_dir, ffmpeg_library_dir = find_ffmpeg_libraries()
if ffmpeg_include_dir is None or ffmpeg_library_dir is None:
return []

print("Found ffmpeg:")
print(f" ffmpeg include path: {ffmpeg_include_dir}")
print(f" ffmpeg library_dir: {ffmpeg_library_dir}")

extensions = []
if use_ffmpeg:
print("Building torchvision with ffmpeg support")
print(f" ffmpeg version: {ffmpeg_version}")
print(f" ffmpeg include path: {ffmpeg_include_dir}")
print(f" ffmpeg library_dir: {ffmpeg_library_dir}")
if USE_CPU_VIDEO_DECODER:
print("Building with CPU video decoder support")

# TorchVision base decoder + video reader
video_reader_src_dir = os.path.join(ROOT_DIR, "torchvision", "csrc", "io", "video_reader")
Expand All @@ -427,6 +431,7 @@ def make_video_decoders_extensions():

extensions.append(
CppExtension(
# This is an aweful name. It should be "cpu_video_decoder". Keeping for BC.
"torchvision.video_reader",
combined_src,
include_dirs=[
Expand All @@ -450,25 +455,24 @@ def make_video_decoders_extensions():
)
)

# Locating video codec
# CUDA_HOME should be set to the cuda root directory.
# TORCHVISION_INCLUDE and TORCHVISION_LIBRARY should include the location to
# video codec header files and libraries respectively.
video_codec_found = (
BUILD_CUDA_SOURCES
and CUDA_HOME is not None
and any([os.path.exists(os.path.join(folder, "cuviddec.h")) for folder in TORCHVISION_INCLUDE])
and any([os.path.exists(os.path.join(folder, "nvcuvid.h")) for folder in TORCHVISION_INCLUDE])
and any([os.path.exists(os.path.join(folder, "libnvcuvid.so")) for folder in TORCHVISION_LIBRARY])
)
if USE_GPU_VIDEO_DECODER:
# Locating GPU video decoder headers and libraries
# CUDA_HOME should be set to the cuda root directory.
# TORCHVISION_INCLUDE and TORCHVISION_LIBRARY should include the locations
# to the headers and libraries below
if not (
BUILD_CUDA_SOURCES
and CUDA_HOME is not None
and any([os.path.exists(os.path.join(folder, "cuviddec.h")) for folder in TORCHVISION_INCLUDE])
and any([os.path.exists(os.path.join(folder, "nvcuvid.h")) for folder in TORCHVISION_INCLUDE])
and any([os.path.exists(os.path.join(folder, "libnvcuvid.so")) for folder in TORCHVISION_LIBRARY])
and any([os.path.exists(os.path.join(folder, "libavcodec", "bsf.h")) for folder in ffmpeg_include_dir])
):
print("Could not find necessary dependencies. Refer the setup.py to check which ones are needed.")
print("Building without GPU video decoder support")
return extensions
print("Building torchvision with GPU video decoder support")

use_video_codec = USE_VIDEO_CODEC and video_codec_found
if (
use_video_codec
and use_ffmpeg
and any([os.path.exists(os.path.join(folder, "libavcodec", "bsf.h")) for folder in ffmpeg_include_dir])
):
print("Building torchvision with video codec support")
gpu_decoder_path = os.path.join(CSRS_DIR, "io", "decoder", "gpu")
gpu_decoder_src = glob.glob(os.path.join(gpu_decoder_path, "*.cpp"))
cuda_libs = os.path.join(CUDA_HOME, "lib64")
Expand All @@ -477,7 +481,7 @@ def make_video_decoders_extensions():
_, extra_compile_args = get_macros_and_flags()
extensions.append(
CUDAExtension(
"torchvision.Decoder",
"torchvision.gpu_decoder",
gpu_decoder_src,
include_dirs=[CSRS_DIR] + TORCHVISION_INCLUDE + [gpu_decoder_path] + [cuda_inc] + ffmpeg_include_dir,
library_dirs=ffmpeg_library_dir + TORCHVISION_LIBRARY + [cuda_libs],
Expand All @@ -498,18 +502,6 @@ def make_video_decoders_extensions():
extra_compile_args=extra_compile_args,
)
)
else:
print("Building torchvision without video codec support")
if (
use_video_codec
and use_ffmpeg
and not any([os.path.exists(os.path.join(folder, "libavcodec", "bsf.h")) for folder in ffmpeg_include_dir])
):
print(
" The installed version of ffmpeg is missing the header file 'bsf.h' which is "
" required for GPU video decoding. Please install the latest ffmpeg from conda-forge channel:"
" `conda install -c conda-forge ffmpeg`."
)

return extensions

Expand Down
6 changes: 3 additions & 3 deletions test/test_io.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ def temp_video(num_frames, height, width, fps, lossless=False, video_codec=None,


@pytest.mark.skipif(
get_video_backend() != "pyav" and not io._HAS_VIDEO_OPT, reason="video_reader backend not available"
get_video_backend() != "pyav" and not io._HAS_CPU_VIDEO_DECODER, reason="video_reader backend not available"
)
@pytest.mark.skipif(av is None, reason="PyAV unavailable")
class TestVideo:
Expand All @@ -77,14 +77,14 @@ def test_write_read_video(self):
assert_equal(data, lv)
assert info["video_fps"] == 5

@pytest.mark.skipif(not io._HAS_VIDEO_OPT, reason="video_reader backend is not chosen")
@pytest.mark.skipif(not io._HAS_CPU_VIDEO_DECODER, reason="video_reader backend is not chosen")
def test_probe_video_from_file(self):
with temp_video(10, 300, 300, 5) as (f_name, data):
video_info = io._probe_video_from_file(f_name)
assert pytest.approx(2, rel=0.0, abs=0.1) == video_info.video_duration
assert pytest.approx(5, rel=0.0, abs=0.1) == video_info.video_fps

@pytest.mark.skipif(not io._HAS_VIDEO_OPT, reason="video_reader backend is not chosen")
@pytest.mark.skipif(not io._HAS_CPU_VIDEO_DECODER, reason="video_reader backend is not chosen")
def test_probe_video_from_memory(self):
with temp_video(10, 300, 300, 5) as (f_name, data):
with open(f_name, "rb") as fp:
Expand Down
4 changes: 2 additions & 2 deletions test/test_video_reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
from numpy.random import randint
from pytest import approx
from torchvision import set_video_backend
from torchvision.io import _HAS_VIDEO_OPT
from torchvision.io import _HAS_CPU_VIDEO_DECODER


try:
Expand Down Expand Up @@ -263,7 +263,7 @@ def _get_video_tensor(video_dir, video_file):


@pytest.mark.skipif(av is None, reason="PyAV unavailable")
@pytest.mark.skipif(_HAS_VIDEO_OPT is False, reason="Didn't compile with ffmpeg")
@pytest.mark.skipif(_HAS_CPU_VIDEO_DECODER is False, reason="Didn't compile with ffmpeg")
class TestVideoReader:
def check_separate_decoding_result(self, tv_result, config):
"""check the decoding results from TorchVision decoder"""
Expand Down
4 changes: 2 additions & 2 deletions test/test_videoapi.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
import torchvision
from pytest import approx
from torchvision.datasets.utils import download_url
from torchvision.io import _HAS_VIDEO_OPT, VideoReader
from torchvision.io import _HAS_CPU_VIDEO_DECODER, VideoReader


# WARNING: these tests have been skipped forever on the CI because the video ops
Expand Down Expand Up @@ -62,7 +62,7 @@ def fate(name, path="."):
}


@pytest.mark.skipif(_HAS_VIDEO_OPT is False, reason="Didn't compile with ffmpeg")
@pytest.mark.skipif(_HAS_CPU_VIDEO_DECODER is False, reason="Didn't compile with ffmpeg")
class TestVideoApi:
@pytest.mark.skipif(av is None, reason="PyAV unavailable")
@pytest.mark.parametrize("test_video", test_videos.keys())
Expand Down
2 changes: 1 addition & 1 deletion torchvision/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@ def set_video_backend(backend):
global _video_backend
if backend not in ["pyav", "video_reader", "cuda"]:
raise ValueError("Invalid video backend '%s'. Options are 'pyav', 'video_reader' and 'cuda'" % backend)
if backend == "video_reader" and not io._HAS_VIDEO_OPT:
if backend == "video_reader" and not io._HAS_CPU_VIDEO_DECODER:
# TODO: better messages
message = "video_reader video backend is not available. Please compile torchvision from source and try again"
raise RuntimeError(message)
Expand Down
2 changes: 2 additions & 0 deletions torchvision/io/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
_HAS_GPU_VIDEO_DECODER = False

from ._video_opt import (
_HAS_CPU_VIDEO_DECODER,
_HAS_VIDEO_OPT,
_probe_video_from_file,
_probe_video_from_memory,
Expand Down Expand Up @@ -49,6 +50,7 @@
"_read_video_from_memory",
"_read_video_timestamps_from_memory",
"_probe_video_from_memory",
"_HAS_CPU_VIDEO_DECODER",
"_HAS_VIDEO_OPT",
"_HAS_GPU_VIDEO_DECODER",
"_read_video_clip_from_memory",
Expand Down
2 changes: 1 addition & 1 deletion torchvision/io/_load_gpu_decoder.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@


try:
_load_library("Decoder")
_load_library("gpu_decoder")
_HAS_GPU_VIDEO_DECODER = True
except (ImportError, OSError):
_HAS_GPU_VIDEO_DECODER = False
5 changes: 3 additions & 2 deletions torchvision/io/_video_opt.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,10 +10,11 @@

try:
_load_library("video_reader")
_HAS_VIDEO_OPT = True
_HAS_CPU_VIDEO_DECODER = True
except (ImportError, OSError):
_HAS_VIDEO_OPT = False
_HAS_CPU_VIDEO_DECODER = False

_HAS_VIDEO_OPT = _HAS_CPU_VIDEO_DECODER # For BC
default_timebase = Fraction(0, 1)


Expand Down
4 changes: 2 additions & 2 deletions torchvision/io/video_reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,9 @@

from ..utils import _log_api_usage_once

from ._video_opt import _HAS_VIDEO_OPT
from ._video_opt import _HAS_CPU_VIDEO_DECODER

if _HAS_VIDEO_OPT:
if _HAS_CPU_VIDEO_DECODER:

def _has_video_opt() -> bool:
return True
Expand Down
Loading