diff --git a/tools/scripts/generate_binary_build_matrix.py b/tools/scripts/generate_binary_build_matrix.py index 960544accd..1061a8394c 100644 --- a/tools/scripts/generate_binary_build_matrix.py +++ b/tools/scripts/generate_binary_build_matrix.py @@ -58,6 +58,7 @@ # Accelerator architectures CPU = "cpu" CPU_AARCH64 = "cpu-aarch64" +CUDA_AARCH64 = "cuda-aarch64" CUDA = "cuda" ROCM = "rocm" @@ -80,6 +81,7 @@ LINUX_GPU_RUNNER = "linux.g5.4xlarge.nvidia.gpu" LINUX_CPU_RUNNER = "linux.2xlarge" LINUX_AARCH64_RUNNER = "linux.arm64.2xlarge" +LINUX_AARCH64_GPU_RUNNER = "linux.arm64.m7g.4xlarge" WIN_GPU_RUNNER = "windows.8xlarge.nvidia.gpu" WIN_CPU_RUNNER = "windows.4xlarge" MACOS_M1_RUNNER = "macos-m1-stable" @@ -103,6 +105,8 @@ def arch_type(arch_version: str) -> str: return ROCM elif arch_version == CPU_AARCH64: return CPU_AARCH64 + elif arch_version == CUDA_AARCH64: + return CUDA_AARCH64 else: # arch_version should always be CPU in this case return CPU @@ -114,7 +118,10 @@ def validation_runner(arch_type: str, os: str) -> str: else: return LINUX_CPU_RUNNER elif os == LINUX_AARCH64: - return LINUX_AARCH64_RUNNER + if arch_type == CUDA_AARCH64: + return LINUX_AARCH64_GPU_RUNNER + else: + return LINUX_AARCH64_RUNNER elif os == WINDOWS: if arch_type == CUDA: return WIN_GPU_RUNNER @@ -154,6 +161,7 @@ def initialize_globals(channel: str, build_python_only: bool) -> None: }, CPU: "pytorch/manylinux-builder:cpu", CPU_AARCH64: "pytorch/manylinuxaarch64-builder:cpu-aarch64", + CUDA_AARCH64: "pytorch/manylinuxaarch64-builder:cuda12.4", } CONDA_CONTAINER_IMAGES = { **{ @@ -188,6 +196,7 @@ def translate_desired_cuda(gpu_arch_type: str, gpu_arch_version: str) -> str: return { CPU: "cpu", CPU_AARCH64: CPU, + CUDA_AARCH64: "cu124", CUDA: f"cu{gpu_arch_version.replace('.', '')}", ROCM: f"rocm{gpu_arch_version}", }.get(gpu_arch_type, gpu_arch_version) @@ -490,7 +499,7 @@ def generate_wheels_matrix( if os == LINUX_AARCH64: # Only want the one arch as the CPU type is different and # uses different build/test scripts - arches = [CPU_AARCH64] + arches = [CPU_AARCH64, CUDA_AARCH64] if with_cuda == ENABLE: upload_to_base_bucket = "no"