Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add presets for nvCOMP #1434

Merged
merged 14 commits into from
Nov 15, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 7 additions & 2 deletions .github/actions/deploy-centos/action.yml
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@ runs:
fi

if [[ "$CI_DEPLOY_PLATFORM" == "linux-x86_64" ]] && [[ -n ${CI_DEPLOY_NEED_CUDA:-} ]]; then
echo Installing CUDA, cuDNN, etc
echo Installing CUDA, cuDNN, nvCOMP, etc
curl -LO https://developer.download.nvidia.com/compute/cuda/12.1.1/local_installers/cuda-repo-rhel7-12-1-local-12.1.1_530.30.02-1.x86_64.rpm
curl -LO https://developer.download.nvidia.com/compute/cuda/repos/rhel7/x86_64/libcudnn8-8.9.1.23-1.cuda12.1.x86_64.rpm
curl -LO https://developer.download.nvidia.com/compute/cuda/repos/rhel7/x86_64/libcudnn8-devel-8.9.1.23-1.cuda12.1.x86_64.rpm
Expand All @@ -86,6 +86,11 @@ runs:
mv /usr/include/cudnn* /usr/include/nccl* /usr/local/cuda/include/
mv /usr/lib64/libcudnn* /usr/lib64/libnccl* /usr/local/cuda/lib64/

curl -LO https://developer.download.nvidia.com/compute/nvcomp/3.0.4/local_installers/nvcomp_3.0.4_x86_64_12.x.tgz
tar -xvf nvcomp_3.0.4_x86_64_12.x.tgz -C /usr/local/cuda/lib64/ --strip-components=1 lib/
tar -xvf nvcomp_3.0.4_x86_64_12.x.tgz -C /usr/local/cuda/include/ --strip-components=1 include/
rm -f nvcomp_3.0.4_x86_64_12.x.tgz

# Work around issues with CUDA 10.2/11.x
mv /usr/include/cublas* /usr/include/nvblas* /usr/local/cuda/include/ || true
mv /usr/lib64/libcublas* /usr/lib64/libnvblas* /usr/local/cuda/lib64/ || true
Expand All @@ -112,7 +117,7 @@ runs:
sed -i /warp_merge_sort.cuh/d /usr/local/cuda/include/cub/cub.cuh

# Remove downloaded archives and unused libraries to avoid running out of disk space
rm -f $(find /usr/local/cuda/ -name '*.a' -and -not -name libcudart_static.a -and -not -name libcudadevrt.a)
rm -f $(find /usr/local/cuda/ -name '*.a' -and -not -name libcudart_static.a -and -not -name libcudadevrt.a -and -not -name libnvcomp_device.a)
fi

if [[ "$CI_DEPLOY_MODULE" == "nvcodec" ]]; then
Expand Down
13 changes: 11 additions & 2 deletions .github/actions/deploy-ubuntu/action.yml
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ runs:
export CUDA=cuda-repo-rhel8-12-3-local-12.3.0_545.23.06-1.aarch64.rpm
export CUDNN=8.9.5.29-1.cuda12.2.aarch64
export NCCL=2.18.5-1+cuda12.2.aarch64
export NVCOMP=nvcomp_3.0.4_SBSA_12.x
export USERLAND_BUILDME="buildme --aarch64"
elif [[ "$CI_DEPLOY_PLATFORM" == "linux-ppc64le" ]]; then
export ARCH=ppc64el
Expand All @@ -52,6 +53,7 @@ runs:
export CUDA=cuda-repo-rhel8-12-3-local-12.3.0_545.23.06-1.x86_64.rpm
export CUDNN=8.9.5.29-1.cuda12.2.x86_64
export NCCL=2.18.5-1+cuda12.2.x86_64
export NVCOMP=nvcomp_3.0.4_x86_64_12.x
fi
echo "ARCH=$ARCH" >> $GITHUB_ENV
echo "PREFIX=$PREFIX" >> $GITHUB_ENV
Expand Down Expand Up @@ -137,7 +139,7 @@ runs:
fi

if [[ -n ${ARCH_CUDA:-} ]] && [[ -n ${CI_DEPLOY_NEED_CUDA:-} ]]; then
echo Installing CUDA, cuDNN, etc
echo Installing CUDA, cuDNN, nvCOMP, etc
curl -LO https://developer.download.nvidia.com/compute/cuda/12.3.0/local_installers/$CUDA
curl -LO https://developer.download.nvidia.com/compute/cuda/repos/rhel8/$ARCH_CUDA/libcudnn8-$CUDNN.rpm
curl -LO https://developer.download.nvidia.com/compute/cuda/repos/rhel8/$ARCH_CUDA/libcudnn8-devel-$CUDNN.rpm
Expand All @@ -153,6 +155,13 @@ runs:
$SUDO mv /usr/include/cudnn* /usr/include/nccl* /usr/local/cuda/include/
$SUDO mv /usr/lib64/libcudnn* /usr/lib64/libnccl* /usr/local/cuda/lib64/

if [[ -n ${NVCOMP:-} ]]; then
curl -LO https://developer.download.nvidia.com/compute/nvcomp/3.0.4/local_installers/$NVCOMP.tgz
$SUDO tar -xvf nvcomp_*.tgz -C /usr/local/cuda/lib64/ --strip-components=1 lib/
$SUDO tar -xvf nvcomp_*.tgz -C /usr/local/cuda/include/ --strip-components=1 include/
rm -f $NVCOMP.tgz
fi

# Work around issues with CUDA 10.2/11.x
$SUDO mv /usr/include/cublas* /usr/include/nvblas* /usr/local/cuda/include/ || true
$SUDO mv /usr/lib64/libcublas* /usr/lib64/libnvblas* /usr/local/cuda/lib64/ || true
Expand All @@ -179,7 +188,7 @@ runs:
$SUDO sed -i /warp_merge_sort.cuh/d /usr/local/cuda/include/cub/cub.cuh

# Remove downloaded archives and unused libraries to avoid running out of disk space
$SUDO rm -f $(find /usr/local/cuda/ -name '*.a' -and -not -name libcudart_static.a -and -not -name libcudadevrt.a)
$SUDO rm -f $(find /usr/local/cuda/ -name '*.a' -and -not -name libcudart_static.a -and -not -name libcudadevrt.a -and -not -name libnvcomp_device.a)
fi

if [[ "$CI_DEPLOY_MODULE" == "nvcodec" ]]; then
Expand Down
10 changes: 9 additions & 1 deletion .github/actions/deploy-windows/action.yml
Original file line number Diff line number Diff line change
Expand Up @@ -90,12 +90,13 @@ runs:
)

if "%CI_DEPLOY_PLATFORM%"=="windows-x86_64" if not "%CI_DEPLOY_NEED_CUDA%"=="" (
echo Installing CUDA, cuDNN, etc
echo Installing CUDA, cuDNN, nvCOMP, etc
curl -LO https://developer.download.nvidia.com/compute/cuda/11.8.0/local_installers/cuda_11.8.0_522.06_windows.exe
curl -LO https://developer.download.nvidia.com/compute/cuda/12.3.0/local_installers/cuda_12.3.0_545.84_windows.exe
rem curl -LO https://developer.download.nvidia.com/compute/redist/cudnn/v8.8.0/local_installers/12.0/cudnn_8.8.0.121_windows.exe
python -m gdown.cli https://drive.google.com/uc?id=1-5QHvwDZC_1rhn5W6fRHNWicXRPtqt31
curl -LO http://www.winimage.com/zLibDll/zlib123dllx64.zip
curl -LO https://developer.download.nvidia.com/compute/nvcomp/3.0.4/local_installers/nvcomp_3.0.4_windows_12.x.zip
cuda_11.8.0_522.06_windows.exe -s
bash -c "rm -Rf 'C:/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v11.8'"
bash -c "mv 'C:/Program Files/NVIDIA Corporation/NvToolsExt' 'C:/Program Files/NVIDIA Corporation/NvToolsExt_old'"
Expand All @@ -105,13 +106,20 @@ runs:
rem cudnn_8.8.0.121_windows.exe -s
unzip cudnn-windows-x86_64-8.9.5.29_cuda12-archive.zip
unzip zlib123dllx64.zip
unzip nvcomp_3.0.4_windows_12.x.zip
rem move "%ProgramFiles%\NVIDIA\CUDNN\v8.8\bin\*.dll" "%ProgramFiles%\NVIDIA GPU Computing Toolkit\CUDA\v12.3\bin"
rem move "%ProgramFiles%\NVIDIA\CUDNN\v8.8\include\*.h" "%ProgramFiles%\NVIDIA GPU Computing Toolkit\CUDA\v12.3\include"
rem move "%ProgramFiles%\NVIDIA\CUDNN\v8.8\lib\x64\*.lib" "%ProgramFiles%\NVIDIA GPU Computing Toolkit\CUDA\v12.3\lib\x64"
move cudnn-windows-x86_64-8.9.5.29_cuda12-archive\bin\*.dll "%ProgramFiles%\NVIDIA GPU Computing Toolkit\CUDA\v12.3\bin"
move cudnn-windows-x86_64-8.9.5.29_cuda12-archive\include\*.h "%ProgramFiles%\NVIDIA GPU Computing Toolkit\CUDA\v12.3\include"
move cudnn-windows-x86_64-8.9.5.29_cuda12-archive\lib\x64\*.lib "%ProgramFiles%\NVIDIA GPU Computing Toolkit\CUDA\v12.3\lib\x64"
move dll_x64\zlibwapi.dll "%ProgramFiles%\NVIDIA GPU Computing Toolkit\CUDA\v12.3\bin"
move include\* "%ProgramFiles%\NVIDIA GPU Computing Toolkit\CUDA\v12.3\include"
move include\gdeflate "%ProgramFiles%\NVIDIA GPU Computing Toolkit\CUDA\v12.3\include"
move include\native "%ProgramFiles%\NVIDIA GPU Computing Toolkit\CUDA\v12.3\include"
move include\nvcomp "%ProgramFiles%\NVIDIA GPU Computing Toolkit\CUDA\v12.3\include"
move lib\nvcomp*.dll "%ProgramFiles%\NVIDIA GPU Computing Toolkit\CUDA\v12.3\bin"
move lib\nvcomp*.lib "%ProgramFiles%\NVIDIA GPU Computing Toolkit\CUDA\v12.3\lib\x64"

rem echo Applying hotfix to Visual Studio 2019 for CUDA
rem curl -LO https://github.com/raw/microsoft/STL/main/stl/inc/cmath
Expand Down
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -219,6 +219,7 @@ Each child module in turn relies by default on the included [`cppbuild.sh` scrip
* CUDA 12.3.x https://developer.nvidia.com/cuda-downloads
* cuDNN 8.9.x https://developer.nvidia.com/cudnn
* NCCL 2.18.x https://developer.nvidia.com/nccl
* nvCOMP 3.0.x https://developer.nvidia.com/nvcomp
* NVIDIA Video Codec SDK 12.1.x https://developer.nvidia.com/nvidia-video-codec-sdk
* OpenCL 3.0.x https://github.com/KhronosGroup/OpenCL-ICD-Loader
* MXNet 1.9.x https://github.com/apache/incubator-mxnet
Expand Down
3 changes: 3 additions & 0 deletions cuda/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,8 @@ To view the license for cuDNN included in these archives, click [here](https://d
### NVIDIA Collective Communications Library (NCCL)
To view the license for NCCL included in these archives, click [here](https://github.com/NVIDIA/nccl/blob/master/LICENSE.txt)

### NVIDIA nvCOMP
To view the license for nvCOMP included in these archives, click [here](https://github.com/NVIDIA/nvcomp/blob/main/LICENSE)

Introduction
------------
Expand All @@ -26,6 +28,7 @@ This directory contains the JavaCPP Presets module for:
* CUDA 12.3.0 https://developer.nvidia.com/cuda-zone
* cuDNN 8.9.5 https://developer.nvidia.com/cudnn
* NCCL 2.18.5 https://developer.nvidia.com/nccl
* nvCOMP 3.0.4 https://developer.nvidia.com/nvcomp

Please refer to the parent README.md file for more detailed information about the JavaCPP Presets.

Expand Down
95 changes: 95 additions & 0 deletions cuda/samples/nvcompLZ4Example.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,95 @@
/*
* Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of NVIDIA CORPORATION nor the names of its
* contributors may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
* OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/

import org.bytedeco.cuda.cudart.CUstream_st;
import org.bytedeco.cuda.global.nvcomp;
import org.bytedeco.cuda.nvcomp.*;
import org.bytedeco.javacpp.BytePointer;
import org.bytedeco.javacpp.Loader;

import static org.bytedeco.cuda.global.cudart.*;
import static org.bytedeco.cuda.global.nvcomp.*;

// https://github.com/NVIDIA/nvcomp/blob/main/examples/high_level_quickstart_example.cpp
public class nvcompLZ4Example {
private static void decomp_compressed_with_manager_factory_example(BytePointer device_input_ptrs, long input_buffer_len) {
CUstream_st stream = new CUstream_st();
int cuda_error = cudaStreamCreate(stream);

long chunk_size = 1 << 16;

nvcompBatchedLZ4Opts_t format_opts = new nvcompBatchedLZ4Opts_t();
format_opts.data_type(NVCOMP_TYPE_CHAR);
LZ4Manager nvcomp_manager = new LZ4Manager(chunk_size, format_opts, stream, 0, nvcomp.NoComputeNoVerify);
CompressionConfig comp_config = nvcomp_manager.configure_compression(input_buffer_len);

BytePointer comp_buffer = new BytePointer();
cuda_error = cudaMalloc(comp_buffer, comp_config.max_compressed_buffer_size());

nvcomp_manager.compress(device_input_ptrs, comp_buffer, comp_config);

// Construct a new nvcomp manager from the compressed buffer.
// Note we could use the nvcomp_manager from above, but here we demonstrate how to create a manager
// for the use case where a buffer is received and the user doesn't know how it was compressed
// Also note, creating the manager in this way synchronizes the stream, as the compressed buffer must be read to
// construct the manager
nvcompManagerBase decomp_nvcomp_manager = create_manager(comp_buffer, stream, 0, NoComputeNoVerify);

DecompressionConfig decomp_config = decomp_nvcomp_manager.configure_decompression(comp_buffer);
BytePointer res_decomp_buffer = new BytePointer();
cuda_error = cudaMalloc(res_decomp_buffer, decomp_config.decomp_data_size());

decomp_nvcomp_manager.decompress(res_decomp_buffer, comp_buffer, decomp_config);

cuda_error = cudaFree(comp_buffer);
cuda_error = cudaFree(res_decomp_buffer);
cuda_error = cudaStreamSynchronize(stream);
cuda_error = cudaStreamDestroy(stream);
}

public static void main(String[] args) {
Loader.load(nvcomp.class);

// Initialize a random array of chars
int input_buffer_len = 1000000;
byte[] uncompressed_data = new byte[input_buffer_len];

for (int i = 0; i < input_buffer_len; i++) {
uncompressed_data[i] = (byte) (Math.random() * 26 + 'a');
}

BytePointer uncompressed_data_ptr = new BytePointer(uncompressed_data);

BytePointer device_input_ptrs = new BytePointer();

int cuda_error = cudaMalloc(device_input_ptrs, input_buffer_len);
cuda_error = cudaMemcpy(device_input_ptrs, uncompressed_data_ptr, input_buffer_len, cudaMemcpyDefault);

decomp_compressed_with_manager_factory_example(device_input_ptrs, input_buffer_len);
}
}
Loading