Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Base device_memory_resource on cuda::stream_ordered_memory_resource #883

Closed
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
33 commits
Select commit Hold shift + click to select a range
2bfb5f9
Make libcudacxx available for cuda::memory_resource
harrism Oct 6, 2021
bffc9d4
re-enable warnings as errors for .cu compilation units.
harrism Oct 7, 2021
9380636
Make thrust_allocator_tests actually use the different MRs.
harrism Oct 7, 2021
ae26177
Add missing header
harrism Oct 7, 2021
7c8b515
Make `device_memory_resource` inherit `cuda::stream_ordered_memory_re…
harrism Oct 7, 2021
8343e10
cmake style
harrism Oct 7, 2021
10ff7a3
cmake style iter 2
harrism Oct 7, 2021
4eafee8
Update cmake/thirdparty/get_libcudacxx.cmake
harrism Oct 7, 2021
c586b53
Update cmake/thirdparty/get_libcudacxx.cmake
harrism Oct 7, 2021
f7dccee
Add simple resource_view test
harrism Oct 13, 2021
d54620c
Convert RMM containers to use `stream_ordered_resource_view`
harrism Oct 13, 2021
391375a
cmake style
harrism Oct 13, 2021
0a6e9c6
cmake formatting
harrism Oct 13, 2021
4e37232
cmake: set LIBCUDACXX_INCLUDE_DIR
harrism Oct 19, 2021
7e00ac4
Use views in multi_stream_allocations_bench
harrism Oct 19, 2021
2ce1677
Convert polymorphic_allocator and fix do_is_equal in all MRs
harrism Oct 20, 2021
226e11b
Make sure Cython can include `cuda/memory_resource`
harrism Oct 20, 2021
af3dc39
cmake style
harrism Oct 20, 2021
bcd8336
Eliminate Upstream pointers
harrism Oct 21, 2021
c0a6cee
Add missing stream argument
harrism Oct 21, 2021
4326cea
Switch device_scalar_tests to use resource_view
harrism Oct 21, 2021
5f7d6f5
Fix do_is_equal after latest cuda/memory_resource upstream changes
harrism Oct 27, 2021
5e3efad
Merge branch 'branch-21.12' into fea-libcudacxx-mr-stage-1
harrism Oct 27, 2021
1b6fdee
Convert MRs with upstreams back to taking template parameters.
harrism Oct 28, 2021
c735dd6
DOC v22.02 Updates
ajschmidt8 Nov 4, 2021
75ea357
Merge branch 'branch-21.12' into fea-libcudacxx-mr-stage-1
harrism Nov 11, 2021
7b6658e
Merge pull request #915 from rapidsai/branch-21.12
GPUtester Nov 11, 2021
68de77e
Merge pull request #922 from rapidsai/branch-21.12
GPUtester Nov 18, 2021
27715b1
Simplify CMake linting with cmake-format (#913)
vyasr Nov 18, 2021
91dd10e
Merge pull request #925 from rapidsai/branch-21.12
GPUtester Nov 19, 2021
224a45a
Merge branch 'branch-22.02' into fea-libcudacxx-mr-stage-1
harrism Nov 30, 2021
a50e09f
Update to latest name changes in cuda::pmr
harrism Dec 14, 2021
ee7c6cf
Make `device_memory_resource` inherit from a device-accessible `strea…
harrism Dec 14, 2021
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 19 additions & 11 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -25,18 +25,26 @@ repos:
name: flake8-cython
args: ["--config=python/.flake8.cython"]
types: [cython]
- repo: https://github.com/cheshirekow/cmake-format-precommit
rev: v0.6.11
- repo: local
hooks:
- id: cmake-format
name: cmake-format
args: ["--config-files", "cmake/config.json", "--in-place", "--"]
types: [file] # override `types: [cmake]`
files: \.(cmake(\.in)?)$|CMakeLists\.txt
- id: cmake-lint
args: ["--config-files", "cmake/config.json", "--"]
types: [file] # override `types: [cmake]`
files: \.(cmake(\.in)?)$|CMakeLists\.txt
- id: cmake-format
name: cmake-format
entry: ./scripts/run-cmake-format.sh cmake-format
language: python
types: [cmake]
# Note that pre-commit autoupdate does not update the versions
# of dependencies, so we'll have to update this manually.
additional_dependencies:
- cmake-format==0.6.11
- id: cmake-lint
name: cmake-lint
entry: ./scripts/run-cmake-format.sh cmake-lint
language: python
types: [cmake]
# Note that pre-commit autoupdate does not update the versions
# of dependencies, so we'll have to update this manually.
additional_dependencies:
- cmake-format==0.6.11

default_language_version:
python: python3
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,7 @@
# RMM 22.02.00 (Date TBD)

Please see https://github.com/rapidsai/rmm/releases/tag/v22.02.00a for the latest changes to this development branch.

# RMM 21.12.00 (Date TBD)

Please see https://github.com/rapidsai/rmm/releases/tag/v21.12.00a for the latest changes to this development branch.
Expand Down
53 changes: 48 additions & 5 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@

cmake_minimum_required(VERSION 3.20.1 FATAL_ERROR)

file(DOWNLOAD https://github.com/raw/rapidsai/rapids-cmake/branch-21.12/RAPIDS.cmake
file(DOWNLOAD https://github.com/raw/rapidsai/rapids-cmake/branch-22.02/RAPIDS.cmake
${CMAKE_BINARY_DIR}/RAPIDS.cmake)
include(${CMAKE_BINARY_DIR}/RAPIDS.cmake)

Expand All @@ -25,7 +25,7 @@ include(rapids-find)

project(
RMM
VERSION 21.12.00
VERSION 22.02.00
LANGUAGES CXX)

# Write the version header
Expand All @@ -50,6 +50,10 @@ message(STATUS "RMM: RMM_LOGGING_LEVEL = '${RMM_LOGGING_LEVEL}'")
# cudart can be statically linked or dynamically linked the python ecosystem wants dynamic linking
option(CUDA_STATIC_RUNTIME "Statically link the CUDA runtime" OFF)

if(NOT RMM_GENERATED_INCLUDE_DIR)
set(RMM_GENERATED_INCLUDE_DIR ${RMM_BINARY_DIR})
endif()

# find packages we depend on
rapids_find_package(
CUDAToolkit REQUIRED
Expand All @@ -58,13 +62,15 @@ rapids_find_package(
rapids_cpm_init()
include(cmake/thirdparty/get_spdlog.cmake)
include(cmake/thirdparty/get_thrust.cmake)
include(cmake/thirdparty/get_libcudacxx.cmake)

# library targets
add_library(rmm INTERFACE)
add_library(rmm::rmm ALIAS rmm)

target_include_directories(rmm INTERFACE "$<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/include>"
"$<INSTALL_INTERFACE:include>")
target_include_directories(
rmm INTERFACE "$<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/include>"
"$<BUILD_INTERFACE:${LIBCUDACXX_INCLUDE_DIR}>" "$<INSTALL_INTERFACE:include>")
robertmaynard marked this conversation as resolved.
Show resolved Hide resolved

if(CUDA_STATIC_RUNTIME)
message(STATUS "RMM: Enabling static linking of cudart")
Expand Down Expand Up @@ -107,7 +113,10 @@ include(CPack)

# install export targets
install(TARGETS rmm EXPORT rmm-exports)
install(DIRECTORY include/rmm/ DESTINATION include/rmm)
install(
DIRECTORY ${RMM_GENERATED_INCLUDE_DIR}/include/libcxx
robertmaynard marked this conversation as resolved.
Show resolved Hide resolved
${RMM_GENERATED_INCLUDE_DIR}/include/libcudacxx
DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/rmm)
install(FILES ${RMM_BINARY_DIR}/include/rmm/version_config.hpp DESTINATION include/rmm)

set(doc_string
Expand All @@ -126,8 +135,42 @@ set(code_string
if(NOT TARGET rmm::Thrust)
thrust_create_target(rmm::Thrust FROM_OPTIONS)
endif()

# nvcc automatically adds the CUDA Toolkit system include paths before any
# system include paths that CMake adds.
#
# CMake implicitly treats all includes on import targets as 'SYSTEM' includes.
#
# To get the cudacxx shipped with RMM to be picked up by consumers instead of the
# version shipped with the CUDA Toolkit we need to make sure it is a non-SYSTEM
# include on the CMake side.
#
# To do this currently, we move the includes from the cudf::cudf target to a
# non-import target to ensure they are `-I` instead of `-isystem`

add_library(rmm_non_system_includes INTERFACE)
target_link_libraries(rmm::rmm INTERFACE rmm_non_system_includes)

get_target_property(all_includes rmm::rmm INTERFACE_INCLUDE_DIRECTORIES)
set(system_includes )
set(normal_includes )
foreach(include IN LISTS all_includes)
if(include MATCHES "/libcudacxx")
list(APPEND normal_includes "${include}")
else()
list(APPEND system_includes "${include}")
endif()
endforeach()
set_target_properties(rmm::rmm PROPERTIES INTERFACE_INCLUDE_DIRECTORIES "${system_includes}")
set_target_properties(rmm_non_system_includes
PROPERTIES INTERFACE_INCLUDE_DIRECTORIES "${normal_includes}")
]=])

# TODO temporary: force dependents to find RMM's version of libcudacxx
string(APPEND code_string
"set(LIBCUDACXX_INCLUDE_DIR \"${LIBCUDACXX_INCLUDE_DIR}\" CACHE PATH \"\")\n")
string(APPEND code_string "set(LIBCXX_INCLUDE_DIR \"${LIBCXX_INCLUDE_DIR}\" CACHE PATH \"\")\n")

rapids_export(
INSTALL rmm
EXPORT_SET rmm-exports
Expand Down
6 changes: 4 additions & 2 deletions benchmarks/device_uvector/device_uvector_bench.cu
Original file line number Diff line number Diff line change
Expand Up @@ -24,10 +24,12 @@

#include <cuda_runtime_api.h>

using pool_mr = rmm::mr::pool_memory_resource<rmm::mr::cuda_memory_resource*>;

static void BM_UvectorSizeConstruction(benchmark::State& state)
{
rmm::mr::cuda_memory_resource cuda_mr{};
rmm::mr::pool_memory_resource<rmm::mr::cuda_memory_resource> mr{&cuda_mr};
pool_mr mr{&cuda_mr};
rmm::mr::set_current_device_resource(&mr);

for (auto _ : state) { // NOLINT(clang-analyzer-deadcode.DeadStores)
Expand All @@ -48,7 +50,7 @@ BENCHMARK(BM_UvectorSizeConstruction)
static void BM_ThrustVectorSizeConstruction(benchmark::State& state)
{
rmm::mr::cuda_memory_resource cuda_mr{};
rmm::mr::pool_memory_resource<rmm::mr::cuda_memory_resource> mr{&cuda_mr};
pool_mr mr{&cuda_mr};
rmm::mr::set_current_device_resource(&mr);

for (auto _ : state) { // NOLINT(clang-analyzer-deadcode.DeadStores)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,8 @@
#include <rmm/mr/device/per_device_resource.hpp>
#include <rmm/mr/device/pool_memory_resource.hpp>

#include <cuda/memory_resource>

#include <cuda_runtime_api.h>

#include <benchmark/benchmark.h>
Expand All @@ -50,7 +52,8 @@ __global__ void compute_bound_kernel(int64_t* out)

using MRFactoryFunc = std::function<std::shared_ptr<rmm::mr::device_memory_resource>()>;

static void run_prewarm(rmm::cuda_stream_pool& stream_pool, rmm::mr::device_memory_resource* mr)
static void run_prewarm(rmm::cuda_stream_pool& stream_pool,
cuda::pmr::stream_ordered_resource_ptr<cuda::pmr::memory_access::device> mr)
{
auto buffers = std::vector<rmm::device_uvector<int64_t>>();
for (int32_t i = 0; i < stream_pool.get_pool_size(); i++) {
Expand All @@ -61,7 +64,7 @@ static void run_prewarm(rmm::cuda_stream_pool& stream_pool, rmm::mr::device_memo

static void run_test(std::size_t num_kernels,
rmm::cuda_stream_pool& stream_pool,
rmm::mr::device_memory_resource* mr)
cuda::pmr::stream_ordered_resource_ptr<cuda::pmr::memory_access::device> mr)
{
for (int32_t i = 0; i < num_kernels; i++) {
auto stream = stream_pool.get_stream(i);
Expand Down Expand Up @@ -100,12 +103,14 @@ inline auto make_cuda_async() { return std::make_shared<rmm::mr::cuda_async_memo

inline auto make_pool()
{
return rmm::mr::make_owning_wrapper<rmm::mr::pool_memory_resource>(make_cuda());
return rmm::mr::make_owning_wrapper<
rmm::mr::pool_memory_resource<rmm::mr::cuda_memory_resource*>>(make_cuda());
}

inline auto make_arena()
{
return rmm::mr::make_owning_wrapper<rmm::mr::arena_memory_resource>(make_cuda());
return rmm::mr::make_owning_wrapper<
rmm::mr::arena_memory_resource<rmm::mr::cuda_memory_resource*>>(make_cuda());
}

inline auto make_binning()
Expand All @@ -115,8 +120,9 @@ inline auto make_binning()
// Larger allocations will use the pool resource
constexpr auto min_bin_pow2{18};
constexpr auto max_bin_pow2{22};
auto mr = rmm::mr::make_owning_wrapper<rmm::mr::binning_memory_resource>(
pool, min_bin_pow2, max_bin_pow2);
auto mr = rmm::mr::make_owning_wrapper<rmm::mr::binning_memory_resource<
rmm::mr::owning_wrapper<rmm::mr::pool_memory_resource<rmm::mr::cuda_memory_resource*>,
rmm::mr::cuda_memory_resource>*>>(pool, min_bin_pow2, max_bin_pow2);
return mr;
}

Expand Down
11 changes: 7 additions & 4 deletions benchmarks/random_allocations/random_allocations.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -165,12 +165,14 @@ inline auto make_cuda_async() { return std::make_shared<rmm::mr::cuda_async_memo

inline auto make_pool()
{
return rmm::mr::make_owning_wrapper<rmm::mr::pool_memory_resource>(make_cuda());
return rmm::mr::make_owning_wrapper<
rmm::mr::pool_memory_resource<rmm::mr::cuda_memory_resource*>>(make_cuda());
}

inline auto make_arena()
{
return rmm::mr::make_owning_wrapper<rmm::mr::arena_memory_resource>(make_cuda());
return rmm::mr::make_owning_wrapper<
rmm::mr::arena_memory_resource<rmm::mr::cuda_memory_resource*>>(make_cuda());
}

inline auto make_binning()
Expand All @@ -180,8 +182,9 @@ inline auto make_binning()
// Larger allocations will use the pool resource
constexpr auto min_bin_pow2{18};
constexpr auto max_bin_pow2{22};
auto mr = rmm::mr::make_owning_wrapper<rmm::mr::binning_memory_resource>(
pool, min_bin_pow2, max_bin_pow2);
auto mr = rmm::mr::make_owning_wrapper<rmm::mr::binning_memory_resource<
rmm::mr::owning_wrapper<rmm::mr::pool_memory_resource<rmm::mr::cuda_memory_resource*>,
rmm::mr::cuda_memory_resource>*>>(pool, min_bin_pow2, max_bin_pow2);
return mr;
}

Expand Down
25 changes: 15 additions & 10 deletions benchmarks/replay/replay.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -58,30 +58,35 @@ std::shared_ptr<rmm::mr::device_memory_resource> make_simulated(std::size_t simu
inline auto make_pool(std::size_t simulated_size)
{
if (simulated_size > 0) {
return rmm::mr::make_owning_wrapper<rmm::mr::pool_memory_resource>(
return rmm::mr::make_owning_wrapper<
rmm::mr::pool_memory_resource<rmm::mr::device_memory_resource*>>(
make_simulated(simulated_size), simulated_size, simulated_size);
}
return rmm::mr::make_owning_wrapper<rmm::mr::pool_memory_resource>(make_cuda());
return rmm::mr::make_owning_wrapper<
rmm::mr::pool_memory_resource<rmm::mr::device_memory_resource*>>(make_cuda());
}

inline auto make_arena(std::size_t simulated_size)
{
if (simulated_size > 0) {
return rmm::mr::make_owning_wrapper<rmm::mr::arena_memory_resource>(
return rmm::mr::make_owning_wrapper<
rmm::mr::arena_memory_resource<rmm::mr::device_memory_resource*>>(
make_simulated(simulated_size), simulated_size, simulated_size);
}
return rmm::mr::make_owning_wrapper<rmm::mr::arena_memory_resource>(make_cuda());
return rmm::mr::make_owning_wrapper<
rmm::mr::arena_memory_resource<rmm::mr::device_memory_resource*>>(make_cuda());
}

inline auto make_binning(std::size_t simulated_size)
{
auto pool = make_pool(simulated_size);
auto mr = rmm::mr::make_owning_wrapper<rmm::mr::binning_memory_resource>(pool);
const auto min_size_exp{18};
const auto max_size_exp{22};
for (std::size_t i = min_size_exp; i <= max_size_exp; i++) {
mr->wrapped().add_bin(1 << i);
}
// Add a binning_memory_resource with fixed-size bins of sizes 256, 512, 1024, 2048 and 4096KiB
// Larger allocations will use the pool resource
constexpr auto min_bin_pow2{18};
constexpr auto max_bin_pow2{22};
auto mr = rmm::mr::make_owning_wrapper<rmm::mr::binning_memory_resource<
rmm::mr::owning_wrapper<rmm::mr::pool_memory_resource<rmm::mr::device_memory_resource*>,
rmm::mr::device_memory_resource>*>>(pool, min_bin_pow2, max_bin_pow2);
return mr;
}

Expand Down
34 changes: 34 additions & 0 deletions cmake/thirdparty/get_libcudacxx.cmake
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
# ==================================================================================================
# Copyright (c) 2021, NVIDIA CORPORATION.
#
# Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except
# in compliance with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under the License
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
# or implied. See the License for the specific language governing permissions and limitations under
# the License.
# ==================================================================================================

# Get the specified version of libcudacxx
function(find_and_configure_libcudacxx VERSION)
rapids_cpm_find(
libcudacxx ${VERSION}
GIT_REPOSITORY https://github.com/mzient/libcudacxx.git
GIT_TAG memres_view # ${VERSION}
GIT_SHALLOW TRUE
DOWNLOAD_ONLY TRUE)

set(LIBCUDACXX_INCLUDE_DIR
"${libcudacxx_SOURCE_DIR}/include"
PARENT_SCOPE)
set(LIBCXX_INCLUDE_DIR
"${libcudacxx_SOURCE_DIR}/libcxx/include"
PARENT_SCOPE)
endfunction()

set(RMM_MIN_VERSION_libcudacxx 1.5.0)

find_and_configure_libcudacxx(${RMM_MIN_VERSION_libcudacxx})
2 changes: 1 addition & 1 deletion doxygen/Doxyfile
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ PROJECT_NAME = "RMM"
# could be handy for archiving the generated documentation or if some version
# control system is used.

PROJECT_NUMBER = 21.12
PROJECT_NUMBER = 22.02

# Using the PROJECT_BRIEF tag one can provide an optional one line description
# for a project that appears at the top of each page and should give viewer a
Expand Down
Loading