rapidsai · harrism · Oct 6, 2021 · Oct 7, 2021 · Oct 7, 2021 · Oct 7, 2021
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -25,18 +25,26 @@ repos:
             name: flake8-cython
             args: ["--config=python/.flake8.cython"]
             types: [cython]
-    - repo: https://github.com/cheshirekow/cmake-format-precommit
-      rev: v0.6.11
+    - repo: local
       hooks:
-          - id: cmake-format
-            name: cmake-format
-            args: ["--config-files", "cmake/config.json", "--in-place", "--"]
-            types: [file]  # override `types: [cmake]`
-            files: \.(cmake(\.in)?)$|CMakeLists\.txt
-          - id: cmake-lint
-            args: ["--config-files", "cmake/config.json", "--"]
-            types: [file]  # override `types: [cmake]`
-            files: \.(cmake(\.in)?)$|CMakeLists\.txt
+            - id: cmake-format
+              name: cmake-format
+              entry: ./scripts/run-cmake-format.sh cmake-format
+              language: python
+              types: [cmake]
+              # Note that pre-commit autoupdate does not update the versions
+              # of dependencies, so we'll have to update this manually.
+              additional_dependencies:
+                - cmake-format==0.6.11
+            - id: cmake-lint
+              name: cmake-lint
+              entry: ./scripts/run-cmake-format.sh cmake-lint
+              language: python
+              types: [cmake]
+              # Note that pre-commit autoupdate does not update the versions
+              # of dependencies, so we'll have to update this manually.
+              additional_dependencies:
+                - cmake-format==0.6.11
 
 default_language_version:
     python: python3
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -1,3 +1,7 @@
+# RMM 22.02.00 (Date TBD)
+
+Please see https://github.com/rapidsai/rmm/releases/tag/v22.02.00a for the latest changes to this development branch.
+
 # RMM 21.12.00 (Date TBD)
 
 Please see https://github.com/rapidsai/rmm/releases/tag/v21.12.00a for the latest changes to this development branch.

@@ -14,7 +14,7 @@
 
 cmake_minimum_required(VERSION 3.20.1 FATAL_ERROR)
 
-file(DOWNLOAD https://github.com/raw/rapidsai/rapids-cmake/branch-21.12/RAPIDS.cmake
+file(DOWNLOAD https://github.com/raw/rapidsai/rapids-cmake/branch-22.02/RAPIDS.cmake
      ${CMAKE_BINARY_DIR}/RAPIDS.cmake)
 include(${CMAKE_BINARY_DIR}/RAPIDS.cmake)
 
@@ -25,7 +25,7 @@ include(rapids-find)
 
 project(
   RMM
-  VERSION 21.12.00
+  VERSION 22.02.00
   LANGUAGES CXX)
 
 # Write the version header
@@ -50,6 +50,10 @@ message(STATUS "RMM: RMM_LOGGING_LEVEL = '${RMM_LOGGING_LEVEL}'")
 # cudart can be statically linked or dynamically linked the python ecosystem wants dynamic linking
 option(CUDA_STATIC_RUNTIME "Statically link the CUDA runtime" OFF)
 
+if(NOT RMM_GENERATED_INCLUDE_DIR)
+  set(RMM_GENERATED_INCLUDE_DIR ${RMM_BINARY_DIR})
+endif()
+
 # find packages we depend on
 rapids_find_package(
   CUDAToolkit REQUIRED
@@ -58,13 +62,15 @@ rapids_find_package(
 rapids_cpm_init()
 include(cmake/thirdparty/get_spdlog.cmake)
 include(cmake/thirdparty/get_thrust.cmake)
+include(cmake/thirdparty/get_libcudacxx.cmake)
 
 # library targets
 add_library(rmm INTERFACE)
 add_library(rmm::rmm ALIAS rmm)
 
-target_include_directories(rmm INTERFACE "$<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/include>"
-                                         "$<INSTALL_INTERFACE:include>")
+target_include_directories(
+  rmm INTERFACE "$<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/include>"
+                "$<BUILD_INTERFACE:${LIBCUDACXX_INCLUDE_DIR}>" "$<INSTALL_INTERFACE:include>")
 
 if(CUDA_STATIC_RUNTIME)
   message(STATUS "RMM: Enabling static linking of cudart")
@@ -107,7 +113,10 @@ include(CPack)
 
 # install export targets
 install(TARGETS rmm EXPORT rmm-exports)
-install(DIRECTORY include/rmm/ DESTINATION include/rmm)
+install(
+  DIRECTORY ${RMM_GENERATED_INCLUDE_DIR}/include/libcxx
+            ${RMM_GENERATED_INCLUDE_DIR}/include/libcudacxx
+  DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/rmm)
 install(FILES ${RMM_BINARY_DIR}/include/rmm/version_config.hpp DESTINATION include/rmm)
 
 set(doc_string
@@ -126,8 +135,42 @@ set(code_string
 if(NOT TARGET rmm::Thrust)
   thrust_create_target(rmm::Thrust FROM_OPTIONS)
 endif()
+
+# nvcc automatically adds the CUDA Toolkit system include paths before any
+# system include paths that CMake adds.
+#
+# CMake implicitly treats all includes on import targets as 'SYSTEM' includes.
+#
+# To get the cudacxx shipped with RMM to be picked up by consumers instead of the
+# version shipped with the CUDA Toolkit we need to make sure it is a non-SYSTEM
+# include on the CMake side.
+#
+# To do this currently, we move the includes from the cudf::cudf target to a
+# non-import target to ensure they are `-I` instead of `-isystem`
+
+add_library(rmm_non_system_includes INTERFACE)
+target_link_libraries(rmm::rmm INTERFACE rmm_non_system_includes)
+
+get_target_property(all_includes rmm::rmm INTERFACE_INCLUDE_DIRECTORIES)
+set(system_includes )
+set(normal_includes )
+foreach(include IN LISTS all_includes)
+  if(include MATCHES "/libcudacxx")
+    list(APPEND normal_includes "${include}")
+  else()
+    list(APPEND system_includes "${include}")
+  endif()
+endforeach()
+set_target_properties(rmm::rmm PROPERTIES INTERFACE_INCLUDE_DIRECTORIES "${system_includes}")
+set_target_properties(rmm_non_system_includes
+                      PROPERTIES INTERFACE_INCLUDE_DIRECTORIES "${normal_includes}")
 ]=])
 
+# TODO temporary: force dependents to find RMM's version of libcudacxx
+string(APPEND code_string
+       "set(LIBCUDACXX_INCLUDE_DIR \"${LIBCUDACXX_INCLUDE_DIR}\" CACHE PATH \"\")\n")
+string(APPEND code_string "set(LIBCXX_INCLUDE_DIR \"${LIBCXX_INCLUDE_DIR}\" CACHE PATH \"\")\n")
+
 rapids_export(
   INSTALL rmm
   EXPORT_SET rmm-exports

@@ -24,10 +24,12 @@
 
 #include <cuda_runtime_api.h>
 
+using pool_mr = rmm::mr::pool_memory_resource<rmm::mr::cuda_memory_resource*>;
+
 static void BM_UvectorSizeConstruction(benchmark::State& state)
 {
   rmm::mr::cuda_memory_resource cuda_mr{};
-  rmm::mr::pool_memory_resource<rmm::mr::cuda_memory_resource> mr{&cuda_mr};
+  pool_mr mr{&cuda_mr};
   rmm::mr::set_current_device_resource(&mr);
 
   for (auto _ : state) {  // NOLINT(clang-analyzer-deadcode.DeadStores)
@@ -48,7 +50,7 @@ BENCHMARK(BM_UvectorSizeConstruction)
 static void BM_ThrustVectorSizeConstruction(benchmark::State& state)
 {
   rmm::mr::cuda_memory_resource cuda_mr{};
-  rmm::mr::pool_memory_resource<rmm::mr::cuda_memory_resource> mr{&cuda_mr};
+  pool_mr mr{&cuda_mr};
   rmm::mr::set_current_device_resource(&mr);
 
   for (auto _ : state) {  // NOLINT(clang-analyzer-deadcode.DeadStores)

@@ -27,6 +27,8 @@
 #include <rmm/mr/device/per_device_resource.hpp>
 #include <rmm/mr/device/pool_memory_resource.hpp>
 
+#include <cuda/memory_resource>
+
 #include <cuda_runtime_api.h>
 
 #include <benchmark/benchmark.h>
@@ -50,7 +52,8 @@ __global__ void compute_bound_kernel(int64_t* out)
 
 using MRFactoryFunc = std::function<std::shared_ptr<rmm::mr::device_memory_resource>()>;
 
-static void run_prewarm(rmm::cuda_stream_pool& stream_pool, rmm::mr::device_memory_resource* mr)
+static void run_prewarm(rmm::cuda_stream_pool& stream_pool,
+                        cuda::pmr::stream_ordered_resource_ptr<cuda::pmr::memory_access::device> mr)
 {
   auto buffers = std::vector<rmm::device_uvector<int64_t>>();
   for (int32_t i = 0; i < stream_pool.get_pool_size(); i++) {
@@ -61,7 +64,7 @@ static void run_prewarm(rmm::cuda_stream_pool& stream_pool, rmm::mr::device_memo
 
 static void run_test(std::size_t num_kernels,
                      rmm::cuda_stream_pool& stream_pool,
-                     rmm::mr::device_memory_resource* mr)
+                     cuda::pmr::stream_ordered_resource_ptr<cuda::pmr::memory_access::device> mr)
 {
   for (int32_t i = 0; i < num_kernels; i++) {
     auto stream = stream_pool.get_stream(i);
@@ -100,12 +103,14 @@ inline auto make_cuda_async() { return std::make_shared<rmm::mr::cuda_async_memo
 
 inline auto make_pool()
 {
-  return rmm::mr::make_owning_wrapper<rmm::mr::pool_memory_resource>(make_cuda());
+  return rmm::mr::make_owning_wrapper<
+    rmm::mr::pool_memory_resource<rmm::mr::cuda_memory_resource*>>(make_cuda());
 }
 
 inline auto make_arena()
 {
-  return rmm::mr::make_owning_wrapper<rmm::mr::arena_memory_resource>(make_cuda());
+  return rmm::mr::make_owning_wrapper<
+    rmm::mr::arena_memory_resource<rmm::mr::cuda_memory_resource*>>(make_cuda());
 }
 
 inline auto make_binning()
@@ -115,8 +120,9 @@ inline auto make_binning()
   // Larger allocations will use the pool resource
   constexpr auto min_bin_pow2{18};
   constexpr auto max_bin_pow2{22};
-  auto mr = rmm::mr::make_owning_wrapper<rmm::mr::binning_memory_resource>(
-    pool, min_bin_pow2, max_bin_pow2);
+  auto mr = rmm::mr::make_owning_wrapper<rmm::mr::binning_memory_resource<
+    rmm::mr::owning_wrapper<rmm::mr::pool_memory_resource<rmm::mr::cuda_memory_resource*>,
+                            rmm::mr::cuda_memory_resource>*>>(pool, min_bin_pow2, max_bin_pow2);
   return mr;
 }
 

@@ -165,12 +165,14 @@ inline auto make_cuda_async() { return std::make_shared<rmm::mr::cuda_async_memo
 
 inline auto make_pool()
 {
-  return rmm::mr::make_owning_wrapper<rmm::mr::pool_memory_resource>(make_cuda());
+  return rmm::mr::make_owning_wrapper<
+    rmm::mr::pool_memory_resource<rmm::mr::cuda_memory_resource*>>(make_cuda());
 }
 
 inline auto make_arena()
 {
-  return rmm::mr::make_owning_wrapper<rmm::mr::arena_memory_resource>(make_cuda());
+  return rmm::mr::make_owning_wrapper<
+    rmm::mr::arena_memory_resource<rmm::mr::cuda_memory_resource*>>(make_cuda());
 }
 
 inline auto make_binning()
@@ -180,8 +182,9 @@ inline auto make_binning()
   // Larger allocations will use the pool resource
   constexpr auto min_bin_pow2{18};
   constexpr auto max_bin_pow2{22};
-  auto mr = rmm::mr::make_owning_wrapper<rmm::mr::binning_memory_resource>(
-    pool, min_bin_pow2, max_bin_pow2);
+  auto mr = rmm::mr::make_owning_wrapper<rmm::mr::binning_memory_resource<
+    rmm::mr::owning_wrapper<rmm::mr::pool_memory_resource<rmm::mr::cuda_memory_resource*>,
+                            rmm::mr::cuda_memory_resource>*>>(pool, min_bin_pow2, max_bin_pow2);
   return mr;
 }
 

@@ -58,30 +58,35 @@ std::shared_ptr<rmm::mr::device_memory_resource> make_simulated(std::size_t simu
 inline auto make_pool(std::size_t simulated_size)
 {
   if (simulated_size > 0) {
-    return rmm::mr::make_owning_wrapper<rmm::mr::pool_memory_resource>(
+    return rmm::mr::make_owning_wrapper<
+      rmm::mr::pool_memory_resource<rmm::mr::device_memory_resource*>>(
       make_simulated(simulated_size), simulated_size, simulated_size);
   }
-  return rmm::mr::make_owning_wrapper<rmm::mr::pool_memory_resource>(make_cuda());
+  return rmm::mr::make_owning_wrapper<
+    rmm::mr::pool_memory_resource<rmm::mr::device_memory_resource*>>(make_cuda());
 }
 
 inline auto make_arena(std::size_t simulated_size)
 {
   if (simulated_size > 0) {
-    return rmm::mr::make_owning_wrapper<rmm::mr::arena_memory_resource>(
+    return rmm::mr::make_owning_wrapper<
+      rmm::mr::arena_memory_resource<rmm::mr::device_memory_resource*>>(
       make_simulated(simulated_size), simulated_size, simulated_size);
   }
-  return rmm::mr::make_owning_wrapper<rmm::mr::arena_memory_resource>(make_cuda());
+  return rmm::mr::make_owning_wrapper<
+    rmm::mr::arena_memory_resource<rmm::mr::device_memory_resource*>>(make_cuda());
 }
 
 inline auto make_binning(std::size_t simulated_size)
 {
   auto pool = make_pool(simulated_size);
-  auto mr   = rmm::mr::make_owning_wrapper<rmm::mr::binning_memory_resource>(pool);
-  const auto min_size_exp{18};
-  const auto max_size_exp{22};
-  for (std::size_t i = min_size_exp; i <= max_size_exp; i++) {
-    mr->wrapped().add_bin(1 << i);
-  }
+  // Add a binning_memory_resource with fixed-size bins of sizes 256, 512, 1024, 2048 and 4096KiB
+  // Larger allocations will use the pool resource
+  constexpr auto min_bin_pow2{18};
+  constexpr auto max_bin_pow2{22};
+  auto mr = rmm::mr::make_owning_wrapper<rmm::mr::binning_memory_resource<
+    rmm::mr::owning_wrapper<rmm::mr::pool_memory_resource<rmm::mr::device_memory_resource*>,
+                            rmm::mr::device_memory_resource>*>>(pool, min_bin_pow2, max_bin_pow2);
   return mr;
 }
 

@@ -0,0 +1,34 @@
+# ==================================================================================================
+# Copyright (c) 2021, NVIDIA CORPORATION.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except
+# in compliance with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software distributed under the License
+# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
+# or implied. See the License for the specific language governing permissions and limitations under
+# the License.
+# ==================================================================================================
+
+# Get the specified version of libcudacxx
+function(find_and_configure_libcudacxx VERSION)
+  rapids_cpm_find(
+    libcudacxx ${VERSION}
+    GIT_REPOSITORY https://github.com/mzient/libcudacxx.git
+    GIT_TAG memres_view # ${VERSION}
+    GIT_SHALLOW TRUE
+    DOWNLOAD_ONLY TRUE)
+
+  set(LIBCUDACXX_INCLUDE_DIR
+      "${libcudacxx_SOURCE_DIR}/include"
+      PARENT_SCOPE)
+  set(LIBCXX_INCLUDE_DIR
+      "${libcudacxx_SOURCE_DIR}/libcxx/include"
+      PARENT_SCOPE)
+endfunction()
+
+set(RMM_MIN_VERSION_libcudacxx 1.5.0)
+
+find_and_configure_libcudacxx(${RMM_MIN_VERSION_libcudacxx})
diff --git a/doxygen/Doxyfile b/doxygen/Doxyfile
@@ -38,7 +38,7 @@ PROJECT_NAME           = "RMM"
 # could be handy for archiving the generated documentation or if some version
 # control system is used.
 
-PROJECT_NUMBER         = 21.12
+PROJECT_NUMBER         = 22.02
 
 # Using the PROJECT_BRIEF tag one can provide an optional one line description
 # for a project that appears at the top of each page and should give viewer a