diff --git a/README.md b/README.md
index e90398c2b..1250d094b 100644
--- a/README.md
+++ b/README.md
@@ -771,8 +771,8 @@ of 1 GiB and a maximum size of 4 GiB. The pool uses
 >>> import rmm
 >>> pool = rmm.mr.PoolMemoryResource(
 ...     rmm.mr.CudaMemoryResource(),
-...     initial_pool_size=2**30,
-...     maximum_pool_size=2**32
+...     initial_pool_size="1GiB", # equivalent to initial_pool_size=2**30
+...     maximum_pool_size="4GiB"
 ... )
 >>> rmm.mr.set_current_device_resource(pool)
 ```
diff --git a/benchmarks/device_uvector/device_uvector_bench.cu b/benchmarks/device_uvector/device_uvector_bench.cu
index 36c9183f9..0eddb1d92 100644
--- a/benchmarks/device_uvector/device_uvector_bench.cu
+++ b/benchmarks/device_uvector/device_uvector_bench.cu
@@ -40,7 +40,7 @@ void BM_UvectorSizeConstruction(benchmark::State& state)
   rmm::mr::cuda_memory_resource cuda_mr{};
   rmm::mr::pool_memory_resource<rmm::mr::cuda_memory_resource> mr{
     &cuda_mr, rmm::percent_of_free_device_memory(50)};
-  rmm::mr::set_current_device_resource(&mr);
+  rmm::mr::set_current_device_resource_ref(mr);
 
   for (auto _ : state) {  // NOLINT(clang-analyzer-deadcode.DeadStores)
     rmm::device_uvector<std::int32_t> vec(state.range(0), rmm::cuda_stream_view{});
@@ -49,7 +49,7 @@ void BM_UvectorSizeConstruction(benchmark::State& state)
 
   state.SetItemsProcessed(static_cast<std::int64_t>(state.iterations()));
 
-  rmm::mr::set_current_device_resource(nullptr);
+  rmm::mr::reset_current_device_resource_ref();
 }
 
 BENCHMARK(BM_UvectorSizeConstruction)
@@ -62,7 +62,7 @@ void BM_ThrustVectorSizeConstruction(benchmark::State& state)
   rmm::mr::cuda_memory_resource cuda_mr{};
   rmm::mr::pool_memory_resource<rmm::mr::cuda_memory_resource> mr{
     &cuda_mr, rmm::percent_of_free_device_memory(50)};
-  rmm::mr::set_current_device_resource(&mr);
+  rmm::mr::set_current_device_resource_ref(mr);
 
   for (auto _ : state) {  // NOLINT(clang-analyzer-deadcode.DeadStores)
     rmm::device_vector<std::int32_t> vec(state.range(0));
@@ -71,7 +71,7 @@ void BM_ThrustVectorSizeConstruction(benchmark::State& state)
 
   state.SetItemsProcessed(static_cast<std::int64_t>(state.iterations()));
 
-  rmm::mr::set_current_device_resource(nullptr);
+  rmm::mr::reset_current_device_resource_ref();
 }
 
 BENCHMARK(BM_ThrustVectorSizeConstruction)
@@ -140,7 +140,7 @@ template <typename Vector>
 void BM_VectorWorkflow(benchmark::State& state)
 {
   rmm::mr::cuda_async_memory_resource cuda_async_mr{};
-  rmm::mr::set_current_device_resource(&cuda_async_mr);
+  rmm::mr::set_current_device_resource_ref(cuda_async_mr);
 
   rmm::cuda_stream input_stream;
   std::vector<rmm::cuda_stream> streams(4);
@@ -158,7 +158,7 @@ void BM_VectorWorkflow(benchmark::State& state)
   auto const bytes            = num_elements * sizeof(std::int32_t) * num_accesses;
   state.SetBytesProcessed(static_cast<std::int64_t>(state.iterations() * bytes));
 
-  rmm::mr::set_current_device_resource(nullptr);
+  rmm::mr::reset_current_device_resource_ref();
 }
 
 BENCHMARK_TEMPLATE(BM_VectorWorkflow, thrust_vector)  // NOLINT
@@ -167,9 +167,9 @@ BENCHMARK_TEMPLATE(BM_VectorWorkflow, thrust_vector)  // NOLINT
   ->Unit(benchmark::kMicrosecond)
   ->UseManualTime();
 
-// The only difference here is that `rmm::device_vector` uses `rmm::current_device_resource()`
-// for allocation while `thrust::device_vector` uses cudaMalloc/cudaFree. In the benchmarks we use
-// `cuda_async_memory_resource`, which is faster.
+// The only difference here is that `rmm::device_vector` uses
+// `rmm::get_current_device_resource_ref()` for allocation while `thrust::device_vector` uses
+// cudaMalloc/cudaFree. In the benchmarks we use `cuda_async_memory_resource`, which is faster.
 BENCHMARK_TEMPLATE(BM_VectorWorkflow, rmm_vector)  // NOLINT
   ->RangeMultiplier(10)                            // NOLINT
   ->Range(100'000, 100'000'000)                    // NOLINT
diff --git a/benchmarks/multi_stream_allocations/multi_stream_allocations_bench.cu b/benchmarks/multi_stream_allocations/multi_stream_allocations_bench.cu
index b73ef54f8..86e761c80 100644
--- a/benchmarks/multi_stream_allocations/multi_stream_allocations_bench.cu
+++ b/benchmarks/multi_stream_allocations/multi_stream_allocations_bench.cu
@@ -75,7 +75,7 @@ static void BM_MultiStreamAllocations(benchmark::State& state, MRFactoryFunc con
 {
   auto mr = factory();
 
-  rmm::mr::set_current_device_resource(mr.get());
+  rmm::mr::set_current_device_resource_ref(mr.get());
 
   auto num_streams = state.range(0);
   auto num_kernels = state.range(1);
@@ -92,7 +92,7 @@ static void BM_MultiStreamAllocations(benchmark::State& state, MRFactoryFunc con
 
   state.SetItemsProcessed(static_cast<int64_t>(state.iterations() * num_kernels));
 
-  rmm::mr::set_current_device_resource(nullptr);
+  rmm::mr::reset_current_device_resource_ref();
 }
 
 inline auto make_cuda() { return std::make_shared<rmm::mr::cuda_memory_resource>(); }
diff --git a/doxygen/Doxyfile b/doxygen/Doxyfile
index 7e13fa95b..dbd4194ab 100644
--- a/doxygen/Doxyfile
+++ b/doxygen/Doxyfile
@@ -2176,7 +2176,11 @@ INCLUDE_FILE_PATTERNS  =
 # recursively expanded use the := operator instead of the = operator.
 # This tag requires that the tag ENABLE_PREPROCESSING is set to YES.
 
-PREDEFINED             = RMM_NAMESPACE=rmm RMM_EXPORT
+# These need to be kept in sync with set in rmm/detail/export.hpp
+# Since we are excluding detail files in EXCLUDE_PATTERNS there
+# appears to be no way of getting doxygen to still parse that file and
+# make the definitions available via the preprocessor :(
+PREDEFINED             = RMM_EXPORT RMM_HIDDEN RMM_NAMESPACE=rmm
 
 # If the MACRO_EXPANSION and EXPAND_ONLY_PREDEF tags are set to YES then this
 # tag can be used to specify a list of macro names that should be expanded. The
@@ -2185,7 +2189,7 @@ PREDEFINED             = RMM_NAMESPACE=rmm RMM_EXPORT
 # definition found in the source code.
 # This tag requires that the tag ENABLE_PREPROCESSING is set to YES.
 
-EXPAND_AS_DEFINED      = RMM_NAMESPACE
+EXPAND_AS_DEFINED      =
 
 # If the SKIP_FUNCTION_MACROS tag is set to YES then doxygen's preprocessor will
 # remove all references to function-like macros that are alone on a line, have
diff --git a/include/rmm/detail/export.hpp b/include/rmm/detail/export.hpp
index 90fa8d173..9587a6009 100644
--- a/include/rmm/detail/export.hpp
+++ b/include/rmm/detail/export.hpp
@@ -24,4 +24,5 @@
 #else
 #define RMM_EXPORT
 #define RMM_HIDDEN
+#define RMM_NAMESPACE rmm
 #endif
diff --git a/include/rmm/device_buffer.hpp b/include/rmm/device_buffer.hpp
index e6664e0f6..3ddd37415 100644
--- a/include/rmm/device_buffer.hpp
+++ b/include/rmm/device_buffer.hpp
@@ -41,7 +41,7 @@ namespace RMM_NAMESPACE {
  *
  * This class allocates untyped and *uninitialized* device memory using a
  * `device_async_resource_ref`. If not explicitly specified, the memory resource
- * returned from `get_current_device_resource()` is used.
+ * returned from `get_current_device_resource_ref()` is used.
  *
  * @note Unlike `std::vector` or `thrust::device_vector`, the device memory
  * allocated by a `device_buffer` is uninitialized. Therefore, it is undefined
@@ -95,7 +95,7 @@ class device_buffer {
   // `__host__ __device__` specifiers to the defaulted constructor when it is called within the
   // context of both host and device functions. Specifically, the `cudf::type_dispatcher` is a host-
   // device function. This causes warnings/errors because this ctor invokes host-only functions.
-  device_buffer() : _mr{rmm::mr::get_current_device_resource()} {}
+  device_buffer() : _mr{rmm::mr::get_current_device_resource_ref()} {}
 
   /**
    * @brief Constructs a new device buffer of `size` uninitialized bytes
@@ -109,7 +109,7 @@ class device_buffer {
    */
   explicit device_buffer(std::size_t size,
                          cuda_stream_view stream,
-                         device_async_resource_ref mr = mr::get_current_device_resource())
+                         device_async_resource_ref mr = mr::get_current_device_resource_ref())
     : _stream{stream}, _mr{mr}
   {
     cuda_set_device_raii dev{_device};
@@ -138,7 +138,7 @@ class device_buffer {
   device_buffer(void const* source_data,
                 std::size_t size,
                 cuda_stream_view stream,
-                device_async_resource_ref mr = mr::get_current_device_resource())
+                device_async_resource_ref mr = mr::get_current_device_resource_ref())
     : _stream{stream}, _mr{mr}
   {
     cuda_set_device_raii dev{_device};
@@ -169,7 +169,7 @@ class device_buffer {
    */
   device_buffer(device_buffer const& other,
                 cuda_stream_view stream,
-                device_async_resource_ref mr = mr::get_current_device_resource())
+                device_async_resource_ref mr = mr::get_current_device_resource_ref())
     : device_buffer{other.data(), other.size(), stream, mr}
   {
   }
@@ -419,8 +419,8 @@ class device_buffer {
   cuda_stream_view _stream{};  ///< Stream to use for device memory deallocation
 
   rmm::device_async_resource_ref _mr{
-    rmm::mr::get_current_device_resource()};  ///< The memory resource used to
-                                              ///< allocate/deallocate device memory
+    rmm::mr::get_current_device_resource_ref()};  ///< The memory resource used to
+                                                  ///< allocate/deallocate device memory
   cuda_device_id _device{get_current_cuda_device()};
 
   /**
diff --git a/include/rmm/device_scalar.hpp b/include/rmm/device_scalar.hpp
index 95388eca9..62b004e2f 100644
--- a/include/rmm/device_scalar.hpp
+++ b/include/rmm/device_scalar.hpp
@@ -95,7 +95,7 @@ class device_scalar {
    * @param mr Optional, resource with which to allocate.
    */
   explicit device_scalar(cuda_stream_view stream,
-                         device_async_resource_ref mr = mr::get_current_device_resource())
+                         device_async_resource_ref mr = mr::get_current_device_resource_ref())
     : _storage{1, stream, mr}
   {
   }
@@ -118,7 +118,7 @@ class device_scalar {
    */
   explicit device_scalar(value_type const& initial_value,
                          cuda_stream_view stream,
-                         device_async_resource_ref mr = mr::get_current_device_resource())
+                         device_async_resource_ref mr = mr::get_current_device_resource_ref())
     : _storage{1, stream, mr}
   {
     set_value_async(initial_value, stream);
@@ -138,7 +138,7 @@ class device_scalar {
    */
   device_scalar(device_scalar const& other,
                 cuda_stream_view stream,
-                device_async_resource_ref mr = mr::get_current_device_resource())
+                device_async_resource_ref mr = mr::get_current_device_resource_ref())
     : _storage{other._storage, stream, mr}
   {
   }
diff --git a/include/rmm/device_uvector.hpp b/include/rmm/device_uvector.hpp
index e1610a73a..13f566150 100644
--- a/include/rmm/device_uvector.hpp
+++ b/include/rmm/device_uvector.hpp
@@ -48,7 +48,7 @@ namespace RMM_NAMESPACE {
  *
  * Example:
  * @code{.cpp}
- * rmm::mr::device_memory_resource * mr = new my_custom_resource();
+ * auto mr = new my_custom_resource();
  * rmm::cuda_stream_view s{};
  *
  * // Allocates *uninitialized* device memory on stream `s` sufficient for 100 ints using the
@@ -126,7 +126,7 @@ class device_uvector {
    */
   explicit device_uvector(std::size_t size,
                           cuda_stream_view stream,
-                          device_async_resource_ref mr = mr::get_current_device_resource())
+                          device_async_resource_ref mr = mr::get_current_device_resource_ref())
     : _storage{elements_to_bytes(size), stream, mr}
   {
   }
@@ -142,7 +142,7 @@ class device_uvector {
    */
   explicit device_uvector(device_uvector const& other,
                           cuda_stream_view stream,
-                          device_async_resource_ref mr = mr::get_current_device_resource())
+                          device_async_resource_ref mr = mr::get_current_device_resource_ref())
     : _storage{other._storage, stream, mr}
   {
   }
diff --git a/include/rmm/exec_policy.hpp b/include/rmm/exec_policy.hpp
index 1c9a07abd..019a8245a 100644
--- a/include/rmm/exec_policy.hpp
+++ b/include/rmm/exec_policy.hpp
@@ -57,7 +57,7 @@ class exec_policy : public thrust_exec_policy_t {
    * @param mr The resource to use for allocating temporary memory
    */
   explicit exec_policy(cuda_stream_view stream      = cuda_stream_default,
-                       device_async_resource_ref mr = mr::get_current_device_resource())
+                       device_async_resource_ref mr = mr::get_current_device_resource_ref())
     : thrust_exec_policy_t(
         thrust::cuda::par(mr::thrust_allocator<char>(stream, mr)).on(stream.value()))
   {
@@ -81,7 +81,7 @@ using thrust_exec_policy_nosync_t =
 class exec_policy_nosync : public thrust_exec_policy_nosync_t {
  public:
   explicit exec_policy_nosync(cuda_stream_view stream      = cuda_stream_default,
-                              device_async_resource_ref mr = mr::get_current_device_resource())
+                              device_async_resource_ref mr = mr::get_current_device_resource_ref())
     : thrust_exec_policy_nosync_t(
         thrust::cuda::par_nosync(mr::thrust_allocator<char>(stream, mr)).on(stream.value()))
   {
diff --git a/include/rmm/logger.hpp b/include/rmm/logger.hpp
index e60501d2b..326385f16 100644
--- a/include/rmm/logger.hpp
+++ b/include/rmm/logger.hpp
@@ -107,7 +107,7 @@ struct bytes {
  *
  * @return spdlog::logger& The logger.
  */
-inline spdlog::logger& logger()
+RMM_EXPORT inline spdlog::logger& logger()
 {
   static detail::logger_wrapper wrapped{};
   return wrapped.logger_;
diff --git a/include/rmm/mr/device/aligned_resource_adaptor.hpp b/include/rmm/mr/device/aligned_resource_adaptor.hpp
index 85eddb427..4df2c4d2d 100644
--- a/include/rmm/mr/device/aligned_resource_adaptor.hpp
+++ b/include/rmm/mr/device/aligned_resource_adaptor.hpp
@@ -20,6 +20,7 @@
 #include <rmm/detail/error.hpp>
 #include <rmm/detail/export.hpp>
 #include <rmm/mr/device/device_memory_resource.hpp>
+#include <rmm/mr/device/per_device_resource.hpp>
 #include <rmm/resource_ref.hpp>
 
 #include <cstddef>
@@ -59,7 +60,6 @@ class aligned_resource_adaptor final : public device_memory_resource {
   /**
    * @brief Construct an aligned resource adaptor using `upstream` to satisfy allocation requests.
    *
-   * @throws rmm::logic_error if `upstream == nullptr`
    * @throws rmm::logic_error if `allocation_alignment` is not a power of 2
    *
    * @param upstream The resource used for allocating/deallocating device memory.
@@ -67,12 +67,33 @@ class aligned_resource_adaptor final : public device_memory_resource {
    * @param alignment_threshold Only allocations with a size larger than or equal to this threshold
    * are aligned.
    */
-  explicit aligned_resource_adaptor(Upstream* upstream,
+  explicit aligned_resource_adaptor(device_async_resource_ref upstream,
                                     std::size_t alignment = rmm::CUDA_ALLOCATION_ALIGNMENT,
                                     std::size_t alignment_threshold = default_alignment_threshold)
     : upstream_{upstream}, alignment_{alignment}, alignment_threshold_{alignment_threshold}
   {
-    RMM_EXPECTS(nullptr != upstream, "Unexpected null upstream resource pointer.");
+    RMM_EXPECTS(rmm::is_supported_alignment(alignment),
+                "Allocation alignment is not a power of 2.");
+  }
+
+  /**
+   * @brief Construct an aligned resource adaptor using `upstream` to satisfy allocation requests.
+   *
+   * @throws rmm::logic_error if `upstream == nullptr`
+   * @throws rmm::logic_error if `alignment` is not a power of 2
+   *
+   * @param upstream The resource used for allocating/deallocating device memory.
+   * @param alignment The size used for allocation alignment.
+   * @param alignment_threshold Only allocations with a size larger than or equal to this threshold
+   * are aligned.
+   */
+  explicit aligned_resource_adaptor(Upstream* upstream,
+                                    std::size_t alignment = rmm::CUDA_ALLOCATION_ALIGNMENT,
+                                    std::size_t alignment_threshold = default_alignment_threshold)
+    : upstream_{to_device_async_resource_ref_checked(upstream)},
+      alignment_{alignment},
+      alignment_threshold_{alignment_threshold}
+  {
     RMM_EXPECTS(rmm::is_supported_alignment(alignment),
                 "Allocation alignment is not a power of 2.");
   }
@@ -92,11 +113,6 @@ class aligned_resource_adaptor final : public device_memory_resource {
     return upstream_;
   }
 
-  /**
-   * @briefreturn{Upstream* to the upstream memory resource}
-   */
-  [[nodiscard]] Upstream* get_upstream() const noexcept { return upstream_; }
-
   /**
    * @brief The default alignment used by the adaptor.
    */
@@ -106,8 +122,8 @@ class aligned_resource_adaptor final : public device_memory_resource {
   using lock_guard = std::lock_guard<std::mutex>;
 
   /**
-   * @brief Allocates memory of size at least `bytes` using the upstream resource with the specified
-   * alignment.
+   * @brief Allocates memory of size at least `bytes` using the upstream resource with the
+   * specified alignment.
    *
    * @throws rmm::bad_alloc if the requested allocation could not be fulfilled
    * by the upstream resource.
@@ -119,10 +135,10 @@ class aligned_resource_adaptor final : public device_memory_resource {
   void* do_allocate(std::size_t bytes, cuda_stream_view stream) override
   {
     if (alignment_ == rmm::CUDA_ALLOCATION_ALIGNMENT || bytes < alignment_threshold_) {
-      return upstream_->allocate(bytes, stream);
+      return get_upstream_resource().allocate_async(bytes, 1, stream);
     }
     auto const size = upstream_allocation_size(bytes);
-    void* pointer   = upstream_->allocate(size, stream);
+    void* pointer   = get_upstream_resource().allocate_async(size, 1, stream);
     // NOLINTNEXTLINE(cppcoreguidelines-pro-type-reinterpret-cast)
     auto const address         = reinterpret_cast<std::size_t>(pointer);
     auto const aligned_address = rmm::align_up(address, alignment_);
@@ -145,7 +161,7 @@ class aligned_resource_adaptor final : public device_memory_resource {
   void do_deallocate(void* ptr, std::size_t bytes, cuda_stream_view stream) override
   {
     if (alignment_ == rmm::CUDA_ALLOCATION_ALIGNMENT || bytes < alignment_threshold_) {
-      upstream_->deallocate(ptr, bytes, stream);
+      get_upstream_resource().deallocate_async(ptr, bytes, 1, stream);
     } else {
       {
         lock_guard lock(mtx_);
@@ -155,7 +171,7 @@ class aligned_resource_adaptor final : public device_memory_resource {
           pointers_.erase(iter);
         }
       }
-      upstream_->deallocate(ptr, upstream_allocation_size(bytes), stream);
+      get_upstream_resource().deallocate_async(ptr, upstream_allocation_size(bytes), 1, stream);
     }
   }
 
@@ -176,8 +192,8 @@ class aligned_resource_adaptor final : public device_memory_resource {
   }
 
   /**
-   * @brief Calculate the allocation size needed from upstream to account for alignments of both the
-   * size and the base pointer.
+   * @brief Calculate the allocation size needed from upstream to account for alignments of both
+   * the size and the base pointer.
    *
    * @param bytes The requested allocation size.
    * @return Allocation size needed from upstream to align both the size and the base pointer.
@@ -188,7 +204,8 @@ class aligned_resource_adaptor final : public device_memory_resource {
     return aligned_size + alignment_ - rmm::CUDA_ALLOCATION_ALIGNMENT;
   }
 
-  Upstream* upstream_;  ///< The upstream resource used for satisfying allocation requests
+  /// The upstream resource used for satisfying allocation requests
+  device_async_resource_ref upstream_;
   std::unordered_map<void*, void*> pointers_;  ///< Map of aligned pointers to upstream pointers.
   std::size_t alignment_;                      ///< The size used for allocation alignment
   std::size_t alignment_threshold_;  ///< The size above which allocations should be aligned
diff --git a/include/rmm/mr/device/arena_memory_resource.hpp b/include/rmm/mr/device/arena_memory_resource.hpp
index 388182e6a..417b7d2b4 100644
--- a/include/rmm/mr/device/arena_memory_resource.hpp
+++ b/include/rmm/mr/device/arena_memory_resource.hpp
@@ -22,6 +22,7 @@
 #include <rmm/logger.hpp>
 #include <rmm/mr/device/detail/arena.hpp>
 #include <rmm/mr/device/device_memory_resource.hpp>
+#include <rmm/resource_ref.hpp>
 
 #include <cuda_runtime_api.h>
 
@@ -82,6 +83,26 @@ namespace mr {
 template <typename Upstream>
 class arena_memory_resource final : public device_memory_resource {
  public:
+  /**
+   * @brief Construct an `arena_memory_resource`.
+   *
+   * @param upstream_mr The memory resource from which to allocate blocks for the global arena.
+   * @param arena_size Size in bytes of the global arena. Defaults to half of the available
+   * memory on the current device.
+   * @param dump_log_on_failure If true, dump memory log when running out of memory.
+   */
+  explicit arena_memory_resource(device_async_resource_ref upstream_mr,
+                                 std::optional<std::size_t> arena_size = std::nullopt,
+                                 bool dump_log_on_failure              = false)
+    : global_arena_{upstream_mr, arena_size}, dump_log_on_failure_{dump_log_on_failure}
+  {
+    if (dump_log_on_failure_) {
+      logger_ = spdlog::basic_logger_mt("arena_memory_dump", "rmm_arena_memory_dump.log");
+      // Set the level to `debug` for more detailed output.
+      logger_->set_level(spdlog::level::info);
+    }
+  }
+
   /**
    * @brief Construct an `arena_memory_resource`.
    *
@@ -95,7 +116,8 @@ class arena_memory_resource final : public device_memory_resource {
   explicit arena_memory_resource(Upstream* upstream_mr,
                                  std::optional<std::size_t> arena_size = std::nullopt,
                                  bool dump_log_on_failure              = false)
-    : global_arena_{upstream_mr, arena_size}, dump_log_on_failure_{dump_log_on_failure}
+    : global_arena_{to_device_async_resource_ref_checked(upstream_mr), arena_size},
+      dump_log_on_failure_{dump_log_on_failure}
   {
     if (dump_log_on_failure_) {
       logger_ = spdlog::basic_logger_mt("arena_memory_dump", "rmm_arena_memory_dump.log");
@@ -113,8 +135,8 @@ class arena_memory_resource final : public device_memory_resource {
   arena_memory_resource& operator=(arena_memory_resource&&) noexcept = delete;
 
  private:
-  using global_arena = rmm::mr::detail::arena::global_arena<Upstream>;
-  using arena        = rmm::mr::detail::arena::arena<Upstream>;
+  using global_arena = rmm::mr::detail::arena::global_arena;
+  using arena        = rmm::mr::detail::arena::arena;
 
   /**
    * @brief Allocates memory of size at least `bytes`.
@@ -274,7 +296,7 @@ class arena_memory_resource final : public device_memory_resource {
       std::unique_lock lock(map_mtx_);
       auto thread_arena = std::make_shared<arena>(global_arena_);
       thread_arenas_.emplace(thread_id, thread_arena);
-      thread_local detail::arena::arena_cleaner<Upstream> cleaner{thread_arena};
+      thread_local detail::arena::arena_cleaner cleaner{thread_arena};
       return *thread_arena;
     }
   }
diff --git a/include/rmm/mr/device/binning_memory_resource.hpp b/include/rmm/mr/device/binning_memory_resource.hpp
index 773035231..a5ef64665 100644
--- a/include/rmm/mr/device/binning_memory_resource.hpp
+++ b/include/rmm/mr/device/binning_memory_resource.hpp
@@ -27,6 +27,7 @@
 #include <cassert>
 #include <map>
 #include <memory>
+#include <optional>
 #include <vector>
 
 namespace RMM_NAMESPACE {
@@ -52,16 +53,48 @@ class binning_memory_resource final : public device_memory_resource {
    * Initially has no bins, so simply uses the upstream_resource until bin resources are added
    * with `add_bin`.
    *
-   * @throws rmm::logic_error if size_base is not a power of two.
+   * @param upstream_resource The upstream memory resource used to allocate bin pools.
+   */
+  explicit binning_memory_resource(device_async_resource_ref upstream_resource)
+    : upstream_mr_{upstream_resource}
+  {
+  }
+
+  /**
+   * @brief Construct a new binning memory resource object.
+   *
+   * Initially has no bins, so simply uses the upstream_resource until bin resources are added
+   * with `add_bin`.
+   *
+   * @throws rmm::logic_error if upstream_resource is nullptr
    *
    * @param upstream_resource The upstream memory resource used to allocate bin pools.
    */
   explicit binning_memory_resource(Upstream* upstream_resource)
-    : upstream_mr_{[upstream_resource]() {
-        RMM_EXPECTS(nullptr != upstream_resource, "Unexpected null upstream pointer.");
-        return upstream_resource;
-      }()}
+    : upstream_mr_{to_device_async_resource_ref_checked(upstream_resource)}
+  {
+  }
+
+  /**
+   * @brief Construct a new binning memory resource object with a range of initial bins.
+   *
+   * Constructs a new binning memory resource and adds bins backed by `fixed_size_memory_resource`
+   * in the range [2^min_size_exponent, 2^max_size_exponent]. For example if `min_size_exponent==18`
+   * and `max_size_exponent==22`, creates bins of sizes 256KiB, 512KiB, 1024KiB, 2048KiB and
+   * 4096KiB.
+   *
+   * @param upstream_resource The upstream memory resource used to allocate bin pools.
+   * @param min_size_exponent The minimum base-2 exponent bin size.
+   * @param max_size_exponent The maximum base-2 exponent bin size.
+   */
+  binning_memory_resource(device_async_resource_ref upstream_resource,
+                          int8_t min_size_exponent,  // NOLINT(bugprone-easily-swappable-parameters)
+                          int8_t max_size_exponent)
+    : upstream_mr_{upstream_resource}
   {
+    for (auto i = min_size_exponent; i <= max_size_exponent; i++) {
+      add_bin(1 << i);
+    }
   }
 
   /**
@@ -72,6 +105,8 @@ class binning_memory_resource final : public device_memory_resource {
    * and `max_size_exponent==22`, creates bins of sizes 256KiB, 512KiB, 1024KiB, 2048KiB and
    * 4096KiB.
    *
+   * @throws rmm::logic_error if upstream_resource is nullptr
+   *
    * @param upstream_resource The upstream memory resource used to allocate bin pools.
    * @param min_size_exponent The minimum base-2 exponent bin size.
    * @param max_size_exponent The maximum base-2 exponent bin size.
@@ -79,10 +114,7 @@ class binning_memory_resource final : public device_memory_resource {
   binning_memory_resource(Upstream* upstream_resource,
                           int8_t min_size_exponent,  // NOLINT(bugprone-easily-swappable-parameters)
                           int8_t max_size_exponent)
-    : upstream_mr_{[upstream_resource]() {
-        RMM_EXPECTS(nullptr != upstream_resource, "Unexpected null upstream pointer.");
-        return upstream_resource;
-      }()}
+    : upstream_mr_{to_device_async_resource_ref_checked(upstream_resource)}
   {
     for (auto i = min_size_exponent; i <= max_size_exponent; i++) {
       add_bin(1 << i);
@@ -102,23 +134,17 @@ class binning_memory_resource final : public device_memory_resource {
   binning_memory_resource& operator=(binning_memory_resource&&)      = delete;
 
   /**
-   * @briefreturn{rmm::device_async_resource_ref to the upstream resource}
+   * @briefreturn{device_async_resource_ref to the upstream resource}
    */
-  [[nodiscard]] rmm::device_async_resource_ref get_upstream_resource() const noexcept
+  [[nodiscard]] device_async_resource_ref get_upstream_resource() const noexcept
   {
     return upstream_mr_;
   }
 
-  /**
-   * @briefreturn{Upstream* to the upstream memory resource}
-   */
-  [[nodiscard]] Upstream* get_upstream() const noexcept { return upstream_mr_; }
-
   /**
    * @brief Add a bin allocator to this resource
    *
-   * Adds `bin_resource` if it is not null; otherwise constructs and adds a
-   * fixed_size_memory_resource.
+   * Adds `bin_resource` if provided; otherwise constructs and adds a fixed_size_memory_resource.
    *
    * This bin will be used for any allocation smaller than `allocation_size` that is larger than
    * the next smaller bin's allocation size.
@@ -130,14 +156,14 @@ class binning_memory_resource final : public device_memory_resource {
    * @param allocation_size The maximum size that this bin allocates
    * @param bin_resource The memory resource for the bin
    */
-  void add_bin(std::size_t allocation_size, device_memory_resource* bin_resource = nullptr)
+  void add_bin(std::size_t allocation_size,
+               std::optional<device_async_resource_ref> bin_resource = std::nullopt)
   {
-    allocation_size = rmm::align_up(allocation_size, rmm::CUDA_ALLOCATION_ALIGNMENT);
+    allocation_size = align_up(allocation_size, CUDA_ALLOCATION_ALIGNMENT);
 
-    if (nullptr != bin_resource) {
-      resource_bins_.insert({allocation_size, bin_resource});
+    if (bin_resource.has_value()) {
+      resource_bins_.insert({allocation_size, bin_resource.value()});
     } else if (resource_bins_.count(allocation_size) == 0) {  // do nothing if bin already exists
-
       owned_bin_resources_.push_back(
         std::make_unique<fixed_size_memory_resource<Upstream>>(upstream_mr_, allocation_size));
       resource_bins_.insert({allocation_size, owned_bin_resources_.back().get()});
@@ -153,11 +179,10 @@ class binning_memory_resource final : public device_memory_resource {
    * @param bytes Requested allocation size in bytes
    * @return Get the resource reference for the requested size.
    */
-  rmm::device_async_resource_ref get_resource_ref(std::size_t bytes)
+  device_async_resource_ref get_resource_ref(std::size_t bytes)
   {
     auto iter = resource_bins_.lower_bound(bytes);
-    return (iter != resource_bins_.cend()) ? rmm::device_async_resource_ref{iter->second}
-                                           : get_upstream_resource();
+    return (iter != resource_bins_.cend()) ? iter->second : get_upstream_resource();
   }
 
   /**
@@ -188,11 +213,12 @@ class binning_memory_resource final : public device_memory_resource {
     get_resource_ref(bytes).deallocate_async(ptr, bytes, stream);
   }
 
-  Upstream* upstream_mr_;  // The upstream memory_resource from which to allocate blocks.
+  device_async_resource_ref
+    upstream_mr_;  // The upstream memory_resource from which to allocate blocks.
 
   std::vector<std::unique_ptr<fixed_size_memory_resource<Upstream>>> owned_bin_resources_;
 
-  std::map<std::size_t, device_memory_resource*> resource_bins_;
+  std::map<std::size_t, device_async_resource_ref> resource_bins_;
 };
 
 /** @} */  // end of group
diff --git a/include/rmm/mr/device/callback_memory_resource.hpp b/include/rmm/mr/device/callback_memory_resource.hpp
index c569f7dd6..fa2d8056d 100644
--- a/include/rmm/mr/device/callback_memory_resource.hpp
+++ b/include/rmm/mr/device/callback_memory_resource.hpp
@@ -86,12 +86,13 @@ class callback_memory_resource final : public device_memory_resource {
    * It is the caller's responsibility to maintain the lifetime of the pointed-to data
    * for the duration of the lifetime of the `callback_memory_resource`.
    */
-  callback_memory_resource(allocate_callback_t allocate_callback,
-                           deallocate_callback_t deallocate_callback,
-                           void* allocate_callback_arg   = nullptr,
-                           void* deallocate_callback_arg = nullptr) noexcept
-    : allocate_callback_(allocate_callback),
-      deallocate_callback_(deallocate_callback),
+  callback_memory_resource(
+    allocate_callback_t allocate_callback,
+    deallocate_callback_t deallocate_callback,
+    void* allocate_callback_arg   = nullptr,  // NOLINT(bugprone-easily-swappable-parameters)
+    void* deallocate_callback_arg = nullptr) noexcept
+    : allocate_callback_(std::move(allocate_callback)),
+      deallocate_callback_(std::move(deallocate_callback)),
       allocate_callback_arg_(allocate_callback_arg),
       deallocate_callback_arg_(deallocate_callback_arg)
   {
diff --git a/include/rmm/mr/device/detail/arena.hpp b/include/rmm/mr/device/detail/arena.hpp
index 3d24cfebf..6f8303c83 100644
--- a/include/rmm/mr/device/detail/arena.hpp
+++ b/include/rmm/mr/device/detail/arena.hpp
@@ -23,6 +23,7 @@
 #include <rmm/detail/export.hpp>
 #include <rmm/detail/logging_assert.hpp>
 #include <rmm/logger.hpp>
+#include <rmm/resource_ref.hpp>
 
 #include <cuda_runtime_api.h>
 
@@ -494,22 +495,18 @@ inline auto max_free_size(std::set<superblock> const& superblocks)
  * @tparam Upstream Memory resource to use for allocating the arena. Implements
  * rmm::mr::device_memory_resource interface.
  */
-template <typename Upstream>
 class global_arena final {
  public:
   /**
    * @brief Construct a global arena.
    *
-   * @throws rmm::logic_error if `upstream_mr == nullptr`.
-   *
    * @param upstream_mr The memory resource from which to allocate blocks for the pool
    * @param arena_size Size in bytes of the global arena. Defaults to half of the available memory
    * on the current device.
    */
-  global_arena(Upstream* upstream_mr, std::optional<std::size_t> arena_size)
+  global_arena(device_async_resource_ref upstream_mr, std::optional<std::size_t> arena_size)
     : upstream_mr_{upstream_mr}
   {
-    RMM_EXPECTS(nullptr != upstream_mr_, "Unexpected null upstream pointer.");
     auto const size =
       rmm::align_down(arena_size.value_or(default_size()), rmm::CUDA_ALLOCATION_ALIGNMENT);
     RMM_EXPECTS(size >= superblock::minimum_size,
@@ -530,7 +527,7 @@ class global_arena final {
   ~global_arena()
   {
     std::lock_guard lock(mtx_);
-    upstream_mr_->deallocate(upstream_block_.pointer(), upstream_block_.size());
+    upstream_mr_.deallocate(upstream_block_.pointer(), upstream_block_.size());
   }
 
   /**
@@ -539,7 +536,7 @@ class global_arena final {
    * @param size The size in bytes of the allocation.
    * @return bool True if the allocation should be handled by the global arena.
    */
-  bool handles(std::size_t size) const { return size > superblock::minimum_size; }
+  static bool handles(std::size_t size) { return size > superblock::minimum_size; }
 
   /**
    * @brief Acquire a superblock that can fit a block of the given size.
@@ -610,7 +607,7 @@ class global_arena final {
    * @param stream Stream on which to perform deallocation.
    * @return bool true if the allocation is found, false otherwise.
    */
-  bool deallocate(void* ptr, std::size_t size, cuda_stream_view stream)
+  bool deallocate_async(void* ptr, std::size_t size, cuda_stream_view stream)
   {
     RMM_LOGGING_ASSERT(handles(size));
     stream.synchronize_no_throw();
@@ -692,7 +689,7 @@ class global_arena final {
    * @brief Default size of the global arena if unspecified.
    * @return the default global arena size.
    */
-  constexpr std::size_t default_size() const
+  static std::size_t default_size()
   {
     auto const [free, total] = rmm::available_device_memory();
     return free / 2;
@@ -705,7 +702,7 @@ class global_arena final {
    */
   void initialize(std::size_t size)
   {
-    upstream_block_ = {upstream_mr_->allocate(size), size};
+    upstream_block_ = {upstream_mr_.allocate(size), size};
     superblocks_.emplace(upstream_block_.pointer(), size);
   }
 
@@ -777,7 +774,7 @@ class global_arena final {
   }
 
   /// The upstream resource to allocate memory from.
-  Upstream* upstream_mr_;
+  device_async_resource_ref upstream_mr_;
   /// Block allocated from upstream so that it can be quickly freed.
   block upstream_block_;
   /// Address-ordered set of superblocks.
@@ -795,7 +792,6 @@ class global_arena final {
  * @tparam Upstream Memory resource to use for allocating the global arena. Implements
  * rmm::mr::device_memory_resource interface.
  */
-template <typename Upstream>
 class arena {
  public:
   /**
@@ -803,7 +799,7 @@ class arena {
    *
    * @param global_arena The global arena from which to allocate superblocks.
    */
-  explicit arena(global_arena<Upstream>& global_arena) : global_arena_{global_arena} {}
+  explicit arena(global_arena& global_arena) : global_arena_{global_arena} {}
 
   // Disable copy (and move) semantics.
   arena(arena const&)                = delete;
@@ -837,7 +833,9 @@ class arena {
    */
   bool deallocate(void* ptr, std::size_t size, cuda_stream_view stream)
   {
-    if (global_arena_.handles(size) && global_arena_.deallocate(ptr, size, stream)) { return true; }
+    if (global_arena::handles(size) && global_arena_.deallocate_async(ptr, size, stream)) {
+      return true;
+    }
     return deallocate(ptr, size);
   }
 
@@ -961,7 +959,7 @@ class arena {
   }
 
   /// The global arena to allocate superblocks from.
-  global_arena<Upstream>& global_arena_;
+  global_arena& global_arena_;
   /// Acquired superblocks.
   std::set<superblock> superblocks_;
   /// Mutex for exclusive lock.
@@ -976,10 +974,9 @@ class arena {
  * @tparam Upstream Memory resource to use for allocating the global arena. Implements
  * rmm::mr::device_memory_resource interface.
  */
-template <typename Upstream>
 class arena_cleaner {
  public:
-  explicit arena_cleaner(std::shared_ptr<arena<Upstream>> const& arena) : arena_(arena) {}
+  explicit arena_cleaner(std::shared_ptr<arena> const& arena) : arena_(arena) {}
 
   // Disable copy (and move) semantics.
   arena_cleaner(arena_cleaner const&)            = delete;
@@ -997,7 +994,7 @@ class arena_cleaner {
 
  private:
   /// A non-owning pointer to the arena that may need cleaning.
-  std::weak_ptr<arena<Upstream>> arena_;
+  std::weak_ptr<arena> arena_;
 };
 
 }  // namespace mr::detail::arena
diff --git a/include/rmm/mr/device/failure_callback_resource_adaptor.hpp b/include/rmm/mr/device/failure_callback_resource_adaptor.hpp
index eeebaac3b..fdb385748 100644
--- a/include/rmm/mr/device/failure_callback_resource_adaptor.hpp
+++ b/include/rmm/mr/device/failure_callback_resource_adaptor.hpp
@@ -18,6 +18,7 @@
 #include <rmm/detail/error.hpp>
 #include <rmm/detail/export.hpp>
 #include <rmm/mr/device/device_memory_resource.hpp>
+#include <rmm/mr/device/per_device_resource.hpp>
 #include <rmm/resource_ref.hpp>
 
 #include <cstddef>
@@ -81,9 +82,9 @@ using failure_callback_t = std::function<bool(std::size_t, void*)>;
  * {
  *   bool retried{false};
  *   failure_callback_adaptor mr{
- *     rmm::mr::get_current_device_resource(), failure_handler, &retried
+ *     rmm::mr::get_current_device_resource_ref(), failure_handler, &retried
  *   };
- *   rmm::mr::set_current_device_resource(&mr);
+ *   rmm::mr::set_current_device_resource_ref(mr);
  * }
  * @endcode
  *
@@ -95,6 +96,21 @@ class failure_callback_resource_adaptor final : public device_memory_resource {
  public:
   using exception_type = ExceptionType;  ///< The type of exception this object catches/throws
 
+  /**
+   * @brief Construct a new `failure_callback_resource_adaptor` using `upstream` to satisfy
+   * allocation requests.
+   *
+   * @param upstream The resource used for allocating/deallocating device memory
+   * @param callback Callback function @see failure_callback_t
+   * @param callback_arg Extra argument passed to `callback`
+   */
+  failure_callback_resource_adaptor(device_async_resource_ref upstream,
+                                    failure_callback_t callback,
+                                    void* callback_arg)
+    : upstream_{upstream}, callback_{std::move(callback)}, callback_arg_{callback_arg}
+  {
+  }
+
   /**
    * @brief Construct a new `failure_callback_resource_adaptor` using `upstream` to satisfy
    * allocation requests.
@@ -108,9 +124,10 @@ class failure_callback_resource_adaptor final : public device_memory_resource {
   failure_callback_resource_adaptor(Upstream* upstream,
                                     failure_callback_t callback,
                                     void* callback_arg)
-    : upstream_{upstream}, callback_{std::move(callback)}, callback_arg_{callback_arg}
+    : upstream_{to_device_async_resource_ref_checked(upstream)},
+      callback_{std::move(callback)},
+      callback_arg_{callback_arg}
   {
-    RMM_EXPECTS(nullptr != upstream, "Unexpected null upstream resource pointer.");
   }
 
   failure_callback_resource_adaptor()                                                    = delete;
@@ -130,11 +147,6 @@ class failure_callback_resource_adaptor final : public device_memory_resource {
     return upstream_;
   }
 
-  /**
-   * @briefreturn{Upstream* to the upstream memory resource}
-   */
-  [[nodiscard]] Upstream* get_upstream() const noexcept { return upstream_; }
-
  private:
   /**
    * @brief Allocates memory of size at least `bytes` using the upstream
@@ -153,7 +165,7 @@ class failure_callback_resource_adaptor final : public device_memory_resource {
 
     while (true) {
       try {
-        ret = upstream_->allocate(bytes, stream);
+        ret = get_upstream_resource().allocate_async(bytes, stream);
         break;
       } catch (exception_type const& e) {
         if (!callback_(bytes, callback_arg_)) { throw; }
@@ -171,7 +183,7 @@ class failure_callback_resource_adaptor final : public device_memory_resource {
    */
   void do_deallocate(void* ptr, std::size_t bytes, cuda_stream_view stream) override
   {
-    upstream_->deallocate(ptr, bytes, stream);
+    get_upstream_resource().deallocate_async(ptr, bytes, stream);
   }
 
   /**
@@ -185,11 +197,12 @@ class failure_callback_resource_adaptor final : public device_memory_resource {
   {
     if (this == &other) { return true; }
     auto cast = dynamic_cast<failure_callback_resource_adaptor<Upstream> const*>(&other);
-    if (cast == nullptr) { return upstream_->is_equal(other); }
+    if (cast == nullptr) { return false; }
     return get_upstream_resource() == cast->get_upstream_resource();
   }
 
-  Upstream* upstream_;  // the upstream resource used for satisfying allocation requests
+  // the upstream resource used for satisfying allocation requests
+  device_async_resource_ref upstream_;
   failure_callback_t callback_;
   void* callback_arg_;
 };
diff --git a/include/rmm/mr/device/fixed_size_memory_resource.hpp b/include/rmm/mr/device/fixed_size_memory_resource.hpp
index 84cb3b0c6..249af77dd 100644
--- a/include/rmm/mr/device/fixed_size_memory_resource.hpp
+++ b/include/rmm/mr/device/fixed_size_memory_resource.hpp
@@ -65,7 +65,31 @@ class fixed_size_memory_resource
 
   /**
    * @brief Construct a new `fixed_size_memory_resource` that allocates memory from
-   * `upstream_resource`.
+   * `upstream_mr`.
+   *
+   * When the pool of blocks is all allocated, grows the pool by allocating
+   * `blocks_to_preallocate` more blocks from `upstream_mr`.
+   *
+   * @param upstream_mr The device_async_resource_ref from which to allocate blocks for the pool.
+   * @param block_size The size of blocks to allocate.
+   * @param blocks_to_preallocate The number of blocks to allocate to initialize the pool.
+   */
+  explicit fixed_size_memory_resource(
+    device_async_resource_ref upstream_mr,
+    // NOLINTNEXTLINE bugprone-easily-swappable-parameters
+    std::size_t block_size            = default_block_size,
+    std::size_t blocks_to_preallocate = default_blocks_to_preallocate)
+    : upstream_mr_{upstream_mr},
+      block_size_{align_up(block_size, CUDA_ALLOCATION_ALIGNMENT)},
+      upstream_chunk_size_{block_size_ * blocks_to_preallocate}
+  {
+    // allocate initial blocks and insert into free list
+    this->insert_blocks(std::move(blocks_from_upstream(cuda_stream_legacy)), cuda_stream_legacy);
+  }
+
+  /**
+   * @brief Construct a new `fixed_size_memory_resource` that allocates memory from
+   * `upstream_mr`.
    *
    * When the pool of blocks is all allocated, grows the pool by allocating
    * `blocks_to_preallocate` more blocks from `upstream_mr`.
@@ -76,11 +100,12 @@ class fixed_size_memory_resource
    */
   explicit fixed_size_memory_resource(
     Upstream* upstream_mr,
+    // NOLINTNEXTLINE bugprone-easily-swappable-parameters
     std::size_t block_size            = default_block_size,
     std::size_t blocks_to_preallocate = default_blocks_to_preallocate)
-    : upstream_mr_{upstream_mr},
-      block_size_{rmm::align_up(block_size, rmm::CUDA_ALLOCATION_ALIGNMENT)},
-      upstream_chunk_size_{block_size * blocks_to_preallocate}
+    : upstream_mr_{to_device_async_resource_ref_checked(upstream_mr)},
+      block_size_{align_up(block_size, CUDA_ALLOCATION_ALIGNMENT)},
+      upstream_chunk_size_{block_size_ * blocks_to_preallocate}
   {
     // allocate initial blocks and insert into free list
     this->insert_blocks(std::move(blocks_from_upstream(cuda_stream_legacy)), cuda_stream_legacy);
@@ -99,18 +124,13 @@ class fixed_size_memory_resource
   fixed_size_memory_resource& operator=(fixed_size_memory_resource&&)      = delete;
 
   /**
-   * @briefreturn{rmm::device_async_resource_ref to the upstream resource}
+   * @briefreturn{device_async_resource_ref to the upstream resource}
    */
-  [[nodiscard]] rmm::device_async_resource_ref get_upstream_resource() const noexcept
+  [[nodiscard]] device_async_resource_ref get_upstream_resource() const noexcept
   {
     return upstream_mr_;
   }
 
-  /**
-   * @briefreturn{Upstream* to the upstream memory resource}
-   */
-  [[nodiscard]] Upstream* get_upstream() const noexcept { return upstream_mr_; }
-
   /**
    * @brief Get the size of blocks allocated by this memory resource.
    *
@@ -200,7 +220,7 @@ class fixed_size_memory_resource
   {
     // Deallocating a fixed-size block just inserts it in the free list, which is
     // handled by the parent class
-    RMM_LOGGING_ASSERT(rmm::align_up(size, rmm::CUDA_ALLOCATION_ALIGNMENT) <= block_size_);
+    RMM_LOGGING_ASSERT(align_up(size, CUDA_ALLOCATION_ALIGNMENT) <= block_size_);
     return block_type{ptr};
   }
 
@@ -254,10 +274,10 @@ class fixed_size_memory_resource
   }
 
  private:
-  Upstream* upstream_mr_;  // The resource from which to allocate new blocks
+  device_async_resource_ref upstream_mr_;  // The resource from which to allocate new blocks
 
-  std::size_t const block_size_;           // size of blocks this MR allocates
-  std::size_t const upstream_chunk_size_;  // size of chunks allocated from heap MR
+  std::size_t block_size_;           // size of blocks this MR allocates
+  std::size_t upstream_chunk_size_;  // size of chunks allocated from heap MR
 
   // blocks allocated from heap: so they can be easily freed
   std::vector<block_type> upstream_blocks_;
diff --git a/include/rmm/mr/device/limiting_resource_adaptor.hpp b/include/rmm/mr/device/limiting_resource_adaptor.hpp
index c3ef72e09..d19fa3a0a 100644
--- a/include/rmm/mr/device/limiting_resource_adaptor.hpp
+++ b/include/rmm/mr/device/limiting_resource_adaptor.hpp
@@ -19,6 +19,7 @@
 #include <rmm/detail/error.hpp>
 #include <rmm/detail/export.hpp>
 #include <rmm/mr/device/device_memory_resource.hpp>
+#include <rmm/mr/device/per_device_resource.hpp>
 #include <rmm/resource_ref.hpp>
 
 #include <cstddef>
@@ -45,6 +46,24 @@ namespace mr {
 template <typename Upstream>
 class limiting_resource_adaptor final : public device_memory_resource {
  public:
+  /**
+   * @brief Construct a new limiting resource adaptor using `upstream` to satisfy
+   * allocation requests and limiting the total allocation amount possible.
+   *
+   * @param upstream The resource used for allocating/deallocating device memory
+   * @param allocation_limit Maximum memory allowed for this allocator
+   * @param alignment Alignment in bytes for the start of each allocated buffer
+   */
+  limiting_resource_adaptor(device_async_resource_ref upstream,
+                            std::size_t allocation_limit,
+                            std::size_t alignment = CUDA_ALLOCATION_ALIGNMENT)
+    : upstream_{upstream},
+      allocation_limit_{allocation_limit},
+      allocated_bytes_(0),
+      alignment_(alignment)
+  {
+  }
+
   /**
    * @brief Construct a new limiting resource adaptor using `upstream` to satisfy
    * allocation requests and limiting the total allocation amount possible.
@@ -57,13 +76,12 @@ class limiting_resource_adaptor final : public device_memory_resource {
    */
   limiting_resource_adaptor(Upstream* upstream,
                             std::size_t allocation_limit,
-                            std::size_t alignment = rmm::CUDA_ALLOCATION_ALIGNMENT)
-    : allocation_limit_{allocation_limit},
+                            std::size_t alignment = CUDA_ALLOCATION_ALIGNMENT)
+    : upstream_{to_device_async_resource_ref_checked(upstream)},
+      allocation_limit_{allocation_limit},
       allocated_bytes_(0),
-      alignment_(alignment),
-      upstream_{upstream}
+      alignment_(alignment)
   {
-    RMM_EXPECTS(nullptr != upstream, "Unexpected null upstream resource pointer.");
   }
 
   limiting_resource_adaptor()                                 = delete;
@@ -76,18 +94,13 @@ class limiting_resource_adaptor final : public device_memory_resource {
     default;  ///< @default_move_assignment{limiting_resource_adaptor}
 
   /**
-   * @briefreturn{rmm::device_async_resource_ref to the upstream resource}
+   * @briefreturn{device_async_resource_ref to the upstream resource}
    */
-  [[nodiscard]] rmm::device_async_resource_ref get_upstream_resource() const noexcept
+  [[nodiscard]] device_async_resource_ref get_upstream_resource() const noexcept
   {
     return upstream_;
   }
 
-  /**
-   * @briefreturn{Upstream* to the upstream memory resource}
-   */
-  [[nodiscard]] Upstream* get_upstream() const noexcept { return upstream_; }
-
   /**
    * @brief Query the number of bytes that have been allocated. Note that
    * this can not be used to know how large of an allocation is possible due
@@ -124,11 +137,11 @@ class limiting_resource_adaptor final : public device_memory_resource {
    */
   void* do_allocate(std::size_t bytes, cuda_stream_view stream) override
   {
-    auto const proposed_size = rmm::align_up(bytes, alignment_);
+    auto const proposed_size = align_up(bytes, alignment_);
     auto const old           = allocated_bytes_.fetch_add(proposed_size);
     if (old + proposed_size <= allocation_limit_) {
       try {
-        return upstream_->allocate(bytes, stream);
+        return get_upstream_resource().allocate_async(bytes, stream);
       } catch (...) {
         allocated_bytes_ -= proposed_size;
         throw;
@@ -148,8 +161,8 @@ class limiting_resource_adaptor final : public device_memory_resource {
    */
   void do_deallocate(void* ptr, std::size_t bytes, cuda_stream_view stream) override
   {
-    std::size_t allocated_size = rmm::align_up(bytes, alignment_);
-    upstream_->deallocate(ptr, bytes, stream);
+    std::size_t allocated_size = align_up(bytes, alignment_);
+    get_upstream_resource().deallocate_async(ptr, bytes, stream);
     allocated_bytes_ -= allocated_size;
   }
 
@@ -164,10 +177,13 @@ class limiting_resource_adaptor final : public device_memory_resource {
   {
     if (this == &other) { return true; }
     auto const* cast = dynamic_cast<limiting_resource_adaptor<Upstream> const*>(&other);
-    if (cast == nullptr) { return upstream_->is_equal(other); }
+    if (cast == nullptr) { return false; }
     return get_upstream_resource() == cast->get_upstream_resource();
   }
 
+  // The upstream resource used for satisfying allocation requests
+  device_async_resource_ref upstream_;
+
   // maximum bytes this allocator is allowed to allocate.
   std::size_t allocation_limit_;
 
@@ -176,9 +192,6 @@ class limiting_resource_adaptor final : public device_memory_resource {
 
   // todo: should be some way to ask the upstream...
   std::size_t alignment_;
-
-  Upstream* upstream_;  ///< The upstream resource used for satisfying
-                        ///< allocation requests
 };
 
 /**
diff --git a/include/rmm/mr/device/logging_resource_adaptor.hpp b/include/rmm/mr/device/logging_resource_adaptor.hpp
index 6f6cd816a..595ab2e4e 100644
--- a/include/rmm/mr/device/logging_resource_adaptor.hpp
+++ b/include/rmm/mr/device/logging_resource_adaptor.hpp
@@ -77,10 +77,8 @@ class logging_resource_adaptor final : public device_memory_resource {
   logging_resource_adaptor(Upstream* upstream,
                            std::string const& filename = get_default_filename(),
                            bool auto_flush             = false)
-    : logger_{make_logger(filename)}, upstream_{upstream}
+    : logger_{make_logger(filename)}, upstream_{to_device_async_resource_ref_checked(upstream)}
   {
-    RMM_EXPECTS(nullptr != upstream, "Unexpected null upstream resource pointer.");
-
     init_logger(auto_flush);
   }
 
@@ -99,10 +97,8 @@ class logging_resource_adaptor final : public device_memory_resource {
    * performance.
    */
   logging_resource_adaptor(Upstream* upstream, std::ostream& stream, bool auto_flush = false)
-    : logger_{make_logger(stream)}, upstream_{upstream}
+    : logger_{make_logger(stream)}, upstream_{to_device_async_resource_ref_checked(upstream)}
   {
-    RMM_EXPECTS(nullptr != upstream, "Unexpected null upstream resource pointer.");
-
     init_logger(auto_flush);
   }
 
@@ -123,10 +119,76 @@ class logging_resource_adaptor final : public device_memory_resource {
   logging_resource_adaptor(Upstream* upstream,
                            spdlog::sinks_init_list sinks,
                            bool auto_flush = false)
-    : logger_{make_logger(sinks)}, upstream_{upstream}
+    : logger_{make_logger(sinks)}, upstream_{to_device_async_resource_ref_checked(upstream)}
   {
-    RMM_EXPECTS(nullptr != upstream, "Unexpected null upstream resource pointer.");
+    init_logger(auto_flush);
+  }
 
+  /**
+   * @brief Construct a new logging resource adaptor using `upstream` to satisfy
+   * allocation requests and logging information about each allocation/free to
+   * the file specified by `filename`.
+   *
+   * The logfile will be written using CSV formatting.
+   *
+   * Clears the contents of `filename` if it already exists.
+   *
+   * Creating multiple `logging_resource_adaptor`s with the same `filename` will
+   * result in undefined behavior.
+   *
+   * @throws spdlog::spdlog_ex if opening `filename` failed
+   *
+   * @param upstream The resource_ref used for allocating/deallocating device memory.
+   * @param filename Name of file to write log info. If not specified, retrieves
+   * the file name from the environment variable "RMM_LOG_FILE".
+   * @param auto_flush If true, flushes the log for every (de)allocation. Warning, this will degrade
+   * performance.
+   */
+  logging_resource_adaptor(device_async_resource_ref upstream,
+                           std::string const& filename = get_default_filename(),
+                           bool auto_flush             = false)
+    : logger_{make_logger(filename)}, upstream_{upstream}
+  {
+    init_logger(auto_flush);
+  }
+
+  /**
+   * @brief Construct a new logging resource adaptor using `upstream` to satisfy
+   * allocation requests and logging information about each allocation/free to
+   * the ostream specified by `stream`.
+   *
+   * The logfile will be written using CSV formatting.
+   *
+   * @param upstream The resource_ref used for allocating/deallocating device memory.
+   * @param stream The ostream to write log info.
+   * @param auto_flush If true, flushes the log for every (de)allocation. Warning, this will degrade
+   * performance.
+   */
+  logging_resource_adaptor(device_async_resource_ref upstream,
+                           std::ostream& stream,
+                           bool auto_flush = false)
+    : logger_{make_logger(stream)}, upstream_{upstream}
+  {
+    init_logger(auto_flush);
+  }
+
+  /**
+   * @brief Construct a new logging resource adaptor using `upstream` to satisfy
+   * allocation requests and logging information about each allocation/free to
+   * the ostream specified by `stream`.
+   *
+   * The logfile will be written using CSV formatting.
+   *
+   * @param upstream The resource_ref used for allocating/deallocating device memory.
+   * @param sinks A list of logging sinks to which log output will be written.
+   * @param auto_flush If true, flushes the log for every (de)allocation. Warning, this will degrade
+   * performance.
+   */
+  logging_resource_adaptor(device_async_resource_ref upstream,
+                           spdlog::sinks_init_list sinks,
+                           bool auto_flush = false)
+    : logger_{make_logger(sinks)}, upstream_{upstream}
+  {
     init_logger(auto_flush);
   }
 
@@ -147,11 +209,6 @@ class logging_resource_adaptor final : public device_memory_resource {
     return upstream_;
   }
 
-  /**
-   * @briefreturn{Upstream* to the upstream memory resource}
-   */
-  [[nodiscard]] Upstream* get_upstream() const noexcept { return upstream_; }
-
   /**
    * @brief Flush logger contents.
    */
@@ -239,7 +296,7 @@ class logging_resource_adaptor final : public device_memory_resource {
   void* do_allocate(std::size_t bytes, cuda_stream_view stream) override
   {
     try {
-      auto const ptr = upstream_->allocate(bytes, stream);
+      auto const ptr = get_upstream_resource().allocate_async(bytes, stream);
       logger_->info("allocate,{},{},{}", ptr, bytes, fmt::ptr(stream.value()));
       return ptr;
     } catch (...) {
@@ -265,7 +322,7 @@ class logging_resource_adaptor final : public device_memory_resource {
   void do_deallocate(void* ptr, std::size_t bytes, cuda_stream_view stream) override
   {
     logger_->info("free,{},{},{}", ptr, bytes, fmt::ptr(stream.value()));
-    upstream_->deallocate(ptr, bytes, stream);
+    get_upstream_resource().deallocate_async(ptr, bytes, stream);
   }
 
   /**
@@ -279,24 +336,14 @@ class logging_resource_adaptor final : public device_memory_resource {
   {
     if (this == &other) { return true; }
     auto const* cast = dynamic_cast<logging_resource_adaptor<Upstream> const*>(&other);
-    if (cast == nullptr) { return upstream_->is_equal(other); }
+    if (cast == nullptr) { return false; }
     return get_upstream_resource() == cast->get_upstream_resource();
   }
 
-  // make_logging_adaptor needs access to private get_default_filename
-  template <typename T>
-  // NOLINTNEXTLINE(readability-redundant-declaration)
-  [[deprecated(
-    "make_logging_adaptor is deprecated in RMM 24.10. Use the logging_resource_adaptor constructor "
-    "instead.")]]
-  friend logging_resource_adaptor<T> make_logging_adaptor(T* upstream,
-                                                          std::string const& filename,
-                                                          bool auto_flush);
-
   std::shared_ptr<spdlog::logger> logger_;  ///< spdlog logger object
 
-  Upstream* upstream_;  ///< The upstream resource used for satisfying
-                        ///< allocation requests
+  device_async_resource_ref upstream_;  ///< The upstream resource used for satisfying
+                                        ///< allocation requests
 };
 
 /**
diff --git a/include/rmm/mr/device/per_device_resource.hpp b/include/rmm/mr/device/per_device_resource.hpp
index b8291c237..855d4c0bf 100644
--- a/include/rmm/mr/device/per_device_resource.hpp
+++ b/include/rmm/mr/device/per_device_resource.hpp
@@ -100,6 +100,15 @@ namespace mr {
 
 namespace detail {
 
+// These symbols must have default visibility so that when they are
+// referenced in multiple different DSOs the linker correctly
+// determines that there is only a single unique reference to the
+// function symbols (and hence they return unique static references
+// across different DSOs). See also
+// https://github.com/rapidsai/rmm/issues/826
+// Although currently the entire RMM namespace is RMM_EXPORT, we
+// explicitly mark these functions as exported in case the namespace
+// export changes.
 /**
  * @brief Returns a pointer to the initial resource.
  *
@@ -122,7 +131,6 @@ RMM_EXPORT inline std::mutex& map_lock()
   return map_lock;
 }
 
-// This symbol must have default visibility, see: https://github.com/rapidsai/rmm/issues/826
 /**
  * @briefreturn{Reference to the map from device id -> resource}
  */
@@ -185,6 +193,31 @@ inline device_memory_resource* get_per_device_resource(cuda_device_id device_id)
                               : found->second;
 }
 
+namespace detail {
+
+// The non-thread-safe implementation of `set_per_device_resource_ref`. This exists because
+// we need to call this function from two places: the thread-safe version of
+// `set_per_device_resource_ref` and the thread-safe version of `set_per_device_resource`,
+// both of which take the lock, so we need an implementation that doesn't take the lock.
+/// @private
+inline device_async_resource_ref set_per_device_resource_ref_unsafe(
+  cuda_device_id device_id, device_async_resource_ref new_resource_ref)
+{
+  auto& map          = detail::get_ref_map();
+  auto const old_itr = map.find(device_id.value());
+  // If a resource didn't previously exist for `device_id`, return pointer to initial_resource
+  // Note: because resource_ref is not default-constructible, we can't use std::map::operator[]
+  if (old_itr == map.end()) {
+    map.insert({device_id.value(), new_resource_ref});
+    return device_async_resource_ref{detail::initial_resource()};
+  }
+
+  auto old_resource_ref = old_itr->second;
+  old_itr->second       = new_resource_ref;  // update map directly via iterator
+  return old_resource_ref;
+}
+}  // namespace detail
+
 /**
  * @brief Set the `device_memory_resource` for the specified device.
  *
@@ -216,6 +249,14 @@ inline device_memory_resource* set_per_device_resource(cuda_device_id device_id,
                                                        device_memory_resource* new_mr)
 {
   std::lock_guard<std::mutex> lock{detail::map_lock()};
+
+  // Note: even though set_per_device_resource() and set_per_device_resource_ref() are not
+  // interchangeable, we call the latter from the former to maintain resource_ref
+  // state consistent with the resource pointer state. This is necessary because the
+  // Python API still uses the raw pointer API. Once the Python API is updated to use
+  // resource_ref, this call can be removed.
+  detail::set_per_device_resource_ref_unsafe(device_id, new_mr);
+
   auto& map          = detail::get_map();
   auto const old_itr = map.find(device_id.value());
   // If a resource didn't previously exist for `id`, return pointer to initial_resource
@@ -342,18 +383,7 @@ inline device_async_resource_ref set_per_device_resource_ref(
   cuda_device_id device_id, device_async_resource_ref new_resource_ref)
 {
   std::lock_guard<std::mutex> lock{detail::ref_map_lock()};
-  auto& map          = detail::get_ref_map();
-  auto const old_itr = map.find(device_id.value());
-  // If a resource didn't previously exist for `device_id`, return pointer to initial_resource
-  // Note: because resource_ref is not default-constructible, we can't use std::map::operator[]
-  if (old_itr == map.end()) {
-    map.insert({device_id.value(), new_resource_ref});
-    return device_async_resource_ref{detail::initial_resource()};
-  }
-
-  auto old_resource_ref = old_itr->second;
-  old_itr->second       = new_resource_ref;  // update map directly via iterator
-  return old_resource_ref;
+  return detail::set_per_device_resource_ref_unsafe(device_id, new_resource_ref);
 }
 
 /**
diff --git a/include/rmm/mr/device/polymorphic_allocator.hpp b/include/rmm/mr/device/polymorphic_allocator.hpp
index 6fb068410..442632d4f 100644
--- a/include/rmm/mr/device/polymorphic_allocator.hpp
+++ b/include/rmm/mr/device/polymorphic_allocator.hpp
@@ -52,7 +52,7 @@ class polymorphic_allocator {
   using value_type = T;  ///< T, the value type of objects allocated by this allocator
   /**
    * @brief Construct a `polymorphic_allocator` using the return value of
-   * `rmm::mr::get_current_device_resource()` as the underlying memory resource.
+   * `rmm::mr::get_current_device_resource_ref()` as the underlying memory resource.
    *
    */
   polymorphic_allocator() = default;
@@ -116,7 +116,7 @@ class polymorphic_allocator {
 
  private:
   rmm::device_async_resource_ref mr_{
-    get_current_device_resource()};  ///< Underlying resource used for (de)allocation
+    get_current_device_resource_ref()};  ///< Underlying resource used for (de)allocation
 };
 
 /**
diff --git a/include/rmm/mr/device/pool_memory_resource.hpp b/include/rmm/mr/device/pool_memory_resource.hpp
index d22b53404..f63de21ff 100644
--- a/include/rmm/mr/device/pool_memory_resource.hpp
+++ b/include/rmm/mr/device/pool_memory_resource.hpp
@@ -25,6 +25,7 @@
 #include <rmm/mr/device/detail/coalescing_free_list.hpp>
 #include <rmm/mr/device/detail/stream_ordered_memory_resource.hpp>
 #include <rmm/mr/device/device_memory_resource.hpp>
+#include <rmm/mr/device/per_device_resource.hpp>
 #include <rmm/resource_ref.hpp>
 
 #include <cuda/std/type_traits>
@@ -114,6 +115,33 @@ class pool_memory_resource final
   friend class detail::stream_ordered_memory_resource<pool_memory_resource<Upstream>,
                                                       detail::coalescing_free_list>;
 
+  /**
+   * @brief Construct a `pool_memory_resource` and allocate the initial device memory pool using
+   * `upstream_mr`.
+   *
+   * @throws rmm::logic_error if `initial_pool_size` is not aligned to a multiple of
+   * pool_memory_resource::allocation_alignment bytes.
+   * @throws rmm::logic_error if `maximum_pool_size` is neither the default nor aligned to a
+   * multiple of pool_memory_resource::allocation_alignment bytes.
+   *
+   * @param upstream_mr The memory_resource from which to allocate blocks for the pool.
+   * @param initial_pool_size Minimum size, in bytes, of the initial pool.
+   * @param maximum_pool_size Maximum size, in bytes, that the pool can grow to. Defaults to all
+   * of the available from the upstream resource.
+   */
+  explicit pool_memory_resource(device_async_resource_ref upstream_mr,
+                                std::size_t initial_pool_size,
+                                std::optional<std::size_t> maximum_pool_size = std::nullopt)
+    : upstream_mr_{upstream_mr}
+  {
+    RMM_EXPECTS(rmm::is_aligned(initial_pool_size, rmm::CUDA_ALLOCATION_ALIGNMENT),
+                "Error, Initial pool size required to be a multiple of 256 bytes");
+    RMM_EXPECTS(rmm::is_aligned(maximum_pool_size.value_or(0), rmm::CUDA_ALLOCATION_ALIGNMENT),
+                "Error, Maximum pool size required to be a multiple of 256 bytes");
+
+    initialize_pool(initial_pool_size, maximum_pool_size);
+  }
+
   /**
    * @brief Construct a `pool_memory_resource` and allocate the initial device memory pool using
    * `upstream_mr`.
@@ -132,10 +160,7 @@ class pool_memory_resource final
   explicit pool_memory_resource(Upstream* upstream_mr,
                                 std::size_t initial_pool_size,
                                 std::optional<std::size_t> maximum_pool_size = std::nullopt)
-    : upstream_mr_{[upstream_mr]() {
-        RMM_EXPECTS(nullptr != upstream_mr, "Unexpected null upstream pointer.");
-        return upstream_mr;
-      }()}
+    : upstream_mr_{to_device_async_resource_ref_checked(upstream_mr)}
   {
     RMM_EXPECTS(rmm::is_aligned(initial_pool_size, rmm::CUDA_ALLOCATION_ALIGNMENT),
                 "Error, Initial pool size required to be a multiple of 256 bytes");
@@ -184,16 +209,11 @@ class pool_memory_resource final
   /**
    * @briefreturn{rmm::device_async_resource_ref to the upstream resource}
    */
-  [[nodiscard]] rmm::device_async_resource_ref get_upstream_resource() const noexcept
+  [[nodiscard]] device_async_resource_ref get_upstream_resource() const noexcept
   {
     return upstream_mr_;
   }
 
-  /**
-   * @briefreturn{Upstream* to the upstream memory resource}
-   */
-  [[nodiscard]] Upstream* get_upstream() const noexcept { return upstream_mr_; }
-
   /**
    * @brief Computes the size of the current pool
    *
@@ -466,7 +486,8 @@ class pool_memory_resource final
   }
 
  private:
-  Upstream* upstream_mr_;  // The "heap" to allocate the pool from
+  // The "heap" to allocate the pool from
+  device_async_resource_ref upstream_mr_;
   std::size_t current_pool_size_{};
   std::optional<std::size_t> maximum_pool_size_{};
 
diff --git a/include/rmm/mr/device/prefetch_resource_adaptor.hpp b/include/rmm/mr/device/prefetch_resource_adaptor.hpp
index 59ce8e036..d3a4c676a 100644
--- a/include/rmm/mr/device/prefetch_resource_adaptor.hpp
+++ b/include/rmm/mr/device/prefetch_resource_adaptor.hpp
@@ -41,6 +41,14 @@ namespace mr {
 template <typename Upstream>
 class prefetch_resource_adaptor final : public device_memory_resource {
  public:
+  /**
+   * @brief Construct a new prefetch resource adaptor using `upstream` to satisfy
+   * allocation requests.
+   *
+   * @param upstream The resource_ref used for allocating/deallocating device memory
+   */
+  prefetch_resource_adaptor(device_async_resource_ref upstream) : upstream_{upstream} {}
+
   /**
    * @brief Construct a new prefetch resource adaptor using `upstream` to satisfy
    * allocation requests.
@@ -49,9 +57,9 @@ class prefetch_resource_adaptor final : public device_memory_resource {
    *
    * @param upstream The resource used for allocating/deallocating device memory
    */
-  prefetch_resource_adaptor(Upstream* upstream) : upstream_{upstream}
+  prefetch_resource_adaptor(Upstream* upstream)
+    : upstream_{to_device_async_resource_ref_checked(upstream)}
   {
-    RMM_EXPECTS(nullptr != upstream, "Unexpected null upstream resource pointer.");
   }
 
   prefetch_resource_adaptor()                                            = delete;
@@ -71,11 +79,6 @@ class prefetch_resource_adaptor final : public device_memory_resource {
     return upstream_;
   }
 
-  /**
-   * @briefreturn{Upstream* to the upstream memory resource}
-   */
-  [[nodiscard]] Upstream* get_upstream() const noexcept { return upstream_; }
-
  private:
   /**
    * @brief Allocates memory of size at least `bytes` using the upstream
@@ -92,7 +95,7 @@ class prefetch_resource_adaptor final : public device_memory_resource {
    */
   void* do_allocate(std::size_t bytes, cuda_stream_view stream) override
   {
-    void* ptr = upstream_->allocate(bytes, stream);
+    void* ptr = get_upstream_resource().allocate_async(bytes, stream);
     rmm::prefetch(ptr, bytes, rmm::get_current_cuda_device(), stream);
     return ptr;
   }
@@ -106,7 +109,7 @@ class prefetch_resource_adaptor final : public device_memory_resource {
    */
   void do_deallocate(void* ptr, std::size_t bytes, cuda_stream_view stream) override
   {
-    upstream_->deallocate(ptr, bytes, stream);
+    get_upstream_resource().deallocate_async(ptr, bytes, stream);
   }
 
   /**
@@ -120,11 +123,12 @@ class prefetch_resource_adaptor final : public device_memory_resource {
   {
     if (this == &other) { return true; }
     auto cast = dynamic_cast<prefetch_resource_adaptor<Upstream> const*>(&other);
-    if (cast == nullptr) { return upstream_->is_equal(other); }
+    if (cast == nullptr) { return false; }
     return get_upstream_resource() == cast->get_upstream_resource();
   }
 
-  Upstream* upstream_;  // the upstream resource used for satisfying allocation requests
+  // the upstream resource used for satisfying allocation requests
+  device_async_resource_ref upstream_;
 };
 
 /** @} */  // end of group
diff --git a/include/rmm/mr/device/statistics_resource_adaptor.hpp b/include/rmm/mr/device/statistics_resource_adaptor.hpp
index bf78c669a..025c51aa7 100644
--- a/include/rmm/mr/device/statistics_resource_adaptor.hpp
+++ b/include/rmm/mr/device/statistics_resource_adaptor.hpp
@@ -17,6 +17,7 @@
 
 #include <rmm/detail/export.hpp>
 #include <rmm/mr/device/device_memory_resource.hpp>
+#include <rmm/mr/device/per_device_resource.hpp>
 #include <rmm/resource_ref.hpp>
 
 #include <cstddef>
@@ -113,17 +114,25 @@ class statistics_resource_adaptor final : public device_memory_resource {
     }
   };
 
+  /**
+   * @brief Construct a new statistics resource adaptor using `upstream` to satisfy
+   * allocation requests.
+   *
+   * @param upstream The resource_ref used for allocating/deallocating device memory.
+   */
+  statistics_resource_adaptor(device_async_resource_ref upstream) : upstream_{upstream} {}
+
   /**
    * @brief Construct a new statistics resource adaptor using `upstream` to satisfy
    * allocation requests.
    *
    * @throws rmm::logic_error if `upstream == nullptr`
    *
-   * @param upstream The resource used for allocating/deallocating device memory
+   * @param upstream The resource used for allocating/deallocating device memory.
    */
-  statistics_resource_adaptor(Upstream* upstream) : upstream_{upstream}
+  statistics_resource_adaptor(Upstream* upstream)
+    : upstream_{to_device_async_resource_ref_checked(upstream)}
   {
-    RMM_EXPECTS(nullptr != upstream, "Unexpected null upstream resource pointer.");
   }
 
   statistics_resource_adaptor()                                              = delete;
@@ -143,11 +152,6 @@ class statistics_resource_adaptor final : public device_memory_resource {
     return upstream_;
   }
 
-  /**
-   * @briefreturn{Upstream* to the upstream memory resource}
-   */
-  [[nodiscard]] Upstream* get_upstream() const noexcept { return upstream_; }
-
   /**
    * @brief Returns a `counter` struct for this adaptor containing the current,
    * peak, and total number of allocated bytes for this
@@ -226,7 +230,7 @@ class statistics_resource_adaptor final : public device_memory_resource {
    */
   void* do_allocate(std::size_t bytes, cuda_stream_view stream) override
   {
-    void* ptr = upstream_->allocate(bytes, stream);
+    void* ptr = get_upstream_resource().allocate_async(bytes, stream);
 
     // increment the stats
     {
@@ -249,7 +253,7 @@ class statistics_resource_adaptor final : public device_memory_resource {
    */
   void do_deallocate(void* ptr, std::size_t bytes, cuda_stream_view stream) override
   {
-    upstream_->deallocate(ptr, bytes, stream);
+    get_upstream_resource().deallocate_async(ptr, bytes, stream);
 
     {
       write_lock_t lock(mtx_);
@@ -271,7 +275,7 @@ class statistics_resource_adaptor final : public device_memory_resource {
   {
     if (this == &other) { return true; }
     auto cast = dynamic_cast<statistics_resource_adaptor<Upstream> const*>(&other);
-    if (cast == nullptr) { return upstream_->is_equal(other); }
+    if (cast == nullptr) { return false; }
     return get_upstream_resource() == cast->get_upstream_resource();
   }
 
@@ -279,14 +283,14 @@ class statistics_resource_adaptor final : public device_memory_resource {
   // Invariant: the stack always contains at least one entry
   std::stack<std::pair<counter, counter>> counter_stack_{{std::make_pair(counter{}, counter{})}};
   std::shared_mutex mutable mtx_;  // mutex for thread safe access to allocations_
-  Upstream* upstream_;             // the upstream resource used for satisfying allocation requests
+  // the upstream resource used for satisfying allocation requests
+  device_async_resource_ref upstream_;
 };
 
 /**
  * @brief Convenience factory to return a `statistics_resource_adaptor` around the
  * upstream resource `upstream`.
  *
- * @tparam Upstream Type of the upstream `device_memory_resource`.
  * @param upstream Pointer to the upstream resource
  * @return The new statistics resource adaptor
  */
@@ -297,7 +301,7 @@ template <typename Upstream>
   "instead.")]]
 statistics_resource_adaptor<Upstream> make_statistics_adaptor(Upstream* upstream)
 {
-  return statistics_resource_adaptor<Upstream>{upstream};
+  return statistics_resource_adaptor<rmm::mr::device_memory_resource>{upstream};
 }
 
 /** @} */  // end of group
diff --git a/include/rmm/mr/device/thread_safe_resource_adaptor.hpp b/include/rmm/mr/device/thread_safe_resource_adaptor.hpp
index 9979d1e08..6881aa19e 100644
--- a/include/rmm/mr/device/thread_safe_resource_adaptor.hpp
+++ b/include/rmm/mr/device/thread_safe_resource_adaptor.hpp
@@ -45,6 +45,16 @@ class thread_safe_resource_adaptor final : public device_memory_resource {
  public:
   using lock_t = std::lock_guard<std::mutex>;  ///< Type of lock used to synchronize access
 
+  /**
+   * @brief Construct a new thread safe resource adaptor using `upstream` to satisfy
+   * allocation requests.
+   *
+   * All allocations and frees are protected by a mutex lock
+   *
+   * @param upstream The resource used for allocating/deallocating device memory.
+   */
+  thread_safe_resource_adaptor(device_async_resource_ref upstream) : upstream_{upstream} {}
+
   /**
    * @brief Construct a new thread safe resource adaptor using `upstream` to satisfy
    * allocation requests.
@@ -55,9 +65,9 @@ class thread_safe_resource_adaptor final : public device_memory_resource {
    *
    * @param upstream The resource used for allocating/deallocating device memory.
    */
-  thread_safe_resource_adaptor(Upstream* upstream) : upstream_{upstream}
+  thread_safe_resource_adaptor(Upstream* upstream)
+    : upstream_{to_device_async_resource_ref_checked(upstream)}
   {
-    RMM_EXPECTS(nullptr != upstream, "Unexpected null upstream resource pointer.");
   }
 
   thread_safe_resource_adaptor()                                               = delete;
@@ -75,11 +85,6 @@ class thread_safe_resource_adaptor final : public device_memory_resource {
     return upstream_;
   }
 
-  /**
-   * @briefreturn{Upstream* to the upstream memory resource}
-   */
-  [[nodiscard]] Upstream* get_upstream() const noexcept { return upstream_; }
-
  private:
   /**
    * @brief Allocates memory of size at least `bytes` using the upstream
@@ -95,7 +100,7 @@ class thread_safe_resource_adaptor final : public device_memory_resource {
   void* do_allocate(std::size_t bytes, cuda_stream_view stream) override
   {
     lock_t lock(mtx);
-    return upstream_->allocate(bytes, stream);
+    return get_upstream_resource().allocate_async(bytes, stream);
   }
 
   /**
@@ -108,7 +113,7 @@ class thread_safe_resource_adaptor final : public device_memory_resource {
   void do_deallocate(void* ptr, std::size_t bytes, cuda_stream_view stream) override
   {
     lock_t lock(mtx);
-    upstream_->deallocate(ptr, bytes, stream);
+    get_upstream_resource().deallocate_async(ptr, bytes, stream);
   }
 
   /**
@@ -122,12 +127,13 @@ class thread_safe_resource_adaptor final : public device_memory_resource {
   {
     if (this == &other) { return true; }
     auto cast = dynamic_cast<thread_safe_resource_adaptor<Upstream> const*>(&other);
-    if (cast == nullptr) { return upstream_->is_equal(other); }
+    if (cast == nullptr) { return false; }
     return get_upstream_resource() == cast->get_upstream_resource();
   }
 
   std::mutex mutable mtx;  // mutex for thread safe access to upstream
-  Upstream* upstream_;     ///< The upstream resource used for satisfying allocation requests
+  device_async_resource_ref
+    upstream_;  ///< The upstream resource used for satisfying allocation requests
 };
 
 /** @} */  // end of group
diff --git a/include/rmm/mr/device/thrust_allocator_adaptor.hpp b/include/rmm/mr/device/thrust_allocator_adaptor.hpp
index 2055a0633..b7b990c3d 100644
--- a/include/rmm/mr/device/thrust_allocator_adaptor.hpp
+++ b/include/rmm/mr/device/thrust_allocator_adaptor.hpp
@@ -150,7 +150,7 @@ class thrust_allocator : public thrust::device_malloc_allocator<T> {
 
  private:
   cuda_stream_view _stream{};
-  rmm::device_async_resource_ref _mr{rmm::mr::get_current_device_resource()};
+  rmm::device_async_resource_ref _mr{rmm::mr::get_current_device_resource_ref()};
   cuda_device_id _device{get_current_cuda_device()};
 };
 /** @} */  // end of group
diff --git a/include/rmm/mr/device/tracking_resource_adaptor.hpp b/include/rmm/mr/device/tracking_resource_adaptor.hpp
index 3d3188b23..6a5916e5c 100644
--- a/include/rmm/mr/device/tracking_resource_adaptor.hpp
+++ b/include/rmm/mr/device/tracking_resource_adaptor.hpp
@@ -20,6 +20,7 @@
 #include <rmm/detail/stack_trace.hpp>
 #include <rmm/logger.hpp>
 #include <rmm/mr/device/device_memory_resource.hpp>
+#include <rmm/mr/device/per_device_resource.hpp>
 #include <rmm/resource_ref.hpp>
 
 #include <fmt/core.h>
@@ -83,6 +84,18 @@ class tracking_resource_adaptor final : public device_memory_resource {
         allocation_size{size} {};
   };
 
+  /**
+   * @brief Construct a new tracking resource adaptor using `upstream` to satisfy
+   * allocation requests.
+   *
+   * @param upstream The resource used for allocating/deallocating device memory
+   * @param capture_stacks If true, capture stacks for allocation calls
+   */
+  tracking_resource_adaptor(device_async_resource_ref upstream, bool capture_stacks = false)
+    : capture_stacks_{capture_stacks}, allocated_bytes_{0}, upstream_{upstream}
+  {
+  }
+
   /**
    * @brief Construct a new tracking resource adaptor using `upstream` to satisfy
    * allocation requests.
@@ -93,9 +106,10 @@ class tracking_resource_adaptor final : public device_memory_resource {
    * @param capture_stacks If true, capture stacks for allocation calls
    */
   tracking_resource_adaptor(Upstream* upstream, bool capture_stacks = false)
-    : capture_stacks_{capture_stacks}, allocated_bytes_{0}, upstream_{upstream}
+    : capture_stacks_{capture_stacks},
+      allocated_bytes_{0},
+      upstream_{to_device_async_resource_ref_checked(upstream)}
   {
-    RMM_EXPECTS(nullptr != upstream, "Unexpected null upstream resource pointer.");
   }
 
   tracking_resource_adaptor()                                 = delete;
@@ -115,11 +129,6 @@ class tracking_resource_adaptor final : public device_memory_resource {
     return upstream_;
   }
 
-  /**
-   * @briefreturn{Upstream* to the upstream memory resource}
-   */
-  [[nodiscard]] Upstream* get_upstream() const noexcept { return upstream_; }
-
   /**
    * @brief Get the outstanding allocations map
    *
@@ -199,8 +208,7 @@ class tracking_resource_adaptor final : public device_memory_resource {
    */
   void* do_allocate(std::size_t bytes, cuda_stream_view stream) override
   {
-    void* ptr = upstream_->allocate(bytes, stream);
-
+    void* ptr = get_upstream_resource().allocate_async(bytes, stream);
     // track it.
     {
       write_lock_t lock(mtx_);
@@ -220,7 +228,7 @@ class tracking_resource_adaptor final : public device_memory_resource {
    */
   void do_deallocate(void* ptr, std::size_t bytes, cuda_stream_view stream) override
   {
-    upstream_->deallocate(ptr, bytes, stream);
+    get_upstream_resource().deallocate_async(ptr, bytes, stream);
     {
       write_lock_t lock(mtx_);
 
@@ -265,7 +273,7 @@ class tracking_resource_adaptor final : public device_memory_resource {
   {
     if (this == &other) { return true; }
     auto cast = dynamic_cast<tracking_resource_adaptor<Upstream> const*>(&other);
-    if (cast == nullptr) { return upstream_->is_equal(other); }
+    if (cast == nullptr) { return false; }
     return get_upstream_resource() == cast->get_upstream_resource();
   }
 
@@ -273,7 +281,8 @@ class tracking_resource_adaptor final : public device_memory_resource {
   std::map<void*, allocation_info> allocations_;  // map of active allocations
   std::atomic<std::size_t> allocated_bytes_;      // number of bytes currently allocated
   std::shared_mutex mutable mtx_;                 // mutex for thread safe access to allocations_
-  Upstream* upstream_;  // the upstream resource used for satisfying allocation requests
+  device_async_resource_ref upstream_;            // the upstream resource used for satisfying
+                                                  // allocation requests
 };
 
 /**
diff --git a/include/rmm/resource_ref.hpp b/include/rmm/resource_ref.hpp
index 08942a040..56049522f 100644
--- a/include/rmm/resource_ref.hpp
+++ b/include/rmm/resource_ref.hpp
@@ -65,5 +65,21 @@ using host_device_resource_ref =
 using host_device_async_resource_ref =
   cuda::mr::async_resource_ref<cuda::mr::host_accessible, cuda::mr::device_accessible>;
 
+/**
+ * @brief Convert pointer to memory resource into `device_async_resource_ref`, checking for
+ * `nullptr`
+ *
+ * @tparam Resource The type of the memory resource.
+ * @param res A pointer to the memory resource.
+ * @return A `device_async_resource_ref` to the memory resource.
+ * @throws std::logic_error if the memory resource pointer is null.
+ */
+template <class Resource>
+device_async_resource_ref to_device_async_resource_ref_checked(Resource* res)
+{
+  RMM_EXPECTS(res, "Unexpected null resource pointer.");
+  return device_async_resource_ref{*res};
+}
+
 /** @} */  // end of group
 }  // namespace RMM_NAMESPACE
diff --git a/python/rmm/docs/guide.md b/python/rmm/docs/guide.md
index 911073b5d..22c0dc023 100644
--- a/python/rmm/docs/guide.md
+++ b/python/rmm/docs/guide.md
@@ -139,8 +139,8 @@ of 1 GiB and a maximum size of 4 GiB. The pool uses
 >>> import rmm
 >>> pool = rmm.mr.PoolMemoryResource(
 ...     rmm.mr.CudaMemoryResource(),
-...     initial_pool_size=2**30,
-...     maximum_pool_size=2**32
+...     initial_pool_size="1GiB", # equivalent to initial_pool_size=2**30
+...     maximum_pool_size="4GiB"
 ... )
 >>> rmm.mr.set_current_device_resource(pool)
 ```
@@ -151,8 +151,8 @@ Similarly, to use a pool of managed memory:
 >>> import rmm
 >>> pool = rmm.mr.PoolMemoryResource(
 ...     rmm.mr.ManagedMemoryResource(),
-...     initial_pool_size=2**30,
-...     maximum_pool_size=2**32
+...     initial_pool_size="1GiB",
+...     maximum_pool_size="4GiB"
 ... )
 >>> rmm.mr.set_current_device_resource(pool)
 ```
diff --git a/python/rmm/rmm/_lib/CMakeLists.txt b/python/rmm/rmm/_lib/CMakeLists.txt
index 1e629a402..7cdfed971 100644
--- a/python/rmm/rmm/_lib/CMakeLists.txt
+++ b/python/rmm/rmm/_lib/CMakeLists.txt
@@ -12,7 +12,8 @@
 # the License.
 # =============================================================================
 
-set(cython_sources device_buffer.pyx lib.pyx logger.pyx memory_resource.pyx cuda_stream.pyx)
+set(cython_sources device_buffer.pyx lib.pyx logger.pyx memory_resource.pyx cuda_stream.pyx
+                   helper.pyx)
 set(linked_libraries rmm::rmm)
 
 # Build all of the Cython targets
diff --git a/python/rmm/rmm/_lib/_torch_allocator.cpp b/python/rmm/rmm/_lib/_torch_allocator.cpp
index dc92e4639..bfe94c2d0 100644
--- a/python/rmm/rmm/_lib/_torch_allocator.cpp
+++ b/python/rmm/rmm/_lib/_torch_allocator.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2023, NVIDIA CORPORATION.
+ * Copyright (c) 2023-2024, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -39,8 +39,9 @@ extern "C" void* allocate(std::size_t size, int device, void* stream)
 {
   rmm::cuda_device_id const device_id{device};
   rmm::cuda_set_device_raii with_device{device_id};
-  auto mr = rmm::mr::get_per_device_resource(device_id);
-  return mr->allocate(size, rmm::cuda_stream_view{static_cast<cudaStream_t>(stream)});
+  auto mr = rmm::mr::get_per_device_resource_ref(device_id);
+  return mr.allocate_async(
+    size, rmm::CUDA_ALLOCATION_ALIGNMENT, rmm::cuda_stream_view{static_cast<cudaStream_t>(stream)});
 }
 
 /**
@@ -55,6 +56,9 @@ extern "C" void deallocate(void* ptr, std::size_t size, int device, void* stream
 {
   rmm::cuda_device_id const device_id{device};
   rmm::cuda_set_device_raii with_device{device_id};
-  auto mr = rmm::mr::get_per_device_resource(device_id);
-  mr->deallocate(ptr, size, rmm::cuda_stream_view{static_cast<cudaStream_t>(stream)});
+  auto mr = rmm::mr::get_per_device_resource_ref(device_id);
+  mr.deallocate_async(ptr,
+                      size,
+                      rmm::CUDA_ALLOCATION_ALIGNMENT,
+                      rmm::cuda_stream_view{static_cast<cudaStream_t>(stream)});
 }
diff --git a/python/rmm/rmm/_lib/helper.pxd b/python/rmm/rmm/_lib/helper.pxd
new file mode 100644
index 000000000..8ca151c00
--- /dev/null
+++ b/python/rmm/rmm/_lib/helper.pxd
@@ -0,0 +1,16 @@
+# Copyright (c) 2024, NVIDIA CORPORATION.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+cdef object parse_bytes(object s) except *
diff --git a/python/rmm/rmm/_lib/helper.pyx b/python/rmm/rmm/_lib/helper.pyx
new file mode 100644
index 000000000..d442ee341
--- /dev/null
+++ b/python/rmm/rmm/_lib/helper.pyx
@@ -0,0 +1,78 @@
+# Copyright (c) 2024, NVIDIA CORPORATION.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Helper functions for rmm"""
+
+import re
+
+
+cdef dict BYTE_SIZES = {
+    'b': 1,
+    '': 1,
+    'kb': 1000,
+    'mb': 1000**2,
+    'gb': 1000**3,
+    'tb': 1000**4,
+    'pb': 1000**5,
+    'kib': 1024,
+    'mib': 1024**2,
+    'gib': 1024**3,
+    'tib': 1024**4,
+    'pib': 1024**5,
+}
+
+
+pattern = re.compile(r"^([0-9]+(?:\.[0-9]*)?)[\t ]*((?i:(?:[kmgtp]i?)?b))?$")
+
+cdef object parse_bytes(object s):
+    """Parse a string or integer into a number of bytes.
+
+    Parameters
+    ----------
+    s : int | str
+        Size in bytes. If an integer is provided, it is returned as-is.
+        A string is parsed as a floating point number with an (optional,
+        case-insensitive) byte-specifier, both SI prefixes (kb, mb, ..., pb)
+        and binary prefixes (kib, mib, ..., pib) are supported.
+
+     Returns
+     -------
+     Requested size in bytes as an integer.
+
+     Raises
+     ------
+     ValueError
+         If it is not possible to parse the input as a byte specification.
+    """
+    cdef str suffix
+    cdef double n
+    cdef int multiplier
+
+    if isinstance(s, int):
+        return s
+
+    match = pattern.match(s)
+
+    if match is None:
+        raise ValueError(f"Could not parse {s} as a byte specification")
+
+    n = float(match.group(1))
+
+    suffix = match.group(2)
+    if suffix is None:
+        suffix = ""
+
+    multiplier = BYTE_SIZES[suffix.lower()]
+
+    return int(n*multiplier)
diff --git a/python/rmm/rmm/_lib/memory_resource.pyx b/python/rmm/rmm/_lib/memory_resource.pyx
index 7cd8a05b7..843331215 100644
--- a/python/rmm/rmm/_lib/memory_resource.pyx
+++ b/python/rmm/rmm/_lib/memory_resource.pyx
@@ -38,6 +38,7 @@ from rmm._cuda.stream cimport Stream
 from rmm._cuda.stream import DEFAULT_STREAM
 
 from rmm._lib.cuda_stream_view cimport cuda_stream_view
+from rmm._lib.helper cimport parse_bytes
 from rmm._lib.memory_resource cimport (
     available_device_memory as c_available_device_memory,
     percent_of_free_device_memory as c_percent_of_free_device_memory,
@@ -332,9 +333,9 @@ cdef class CudaAsyncMemoryResource(DeviceMemoryResource):
 
     Parameters
     ----------
-    initial_pool_size : int, optional
+    initial_pool_size : int | str, optional
         Initial pool size in bytes. By default, half the available memory
-        on the device is used.
+        on the device is used. A string argument is parsed using `parse_bytes`.
     release_threshold: int, optional
         Release threshold in bytes. If the pool size grows beyond this
         value, unused memory held by the pool will be released at the
@@ -352,7 +353,7 @@ cdef class CudaAsyncMemoryResource(DeviceMemoryResource):
         cdef optional[size_t] c_initial_pool_size = (
             optional[size_t]()
             if initial_pool_size is None
-            else optional[size_t](<size_t> initial_pool_size)
+            else optional[size_t](<size_t> parse_bytes(initial_pool_size))
         )
 
         cdef optional[size_t] c_release_threshold = (
@@ -444,12 +445,12 @@ cdef class PoolMemoryResource(UpstreamResourceAdaptor):
         c_initial_pool_size = (
             c_percent_of_free_device_memory(50) if
             initial_pool_size is None
-            else initial_pool_size
+            else parse_bytes(initial_pool_size)
         )
         c_maximum_pool_size = (
             optional[size_t]() if
             maximum_pool_size is None
-            else optional[size_t](<size_t> maximum_pool_size)
+            else optional[size_t](<size_t> parse_bytes(maximum_pool_size))
         )
         self.c_obj.reset(
             new pool_memory_resource[device_memory_resource](
@@ -474,10 +475,10 @@ cdef class PoolMemoryResource(UpstreamResourceAdaptor):
         upstream_mr : DeviceMemoryResource
             The DeviceMemoryResource from which to allocate blocks for the
             pool.
-        initial_pool_size : int, optional
+        initial_pool_size : int | str, optional
             Initial pool size in bytes. By default, half the available memory
             on the device is used.
-        maximum_pool_size : int, optional
+        maximum_pool_size : int | str, optional
             Maximum size in bytes, that the pool can grow to.
         """
         pass
@@ -1149,8 +1150,10 @@ cpdef void _initialize(
         typ = PoolMemoryResource
         args = (upstream(),)
         kwargs = dict(
-            initial_pool_size=initial_pool_size,
-            maximum_pool_size=maximum_pool_size
+            initial_pool_size=None if initial_pool_size is None
+            else parse_bytes(initial_pool_size),
+            maximum_pool_size=None if maximum_pool_size is None
+            else parse_bytes(maximum_pool_size)
         )
     else:
         typ = upstream
diff --git a/python/rmm/rmm/rmm.py b/python/rmm/rmm/rmm.py
index e5290905c..bac04b477 100644
--- a/python/rmm/rmm/rmm.py
+++ b/python/rmm/rmm/rmm.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2019, NVIDIA CORPORATION.
+# Copyright (c) 2019-2024, NVIDIA CORPORATION.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -45,14 +45,16 @@ def reinitialize(
         performance.
     managed_memory : bool, default False
         If True, use managed memory for device memory allocation
-    initial_pool_size : int, default None
+    initial_pool_size : int | str, default None
         When `pool_allocator` is True, this indicates the initial pool size in
         bytes. By default, 1/2 of the total GPU memory is used.
         When `pool_allocator` is False, this argument is ignored if provided.
-    maximum_pool_size : int, default None
+        A string argument is parsed using `parse_bytes`.
+    maximum_pool_size : int | str, default None
         When `pool_allocator` is True, this indicates the maximum pool size in
         bytes. By default, the total available memory on the GPU is used.
         When `pool_allocator` is False, this argument is ignored if provided.
+        A string argument is parsed using `parse_bytes`.
     devices : int or List[int], default 0
         GPU device  IDs to register. By default registers only GPU 0.
     logging : bool, default False
diff --git a/python/rmm/rmm/tests/test_rmm.py b/python/rmm/rmm/tests/test_rmm.py
index 9379c71e8..8c978e6f1 100644
--- a/python/rmm/rmm/tests/test_rmm.py
+++ b/python/rmm/rmm/tests/test_rmm.py
@@ -432,8 +432,8 @@ def test_rmm_pool_cupy_allocator_stream_lifetime():
 def test_pool_memory_resource(dtype, nelem, alloc):
     mr = rmm.mr.PoolMemoryResource(
         rmm.mr.CudaMemoryResource(),
-        initial_pool_size=1 << 22,
-        maximum_pool_size=1 << 23,
+        initial_pool_size="4MiB",
+        maximum_pool_size="8MiB",
     )
     rmm.mr.set_current_device_resource(mr)
     assert rmm.mr.get_current_device_resource_type() is type(mr)
@@ -507,7 +507,7 @@ def test_binning_memory_resource(dtype, nelem, alloc, upstream_mr):
 
 def test_reinitialize_max_pool_size():
     rmm.reinitialize(
-        pool_allocator=True, initial_pool_size=0, maximum_pool_size=1 << 23
+        pool_allocator=True, initial_pool_size=0, maximum_pool_size="8MiB"
     )
     rmm.DeviceBuffer().resize((1 << 23) - 1)
 
@@ -530,6 +530,24 @@ def test_reinitialize_initial_pool_size_gt_max():
     assert "Initial pool size exceeds the maximum pool size" in str(e.value)
 
 
+def test_reinitialize_with_valid_str_arg_pool_size():
+    rmm.reinitialize(
+        pool_allocator=True,
+        initial_pool_size="2kib",
+        maximum_pool_size="8kib",
+    )
+
+
+def test_reinitialize_with_invalid_str_arg_pool_size():
+    with pytest.raises(ValueError) as e:
+        rmm.reinitialize(
+            pool_allocator=True,
+            initial_pool_size="2k",  # 2kb valid, not 2k
+            maximum_pool_size="8k",
+        )
+    assert "Could not parse" in str(e.value)
+
+
 @pytest.mark.parametrize("dtype", _dtypes)
 @pytest.mark.parametrize("nelem", _nelems)
 @pytest.mark.parametrize("alloc", _allocs)
diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt
index d1195a070..551727fe8 100644
--- a/tests/CMakeLists.txt
+++ b/tests/CMakeLists.txt
@@ -40,6 +40,7 @@ function(ConfigureTestInternal TEST_NAME)
                              PUBLIC "SPDLOG_ACTIVE_LEVEL=SPDLOG_LEVEL_${RMM_LOGGING_LEVEL}")
   target_compile_options(${TEST_NAME} PUBLIC $<$<COMPILE_LANG_AND_ID:CXX,GNU,Clang>:-Wall -Werror
                                              -Wno-error=deprecated-declarations>)
+  target_compile_options(${TEST_NAME} PUBLIC "$<$<CONFIG:Debug>:-O0>")
 
   if(DISABLE_DEPRECATION_WARNING)
     target_compile_options(
@@ -125,7 +126,7 @@ ConfigureTest(DEVICE_MR_REF_TEST mr/device/mr_ref_tests.cpp
 ConfigureTest(ADAPTOR_TEST mr/device/adaptor_tests.cpp)
 
 # pool mr tests
-ConfigureTest(POOL_MR_TEST mr/device/pool_mr_tests.cpp GPUS 1 PERCENT 60)
+ConfigureTest(POOL_MR_TEST mr/device/pool_mr_tests.cpp GPUS 1 PERCENT 100)
 
 # cuda_async mr tests
 ConfigureTest(CUDA_ASYNC_MR_TEST mr/device/cuda_async_mr_tests.cpp GPUS 1 PERCENT 60)
@@ -140,10 +141,10 @@ ConfigureTest(POLYMORPHIC_ALLOCATOR_TEST mr/device/polymorphic_allocator_tests.c
 ConfigureTest(STREAM_ADAPTOR_TEST mr/device/stream_allocator_adaptor_tests.cpp)
 
 # statistics adaptor tests
-ConfigureTest(STATISTICS_TEST mr/device/statistics_mr_tests.cpp GPUS 1 PERCENT 100)
+ConfigureTest(STATISTICS_TEST mr/device/statistics_mr_tests.cpp)
 
 # tracking adaptor tests
-ConfigureTest(TRACKING_TEST mr/device/tracking_mr_tests.cpp GPUS 1 PERCENT 100)
+ConfigureTest(TRACKING_TEST mr/device/tracking_mr_tests.cpp)
 
 # failure callback adaptor tests
 ConfigureTest(FAILURE_CALLBACK_TEST mr/device/failure_callback_mr_tests.cpp)
@@ -185,7 +186,7 @@ ConfigureTest(PREFETCH_TEST prefetch_tests.cpp)
 ConfigureTest(LOGGER_TEST logger_tests.cpp)
 
 # arena MR tests
-ConfigureTest(ARENA_MR_TEST mr/device/arena_mr_tests.cpp GPUS 1 PERCENT 60)
+ConfigureTest(ARENA_MR_TEST mr/device/arena_mr_tests.cpp GPUS 1 PERCENT 100)
 
 # binning MR tests
 ConfigureTest(BINNING_MR_TEST mr/device/binning_mr_tests.cpp)
diff --git a/tests/container_multidevice_tests.cu b/tests/container_multidevice_tests.cu
index e58ba53a2..55432feb0 100644
--- a/tests/container_multidevice_tests.cu
+++ b/tests/container_multidevice_tests.cu
@@ -42,9 +42,9 @@ TYPED_TEST(ContainerMultiDeviceTest, CreateDestroyDifferentActiveDevice)
   // only run on multidevice systems
   if (num_devices >= 2) {
     rmm::cuda_set_device_raii dev{rmm::cuda_device_id{0}};
-    auto* orig_mr = rmm::mr::get_current_device_resource();
+    auto orig_mr  = rmm::mr::get_current_device_resource_ref();
     auto check_mr = device_check_resource_adaptor{orig_mr};
-    rmm::mr::set_current_device_resource(&check_mr);
+    rmm::mr::set_current_device_resource_ref(check_mr);
 
     {
       if constexpr (std::is_same_v<TypeParam, rmm::device_scalar<int>>) {
@@ -57,7 +57,7 @@ TYPED_TEST(ContainerMultiDeviceTest, CreateDestroyDifferentActiveDevice)
     }
 
     RMM_ASSERT_CUDA_SUCCESS(cudaSetDevice(0));
-    rmm::mr::set_current_device_resource(orig_mr);
+    rmm::mr::set_current_device_resource_ref(orig_mr);
   }
 }
 
@@ -69,9 +69,9 @@ TYPED_TEST(ContainerMultiDeviceTest, CreateMoveDestroyDifferentActiveDevice)
   // only run on multidevice systems
   if (num_devices >= 2) {
     rmm::cuda_set_device_raii dev{rmm::cuda_device_id{0}};
-    auto* orig_mr = rmm::mr::get_current_device_resource();
+    auto orig_mr  = rmm::mr::get_current_device_resource_ref();
     auto check_mr = device_check_resource_adaptor{orig_mr};
-    rmm::mr::set_current_device_resource(&check_mr);
+    rmm::mr::set_current_device_resource_ref(check_mr);
 
     {
       auto buf_1 = []() {
@@ -97,7 +97,7 @@ TYPED_TEST(ContainerMultiDeviceTest, CreateMoveDestroyDifferentActiveDevice)
     }
 
     RMM_ASSERT_CUDA_SUCCESS(cudaSetDevice(0));
-    rmm::mr::set_current_device_resource(orig_mr);
+    rmm::mr::set_current_device_resource_ref(orig_mr);
   }
 }
 
@@ -109,9 +109,9 @@ TYPED_TEST(ContainerMultiDeviceTest, ResizeDifferentActiveDevice)
   // only run on multidevice systems
   if (num_devices >= 2) {
     rmm::cuda_set_device_raii dev{rmm::cuda_device_id{0}};
-    auto* orig_mr = rmm::mr::get_current_device_resource();
+    auto orig_mr  = rmm::mr::get_current_device_resource_ref();
     auto check_mr = device_check_resource_adaptor{orig_mr};
-    rmm::mr::set_current_device_resource(&check_mr);
+    rmm::mr::set_current_device_resource_ref(check_mr);
 
     if constexpr (not std::is_same_v<TypeParam, rmm::device_scalar<int>>) {
       auto buf = TypeParam(128, rmm::cuda_stream_view{});
@@ -120,7 +120,7 @@ TYPED_TEST(ContainerMultiDeviceTest, ResizeDifferentActiveDevice)
     }
 
     RMM_ASSERT_CUDA_SUCCESS(cudaSetDevice(0));
-    rmm::mr::set_current_device_resource(orig_mr);
+    rmm::mr::set_current_device_resource_ref(orig_mr);
   }
 }
 
@@ -132,9 +132,9 @@ TYPED_TEST(ContainerMultiDeviceTest, ShrinkDifferentActiveDevice)
   // only run on multidevice systems
   if (num_devices >= 2) {
     rmm::cuda_set_device_raii dev{rmm::cuda_device_id{0}};
-    auto* orig_mr = rmm::mr::get_current_device_resource();
+    auto orig_mr  = rmm::mr::get_current_device_resource_ref();
     auto check_mr = device_check_resource_adaptor{orig_mr};
-    rmm::mr::set_current_device_resource(&check_mr);
+    rmm::mr::set_current_device_resource_ref(check_mr);
 
     if constexpr (not std::is_same_v<TypeParam, rmm::device_scalar<int>>) {
       auto buf = TypeParam(128, rmm::cuda_stream_view{});
@@ -144,6 +144,6 @@ TYPED_TEST(ContainerMultiDeviceTest, ShrinkDifferentActiveDevice)
     }
 
     RMM_ASSERT_CUDA_SUCCESS(cudaSetDevice(0));
-    rmm::mr::set_current_device_resource(orig_mr);
+    rmm::mr::set_current_device_resource_ref(orig_mr);
   }
 }
diff --git a/tests/cuda_stream_tests.cpp b/tests/cuda_stream_tests.cpp
index 1cc068434..ec7e6c3e9 100644
--- a/tests/cuda_stream_tests.cpp
+++ b/tests/cuda_stream_tests.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2020-2021, NVIDIA CORPORATION.
+ * Copyright (c) 2020-2024, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -96,6 +96,6 @@ TEST_F(CudaStreamDeathTest, TestSyncNoThrow)
     // should assert here or in `~cuda_stream()`
     stream_a.synchronize_no_throw();
   };
-  EXPECT_DEATH(test(), "Assertion");
+  EXPECT_DEATH(test(), "");
 }
 #endif
diff --git a/tests/device_buffer_tests.cu b/tests/device_buffer_tests.cu
index c095eecf8..5e48504d6 100644
--- a/tests/device_buffer_tests.cu
+++ b/tests/device_buffer_tests.cu
@@ -75,7 +75,7 @@ TYPED_TEST(DeviceBufferTest, DefaultMemoryResource)
   EXPECT_EQ(this->size, buff.size());
   EXPECT_EQ(this->size, buff.ssize());
   EXPECT_EQ(this->size, buff.capacity());
-  EXPECT_EQ(rmm::device_async_resource_ref{rmm::mr::get_current_device_resource()},
+  EXPECT_EQ(rmm::device_async_resource_ref{rmm::mr::get_current_device_resource_ref()},
             buff.memory_resource());
   EXPECT_EQ(rmm::cuda_stream_view{}, buff.stream());
 }
@@ -87,7 +87,7 @@ TYPED_TEST(DeviceBufferTest, DefaultMemoryResourceStream)
   EXPECT_NE(nullptr, buff.data());
   EXPECT_EQ(this->size, buff.size());
   EXPECT_EQ(this->size, buff.capacity());
-  EXPECT_EQ(rmm::device_async_resource_ref{rmm::mr::get_current_device_resource()},
+  EXPECT_EQ(rmm::device_async_resource_ref{rmm::mr::get_current_device_resource_ref()},
             buff.memory_resource());
   EXPECT_EQ(this->stream, buff.stream());
 }
@@ -121,7 +121,7 @@ TYPED_TEST(DeviceBufferTest, CopyFromRawDevicePointer)
   EXPECT_NE(nullptr, buff.data());
   EXPECT_EQ(this->size, buff.size());
   EXPECT_EQ(this->size, buff.capacity());
-  EXPECT_EQ(rmm::device_async_resource_ref{rmm::mr::get_current_device_resource()},
+  EXPECT_EQ(rmm::device_async_resource_ref{rmm::mr::get_current_device_resource_ref()},
             buff.memory_resource());
   EXPECT_EQ(rmm::cuda_stream_view{}, buff.stream());
 
@@ -138,7 +138,7 @@ TYPED_TEST(DeviceBufferTest, CopyFromRawHostPointer)
   EXPECT_NE(nullptr, buff.data());
   EXPECT_EQ(this->size, buff.size());
   EXPECT_EQ(this->size, buff.capacity());
-  EXPECT_EQ(rmm::device_async_resource_ref{rmm::mr::get_current_device_resource()},
+  EXPECT_EQ(rmm::device_async_resource_ref{rmm::mr::get_current_device_resource_ref()},
             buff.memory_resource());
   EXPECT_EQ(rmm::cuda_stream_view{}, buff.stream());
   buff.stream().synchronize();
@@ -152,7 +152,7 @@ TYPED_TEST(DeviceBufferTest, CopyFromNullptr)
   EXPECT_EQ(nullptr, buff.data());
   EXPECT_EQ(0, buff.size());
   EXPECT_EQ(0, buff.capacity());
-  EXPECT_EQ(rmm::device_async_resource_ref{rmm::mr::get_current_device_resource()},
+  EXPECT_EQ(rmm::device_async_resource_ref{rmm::mr::get_current_device_resource_ref()},
             buff.memory_resource());
   EXPECT_EQ(rmm::cuda_stream_view{}, buff.stream());
 }
@@ -180,7 +180,7 @@ TYPED_TEST(DeviceBufferTest, CopyConstructor)
   EXPECT_EQ(buff.size(), buff_copy.size());
   EXPECT_EQ(buff.capacity(), buff_copy.capacity());
   EXPECT_EQ(buff_copy.memory_resource(),
-            rmm::device_async_resource_ref{rmm::mr::get_current_device_resource()});
+            rmm::device_async_resource_ref{rmm::mr::get_current_device_resource_ref()});
   EXPECT_EQ(buff_copy.stream(), rmm::cuda_stream_view{});
 
   EXPECT_TRUE(thrust::equal(rmm::exec_policy(rmm::cuda_stream_default),
@@ -223,7 +223,7 @@ TYPED_TEST(DeviceBufferTest, CopyCapacityLargerThanSize)
   // The capacity of the copy should be equal to the `size()` of the original
   EXPECT_EQ(new_size, buff_copy.capacity());
   EXPECT_EQ(buff_copy.memory_resource(),
-            rmm::device_async_resource_ref{rmm::mr::get_current_device_resource()});
+            rmm::device_async_resource_ref{rmm::mr::get_current_device_resource_ref()});
   EXPECT_EQ(buff_copy.stream(), rmm::cuda_stream_view{});
 
   EXPECT_TRUE(thrust::equal(rmm::exec_policy(rmm::cuda_stream_default),
diff --git a/tests/device_check_resource_adaptor.hpp b/tests/device_check_resource_adaptor.hpp
index fcb578fdf..6780f56d7 100644
--- a/tests/device_check_resource_adaptor.hpp
+++ b/tests/device_check_resource_adaptor.hpp
@@ -17,13 +17,14 @@
 #include <rmm/cuda_device.hpp>
 #include <rmm/cuda_stream_view.hpp>
 #include <rmm/mr/device/device_memory_resource.hpp>
+#include <rmm/mr/device/per_device_resource.hpp>
 #include <rmm/resource_ref.hpp>
 
 #include <gtest/gtest.h>
 
 class device_check_resource_adaptor final : public rmm::mr::device_memory_resource {
  public:
-  device_check_resource_adaptor(rmm::mr::device_memory_resource* upstream)
+  device_check_resource_adaptor(rmm::device_async_resource_ref upstream)
     : device_id{rmm::get_current_cuda_device()}, upstream_(upstream)
   {
   }
@@ -36,11 +37,6 @@ class device_check_resource_adaptor final : public rmm::mr::device_memory_resour
     return upstream_;
   }
 
-  /**
-   * @briefreturn{device_memory_resource* to the upstream memory resource}
-   */
-  [[nodiscard]] device_memory_resource* get_upstream() const noexcept { return upstream_; }
-
  private:
   [[nodiscard]] bool check_device_id() const { return device_id == rmm::get_current_cuda_device(); }
 
@@ -48,7 +44,7 @@ class device_check_resource_adaptor final : public rmm::mr::device_memory_resour
   {
     bool const is_correct_device = check_device_id();
     EXPECT_TRUE(is_correct_device);
-    if (is_correct_device) { return upstream_->allocate(bytes, stream); }
+    if (is_correct_device) { return get_upstream_resource().allocate_async(bytes, stream); }
     return nullptr;
   }
 
@@ -56,7 +52,7 @@ class device_check_resource_adaptor final : public rmm::mr::device_memory_resour
   {
     bool const is_correct_device = check_device_id();
     EXPECT_TRUE(is_correct_device);
-    if (is_correct_device) { upstream_->deallocate(ptr, bytes, stream); }
+    if (is_correct_device) { get_upstream_resource().deallocate_async(ptr, bytes, stream); }
   }
 
   [[nodiscard]] bool do_is_equal(
@@ -64,10 +60,10 @@ class device_check_resource_adaptor final : public rmm::mr::device_memory_resour
   {
     if (this == &other) { return true; }
     auto const* cast = dynamic_cast<device_check_resource_adaptor const*>(&other);
-    if (cast == nullptr) { return upstream_->is_equal(other); }
+    if (cast == nullptr) { return false; }
     return get_upstream_resource() == cast->get_upstream_resource();
   }
 
   rmm::cuda_device_id device_id;
-  rmm::mr::device_memory_resource* upstream_{};
+  rmm::device_async_resource_ref upstream_;
 };
diff --git a/tests/device_scalar_tests.cpp b/tests/device_scalar_tests.cpp
index 6f80a5de1..323894a6a 100644
--- a/tests/device_scalar_tests.cpp
+++ b/tests/device_scalar_tests.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2021, NVIDIA CORPORATION.
+ * Copyright (c) 2019-2024, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -38,7 +38,7 @@ struct DeviceScalarTest : public ::testing::Test {
   std::default_random_engine generator{};
   T value{};
   rmm::cuda_stream stream{};
-  rmm::device_async_resource_ref mr{rmm::mr::get_current_device_resource()};
+  rmm::device_async_resource_ref mr{rmm::mr::get_current_device_resource_ref()};
 
   DeviceScalarTest() : value{random_value()} {}
 
diff --git a/tests/device_uvector_tests.cpp b/tests/device_uvector_tests.cpp
index 1c93ef138..90955c24c 100644
--- a/tests/device_uvector_tests.cpp
+++ b/tests/device_uvector_tests.cpp
@@ -39,7 +39,7 @@ TYPED_TEST(TypedUVectorTest, MemoryResource)
 {
   rmm::device_uvector<TypeParam> vec(128, this->stream());
   EXPECT_EQ(vec.memory_resource(),
-            rmm::device_async_resource_ref{rmm::mr::get_current_device_resource()});
+            rmm::device_async_resource_ref{rmm::mr::get_current_device_resource_ref()});
 }
 
 TYPED_TEST(TypedUVectorTest, ZeroSizeConstructor)
diff --git a/tests/mock_resource.hpp b/tests/mock_resource.hpp
index e06148d3a..555cf0d74 100644
--- a/tests/mock_resource.hpp
+++ b/tests/mock_resource.hpp
@@ -25,7 +25,12 @@ class mock_resource : public rmm::mr::device_memory_resource {
  public:
   MOCK_METHOD(void*, do_allocate, (std::size_t, cuda_stream_view), (override));
   MOCK_METHOD(void, do_deallocate, (void*, std::size_t, cuda_stream_view), (override));
+  bool operator==(mock_resource const&) const noexcept { return true; }
+  bool operator!=(mock_resource const&) const { return false; }
+  friend void get_property(mock_resource const&, cuda::mr::device_accessible) noexcept {}
   using size_pair = std::pair<std::size_t, std::size_t>;
 };
 
+static_assert(cuda::mr::async_resource_with<mock_resource, cuda::mr::device_accessible>);
+
 }  // namespace rmm::test
diff --git a/tests/mr/device/adaptor_tests.cpp b/tests/mr/device/adaptor_tests.cpp
index a757a78b0..286daa841 100644
--- a/tests/mr/device/adaptor_tests.cpp
+++ b/tests/mr/device/adaptor_tests.cpp
@@ -129,8 +129,7 @@ TYPED_TEST(AdaptorTest, Equality)
   }
 
   {
-    rmm::mr::device_memory_resource* device_mr = &this->cuda;
-    auto other_mr = aligned_resource_adaptor<rmm::mr::device_memory_resource>{device_mr};
+    auto other_mr = aligned_resource_adaptor<rmm::mr::device_memory_resource>{&this->cuda};
     EXPECT_FALSE(this->mr->is_equal(other_mr));
   }
 }
diff --git a/tests/mr/device/aligned_mr_tests.cpp b/tests/mr/device/aligned_mr_tests.cpp
index b9ecbc8ca..9b90bf751 100644
--- a/tests/mr/device/aligned_mr_tests.cpp
+++ b/tests/mr/device/aligned_mr_tests.cpp
@@ -59,13 +59,13 @@ TEST(AlignedTest, ThrowOnInvalidAllocationAlignment)
 TEST(AlignedTest, SupportsGetMemInfo)
 {
   mock_resource mock;
-  aligned_mock mr{&mock};
+  aligned_mock mr{mock};
 }
 
 TEST(AlignedTest, DefaultAllocationAlignmentPassthrough)
 {
   mock_resource mock;
-  aligned_mock mr{&mock};
+  aligned_mock mr{mock};
 
   cuda_stream_view stream;
   void* const pointer = int_to_address(123);
@@ -204,7 +204,7 @@ TEST(AlignedTest, AlignRealPointer)
 {
   auto const alignment{4096};
   auto const threshold{65536};
-  aligned_real mr{rmm::mr::get_current_device_resource(), alignment, threshold};
+  aligned_real mr{rmm::mr::get_current_device_resource_ref(), alignment, threshold};
   void* alloc = mr.allocate(threshold);
   EXPECT_TRUE(rmm::is_pointer_aligned(alloc, alignment));
   mr.deallocate(alloc, threshold);
diff --git a/tests/mr/device/arena_mr_tests.cpp b/tests/mr/device/arena_mr_tests.cpp
index 1eb38888e..bdc0f2438 100644
--- a/tests/mr/device/arena_mr_tests.cpp
+++ b/tests/mr/device/arena_mr_tests.cpp
@@ -23,6 +23,9 @@
 #include <rmm/mr/device/arena_memory_resource.hpp>
 #include <rmm/mr/device/device_memory_resource.hpp>
 #include <rmm/mr/device/per_device_resource.hpp>
+#include <rmm/resource_ref.hpp>
+
+#include <cuda/stream_ref>
 
 #include <gmock/gmock.h>
 #include <gtest/gtest.h>
@@ -37,15 +40,22 @@ namespace {
 
 class mock_memory_resource {
  public:
-  MOCK_METHOD(void*, allocate, (std::size_t));
-  MOCK_METHOD(void, deallocate, (void*, std::size_t));
+  MOCK_METHOD(void*, allocate, (std::size_t, std::size_t));
+  MOCK_METHOD(void, deallocate, (void*, std::size_t, std::size_t));
+  MOCK_METHOD(void*, allocate_async, (std::size_t, std::size_t, cuda::stream_ref));
+  MOCK_METHOD(void, deallocate_async, (void*, std::size_t, std::size_t, cuda::stream_ref));
+  bool operator==(mock_memory_resource const&) const noexcept { return true; }
+  bool operator!=(mock_memory_resource const&) const { return false; }
+  friend void get_property(mock_memory_resource const&, cuda::mr::device_accessible) noexcept {}
 };
 
+static_assert(cuda::mr::async_resource_with<mock_memory_resource, cuda::mr::device_accessible>);
+
 using rmm::mr::detail::arena::block;
 using rmm::mr::detail::arena::byte_span;
 using rmm::mr::detail::arena::superblock;
-using global_arena = rmm::mr::detail::arena::global_arena<mock_memory_resource>;
-using arena        = rmm::mr::detail::arena::arena<mock_memory_resource>;
+using global_arena = rmm::mr::detail::arena::global_arena;
+using arena        = rmm::mr::detail::arena::arena;
 using arena_mr     = rmm::mr::arena_memory_resource<rmm::mr::device_memory_resource>;
 using ::testing::Return;
 
@@ -59,9 +69,10 @@ auto const fake_address4 = reinterpret_cast<void*>(superblock::minimum_size * 2)
 struct ArenaTest : public ::testing::Test {
   void SetUp() override
   {
-    EXPECT_CALL(mock_mr, allocate(arena_size)).WillOnce(Return(fake_address3));
-    EXPECT_CALL(mock_mr, deallocate(fake_address3, arena_size));
-    global     = std::make_unique<global_arena>(&mock_mr, arena_size);
+    EXPECT_CALL(mock_mr, allocate(arena_size, ::testing::_)).WillOnce(Return(fake_address3));
+    EXPECT_CALL(mock_mr, deallocate(fake_address3, arena_size, ::testing::_));
+
+    global     = std::make_unique<global_arena>(mock_mr, arena_size);
     per_thread = std::make_unique<arena>(*global);
   }
 
@@ -293,13 +304,6 @@ TEST_F(ArenaTest, SuperblockMaxFreeSizeWhenFull)  // NOLINT
 /**
  * Test global_arena.
  */
-
-TEST_F(ArenaTest, GlobalArenaNullUpstream)  // NOLINT
-{
-  auto construct_nullptr = []() { global_arena global{nullptr, std::nullopt}; };
-  EXPECT_THROW(construct_nullptr(), rmm::logic_error);  // NOLINT(cppcoreguidelines-avoid-goto)
-}
-
 TEST_F(ArenaTest, GlobalArenaAcquire)  // NOLINT
 {
   auto const sblk = global->acquire(256);
@@ -378,7 +382,7 @@ TEST_F(ArenaTest, GlobalArenaDeallocate)  // NOLINT
 {
   auto* ptr = global->allocate(superblock::minimum_size * 2);
   EXPECT_EQ(ptr, fake_address3);
-  global->deallocate(ptr, superblock::minimum_size * 2, {});
+  global->deallocate_async(ptr, superblock::minimum_size * 2, {});
   ptr = global->allocate(superblock::minimum_size * 2);
   EXPECT_EQ(ptr, fake_address3);
 }
@@ -387,8 +391,8 @@ TEST_F(ArenaTest, GlobalArenaDeallocateAlignUp)  // NOLINT
 {
   auto* ptr  = global->allocate(superblock::minimum_size + 256);
   auto* ptr2 = global->allocate(superblock::minimum_size + 512);
-  global->deallocate(ptr, superblock::minimum_size + 256, {});
-  global->deallocate(ptr2, superblock::minimum_size + 512, {});
+  global->deallocate_async(ptr, superblock::minimum_size + 256, {});
+  global->deallocate_async(ptr2, superblock::minimum_size + 512, {});
   EXPECT_EQ(global->allocate(arena_size), fake_address3);
 }
 
@@ -479,7 +483,7 @@ TEST_F(ArenaTest, ThrowOnNullUpstream)  // NOLINT
 
 TEST_F(ArenaTest, SizeSmallerThanSuperblockSize)  // NOLINT
 {
-  auto construct_small = []() { arena_mr mr{rmm::mr::get_current_device_resource(), 256}; };
+  auto construct_small = []() { arena_mr mr{rmm::mr::get_current_device_resource_ref(), 256}; };
   // NOLINTNEXTLINE(cppcoreguidelines-avoid-goto)
   EXPECT_THROW(construct_small(), rmm::logic_error);
 }
@@ -490,14 +494,14 @@ TEST_F(ArenaTest, AllocateNinetyPercent)  // NOLINT
     auto const free           = rmm::available_device_memory().first;
     auto const ninety_percent = rmm::align_up(
       static_cast<std::size_t>(static_cast<double>(free) * 0.9), rmm::CUDA_ALLOCATION_ALIGNMENT);
-    arena_mr mr(rmm::mr::get_current_device_resource(), ninety_percent);
+    arena_mr mr(rmm::mr::get_current_device_resource_ref(), ninety_percent);
   }());
 }
 
 TEST_F(ArenaTest, SmallMediumLarge)  // NOLINT
 {
   EXPECT_NO_THROW([]() {  // NOLINT(cppcoreguidelines-avoid-goto)
-    arena_mr mr(rmm::mr::get_current_device_resource());
+    arena_mr mr(rmm::mr::get_current_device_resource_ref());
     auto* small     = mr.allocate(256);
     auto* medium    = mr.allocate(64_MiB);
     auto const free = rmm::available_device_memory().first;
@@ -512,7 +516,7 @@ TEST_F(ArenaTest, Defragment)  // NOLINT
 {
   EXPECT_NO_THROW([]() {  // NOLINT(cppcoreguidelines-avoid-goto)
     auto const arena_size = superblock::minimum_size * 4;
-    arena_mr mr(rmm::mr::get_current_device_resource(), arena_size);
+    arena_mr mr(rmm::mr::get_current_device_resource_ref(), arena_size);
     std::vector<std::thread> threads;
     std::size_t num_threads{4};
     threads.reserve(num_threads);
@@ -539,7 +543,7 @@ TEST_F(ArenaTest, PerThreadToStreamDealloc)  // NOLINT
   // arena that then moved to global arena during a defragmentation
   // and then moved to a stream arena.
   auto const arena_size = superblock::minimum_size * 2;
-  arena_mr mr(rmm::mr::get_current_device_resource(), arena_size);
+  arena_mr mr(rmm::mr::get_current_device_resource_ref(), arena_size);
   // Create an allocation from a per thread arena
   void* thread_ptr = mr.allocate(256, rmm::cuda_stream_per_thread);
   // Create an allocation in a stream arena to force global arena
@@ -565,7 +569,7 @@ TEST_F(ArenaTest, PerThreadToStreamDealloc)  // NOLINT
 
 TEST_F(ArenaTest, DumpLogOnFailure)  // NOLINT
 {
-  arena_mr mr{rmm::mr::get_current_device_resource(), 1_MiB, true};
+  arena_mr mr{rmm::mr::get_current_device_resource_ref(), 1_MiB, true};
 
   {  // make the log interesting
     std::vector<std::thread> threads;
diff --git a/tests/mr/device/callback_mr_tests.cpp b/tests/mr/device/callback_mr_tests.cpp
index 34a2cc8cc..a56efa60c 100644
--- a/tests/mr/device/callback_mr_tests.cpp
+++ b/tests/mr/device/callback_mr_tests.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2022-2023, NVIDIA CORPORATION.
+ * Copyright (c) 2022-2024, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -36,47 +36,50 @@ using ::testing::_;
 
 TEST(CallbackTest, TestCallbacksAreInvoked)
 {
-  auto base_mr = mock_resource();
+  auto base_mr  = mock_resource();
+  auto base_ref = device_async_resource_ref{base_mr};
   EXPECT_CALL(base_mr, do_allocate(10_MiB, cuda_stream_view{})).Times(1);
   EXPECT_CALL(base_mr, do_deallocate(_, 10_MiB, cuda_stream_view{})).Times(1);
 
   auto allocate_callback = [](std::size_t size, cuda_stream_view stream, void* arg) {
-    auto base_mr = static_cast<rmm::mr::device_memory_resource*>(arg);
-    return base_mr->allocate(size, stream);
+    auto base_mr = *static_cast<rmm::device_async_resource_ref*>(arg);
+    return base_mr.allocate_async(size, stream);
   };
   auto deallocate_callback = [](void* ptr, std::size_t size, cuda_stream_view stream, void* arg) {
-    auto base_mr = static_cast<rmm::mr::device_memory_resource*>(arg);
-    base_mr->deallocate(ptr, size, stream);
+    auto base_mr = *static_cast<rmm::device_async_resource_ref*>(arg);
+    base_mr.deallocate_async(ptr, size, stream);
   };
   auto mr =
-    rmm::mr::callback_memory_resource(allocate_callback, deallocate_callback, &base_mr, &base_mr);
-  auto ptr = mr.allocate(10_MiB);
-  mr.deallocate(ptr, 10_MiB);
+    rmm::mr::callback_memory_resource(allocate_callback, deallocate_callback, &base_ref, &base_ref);
+  auto const size = std::size_t{10_MiB};
+  auto* ptr       = mr.allocate(size);
+  mr.deallocate(ptr, size);
 }
 
 TEST(CallbackTest, LoggingTest)
 {
   testing::internal::CaptureStdout();
 
-  auto base_mr           = rmm::mr::get_current_device_resource();
+  auto base_mr           = rmm::mr::get_current_device_resource_ref();
   auto allocate_callback = [](std::size_t size, cuda_stream_view stream, void* arg) {
     std::cout << "Allocating " << size << " bytes" << std::endl;
-    auto base_mr = static_cast<rmm::mr::device_memory_resource*>(arg);
-    return base_mr->allocate(size, stream);
+    auto base_mr = *static_cast<rmm::device_async_resource_ref*>(arg);
+    return base_mr.allocate_async(size, stream);
   };
 
   auto deallocate_callback = [](void* ptr, std::size_t size, cuda_stream_view stream, void* arg) {
     std::cout << "Deallocating " << size << " bytes" << std::endl;
-    auto base_mr = static_cast<rmm::mr::device_memory_resource*>(arg);
-    base_mr->deallocate(ptr, size, stream);
+    auto base_mr = *static_cast<rmm::device_async_resource_ref*>(arg);
+    base_mr.deallocate_async(ptr, size, stream);
   };
   auto mr =
-    rmm::mr::callback_memory_resource(allocate_callback, deallocate_callback, base_mr, base_mr);
-  auto ptr = mr.allocate(10_MiB);
-  mr.deallocate(ptr, 10_MiB);
+    rmm::mr::callback_memory_resource(allocate_callback, deallocate_callback, &base_mr, &base_mr);
+  auto const size = std::size_t{10_MiB};
+  auto* ptr       = mr.allocate(size);
+  mr.deallocate(ptr, size);
 
   std::string output = testing::internal::GetCapturedStdout();
-  std::string expect = fmt::format("Allocating {} bytes\nDeallocating {} bytes\n", 10_MiB, 10_MiB);
+  std::string expect = fmt::format("Allocating {} bytes\nDeallocating {} bytes\n", size, size);
   ASSERT_EQ(expect, output);
 }
 
diff --git a/tests/mr/device/failure_callback_mr_tests.cpp b/tests/mr/device/failure_callback_mr_tests.cpp
index 683aee86e..4b3d084d5 100644
--- a/tests/mr/device/failure_callback_mr_tests.cpp
+++ b/tests/mr/device/failure_callback_mr_tests.cpp
@@ -47,7 +47,8 @@ bool failure_handler(std::size_t /*bytes*/, void* arg)
 TEST(FailureCallbackTest, RetryAllocationOnce)
 {
   bool retried{false};
-  failure_callback_adaptor<> mr{rmm::mr::get_current_device_resource(), failure_handler, &retried};
+  failure_callback_adaptor<> mr{
+    rmm::mr::get_current_device_resource_ref(), failure_handler, &retried};
   EXPECT_EQ(retried, false);
   EXPECT_THROW(mr.allocate(512_GiB), std::bad_alloc);
   EXPECT_EQ(retried, true);
diff --git a/tests/mr/device/limiting_mr_tests.cpp b/tests/mr/device/limiting_mr_tests.cpp
index 777ce9428..e6cc97029 100644
--- a/tests/mr/device/limiting_mr_tests.cpp
+++ b/tests/mr/device/limiting_mr_tests.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2021, NVIDIA CORPORATION.
+ * Copyright (c) 2019-2024, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -25,19 +25,19 @@
 namespace rmm::test {
 namespace {
 
-using Limiting_adaptor = rmm::mr::limiting_resource_adaptor<rmm::mr::device_memory_resource>;
+using limiting_adaptor = rmm::mr::limiting_resource_adaptor<rmm::mr::device_memory_resource>;
 
 TEST(LimitingTest, ThrowOnNullUpstream)
 {
   auto const max_size{5_MiB};
-  auto construct_nullptr = []() { Limiting_adaptor mr{nullptr, max_size}; };
+  auto construct_nullptr = []() { limiting_adaptor mr{nullptr, max_size}; };
   EXPECT_THROW(construct_nullptr(), rmm::logic_error);
 }
 
 TEST(LimitingTest, TooBig)
 {
   auto const max_size{5_MiB};
-  Limiting_adaptor mr{rmm::mr::get_current_device_resource(), max_size};
+  limiting_adaptor mr{rmm::mr::get_current_device_resource_ref(), max_size};
   EXPECT_THROW(mr.allocate(max_size + 1), rmm::out_of_memory);
 }
 
@@ -45,15 +45,15 @@ TEST(LimitingTest, UpstreamFailure)
 {
   auto const max_size_1{2_MiB};
   auto const max_size_2{5_MiB};
-  Limiting_adaptor mr1{rmm::mr::get_current_device_resource(), max_size_1};
-  Limiting_adaptor mr2{&mr1, max_size_2};
+  limiting_adaptor mr1{rmm::mr::get_current_device_resource_ref(), max_size_1};
+  limiting_adaptor mr2{&mr1, max_size_2};
   EXPECT_THROW(mr2.allocate(4_MiB), rmm::out_of_memory);
 }
 
 TEST(LimitingTest, UnderLimitDueToFrees)
 {
   auto const max_size{10_MiB};
-  Limiting_adaptor mr{rmm::mr::get_current_device_resource(), max_size};
+  limiting_adaptor mr{rmm::mr::get_current_device_resource_ref(), max_size};
   auto const size1{4_MiB};
   auto* ptr1           = mr.allocate(size1);
   auto allocated_bytes = size1;
@@ -81,7 +81,7 @@ TEST(LimitingTest, UnderLimitDueToFrees)
 TEST(LimitingTest, OverLimit)
 {
   auto const max_size{10_MiB};
-  Limiting_adaptor mr{rmm::mr::get_current_device_resource(), max_size};
+  limiting_adaptor mr{rmm::mr::get_current_device_resource_ref(), max_size};
   auto const size1{4_MiB};
   auto* ptr1           = mr.allocate(size1);
   auto allocated_bytes = size1;
diff --git a/tests/mr/device/pool_mr_tests.cpp b/tests/mr/device/pool_mr_tests.cpp
index c63a61844..9db63eb1b 100644
--- a/tests/mr/device/pool_mr_tests.cpp
+++ b/tests/mr/device/pool_mr_tests.cpp
@@ -49,19 +49,7 @@ TEST(PoolTest, ThrowMaxLessThanInitial)
   auto max_less_than_initial = []() {
     const auto initial{1024};
     const auto maximum{256};
-    pool_mr mr{rmm::mr::get_current_device_resource(), initial, maximum};
-  };
-  EXPECT_THROW(max_less_than_initial(), rmm::logic_error);
-}
-
-TEST(PoolTest, ReferenceThrowMaxLessThanInitial)
-{
-  // Make sure first argument is enough larger than the second that alignment rounding doesn't
-  // make them equal
-  auto max_less_than_initial = []() {
-    const auto initial{1024};
-    const auto maximum{256};
-    pool_mr mr{*rmm::mr::get_current_device_resource(), initial, maximum};
+    pool_mr mr{rmm::mr::get_current_device_resource_ref(), initial, maximum};
   };
   EXPECT_THROW(max_less_than_initial(), rmm::logic_error);
 }
@@ -72,7 +60,7 @@ TEST(PoolTest, AllocateNinetyPercent)
     auto const [free, total] = rmm::available_device_memory();
     (void)total;
     auto const ninety_percent_pool = rmm::percent_of_free_device_memory(90);
-    pool_mr mr{rmm::mr::get_current_device_resource(), ninety_percent_pool};
+    pool_mr mr{rmm::mr::get_current_device_resource_ref(), ninety_percent_pool};
   };
   EXPECT_NO_THROW(allocate_ninety());
 }
@@ -81,7 +69,7 @@ TEST(PoolTest, TwoLargeBuffers)
 {
   auto two_large = []() {
     [[maybe_unused]] auto const [free, total] = rmm::available_device_memory();
-    pool_mr mr{rmm::mr::get_current_device_resource(), rmm::percent_of_free_device_memory(50)};
+    pool_mr mr{rmm::mr::get_current_device_resource_ref(), rmm::percent_of_free_device_memory(50)};
     auto* ptr1 = mr.allocate(free / 4);
     auto* ptr2 = mr.allocate(free / 4);
     mr.deallocate(ptr1, free / 4);
@@ -116,7 +104,7 @@ TEST(PoolTest, ForceGrowth)
 
 TEST(PoolTest, DeletedStream)
 {
-  pool_mr mr{rmm::mr::get_current_device_resource(), 0};
+  pool_mr mr{rmm::mr::get_current_device_resource_ref(), 0};
   cudaStream_t stream{};  // we don't use rmm::cuda_stream here to make destruction more explicit
   const int size = 10000;
   EXPECT_EQ(cudaSuccess, cudaStreamCreate(&stream));
@@ -129,7 +117,7 @@ TEST(PoolTest, DeletedStream)
 TEST(PoolTest, InitialAndMaxPoolSizeEqual)
 {
   EXPECT_NO_THROW([]() {
-    pool_mr mr(rmm::mr::get_current_device_resource(), 1000192, 1000192);
+    pool_mr mr(rmm::mr::get_current_device_resource_ref(), 1000192, 1000192);
     mr.allocate(1000);
   }());
 }
@@ -138,14 +126,14 @@ TEST(PoolTest, NonAlignedPoolSize)
 {
   EXPECT_THROW(
     []() {
-      pool_mr mr(rmm::mr::get_current_device_resource(), 1000031, 1000192);
+      pool_mr mr(rmm::mr::get_current_device_resource_ref(), 1000031, 1000192);
       mr.allocate(1000);
     }(),
     rmm::logic_error);
 
   EXPECT_THROW(
     []() {
-      pool_mr mr(rmm::mr::get_current_device_resource(), 1000192, 1000200);
+      pool_mr mr(rmm::mr::get_current_device_resource_ref(), 1000192, 1000200);
       mr.allocate(1000);
     }(),
     rmm::logic_error);
@@ -203,18 +191,18 @@ namespace test_properties {
 class fake_async_resource {
  public:
   // To model `async_resource`
-  void* allocate(std::size_t, std::size_t) { return nullptr; }
-  void deallocate(void* ptr, std::size_t, std::size_t) {}
-  void* allocate_async(std::size_t, std::size_t, cuda::stream_ref) { return nullptr; }
-  void deallocate_async(void* ptr, std::size_t, std::size_t, cuda::stream_ref) {}
+  static void* allocate(std::size_t, std::size_t) { return nullptr; }
+  static void deallocate(void* ptr, std::size_t, std::size_t) {}
+  static void* allocate_async(std::size_t, std::size_t, cuda::stream_ref) { return nullptr; }
+  static void deallocate_async(void* ptr, std::size_t, std::size_t, cuda::stream_ref) {}
 
   bool operator==(const fake_async_resource& other) const { return true; }
   bool operator!=(const fake_async_resource& other) const { return false; }
 
  private:
-  void* do_allocate(std::size_t bytes, cuda_stream_view) { return nullptr; }
-  void do_deallocate(void* ptr, std::size_t, cuda_stream_view) {}
-  [[nodiscard]] bool do_is_equal(fake_async_resource const& other) const noexcept { return true; }
+  static void* do_allocate(std::size_t bytes, cuda_stream_view) { return nullptr; }
+  static void do_deallocate(void* ptr, std::size_t, cuda_stream_view) {}
+  [[nodiscard]] static bool do_is_equal(fake_async_resource const& other) noexcept { return true; }
 };
 static_assert(!cuda::has_property<fake_async_resource, cuda::mr::device_accessible>);
 static_assert(!cuda::has_property<rmm::mr::pool_memory_resource<fake_async_resource>,
diff --git a/tests/mr/device/statistics_mr_tests.cpp b/tests/mr/device/statistics_mr_tests.cpp
index 6c5700f0b..f796a4c00 100644
--- a/tests/mr/device/statistics_mr_tests.cpp
+++ b/tests/mr/device/statistics_mr_tests.cpp
@@ -40,7 +40,7 @@ TEST(StatisticsTest, ThrowOnNullUpstream)
 
 TEST(StatisticsTest, Empty)
 {
-  statistics_adaptor mr{rmm::mr::get_current_device_resource()};
+  statistics_adaptor mr{rmm::mr::get_current_device_resource_ref()};
 
   EXPECT_EQ(mr.get_bytes_counter().peak, 0);
   EXPECT_EQ(mr.get_bytes_counter().total, 0);
@@ -53,7 +53,7 @@ TEST(StatisticsTest, Empty)
 
 TEST(StatisticsTest, AllFreed)
 {
-  statistics_adaptor mr{rmm::mr::get_current_device_resource()};
+  statistics_adaptor mr{rmm::mr::get_current_device_resource_ref()};
   std::vector<void*> allocations;
 
   allocations.reserve(num_allocations);
@@ -71,7 +71,7 @@ TEST(StatisticsTest, AllFreed)
 
 TEST(StatisticsTest, PeakAllocations)
 {
-  statistics_adaptor mr{rmm::mr::get_current_device_resource()};
+  statistics_adaptor mr{rmm::mr::get_current_device_resource_ref()};
   std::vector<void*> allocations;
 
   for (std::size_t i = 0; i < num_allocations; ++i) {
@@ -127,9 +127,9 @@ TEST(StatisticsTest, PeakAllocations)
 
 TEST(StatisticsTest, MultiTracking)
 {
-  auto* orig_device_resource = rmm::mr::get_current_device_resource();
+  auto orig_device_resource = rmm::mr::get_current_device_resource_ref();
   statistics_adaptor mr{orig_device_resource};
-  rmm::mr::set_current_device_resource(&mr);
+  rmm::mr::set_current_device_resource_ref(mr);
 
   std::vector<std::shared_ptr<rmm::device_buffer>> allocations;
   for (std::size_t i = 0; i < num_allocations; ++i) {
@@ -139,8 +139,8 @@ TEST(StatisticsTest, MultiTracking)
 
   EXPECT_EQ(mr.get_allocations_counter().value, 10);
 
-  statistics_adaptor inner_mr{rmm::mr::get_current_device_resource()};
-  rmm::mr::set_current_device_resource(&inner_mr);
+  statistics_adaptor inner_mr{rmm::mr::get_current_device_resource_ref()};
+  rmm::mr::set_current_device_resource_ref(inner_mr);
 
   for (std::size_t i = 0; i < num_more_allocations; ++i) {
     allocations.emplace_back(
@@ -172,7 +172,7 @@ TEST(StatisticsTest, MultiTracking)
   EXPECT_EQ(inner_mr.get_allocations_counter().peak, 5);
 
   // Reset the current device resource
-  rmm::mr::set_current_device_resource(orig_device_resource);
+  rmm::mr::set_current_device_resource_ref(orig_device_resource);
 }
 
 TEST(StatisticsTest, NegativeInnerTracking)
@@ -180,7 +180,7 @@ TEST(StatisticsTest, NegativeInnerTracking)
   // This tests the unlikely scenario where pointers are deallocated on an inner
   // wrapped memory resource. This can happen if the MR is not saved with the
   // memory pointer
-  statistics_adaptor mr{rmm::mr::get_current_device_resource()};
+  statistics_adaptor mr{rmm::mr::get_current_device_resource_ref()};
   std::vector<void*> allocations;
   for (std::size_t i = 0; i < num_allocations; ++i) {
     allocations.push_back(mr.allocate(ten_MiB));
@@ -236,7 +236,7 @@ TEST(StatisticsTest, NegativeInnerTracking)
 
 TEST(StatisticsTest, Nested)
 {
-  statistics_adaptor mr{rmm::mr::get_current_device_resource()};
+  statistics_adaptor mr{rmm::mr::get_current_device_resource_ref()};
   void* a0 = mr.allocate(ten_MiB);
   EXPECT_EQ(mr.get_bytes_counter().value, ten_MiB);
   EXPECT_EQ(mr.get_allocations_counter().value, 1);
diff --git a/tests/mr/device/thrust_allocator_tests.cu b/tests/mr/device/thrust_allocator_tests.cu
index 91ae396ed..84f599957 100644
--- a/tests/mr/device/thrust_allocator_tests.cu
+++ b/tests/mr/device/thrust_allocator_tests.cu
@@ -37,9 +37,9 @@ namespace {
 struct allocator_test : public mr_ref_test {};
 
 // Disable until we support resource_ref with set_current_device_resource
-/*TEST_P(allocator_test, first)
+TEST_P(allocator_test, first)
 {
-  rmm::mr::set_current_device_resource(this->mr.get());
+  rmm::mr::set_current_device_resource_ref(this->ref);
   auto const num_ints{100};
   rmm::device_vector<int> ints(num_ints, 1);
   EXPECT_EQ(num_ints, thrust::reduce(ints.begin(), ints.end()));
@@ -47,12 +47,12 @@ struct allocator_test : public mr_ref_test {};
 
 TEST_P(allocator_test, defaults)
 {
-  rmm::mr::set_current_device_resource(this->mr.get());
+  rmm::mr::set_current_device_resource_ref(this->ref);
   rmm::mr::thrust_allocator<int> allocator(rmm::cuda_stream_default);
   EXPECT_EQ(allocator.stream(), rmm::cuda_stream_default);
   EXPECT_EQ(allocator.get_upstream_resource(),
-            rmm::device_async_resource_ref{rmm::mr::get_current_device_resource()});
-}*/
+            rmm::device_async_resource_ref{rmm::mr::get_current_device_resource_ref()});
+}
 
 TEST_P(allocator_test, multi_device)
 {
diff --git a/tests/mr/device/tracking_mr_tests.cpp b/tests/mr/device/tracking_mr_tests.cpp
index 7c2532c60..acd540ae6 100644
--- a/tests/mr/device/tracking_mr_tests.cpp
+++ b/tests/mr/device/tracking_mr_tests.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2020-2021, NVIDIA CORPORATION.
+ * Copyright (c) 2020-2024, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -42,14 +42,14 @@ TEST(TrackingTest, ThrowOnNullUpstream)
 
 TEST(TrackingTest, Empty)
 {
-  tracking_adaptor mr{rmm::mr::get_current_device_resource()};
+  tracking_adaptor mr{rmm::mr::get_current_device_resource_ref()};
   EXPECT_EQ(mr.get_outstanding_allocations().size(), 0);
   EXPECT_EQ(mr.get_allocated_bytes(), 0);
 }
 
 TEST(TrackingTest, AllFreed)
 {
-  tracking_adaptor mr{rmm::mr::get_current_device_resource()};
+  tracking_adaptor mr{rmm::mr::get_current_device_resource_ref()};
   std::vector<void*> allocations;
   allocations.reserve(num_allocations);
   for (int i = 0; i < num_allocations; ++i) {
@@ -64,7 +64,7 @@ TEST(TrackingTest, AllFreed)
 
 TEST(TrackingTest, AllocationsLeftWithStacks)
 {
-  tracking_adaptor mr{rmm::mr::get_current_device_resource(), true};
+  tracking_adaptor mr{rmm::mr::get_current_device_resource_ref(), true};
   std::vector<void*> allocations;
   allocations.reserve(num_allocations);
   for (int i = 0; i < num_allocations; ++i) {
@@ -82,7 +82,7 @@ TEST(TrackingTest, AllocationsLeftWithStacks)
 
 TEST(TrackingTest, AllocationsLeftWithoutStacks)
 {
-  tracking_adaptor mr{rmm::mr::get_current_device_resource()};
+  tracking_adaptor mr{rmm::mr::get_current_device_resource_ref()};
   std::vector<void*> allocations;
   allocations.reserve(num_allocations);
   for (int i = 0; i < num_allocations; ++i) {
@@ -101,9 +101,9 @@ TEST(TrackingTest, AllocationsLeftWithoutStacks)
 
 TEST(TrackingTest, MultiTracking)
 {
-  auto* orig_device_resource = rmm::mr::get_current_device_resource();
+  auto orig_device_resource = rmm::mr::get_current_device_resource_ref();
   tracking_adaptor mr{orig_device_resource, true};
-  rmm::mr::set_current_device_resource(&mr);
+  rmm::mr::set_current_device_resource_ref(mr);
 
   std::vector<std::shared_ptr<rmm::device_buffer>> allocations;
   for (std::size_t i = 0; i < num_allocations; ++i) {
@@ -113,8 +113,8 @@ TEST(TrackingTest, MultiTracking)
 
   EXPECT_EQ(mr.get_outstanding_allocations().size(), num_allocations);
 
-  tracking_adaptor inner_mr{rmm::mr::get_current_device_resource()};
-  rmm::mr::set_current_device_resource(&inner_mr);
+  tracking_adaptor inner_mr{rmm::mr::get_current_device_resource_ref()};
+  rmm::mr::set_current_device_resource_ref(inner_mr);
 
   for (std::size_t i = 0; i < num_more_allocations; ++i) {
     allocations.emplace_back(
@@ -141,7 +141,7 @@ TEST(TrackingTest, MultiTracking)
   EXPECT_EQ(inner_mr.get_allocated_bytes(), 0);
 
   // Reset the current device resource
-  rmm::mr::set_current_device_resource(orig_device_resource);
+  rmm::mr::set_current_device_resource_ref(orig_device_resource);
 }
 
 TEST(TrackingTest, NegativeInnerTracking)
@@ -149,7 +149,7 @@ TEST(TrackingTest, NegativeInnerTracking)
   // This tests the unlikely scenario where pointers are deallocated on an inner
   // wrapped memory resource. This can happen if the MR is not saved with the
   // memory pointer
-  tracking_adaptor mr{rmm::mr::get_current_device_resource()};
+  tracking_adaptor mr{rmm::mr::get_current_device_resource_ref()};
   std::vector<void*> allocations;
   for (std::size_t i = 0; i < num_allocations; ++i) {
     allocations.push_back(mr.allocate(ten_MiB));
@@ -181,7 +181,7 @@ TEST(TrackingTest, NegativeInnerTracking)
 
 TEST(TrackingTest, DeallocWrongBytes)
 {
-  tracking_adaptor mr{rmm::mr::get_current_device_resource()};
+  tracking_adaptor mr{rmm::mr::get_current_device_resource_ref()};
   std::vector<void*> allocations;
   for (std::size_t i = 0; i < num_allocations; ++i) {
     allocations.push_back(mr.allocate(ten_MiB));
@@ -207,7 +207,7 @@ TEST(TrackingTest, LogOutstandingAllocations)
   rmm::logger().sinks().push_back(oss_sink);
   auto old_level = rmm::logger().level();
 
-  tracking_adaptor mr{rmm::mr::get_current_device_resource()};
+  tracking_adaptor mr{rmm::mr::get_current_device_resource_ref()};
   std::vector<void*> allocations;
   for (std::size_t i = 0; i < num_allocations; ++i) {
     allocations.push_back(mr.allocate(ten_MiB));
diff --git a/tests/mr/host/mr_ref_tests.cpp b/tests/mr/host/mr_ref_tests.cpp
index 8445ab1f5..071739575 100644
--- a/tests/mr/host/mr_ref_tests.cpp
+++ b/tests/mr/host/mr_ref_tests.cpp
@@ -233,14 +233,17 @@ TYPED_TEST(MRRefTest, UnsupportedAlignmentTest)
   for (std::size_t num_trials = 0; num_trials < NUM_TRIALS; ++num_trials) {
     for (std::size_t alignment = MinTestedAlignment; alignment <= MaxTestedAlignment;
          alignment *= TestedAlignmentMultiplier) {
+#ifdef NDEBUG
       auto allocation_size = size_distribution(generator);
       void* ptr{nullptr};
       // An unsupported alignment (like an odd number) should result in an
       // alignment of `alignof(std::max_align_t)`
       auto const bad_alignment = alignment + 1;
+
       EXPECT_NO_THROW(ptr = this->ref.allocate(allocation_size, bad_alignment));
       EXPECT_TRUE(is_aligned(ptr, alignof(std::max_align_t)));
       EXPECT_NO_THROW(this->ref.deallocate(ptr, allocation_size, bad_alignment));
+#endif
     }
   }
 }