diff --git a/python/rmm/CMakeLists.txt b/python/rmm/CMakeLists.txt
index 6c2515102..4c4e603ef 100644
--- a/python/rmm/CMakeLists.txt
+++ b/python/rmm/CMakeLists.txt
@@ -30,4 +30,6 @@ rapids_cython_init()
 add_compile_definitions("SPDLOG_ACTIVE_LEVEL=SPDLOG_LEVEL_${RMM_LOGGING_LEVEL}")
 
 add_subdirectory(rmm/_cuda)
-add_subdirectory(rmm/_lib)
+# add_subdirectory(rmm/_lib)
+add_subdirectory(rmm/cpp)
+add_subdirectory(rmm/python)
diff --git a/python/rmm/rmm/cpp/CMakeLists.txt b/python/rmm/rmm/cpp/CMakeLists.txt
new file mode 100644
index 000000000..8ac924df4
--- /dev/null
+++ b/python/rmm/rmm/cpp/CMakeLists.txt
@@ -0,0 +1,35 @@
+# =============================================================================
+# Copyright (c) 2022-2024, NVIDIA CORPORATION.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
+# in compliance with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software distributed under the License
+# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
+# or implied. See the License for the specific language governing permissions and limitations under
+# the License.
+# =============================================================================
+
+set(cython_sources cpp_logger.pyx cpp_memory_resource.pyx)
+set(linked_libraries rmm::rmm)
+
+# Build all of the Cython targets
+rapids_cython_create_modules(SOURCE_FILES "${cython_sources}" LINKED_LIBRARIES "${linked_libraries}"
+                                                                               CXX)
+
+# mark all symbols in these Cython targets "hidden" by default, so they won't collide with symbols
+# loaded from other DSOs
+foreach(_cython_target IN LISTS RAPIDS_CYTHON_CREATED_TARGETS)
+  set_target_properties(${_cython_target} PROPERTIES C_VISIBILITY_PRESET hidden
+                                                     CXX_VISIBILITY_PRESET hidden)
+endforeach()
+
+add_library(_torch_allocator SHARED _torch_allocator.cpp)
+# Want the output to be called _torch_allocator.so
+set_target_properties(_torch_allocator PROPERTIES PREFIX "" SUFFIX ".so")
+target_link_libraries(_torch_allocator PRIVATE rmm::rmm)
+cmake_path(RELATIVE_PATH CMAKE_CURRENT_SOURCE_DIR BASE_DIRECTORY "${PROJECT_SOURCE_DIR}"
+           OUTPUT_VARIABLE _torch_allocator_location)
+install(TARGETS _torch_allocator DESTINATION "${_torch_allocator_location}")
diff --git a/python/rmm/rmm/cpp/__init__.pxd b/python/rmm/rmm/cpp/__init__.pxd
new file mode 100644
index 000000000..46753baa3
--- /dev/null
+++ b/python/rmm/rmm/cpp/__init__.pxd
@@ -0,0 +1,13 @@
+# Copyright (c) 2019-2020, NVIDIA CORPORATION.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
diff --git a/python/rmm/rmm/cpp/_torch_allocator.cpp b/python/rmm/rmm/cpp/_torch_allocator.cpp
new file mode 100644
index 000000000..bfe94c2d0
--- /dev/null
+++ b/python/rmm/rmm/cpp/_torch_allocator.cpp
@@ -0,0 +1,64 @@
+/*
+ * Copyright (c) 2023-2024, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <rmm/cuda_device.hpp>
+#include <rmm/cuda_stream_view.hpp>
+#include <rmm/mr/device/per_device_resource.hpp>
+
+#include <cuda_runtime_api.h>
+
+// These signatures must match those required by CUDAPluggableAllocator in
+// github.com/pytorch/pytorch/blob/main/torch/csrc/cuda/CUDAPluggableAllocator.h
+// Since the loading is done at runtime via dlopen, no error checking
+// can be performed for mismatching signatures.
+
+/**
+ * @brief Allocate memory of at least \p size bytes.
+ *
+ * @throws rmm::bad_alloc When the requested allocation cannot be satisfied.
+ *
+ * @param size The number of bytes to allocate
+ * @param device The device whose memory resource one should use
+ * @param stream CUDA stream to perform allocation on
+ * @return Pointer to the newly allocated memory
+ */
+extern "C" void* allocate(std::size_t size, int device, void* stream)
+{
+  rmm::cuda_device_id const device_id{device};
+  rmm::cuda_set_device_raii with_device{device_id};
+  auto mr = rmm::mr::get_per_device_resource_ref(device_id);
+  return mr.allocate_async(
+    size, rmm::CUDA_ALLOCATION_ALIGNMENT, rmm::cuda_stream_view{static_cast<cudaStream_t>(stream)});
+}
+
+/**
+ * @brief Deallocate memory pointed to by \p ptr.
+ *
+ * @param ptr Pointer to be deallocated
+ * @param size The number of bytes in the allocation
+ * @param device The device whose memory resource one should use
+ * @param stream CUDA stream to perform deallocation on
+ */
+extern "C" void deallocate(void* ptr, std::size_t size, int device, void* stream)
+{
+  rmm::cuda_device_id const device_id{device};
+  rmm::cuda_set_device_raii with_device{device_id};
+  auto mr = rmm::mr::get_per_device_resource_ref(device_id);
+  mr.deallocate_async(ptr,
+                      size,
+                      rmm::CUDA_ALLOCATION_ALIGNMENT,
+                      rmm::cuda_stream_view{static_cast<cudaStream_t>(stream)});
+}
diff --git a/python/rmm/rmm/cpp/cpp_cuda_stream.pxd b/python/rmm/rmm/cpp/cpp_cuda_stream.pxd
new file mode 100644
index 000000000..16b66ee2b
--- /dev/null
+++ b/python/rmm/rmm/cpp/cpp_cuda_stream.pxd
@@ -0,0 +1,28 @@
+# Copyright (c) 2020-2024, NVIDIA CORPORATION.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from cuda.ccudart cimport cudaStream_t
+from libcpp cimport bool
+
+from rmm.cpp.cpp_cuda_stream_view cimport cuda_stream_view
+
+
+cdef extern from "rmm/cuda_stream.hpp" namespace "rmm" nogil:
+    cdef cppclass cuda_stream:
+        cuda_stream() except +
+        bool is_valid() except +
+        cudaStream_t value() except +
+        cuda_stream_view view() except +
+        void synchronize() except +
+        void synchronize_no_throw()
diff --git a/python/rmm/rmm/cpp/cpp_cuda_stream_pool.pxd b/python/rmm/rmm/cpp/cpp_cuda_stream_pool.pxd
new file mode 100644
index 000000000..553b38514
--- /dev/null
+++ b/python/rmm/rmm/cpp/cpp_cuda_stream_pool.pxd
@@ -0,0 +1,23 @@
+# Copyright (c) 2021-2024, NVIDIA CORPORATION.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from rmm.cpp.cpp_cuda_stream_view cimport cuda_stream_view
+
+
+cdef extern from "rmm/cuda_stream_pool.hpp" namespace "rmm" nogil:
+    cdef cppclass cuda_stream_pool:
+        cuda_stream_pool(size_t pool_size)
+        cuda_stream_view get_stream()
+        cuda_stream_view get_stream(size_t stream_id) except +
+        size_t get_pool_size()
diff --git a/python/rmm/rmm/cpp/cpp_cuda_stream_view.pxd b/python/rmm/rmm/cpp/cpp_cuda_stream_view.pxd
new file mode 100644
index 000000000..bf0d33c24
--- /dev/null
+++ b/python/rmm/rmm/cpp/cpp_cuda_stream_view.pxd
@@ -0,0 +1,32 @@
+# Copyright (c) 2020, NVIDIA CORPORATION.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from cuda.ccudart cimport cudaStream_t
+from libcpp cimport bool
+
+
+cdef extern from "rmm/cuda_stream_view.hpp" namespace "rmm" nogil:
+    cdef cppclass cuda_stream_view:
+        cuda_stream_view()
+        cuda_stream_view(cudaStream_t)
+        cudaStream_t value()
+        bool is_default()
+        bool is_per_thread_default()
+        void synchronize() except +
+
+    cdef bool operator==(cuda_stream_view const, cuda_stream_view const)
+
+    const cuda_stream_view cuda_stream_default
+    const cuda_stream_view cuda_stream_legacy
+    const cuda_stream_view cuda_stream_per_thread
diff --git a/python/rmm/rmm/cpp/cpp_device_buffer.pxd b/python/rmm/rmm/cpp/cpp_device_buffer.pxd
new file mode 100644
index 000000000..1aa7634cf
--- /dev/null
+++ b/python/rmm/rmm/cpp/cpp_device_buffer.pxd
@@ -0,0 +1,58 @@
+# Copyright (c) 2019-2024, NVIDIA CORPORATION.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from rmm.cpp.cpp_cuda_stream_view cimport cuda_stream_view
+from rmm.cpp.cpp_memory_resource cimport device_memory_resource
+
+
+cdef extern from "rmm/mr/device/per_device_resource.hpp" namespace "rmm" nogil:
+    cdef cppclass cuda_device_id:
+        ctypedef int value_type
+        cuda_device_id()
+        cuda_device_id(value_type id)
+        value_type value()
+
+    cdef cuda_device_id get_current_cuda_device()
+
+cdef extern from "rmm/prefetch.hpp" namespace "rmm" nogil:
+    cdef void prefetch(const void* ptr,
+                       size_t bytes,
+                       cuda_device_id device,
+                       cuda_stream_view stream) except +
+
+cdef extern from "rmm/device_buffer.hpp" namespace "rmm" nogil:
+    cdef cppclass device_buffer:
+        device_buffer()
+        device_buffer(
+            size_t size,
+            cuda_stream_view stream,
+            device_memory_resource *
+        ) except +
+        device_buffer(
+            const void* source_data,
+            size_t size,
+            cuda_stream_view stream,
+            device_memory_resource *
+        ) except +
+        device_buffer(
+            const device_buffer buf,
+            cuda_stream_view stream,
+            device_memory_resource *
+        ) except +
+        void reserve(size_t new_capacity, cuda_stream_view stream) except +
+        void resize(size_t new_size, cuda_stream_view stream) except +
+        void shrink_to_fit(cuda_stream_view stream) except +
+        void* data()
+        size_t size()
+        size_t capacity()
diff --git a/python/rmm/rmm/cpp/cpp_device_uvector.pxd b/python/rmm/rmm/cpp/cpp_device_uvector.pxd
new file mode 100644
index 000000000..2cb647e3c
--- /dev/null
+++ b/python/rmm/rmm/cpp/cpp_device_uvector.pxd
@@ -0,0 +1,39 @@
+# Copyright (c) 2021-2024, NVIDIA CORPORATION.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from rmm.cpp.cuda_stream_view cimport cuda_stream_view
+from rmm.cpp.device_buffer cimport device_buffer
+from rmm.cpp.memory_resource cimport device_memory_resource
+
+
+cdef extern from "rmm/device_buffer.hpp" namespace "rmm" nogil:
+    cdef cppclass device_uvector[T]:
+        device_uvector(size_t size, cuda_stream_view  stream) except +
+        T* element_ptr(size_t index)
+        void set_element(size_t element_index, const T& v, cuda_stream_view s)
+        void set_element_async(
+            size_t element_index,
+            const T& v,
+            cuda_stream_view s
+        ) except +
+        T front_element(cuda_stream_view s) except +
+        T back_element(cuda_stream_view s) except +
+        void reserve(size_t new_capacity, cuda_stream_view stream) except +
+        void resize(size_t new_size, cuda_stream_view stream) except +
+        void shrink_to_fit(cuda_stream_view stream) except +
+        device_buffer release()
+        size_t capacity()
+        T* data()
+        size_t size()
+        device_memory_resource* memory_resource()
diff --git a/python/rmm/rmm/cpp/cpp_logger.pyx b/python/rmm/rmm/cpp/cpp_logger.pyx
new file mode 100644
index 000000000..241a748c3
--- /dev/null
+++ b/python/rmm/rmm/cpp/cpp_logger.pyx
@@ -0,0 +1,66 @@
+# Copyright (c) 2023-2024, NVIDIA CORPORATION.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from libcpp cimport bool
+
+
+cdef extern from "spdlog/common.h" namespace "spdlog::level" nogil:
+    cpdef enum logging_level "spdlog::level::level_enum":
+        """
+        The debug logging level for RMM.
+
+        Debug logging prints messages to a log file. See
+        `Debug Logging <https://github.com/rapidsai/rmm#debug-logging>`_
+        for more information.
+
+        Valid levels, in decreasing order of verbosity, are TRACE, DEBUG,
+        INFO, WARN, ERR, CRITICAL, and OFF. Default is INFO.
+
+        Examples
+        --------
+        >>> import rmm
+        >>> rmm.logging_level.DEBUG
+        <logging_level.DEBUG: 1>
+        >>> rmm.logging_level.DEBUG.value
+        1
+        >>> rmm.logging_level.DEBUG.name
+        'DEBUG'
+
+        See Also
+        --------
+        set_logging_level : Set the debug logging level
+        get_logging_level : Get the current debug logging level
+        """
+        TRACE "spdlog::level::trace"
+        DEBUG "spdlog::level::debug"
+        INFO "spdlog::level::info"
+        WARN "spdlog::level::warn"
+        ERR "spdlog::level::err"
+        CRITICAL "spdlog::level::critical"
+        OFF "spdlog::level::off"
+
+
+cdef extern from "spdlog/spdlog.h" namespace "spdlog" nogil:
+    cdef cppclass spdlog_logger "spdlog::logger":
+        spdlog_logger() except +
+        void set_level(logging_level level)
+        logging_level level()
+        void flush() except +
+        void flush_on(logging_level level)
+        logging_level flush_level()
+        bool should_log(logging_level msg_level)
+
+
+cdef extern from "rmm/logger.hpp" namespace "rmm" nogil:
+    cdef spdlog_logger& logger() except +
diff --git a/python/rmm/rmm/cpp/cpp_memory_resource.pxd b/python/rmm/rmm/cpp/cpp_memory_resource.pxd
new file mode 100644
index 000000000..cef5d4737
--- /dev/null
+++ b/python/rmm/rmm/cpp/cpp_memory_resource.pxd
@@ -0,0 +1,34 @@
+# Copyright (c) 2020-2024, NVIDIA CORPORATION.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from libcpp.pair cimport pair
+
+from rmm.cpp.cpp_cuda_stream_view cimport cuda_stream_view
+
+
+cdef extern from "rmm/mr/device/device_memory_resource.hpp" \
+        namespace "rmm::mr" nogil:
+    cdef cppclass device_memory_resource:
+        void* allocate(size_t bytes) except +
+        void* allocate(size_t bytes, cuda_stream_view stream) except +
+        void deallocate(void* ptr, size_t bytes) except +
+        void deallocate(
+            void* ptr,
+            size_t bytes,
+            cuda_stream_view stream
+        ) except +
+
+cdef extern from "rmm/cuda_device.hpp" namespace "rmm" nogil:
+    size_t percent_of_free_device_memory(int percent) except +
+    pair[size_t, size_t] available_device_memory() except +
diff --git a/python/rmm/rmm/cpp/cpp_memory_resource.pyx b/python/rmm/rmm/cpp/cpp_memory_resource.pyx
new file mode 100644
index 000000000..50e201517
--- /dev/null
+++ b/python/rmm/rmm/cpp/cpp_memory_resource.pyx
@@ -0,0 +1,215 @@
+# Copyright (c) 2020-2024, NVIDIA CORPORATION.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# This import is needed for Cython typing in translate_python_except_to_cpp
+# See https://github.com/cython/cython/issues/5589
+from builtins import BaseException
+
+from libc.stddef cimport size_t
+from libc.stdint cimport int8_t, int64_t
+from libcpp cimport bool
+from libcpp.optional cimport optional
+from libcpp.pair cimport pair
+from libcpp.string cimport string
+
+from rmm.cpp.cpp_cuda_stream_view cimport cuda_stream_view
+from rmm.cpp.cpp_memory_resource cimport device_memory_resource
+
+# Transparent handle of a C++ exception
+ctypedef pair[int, string] CppExcept
+
+cdef CppExcept translate_python_except_to_cpp(err: BaseException) noexcept:
+    """Translate a Python exception into a C++ exception handle
+
+    The returned exception handle can then be thrown by `throw_cpp_except()`,
+    which MUST be done without holding the GIL.
+
+    This is useful when C++ calls a Python function and needs to catch or
+    propagate exceptions.
+    """
+    if isinstance(err, MemoryError):
+        return CppExcept(0, str.encode(str(err)))
+    return CppExcept(-1, str.encode(str(err)))
+
+# Implementation of `throw_cpp_except()`, which throws a given `CppExcept`.
+# This function MUST be called without the GIL otherwise the thrown C++
+# exception are translated back into a Python exception.
+cdef extern from *:
+    """
+    #include <stdexcept>
+    #include <utility>
+
+    void throw_cpp_except(std::pair<int, std::string> res) {
+        switch(res.first) {
+            case 0:
+                throw rmm::out_of_memory(res.second);
+            default:
+                throw std::runtime_error(res.second);
+        }
+    }
+    """
+    void throw_cpp_except(CppExcept) nogil
+
+
+# NOTE: Keep extern declarations in .pyx file as much as possible to avoid
+# leaking dependencies when importing RMM Cython .pxd files
+cdef extern from "rmm/mr/device/cuda_memory_resource.hpp" \
+        namespace "rmm::mr" nogil:
+    cdef cppclass cuda_memory_resource(device_memory_resource):
+        cuda_memory_resource() except +
+
+cdef extern from "rmm/mr/device/managed_memory_resource.hpp" \
+        namespace "rmm::mr" nogil:
+    cdef cppclass managed_memory_resource(device_memory_resource):
+        managed_memory_resource() except +
+
+cdef extern from "rmm/mr/device/system_memory_resource.hpp" \
+        namespace "rmm::mr" nogil:
+    cdef cppclass system_memory_resource(device_memory_resource):
+        system_memory_resource() except +
+
+cdef extern from "rmm/mr/device/sam_headroom_memory_resource.hpp" \
+        namespace "rmm::mr" nogil:
+    cdef cppclass sam_headroom_memory_resource(device_memory_resource):
+        sam_headroom_memory_resource(size_t headroom) except +
+
+cdef extern from "rmm/mr/device/cuda_async_memory_resource.hpp" \
+        namespace "rmm::mr" nogil:
+
+    cdef cppclass cuda_async_memory_resource(device_memory_resource):
+        cuda_async_memory_resource(
+            optional[size_t] initial_pool_size,
+            optional[size_t] release_threshold,
+            optional[allocation_handle_type] export_handle_type) except +
+
+# TODO: when we adopt Cython 3.0 use enum class
+cdef extern from "rmm/mr/device/cuda_async_memory_resource.hpp" \
+        namespace \
+        "rmm::mr::cuda_async_memory_resource::allocation_handle_type" \
+        nogil:
+    enum allocation_handle_type \
+            "rmm::mr::cuda_async_memory_resource::allocation_handle_type":
+        none
+        posix_file_descriptor
+        win32
+        win32_kmt
+
+
+cdef extern from "rmm/mr/device/pool_memory_resource.hpp" \
+        namespace "rmm::mr" nogil:
+    cdef cppclass pool_memory_resource[Upstream](device_memory_resource):
+        pool_memory_resource(
+            Upstream* upstream_mr,
+            size_t initial_pool_size,
+            optional[size_t] maximum_pool_size) except +
+        size_t pool_size()
+
+cdef extern from "rmm/mr/device/fixed_size_memory_resource.hpp" \
+        namespace "rmm::mr" nogil:
+    cdef cppclass fixed_size_memory_resource[Upstream](device_memory_resource):
+        fixed_size_memory_resource(
+            Upstream* upstream_mr,
+            size_t block_size,
+            size_t block_to_preallocate) except +
+
+cdef extern from "rmm/mr/device/callback_memory_resource.hpp" \
+        namespace "rmm::mr" nogil:
+    ctypedef void* (*allocate_callback_t)(size_t, cuda_stream_view, void*)
+    ctypedef void (*deallocate_callback_t)(void*, size_t, cuda_stream_view, void*)
+
+    cdef cppclass callback_memory_resource(device_memory_resource):
+        callback_memory_resource(
+            allocate_callback_t allocate_callback,
+            deallocate_callback_t deallocate_callback,
+            void* allocate_callback_arg,
+            void* deallocate_callback_arg
+        ) except +
+
+cdef extern from "rmm/mr/device/binning_memory_resource.hpp" \
+        namespace "rmm::mr" nogil:
+    cdef cppclass binning_memory_resource[Upstream](device_memory_resource):
+        binning_memory_resource(Upstream* upstream_mr) except +
+        binning_memory_resource(
+            Upstream* upstream_mr,
+            int8_t min_size_exponent,
+            int8_t max_size_exponent) except +
+
+        void add_bin(size_t allocation_size) except +
+        void add_bin(
+            size_t allocation_size,
+            device_memory_resource* bin_resource) except +
+
+cdef extern from "rmm/mr/device/limiting_resource_adaptor.hpp" \
+        namespace "rmm::mr" nogil:
+    cdef cppclass limiting_resource_adaptor[Upstream](device_memory_resource):
+        limiting_resource_adaptor(
+            Upstream* upstream_mr,
+            size_t allocation_limit) except +
+
+        size_t get_allocated_bytes() except +
+        size_t get_allocation_limit() except +
+
+cdef extern from "rmm/mr/device/logging_resource_adaptor.hpp" \
+        namespace "rmm::mr" nogil:
+    cdef cppclass logging_resource_adaptor[Upstream](device_memory_resource):
+        logging_resource_adaptor(
+            Upstream* upstream_mr,
+            string filename) except +
+
+        void flush() except +
+
+cdef extern from "rmm/mr/device/statistics_resource_adaptor.hpp" \
+        namespace "rmm::mr" nogil:
+    cdef cppclass statistics_resource_adaptor[Upstream](device_memory_resource):
+        struct counter:
+            counter()
+
+            int64_t value
+            int64_t peak
+            int64_t total
+
+        statistics_resource_adaptor(Upstream* upstream_mr) except +
+
+        counter get_bytes_counter() except +
+        counter get_allocations_counter() except +
+        pair[counter, counter] pop_counters() except +
+        pair[counter, counter] push_counters() except +
+
+cdef extern from "rmm/mr/device/tracking_resource_adaptor.hpp" \
+        namespace "rmm::mr" nogil:
+    cdef cppclass tracking_resource_adaptor[Upstream](device_memory_resource):
+        tracking_resource_adaptor(
+            Upstream* upstream_mr,
+            bool capture_stacks) except +
+
+        size_t get_allocated_bytes() except +
+        string get_outstanding_allocations_str() except +
+        void log_outstanding_allocations() except +
+
+cdef extern from "rmm/mr/device/failure_callback_resource_adaptor.hpp" \
+        namespace "rmm::mr" nogil:
+    ctypedef bool (*failure_callback_t)(size_t, void*)
+    cdef cppclass failure_callback_resource_adaptor[Upstream](
+        device_memory_resource
+    ):
+        failure_callback_resource_adaptor(
+            Upstream* upstream_mr,
+            failure_callback_t callback,
+            void* callback_arg
+        ) except +
+
+cdef extern from "rmm/mr/device/prefetch_resource_adaptor.hpp" \
+        namespace "rmm::mr" nogil:
+    cdef cppclass prefetch_resource_adaptor[Upstream](device_memory_resource):
+        prefetch_resource_adaptor(Upstream* upstream_mr) except +
diff --git a/python/rmm/rmm/cpp/cpp_per_device_resource.pxd b/python/rmm/rmm/cpp/cpp_per_device_resource.pxd
new file mode 100644
index 000000000..59f651687
--- /dev/null
+++ b/python/rmm/rmm/cpp/cpp_per_device_resource.pxd
@@ -0,0 +1,36 @@
+# Copyright (c) 2019-2024, NVIDIA CORPORATION.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from rmm.cpp.cpp_memory_resource cimport device_memory_resource
+
+
+cdef extern from "rmm/mr/device/per_device_resource.hpp" namespace "rmm" nogil:
+    cdef cppclass cuda_device_id:
+        ctypedef int value_type
+
+        cuda_device_id(value_type id)
+
+        value_type value()
+
+cdef extern from "rmm/mr/device/per_device_resource.hpp" \
+        namespace "rmm::mr" nogil:
+    cdef device_memory_resource* set_current_device_resource(
+        device_memory_resource* new_mr
+    )
+    cdef device_memory_resource* get_current_device_resource()
+    cdef device_memory_resource* set_per_device_resource(
+        cuda_device_id id, device_memory_resource* new_mr
+    )
+    cdef device_memory_resource* get_per_device_resource (
+        cuda_device_id id
+    )
diff --git a/python/rmm/rmm/python/CMakeLists.txt b/python/rmm/rmm/python/CMakeLists.txt
new file mode 100644
index 000000000..558c0ce9a
--- /dev/null
+++ b/python/rmm/rmm/python/CMakeLists.txt
@@ -0,0 +1,28 @@
+# =============================================================================
+# Copyright (c) 2022-2024, NVIDIA CORPORATION.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
+# in compliance with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software distributed under the License
+# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
+# or implied. See the License for the specific language governing permissions and limitations under
+# the License.
+# =============================================================================
+
+set(cython_sources device_buffer.pyx lib.pyx logger.pyx memory_resource.pyx cuda_stream.pyx
+                   helper.pyx)
+set(linked_libraries rmm::rmm)
+
+# Build all of the Cython targets
+rapids_cython_create_modules(SOURCE_FILES "${cython_sources}" LINKED_LIBRARIES "${linked_libraries}"
+                                                                               CXX)
+
+# mark all symbols in these Cython targets "hidden" by default, so they won't collide with symbols
+# loaded from other DSOs
+foreach(_cython_target IN LISTS RAPIDS_CYTHON_CREATED_TARGETS)
+  set_target_properties(${_cython_target} PROPERTIES C_VISIBILITY_PRESET hidden
+                                                     CXX_VISIBILITY_PRESET hidden)
+endforeach()
diff --git a/python/rmm/rmm/python/__init__.pxd b/python/rmm/rmm/python/__init__.pxd
new file mode 100644
index 000000000..46753baa3
--- /dev/null
+++ b/python/rmm/rmm/python/__init__.pxd
@@ -0,0 +1,13 @@
+# Copyright (c) 2019-2020, NVIDIA CORPORATION.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
diff --git a/python/rmm/rmm/python/__init__.py b/python/rmm/rmm/python/__init__.py
new file mode 100644
index 000000000..0b8672ef6
--- /dev/null
+++ b/python/rmm/rmm/python/__init__.py
@@ -0,0 +1,15 @@
+# Copyright (c) 2019-2021, NVIDIA CORPORATION.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from .device_buffer import DeviceBuffer
diff --git a/python/rmm/rmm/python/cuda_stream.pyx b/python/rmm/rmm/python/cuda_stream.pyx
new file mode 100644
index 000000000..f1529111d
--- /dev/null
+++ b/python/rmm/rmm/python/cuda_stream.pyx
@@ -0,0 +1,36 @@
+# Copyright (c) 2020-2024, NVIDIA CORPORATION.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+cimport cython
+from cuda.ccudart cimport cudaStream_t
+from libcpp cimport bool
+
+from rmm.cpp.cpp_cuda_stream cimport cuda_stream
+
+
+@cython.final
+cdef class CudaStream:
+    """
+    Wrapper around a CUDA stream with RAII semantics.
+    When a CudaStream instance is GC'd, the underlying
+    CUDA stream is destroyed.
+    """
+    def __cinit__(self):
+        self.c_obj.reset(new cuda_stream())
+
+    cdef cudaStream_t value(self) except * nogil:
+        return self.c_obj.get()[0].value()
+
+    cdef bool is_valid(self) except * nogil:
+        return self.c_obj.get()[0].is_valid()
diff --git a/python/rmm/rmm/python/device_buffer.pxd b/python/rmm/rmm/python/device_buffer.pxd
new file mode 100644
index 000000000..d8b56d2fb
--- /dev/null
+++ b/python/rmm/rmm/python/device_buffer.pxd
@@ -0,0 +1,71 @@
+# Copyright (c) 2019-2024, NVIDIA CORPORATION.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from libc.stdint cimport uintptr_t
+from libcpp.memory cimport unique_ptr
+
+from rmm._cuda.stream cimport Stream
+from rmm.cpp.cpp_device_buffer cimport device_buffer
+from rmm.python.memory_resource cimport DeviceMemoryResource
+
+
+cdef class DeviceBuffer:
+    cdef unique_ptr[device_buffer] c_obj
+
+    # Holds a reference to the DeviceMemoryResource used for allocation.
+    # Ensures the MR does not get destroyed before this DeviceBuffer. `mr` is
+    # needed for deallocation
+    cdef DeviceMemoryResource mr
+
+    # Holds a reference to the stream used by the underlying `device_buffer`.
+    # Ensures the stream does not get destroyed before this DeviceBuffer
+    cdef Stream stream
+
+    @staticmethod
+    cdef DeviceBuffer c_from_unique_ptr(
+        unique_ptr[device_buffer] ptr,
+        Stream stream=*,
+        DeviceMemoryResource mr=*,
+    )
+
+    @staticmethod
+    cdef DeviceBuffer c_to_device(const unsigned char[::1] b,
+                                  Stream stream=*) except *
+    cpdef copy_to_host(self, ary=*, Stream stream=*)
+    cpdef copy_from_host(self, ary, Stream stream=*)
+    cpdef copy_from_device(self, cuda_ary, Stream stream=*)
+    cpdef bytes tobytes(self, Stream stream=*)
+
+    cdef size_t c_size(self) except *
+    cpdef void reserve(self, size_t new_capacity, Stream stream=*) except *
+    cpdef void resize(self, size_t new_size, Stream stream=*) except *
+    cpdef size_t capacity(self) except *
+    cdef void* c_data(self) except *
+
+    cdef device_buffer c_release(self) except *
+
+cpdef DeviceBuffer to_device(const unsigned char[::1] b,
+                             Stream stream=*)
+cpdef void copy_ptr_to_host(uintptr_t db,
+                            unsigned char[::1] hb,
+                            Stream stream=*) except *
+
+cpdef void copy_host_to_ptr(const unsigned char[::1] hb,
+                            uintptr_t db,
+                            Stream stream=*) except *
+
+cpdef void copy_device_to_ptr(uintptr_t d_src,
+                              uintptr_t d_dst,
+                              size_t count,
+                              Stream stream=*) except *
diff --git a/python/rmm/rmm/python/device_buffer.pyx b/python/rmm/rmm/python/device_buffer.pyx
new file mode 100644
index 000000000..1d564ff33
--- /dev/null
+++ b/python/rmm/rmm/python/device_buffer.pyx
@@ -0,0 +1,559 @@
+# Copyright (c) 2019-2024, NVIDIA CORPORATION.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import numpy as np
+
+cimport cython
+from cpython.bytes cimport PyBytes_FromStringAndSize
+from libc.stdint cimport uintptr_t
+from libcpp.memory cimport unique_ptr
+from libcpp.utility cimport move
+
+from rmm._cuda.stream cimport Stream
+
+from rmm._cuda.stream import DEFAULT_STREAM
+
+cimport cuda.ccudart as ccudart
+from cuda.ccudart cimport (
+    cudaError,
+    cudaError_t,
+    cudaMemcpyAsync,
+    cudaMemcpyKind,
+    cudaStream_t,
+)
+
+from rmm.cpp.cpp_cuda_stream_view cimport cuda_stream_view
+from rmm.cpp.cpp_device_buffer cimport (
+    cuda_device_id,
+    device_buffer,
+    get_current_cuda_device,
+    prefetch,
+)
+from rmm.python.memory_resource cimport (
+    DeviceMemoryResource,
+    device_memory_resource,
+    get_current_device_resource,
+)
+
+
+# The DeviceMemoryResource attribute could be released prematurely
+# by the gc if the DeviceBuffer is in a reference cycle. Removing
+# the tp_clear function with the no_gc_clear decoration prevents that.
+# See https://github.com/rapidsai/rmm/pull/931 for details.
+@cython.no_gc_clear
+cdef class DeviceBuffer:
+
+    def __cinit__(self, *,
+                  uintptr_t ptr=0,
+                  size_t size=0,
+                  Stream stream=DEFAULT_STREAM,
+                  DeviceMemoryResource mr=None):
+        """Construct a ``DeviceBuffer`` with optional size and data pointer
+
+        Parameters
+        ----------
+        ptr : int
+            pointer to some data on host or device to copy over
+        size : int
+            size of the buffer to allocate
+            (and possibly size of data to copy)
+        stream : optional
+            CUDA stream to use for construction and/or copying,
+            defaults to the CUDA default stream. A reference to the
+            stream is stored internally to ensure it doesn't go out of
+            scope while the DeviceBuffer is in use. Destroying the
+            underlying stream while the DeviceBuffer is in use will
+            result in undefined behavior.
+        mr : optional
+           DeviceMemoryResource for the allocation, if not provided
+           defaults to the current device resource.
+
+        Note
+        ----
+        If the pointer passed is non-null and ``stream`` is the default stream,
+        it is synchronized after the copy. However if a non-default ``stream``
+        is provided, this function is fully asynchronous.
+
+        Examples
+        --------
+        >>> import rmm
+        >>> db = rmm.DeviceBuffer(size=5)
+        """
+        cdef const void* c_ptr
+        cdef device_memory_resource * mr_ptr
+        # Save a reference to the MR and stream used for allocation
+        self.mr = get_current_device_resource() if mr is None else mr
+        self.stream = stream
+
+        mr_ptr = self.mr.get_mr()
+        with nogil:
+            c_ptr = <const void*>ptr
+
+            if c_ptr == NULL or size == 0:
+                self.c_obj.reset(new device_buffer(size, stream.view(), mr_ptr))
+            else:
+                self.c_obj.reset(new device_buffer(c_ptr, size, stream.view(), mr_ptr))
+
+                if stream.c_is_default():
+                    stream.c_synchronize()
+
+    def __len__(self):
+        return self.size
+
+    def __sizeof__(self):
+        return self.size
+
+    def __bytes__(self):
+        return self.tobytes()
+
+    @property
+    def nbytes(self):
+        """Gets the size of the buffer in bytes."""
+        return self.size
+
+    @property
+    def ptr(self):
+        """Gets a pointer to the underlying data."""
+        return int(<uintptr_t>self.c_data())
+
+    @property
+    def size(self):
+        """Gets the size of the buffer in bytes."""
+        return int(self.c_size())
+
+    def __reduce__(self):
+        return to_device, (self.copy_to_host(),)
+
+    @property
+    def __cuda_array_interface__(self):
+        cdef dict intf = {
+            "data": (self.ptr, False),
+            "shape": (self.size,),
+            "strides": None,
+            "typestr": "|u1",
+            "version": 0
+        }
+        return intf
+
+    def prefetch(self, device=None, stream=None):
+        """Prefetch buffer data to the specified device on the specified stream.
+
+        Assumes the storage for this DeviceBuffer is CUDA managed memory
+        (unified memory). If it is not, this function is a no-op.
+
+        Parameters
+        ----------
+        device : optional
+            The CUDA device to which to prefetch the memory for this buffer.
+            Defaults to the current CUDA device. To prefetch to the CPU, pass
+            :py:attr:`~cuda.cudart.cudaCpuDeviceId` as the device.
+        stream : optional
+            CUDA stream to use for prefetching. Defaults to self.stream
+        """
+        cdef cuda_device_id dev = (get_current_cuda_device()
+                                   if device is None
+                                   else cuda_device_id(device))
+        cdef Stream strm = self.stream if stream is None else stream
+        with nogil:
+            prefetch(self.c_obj.get()[0].data(),
+                     self.c_obj.get()[0].size(),
+                     dev,
+                     strm.view())
+
+    def copy(self):
+        """Returns a copy of DeviceBuffer.
+
+        Returns
+        -------
+        A deep copy of existing ``DeviceBuffer``
+
+        Examples
+        --------
+        >>> import rmm
+        >>> db = rmm.DeviceBuffer.to_device(b"abc")
+        >>> db_copy = db.copy()
+        >>> db.copy_to_host()
+        array([97, 98, 99], dtype=uint8)
+        >>> db_copy.copy_to_host()
+        array([97, 98, 99], dtype=uint8)
+        >>> assert db is not db_copy
+        >>> assert db.ptr != db_copy.ptr
+        """
+        ret = DeviceBuffer(ptr=self.ptr, size=self.size, stream=self.stream)
+        ret.mr = self.mr
+        return ret
+
+    def __copy__(self):
+        return self.copy()
+
+    @staticmethod
+    cdef DeviceBuffer c_from_unique_ptr(
+        unique_ptr[device_buffer] ptr,
+        Stream stream=DEFAULT_STREAM,
+        DeviceMemoryResource mr=None,
+    ):
+        cdef DeviceBuffer buf = DeviceBuffer.__new__(DeviceBuffer)
+        if stream.c_is_default():
+            stream.c_synchronize()
+        buf.c_obj = move(ptr)
+        buf.mr = get_current_device_resource() if mr is None else mr
+        buf.stream = stream
+        return buf
+
+    @staticmethod
+    cdef DeviceBuffer c_to_device(const unsigned char[::1] b,
+                                  Stream stream=DEFAULT_STREAM) except *:
+        """Calls ``to_device`` function on arguments provided"""
+        return to_device(b, stream)
+
+    @staticmethod
+    def to_device(const unsigned char[::1] b,
+                  Stream stream=DEFAULT_STREAM):
+        """Calls ``to_device`` function on arguments provided."""
+        return to_device(b, stream)
+
+    cpdef copy_to_host(self, ary=None, Stream stream=DEFAULT_STREAM):
+        """Copy from a ``DeviceBuffer`` to a buffer on host.
+
+        Parameters
+        ----------
+        ary : ``bytes``-like buffer to write into
+        stream : CUDA stream to use for copying, default the default stream
+
+        Examples
+        --------
+        >>> import rmm
+        >>> db = rmm.DeviceBuffer.to_device(b"abc")
+        >>> hb = bytearray(db.nbytes)
+        >>> db.copy_to_host(hb)
+        >>> print(hb)
+        bytearray(b'abc')
+        >>> hb = db.copy_to_host()
+        >>> print(hb)
+        bytearray(b'abc')
+        """
+        cdef const device_buffer* dbp = self.c_obj.get()
+        cdef size_t s = dbp.size()
+
+        cdef unsigned char[::1] hb = ary
+        if hb is None:
+            # NumPy leverages huge pages under-the-hood,
+            # which speeds up the copy from device to host.
+            hb = ary = np.empty((s,), dtype="u1")
+        elif len(hb) < s:
+            raise ValueError(
+                "Argument `ary` is too small. Need space for %i bytes." % s
+            )
+
+        copy_ptr_to_host(<uintptr_t>dbp.data(), hb[:s], stream)
+
+        return ary
+
+    cpdef copy_from_host(self, ary, Stream stream=DEFAULT_STREAM):
+        """Copy from a buffer on host to ``self``
+
+        Parameters
+        ----------
+        ary : ``bytes``-like buffer to copy from
+        stream : CUDA stream to use for copying, default the default stream
+
+        Examples
+        --------
+        >>> import rmm
+        >>> db = rmm.DeviceBuffer(size=10)
+        >>> hb = b"abcdef"
+        >>> db.copy_from_host(hb)
+        >>> hb = db.copy_to_host()
+        >>> print(hb)
+        array([97, 98, 99,  0,  0,  0,  0,  0,  0,  0], dtype=uint8)
+        """
+        cdef device_buffer* dbp = self.c_obj.get()
+
+        cdef const unsigned char[::1] hb = ary
+        cdef size_t s = len(hb)
+        if s > self.size:
+            raise ValueError(
+                "Argument `ary` is too large. Need space for %i bytes." % s
+            )
+
+        copy_host_to_ptr(hb[:s], <uintptr_t>dbp.data(), stream)
+
+    cpdef copy_from_device(self, cuda_ary,
+                           Stream stream=DEFAULT_STREAM):
+        """Copy from a buffer on host to ``self``
+
+        Parameters
+        ----------
+        cuda_ary : object to copy from that has ``__cuda_array_interface__``
+        stream : CUDA stream to use for copying, default the default stream
+
+        Examples
+        --------
+        >>> import rmm
+        >>> db = rmm.DeviceBuffer(size=5)
+        >>> db2 = rmm.DeviceBuffer.to_device(b"abc")
+        >>> db.copy_from_device(db2)
+        >>> hb = db.copy_to_host()
+        >>> print(hb)
+        array([97, 98, 99,  0,  0], dtype=uint8)
+        """
+        if not hasattr(cuda_ary, "__cuda_array_interface__"):
+            raise ValueError(
+                "Expected object to support `__cuda_array_interface__` "
+                "protocol"
+            )
+
+        cuda_ary_interface = cuda_ary.__cuda_array_interface__
+        shape = cuda_ary_interface["shape"]
+        strides = cuda_ary_interface.get("strides")
+        dtype = np.dtype(cuda_ary_interface["typestr"])
+
+        if len(shape) > 1:
+            raise ValueError(
+                "Only 1-D contiguous arrays are supported, got {}-D "
+                "array".format(str(len(shape)))
+            )
+
+        if strides is not None:
+            if strides[0] != dtype.itemsize:
+                raise ValueError(
+                    "Only 1-D contiguous arrays are supported, got a "
+                    "non-contiguous array"
+                )
+
+        cdef uintptr_t src_ptr = cuda_ary_interface["data"][0]
+        cdef size_t s = shape[0] * dtype.itemsize
+        if s > self.size:
+            raise ValueError(
+                "Argument `hb` is too large. Need space for %i bytes." % s
+            )
+
+        cdef device_buffer* dbp = self.c_obj.get()
+
+        copy_device_to_ptr(
+            <uintptr_t>src_ptr,
+            <uintptr_t>dbp.data(),
+            s,
+            stream
+        )
+
+    cpdef bytes tobytes(self, Stream stream=DEFAULT_STREAM):
+        cdef const device_buffer* dbp = self.c_obj.get()
+        cdef size_t s = dbp.size()
+
+        cdef bytes b = PyBytes_FromStringAndSize(NULL, s)
+        cdef unsigned char* p = b
+        cdef unsigned char[::1] mv = (<unsigned char[:(s + 1):1]>p)[:s]
+        self.copy_to_host(mv, stream)
+
+        return b
+
+    cdef size_t c_size(self) except *:
+        return self.c_obj.get()[0].size()
+
+    cpdef void reserve(self,
+                       size_t new_capacity,
+                       Stream stream=DEFAULT_STREAM) except *:
+        self.c_obj.get()[0].reserve(new_capacity, stream.view())
+
+    cpdef void resize(self,
+                      size_t new_size,
+                      Stream stream=DEFAULT_STREAM) except *:
+        self.c_obj.get()[0].resize(new_size, stream.view())
+
+    cpdef size_t capacity(self) except *:
+        return self.c_obj.get()[0].capacity()
+
+    cdef void* c_data(self) except *:
+        return self.c_obj.get()[0].data()
+
+    cdef device_buffer c_release(self) except *:
+        """
+        Releases ownership of the data held by this DeviceBuffer.
+        """
+        return move(cython.operator.dereference(self.c_obj))
+
+
+@cython.boundscheck(False)
+cpdef DeviceBuffer to_device(const unsigned char[::1] b,
+                             Stream stream=DEFAULT_STREAM):
+    """Return a new ``DeviceBuffer`` with a copy of the data.
+
+    Parameters
+    ----------
+    b : ``bytes``-like data on host to copy to device
+    stream : CUDA stream to use for copying, default the default stream
+
+    Returns
+    -------
+    ``DeviceBuffer`` with copy of data from host
+
+    Examples
+    --------
+    >>> import rmm
+    >>> db = rmm._lib.device_buffer.to_device(b"abc")
+    >>> print(bytes(db))
+    b'abc'
+    """
+
+    if b is None:
+        raise TypeError(
+            "Argument 'b' has incorrect type"
+            " (expected bytes-like, got NoneType)"
+        )
+
+    cdef uintptr_t p = <uintptr_t>&b[0]
+    cdef size_t s = len(b)
+    return DeviceBuffer(ptr=p, size=s, stream=stream)
+
+
+@cython.boundscheck(False)
+cdef void _copy_async(const void* src,
+                      void* dst,
+                      size_t count,
+                      ccudart.cudaMemcpyKind kind,
+                      cuda_stream_view stream) except * nogil:
+    """
+    Asynchronously copy data between host and/or device pointers.
+
+    This is a convenience wrapper around cudaMemcpyAsync that
+    checks for errors. Only used for internal implementation.
+
+    Parameters
+    ----------
+    src : pointer to ``bytes``-like host buffer or device data to copy from
+    dst : pointer to ``bytes``-like host buffer or device data to copy into
+    count : the size in bytes to copy
+    kind : the kind of copy to perform
+    stream : CUDA stream to use for copying, default the default stream
+    """
+    cdef cudaError_t err = cudaMemcpyAsync(dst, src, count, kind,
+                                           <cudaStream_t>stream)
+
+    if err != cudaError.cudaSuccess:
+        raise RuntimeError(f"Memcpy failed with error: {err}")
+
+
+@cython.boundscheck(False)
+cpdef void copy_ptr_to_host(uintptr_t db,
+                            unsigned char[::1] hb,
+                            Stream stream=DEFAULT_STREAM) except *:
+    """Copy from a device pointer to a buffer on host
+
+    Parameters
+    ----------
+    db : pointer to data on device to copy
+    hb : ``bytes``-like buffer to write into
+    stream : CUDA stream to use for copying, default the default stream
+
+    Note
+    ----
+    If ``stream`` is the default stream, it is synchronized after the copy.
+    However if a non-default ``stream`` is provided, this function is fully
+    asynchronous.
+
+    Examples
+    --------
+    >>> import rmm
+    >>> db = rmm.DeviceBuffer.to_device(b"abc")
+    >>> hb = bytearray(db.nbytes)
+    >>> rmm._lib.device_buffer.copy_ptr_to_host(db.ptr, hb)
+    >>> print(hb)
+    bytearray(b'abc')
+    """
+
+    if hb is None:
+        raise TypeError(
+            "Argument `hb` has incorrect type"
+            " (expected bytes-like, got NoneType)"
+        )
+
+    with nogil:
+        _copy_async(<const void*>db, <void*>&hb[0], len(hb),
+                    cudaMemcpyKind.cudaMemcpyDeviceToHost, stream.view())
+
+    if stream.c_is_default():
+        stream.c_synchronize()
+
+
+@cython.boundscheck(False)
+cpdef void copy_host_to_ptr(const unsigned char[::1] hb,
+                            uintptr_t db,
+                            Stream stream=DEFAULT_STREAM) except *:
+    """Copy from a host pointer to a device pointer
+
+    Parameters
+    ----------
+    hb : ``bytes``-like host buffer to copy
+    db : pointer to data on device to write into
+    stream : CUDA stream to use for copying, default the default stream
+
+    Note
+    ----
+    If ``stream`` is the default stream, it is synchronized after the copy.
+    However if a non-default ``stream`` is provided, this function is fully
+    asynchronous.
+
+    Examples
+    --------
+    >>> import rmm
+    >>> db = rmm.DeviceBuffer(size=10)
+    >>> hb = b"abc"
+    >>> rmm._lib.device_buffer.copy_host_to_ptr(hb, db.ptr)
+    >>> hb = db.copy_to_host()
+    >>> print(hb)
+    array([97, 98, 99,  0,  0,  0,  0,  0,  0,  0], dtype=uint8)
+    """
+
+    if hb is None:
+        raise TypeError(
+            "Argument `hb` has incorrect type"
+            " (expected bytes-like, got NoneType)"
+        )
+
+    with nogil:
+        _copy_async(<const void*>&hb[0], <void*>db, len(hb),
+                    cudaMemcpyKind.cudaMemcpyHostToDevice, stream.view())
+
+    if stream.c_is_default():
+        stream.c_synchronize()
+
+
+@cython.boundscheck(False)
+cpdef void copy_device_to_ptr(uintptr_t d_src,
+                              uintptr_t d_dst,
+                              size_t count,
+                              Stream stream=DEFAULT_STREAM) except *:
+    """Copy from a device pointer to a device pointer
+
+    Parameters
+    ----------
+    d_src : pointer to data on device to copy from
+    d_dst : pointer to data on device to write into
+    count : the size in bytes to copy
+    stream : CUDA stream to use for copying, default the default stream
+
+    Examples
+    --------
+    >>> import rmm
+    >>> db = rmm.DeviceBuffer(size=5)
+    >>> db2 = rmm.DeviceBuffer.to_device(b"abc")
+    >>> rmm._lib.device_buffer.copy_device_to_ptr(db2.ptr, db.ptr, db2.size)
+    >>> hb = db.copy_to_host()
+    >>> hb
+    array([97, 98, 99,  0,  0], dtype=uint8)
+    """
+
+    with nogil:
+        _copy_async(<const void*>d_src, <void*>d_dst, count,
+                    cudaMemcpyKind.cudaMemcpyDeviceToDevice, stream.view())
diff --git a/python/rmm/rmm/python/helper.pxd b/python/rmm/rmm/python/helper.pxd
new file mode 100644
index 000000000..8ca151c00
--- /dev/null
+++ b/python/rmm/rmm/python/helper.pxd
@@ -0,0 +1,16 @@
+# Copyright (c) 2024, NVIDIA CORPORATION.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+cdef object parse_bytes(object s) except *
diff --git a/python/rmm/rmm/python/helper.pyx b/python/rmm/rmm/python/helper.pyx
new file mode 100644
index 000000000..d442ee341
--- /dev/null
+++ b/python/rmm/rmm/python/helper.pyx
@@ -0,0 +1,78 @@
+# Copyright (c) 2024, NVIDIA CORPORATION.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Helper functions for rmm"""
+
+import re
+
+
+cdef dict BYTE_SIZES = {
+    'b': 1,
+    '': 1,
+    'kb': 1000,
+    'mb': 1000**2,
+    'gb': 1000**3,
+    'tb': 1000**4,
+    'pb': 1000**5,
+    'kib': 1024,
+    'mib': 1024**2,
+    'gib': 1024**3,
+    'tib': 1024**4,
+    'pib': 1024**5,
+}
+
+
+pattern = re.compile(r"^([0-9]+(?:\.[0-9]*)?)[\t ]*((?i:(?:[kmgtp]i?)?b))?$")
+
+cdef object parse_bytes(object s):
+    """Parse a string or integer into a number of bytes.
+
+    Parameters
+    ----------
+    s : int | str
+        Size in bytes. If an integer is provided, it is returned as-is.
+        A string is parsed as a floating point number with an (optional,
+        case-insensitive) byte-specifier, both SI prefixes (kb, mb, ..., pb)
+        and binary prefixes (kib, mib, ..., pib) are supported.
+
+     Returns
+     -------
+     Requested size in bytes as an integer.
+
+     Raises
+     ------
+     ValueError
+         If it is not possible to parse the input as a byte specification.
+    """
+    cdef str suffix
+    cdef double n
+    cdef int multiplier
+
+    if isinstance(s, int):
+        return s
+
+    match = pattern.match(s)
+
+    if match is None:
+        raise ValueError(f"Could not parse {s} as a byte specification")
+
+    n = float(match.group(1))
+
+    suffix = match.group(2)
+    if suffix is None:
+        suffix = ""
+
+    multiplier = BYTE_SIZES[suffix.lower()]
+
+    return int(n*multiplier)
diff --git a/python/rmm/rmm/python/lib.pxd b/python/rmm/rmm/python/lib.pxd
new file mode 100644
index 000000000..b61e0d569
--- /dev/null
+++ b/python/rmm/rmm/python/lib.pxd
@@ -0,0 +1,17 @@
+# Copyright (c) 2019-2020, NVIDIA CORPORATION.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from libcpp.utility cimport pair
+
+ctypedef pair[const char*, unsigned int] caller_pair
diff --git a/python/rmm/rmm/python/lib.pyx b/python/rmm/rmm/python/lib.pyx
new file mode 100644
index 000000000..46753baa3
--- /dev/null
+++ b/python/rmm/rmm/python/lib.pyx
@@ -0,0 +1,13 @@
+# Copyright (c) 2019-2020, NVIDIA CORPORATION.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
diff --git a/python/rmm/rmm/python/logger.pyx b/python/rmm/rmm/python/logger.pyx
new file mode 100644
index 000000000..2997ff633
--- /dev/null
+++ b/python/rmm/rmm/python/logger.pyx
@@ -0,0 +1,208 @@
+# Copyright (c) 2023-2024, NVIDIA CORPORATION.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import warnings
+from rmm.cpp.cpp_logging cimport logging_level, logger
+
+
+def _validate_level_type(level):
+    if not isinstance(level, logging_level):
+        raise TypeError("level must be an instance of the logging_level enum")
+
+
+def should_log(level):
+    """
+    Check if a message at the given level would be logged.
+
+    A message at the given level would be logged if the current debug logging
+    level is set to a level that is at least as verbose than the given level,
+    *and* the RMM module is compiled for a logging level at least as verbose.
+    If these conditions are not both met, this function will return false.
+
+    Debug logging prints messages to a log file. See
+    `Debug Logging <https://github.com/rapidsai/rmm#debug-logging>`_
+    for more information.
+
+    Parameters
+    ----------
+    level : logging_level
+        The debug logging level. Valid values are instances of the
+        ``logging_level`` enum.
+
+    Returns
+    -------
+    should_log : bool
+        True if a message at the given level would be logged, False otherwise.
+
+    Raises
+    ------
+    TypeError
+        If the logging level is not an instance of the ``logging_level`` enum.
+    """
+    _validate_level_type(level)
+    return logger().should_log(level)
+
+
+def set_logging_level(level):
+    """
+    Set the debug logging level.
+
+    Debug logging prints messages to a log file. See
+    `Debug Logging <https://github.com/rapidsai/rmm#debug-logging>`_
+    for more information.
+
+    Parameters
+    ----------
+    level : logging_level
+        The debug logging level. Valid values are instances of the
+        ``logging_level`` enum.
+
+    Raises
+    ------
+    TypeError
+        If the logging level is not an instance of the ``logging_level`` enum.
+
+    See Also
+    --------
+    get_logging_level : Get the current debug logging level.
+
+    Examples
+    --------
+    >>> import rmm
+    >>> rmm.set_logging_level(rmm.logging_level.WARN) # set logging level to warn
+    """
+    _validate_level_type(level)
+    logger().set_level(level)
+
+    if not should_log(level):
+        warnings.warn(f"RMM will not log logging_level.{level.name}. This "
+                      "may be because the C++ library is compiled for a "
+                      "less-verbose logging level.")
+
+
+def get_logging_level():
+    """
+    Get the current debug logging level.
+
+    Debug logging prints messages to a log file. See
+    `Debug Logging <https://github.com/rapidsai/rmm#debug-logging>`_
+    for more information.
+
+    Returns
+    -------
+    level : logging_level
+        The current debug logging level, an instance of the ``logging_level``
+        enum.
+
+    See Also
+    --------
+    set_logging_level : Set the debug logging level.
+
+    Examples
+    --------
+    >>> import rmm
+    >>> rmm.get_logging_level() # get current logging level
+    <logging_level.INFO: 2>
+    """
+    return logging_level(logger().level())
+
+
+def flush_logger():
+    """
+    Flush the debug logger. This will cause any buffered log messages to
+    be written to the log file.
+
+    Debug logging prints messages to a log file. See
+    `Debug Logging <https://github.com/rapidsai/rmm#debug-logging>`_
+    for more information.
+
+    See Also
+    --------
+    set_flush_level : Set the flush level for the debug logger.
+    get_flush_level : Get the current debug logging flush level.
+
+    Examples
+    --------
+    >>> import rmm
+    >>> rmm.flush_logger() # flush the logger
+    """
+    logger().flush()
+
+
+def set_flush_level(level):
+    """
+    Set the flush level for the debug logger. Messages of this level or higher
+    will automatically flush to the file.
+
+    Debug logging prints messages to a log file. See
+    `Debug Logging <https://github.com/rapidsai/rmm#debug-logging>`_
+    for more information.
+
+    Parameters
+    ----------
+    level : logging_level
+        The debug logging level. Valid values are instances of the
+        ``logging_level`` enum.
+
+    Raises
+    ------
+    TypeError
+        If the logging level is not an instance of the ``logging_level`` enum.
+
+    See Also
+    --------
+    get_flush_level : Get the current debug logging flush level.
+    flush_logger : Flush the logger.
+
+    Examples
+    --------
+    >>> import rmm
+    >>> rmm.flush_on(rmm.logging_level.WARN) # set flush level to warn
+    """
+    _validate_level_type(level)
+    logger().flush_on(level)
+
+    if not should_log(level):
+        warnings.warn(f"RMM will not log logging_level.{level.name}. This "
+                      "may be because the C++ library is compiled for a "
+                      "less-verbose logging level.")
+
+
+def get_flush_level():
+    """
+    Get the current debug logging flush level for the RMM logger. Messages of
+    this level or higher will automatically flush to the file.
+
+    Debug logging prints messages to a log file. See
+    `Debug Logging <https://github.com/rapidsai/rmm#debug-logging>`_
+    for more information.
+
+    Returns
+    -------
+    logging_level
+        The current flush level, an instance of the ``logging_level``
+        enum.
+
+    See Also
+    --------
+    set_flush_level : Set the flush level for the logger.
+    flush_logger : Flush the logger.
+
+    Examples
+    --------
+    >>> import rmm
+    >>> rmm.flush_level() # get current flush level
+    <logging_level.INFO: 2>
+    """
+    return logging_level(logger().flush_level())
diff --git a/python/rmm/rmm/python/memory_resource.pxd b/python/rmm/rmm/python/memory_resource.pxd
new file mode 100644
index 000000000..0adda4eaf
--- /dev/null
+++ b/python/rmm/rmm/python/memory_resource.pxd
@@ -0,0 +1,83 @@
+# Copyright (c) 2020-2024, NVIDIA CORPORATION.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from libcpp.memory cimport shared_ptr
+
+from rmm.cpp.cpp_memory_resource cimport device_memory_resource
+
+
+cdef class DeviceMemoryResource:
+    cdef shared_ptr[device_memory_resource] c_obj
+    cdef device_memory_resource* get_mr(self) noexcept nogil
+
+cdef class UpstreamResourceAdaptor(DeviceMemoryResource):
+    cdef readonly DeviceMemoryResource upstream_mr
+
+    cpdef DeviceMemoryResource get_upstream(self)
+
+cdef class CudaMemoryResource(DeviceMemoryResource):
+    pass
+
+cdef class ManagedMemoryResource(DeviceMemoryResource):
+    pass
+
+cdef class SystemMemoryResource(DeviceMemoryResource):
+    pass
+
+cdef class SamHeadroomMemoryResource(DeviceMemoryResource):
+    pass
+
+cdef class CudaAsyncMemoryResource(DeviceMemoryResource):
+    pass
+
+cdef class PoolMemoryResource(UpstreamResourceAdaptor):
+    pass
+
+cdef class FixedSizeMemoryResource(UpstreamResourceAdaptor):
+    pass
+
+cdef class BinningMemoryResource(UpstreamResourceAdaptor):
+
+    cdef readonly list _bin_mrs
+
+    cpdef add_bin(
+        self,
+        size_t allocation_size,
+        DeviceMemoryResource bin_resource=*)
+
+cdef class CallbackMemoryResource(DeviceMemoryResource):
+    cdef object _allocate_func
+    cdef object _deallocate_func
+
+cdef class LimitingResourceAdaptor(UpstreamResourceAdaptor):
+    pass
+
+cdef class LoggingResourceAdaptor(UpstreamResourceAdaptor):
+    cdef object _log_file_name
+    cpdef get_file_name(self)
+    cpdef flush(self)
+
+cdef class StatisticsResourceAdaptor(UpstreamResourceAdaptor):
+    pass
+
+cdef class TrackingResourceAdaptor(UpstreamResourceAdaptor):
+    pass
+
+cdef class FailureCallbackResourceAdaptor(UpstreamResourceAdaptor):
+    cdef object _callback
+
+cdef class PrefetchResourceAdaptor(UpstreamResourceAdaptor):
+    pass
+
+cpdef DeviceMemoryResource get_current_device_resource()
diff --git a/python/rmm/rmm/python/memory_resource.pyx b/python/rmm/rmm/python/memory_resource.pyx
new file mode 100644
index 000000000..46613667c
--- /dev/null
+++ b/python/rmm/rmm/python/memory_resource.pyx
@@ -0,0 +1,1170 @@
+# Copyright (c) 2020-2024, NVIDIA CORPORATION.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import os
+import warnings
+# This import is needed for Cython typing in translate_python_except_to_cpp
+# See https://github.com/cython/cython/issues/5589
+from builtins import BaseException
+from collections import defaultdict
+
+cimport cython
+from cython.operator cimport dereference as deref
+from libc.stddef cimport size_t
+from libc.stdint cimport int8_t, uintptr_t
+from libcpp cimport bool
+from libcpp.memory cimport make_unique, unique_ptr
+from libcpp.optional cimport optional
+from libcpp.pair cimport pair
+
+from cuda.cudart import cudaError_t
+
+from rmm._cuda.gpu import CUDARuntimeError, getDevice, setDevice
+
+from rmm._cuda.stream cimport Stream
+
+from rmm._cuda.stream import DEFAULT_STREAM
+
+from rmm.cpp.cpp_cuda_stream_view cimport cuda_stream_view
+from rmm.cpp.cpp_per_device_resource cimport (
+    cuda_device_id,
+    set_per_device_resource as cpp_set_per_device_resource,
+)
+from rmm.python.helper cimport parse_bytes
+from rmm.python.memory_resource cimport (
+    available_device_memory as c_available_device_memory,
+    percent_of_free_device_memory as c_percent_of_free_device_memory,
+)
+
+from rmm.statistics import Statistics
+
+from rmm.cpp.cpp_memory_resource cimport (
+    CppExcept,
+    allocate_callback_t,
+    allocation_handle_type,
+    binning_memory_resource,
+    callback_memory_resource,
+    cuda_async_memory_resource,
+    cuda_memory_resource,
+    deallocate_callback_t,
+    device_memory_resource,
+    failure_callback_resource_adaptor,
+    failure_callback_t,
+    fixed_size_memory_resource,
+    limiting_resource_adaptor,
+    logging_resource_adaptor,
+    managed_memory_resource,
+    pool_memory_resource,
+    posix_file_descriptor,
+    prefetch_resource_adaptor,
+    sam_headroom_memory_resource,
+    statistics_resource_adaptor,
+    system_memory_resource,
+    throw_cpp_except,
+    tracking_resource_adaptor,
+    translate_python_except_to_cpp,
+)
+
+
+cdef class DeviceMemoryResource:
+
+    cdef device_memory_resource* get_mr(self) noexcept nogil:
+        """Get the underlying C++ memory resource object."""
+        return self.c_obj.get()
+
+    def allocate(self, size_t nbytes, Stream stream=DEFAULT_STREAM):
+        """Allocate ``nbytes`` bytes of memory.
+
+        Parameters
+        ----------
+        nbytes : size_t
+            The size of the allocation in bytes
+        stream : Stream
+            Optional stream for the allocation
+        """
+        return <uintptr_t>self.c_obj.get().allocate(nbytes, stream.view())
+
+    def deallocate(self, uintptr_t ptr, size_t nbytes, Stream stream=DEFAULT_STREAM):
+        """Deallocate memory pointed to by ``ptr`` of size ``nbytes``.
+
+        Parameters
+        ----------
+        ptr : uintptr_t
+            Pointer to be deallocated
+        nbytes : size_t
+            Size of the allocation in bytes
+        stream : Stream
+            Optional stream for the deallocation
+        """
+        self.c_obj.get().deallocate(<void*>(ptr), nbytes, stream.view())
+
+
+# See the note about `no_gc_clear` in `device_buffer.pyx`.
+@cython.no_gc_clear
+cdef class UpstreamResourceAdaptor(DeviceMemoryResource):
+    """Parent class for all memory resources that track an upstream.
+
+    Upstream resource tracking requires maintaining a reference to the upstream
+    mr so that it is kept alive and may be accessed by any downstream resource
+    adaptors.
+    """
+
+    def __cinit__(self, DeviceMemoryResource upstream_mr, *args, **kwargs):
+
+        if (upstream_mr is None):
+            raise Exception("Argument `upstream_mr` must not be None")
+
+        self.upstream_mr = upstream_mr
+
+    def __dealloc__(self):
+        # Must cleanup the base MR before any upstream MR
+        self.c_obj.reset()
+
+    cpdef DeviceMemoryResource get_upstream(self):
+        return self.upstream_mr
+
+
+cdef class CudaMemoryResource(DeviceMemoryResource):
+    def __cinit__(self):
+        self.c_obj.reset(
+            new cuda_memory_resource()
+        )
+
+    def __init__(self):
+        """
+        Memory resource that uses ``cudaMalloc``/``cudaFree`` for
+        allocation/deallocation.
+        """
+        pass
+
+
+cdef class CudaAsyncMemoryResource(DeviceMemoryResource):
+    """
+    Memory resource that uses ``cudaMallocAsync``/``cudaFreeAsync`` for
+    allocation/deallocation.
+
+    Parameters
+    ----------
+    initial_pool_size : int | str, optional
+        Initial pool size in bytes. By default, half the available memory
+        on the device is used. A string argument is parsed using `parse_bytes`.
+    release_threshold: int, optional
+        Release threshold in bytes. If the pool size grows beyond this
+        value, unused memory held by the pool will be released at the
+        next synchronization point.
+    enable_ipc: bool, optional
+        If True, enables export of POSIX file descriptor handles for the memory
+        allocated by this resource so that it can be used with CUDA IPC.
+    """
+    def __cinit__(
+        self,
+        initial_pool_size=None,
+        release_threshold=None,
+        enable_ipc=False
+    ):
+        cdef optional[size_t] c_initial_pool_size = (
+            optional[size_t]()
+            if initial_pool_size is None
+            else optional[size_t](<size_t> parse_bytes(initial_pool_size))
+        )
+
+        cdef optional[size_t] c_release_threshold = (
+            optional[size_t]()
+            if release_threshold is None
+            else optional[size_t](<size_t> release_threshold)
+        )
+
+        # If IPC memory handles are not supported, the constructor below will
+        # raise an error from C++.
+        cdef optional[allocation_handle_type] c_export_handle_type = (
+            optional[allocation_handle_type](
+                posix_file_descriptor
+            )
+            if enable_ipc
+            else optional[allocation_handle_type]()
+        )
+
+        self.c_obj.reset(
+            new cuda_async_memory_resource(
+                c_initial_pool_size,
+                c_release_threshold,
+                c_export_handle_type
+            )
+        )
+
+
+cdef class ManagedMemoryResource(DeviceMemoryResource):
+    def __cinit__(self):
+        self.c_obj.reset(
+            new managed_memory_resource()
+        )
+
+    def __init__(self):
+        """
+        Memory resource that uses ``cudaMallocManaged``/``cudaFree`` for
+        allocation/deallocation.
+        """
+        pass
+
+
+cdef class SystemMemoryResource(DeviceMemoryResource):
+    def __cinit__(self):
+        self.c_obj.reset(
+            new system_memory_resource()
+        )
+
+    def __init__(self):
+        """
+        Memory resource that uses ``malloc``/``free`` for
+        allocation/deallocation.
+        """
+        pass
+
+
+cdef class SamHeadroomMemoryResource(DeviceMemoryResource):
+    def __cinit__(
+        self,
+        size_t headroom
+    ):
+        self.c_obj.reset(new sam_headroom_memory_resource(headroom))
+
+    def __init__(
+        self,
+        size_t headroom
+    ):
+        """
+        Memory resource that uses ``malloc``/``free`` for
+        allocation/deallocation.
+
+        Parameters
+        ----------
+        headroom : size_t
+            Size of the reserved GPU memory as headroom
+        """
+        pass
+
+
+cdef class PoolMemoryResource(UpstreamResourceAdaptor):
+
+    def __cinit__(
+            self,
+            DeviceMemoryResource upstream_mr,
+            initial_pool_size=None,
+            maximum_pool_size=None
+    ):
+        cdef size_t c_initial_pool_size
+        cdef optional[size_t] c_maximum_pool_size
+        c_initial_pool_size = (
+            c_percent_of_free_device_memory(50) if
+            initial_pool_size is None
+            else parse_bytes(initial_pool_size)
+        )
+        c_maximum_pool_size = (
+            optional[size_t]() if
+            maximum_pool_size is None
+            else optional[size_t](<size_t> parse_bytes(maximum_pool_size))
+        )
+        self.c_obj.reset(
+            new pool_memory_resource[device_memory_resource](
+                upstream_mr.get_mr(),
+                c_initial_pool_size,
+                c_maximum_pool_size
+            )
+        )
+
+    def __init__(
+            self,
+            DeviceMemoryResource upstream_mr,
+            object initial_pool_size=None,
+            object maximum_pool_size=None
+    ):
+        """
+        Coalescing best-fit suballocator which uses a pool of memory allocated
+        from an upstream memory resource.
+
+        Parameters
+        ----------
+        upstream_mr : DeviceMemoryResource
+            The DeviceMemoryResource from which to allocate blocks for the
+            pool.
+        initial_pool_size : int | str, optional
+            Initial pool size in bytes. By default, half the available memory
+            on the device is used.
+        maximum_pool_size : int | str, optional
+            Maximum size in bytes, that the pool can grow to.
+        """
+        pass
+
+    def pool_size(self):
+        cdef pool_memory_resource[device_memory_resource]* c_mr = (
+            <pool_memory_resource[device_memory_resource]*>(self.get_mr())
+        )
+        return c_mr.pool_size()
+
+cdef class FixedSizeMemoryResource(UpstreamResourceAdaptor):
+    def __cinit__(
+            self,
+            DeviceMemoryResource upstream_mr,
+            size_t block_size=1<<20,
+            size_t blocks_to_preallocate=128
+    ):
+        self.c_obj.reset(
+            new fixed_size_memory_resource[device_memory_resource](
+                upstream_mr.get_mr(),
+                block_size,
+                blocks_to_preallocate
+            )
+        )
+
+    def __init__(
+            self,
+            DeviceMemoryResource upstream_mr,
+            size_t block_size=1<<20,
+            size_t blocks_to_preallocate=128
+    ):
+        """
+        Memory resource which allocates memory blocks of a single fixed size.
+
+        Parameters
+        ----------
+        upstream_mr : DeviceMemoryResource
+            The DeviceMemoryResource from which to allocate blocks for the
+            pool.
+        block_size : int, optional
+            The size of blocks to allocate (default is 1MiB).
+        blocks_to_preallocate : int, optional
+            The number of blocks to allocate to initialize the pool.
+
+        Notes
+        -----
+        Supports only allocations of size smaller than the configured
+        block_size.
+        """
+        pass
+
+
+cdef class BinningMemoryResource(UpstreamResourceAdaptor):
+    def __cinit__(
+        self,
+        DeviceMemoryResource upstream_mr,
+        int8_t min_size_exponent=-1,
+        int8_t max_size_exponent=-1,
+    ):
+
+        self._bin_mrs = []
+
+        if (min_size_exponent == -1 or max_size_exponent == -1):
+            self.c_obj.reset(
+                new binning_memory_resource[device_memory_resource](
+                    upstream_mr.get_mr()
+                )
+            )
+        else:
+            self.c_obj.reset(
+                new binning_memory_resource[device_memory_resource](
+                    upstream_mr.get_mr(),
+                    min_size_exponent,
+                    max_size_exponent
+                )
+            )
+
+    def __dealloc__(self):
+
+        # Must cleanup the base MR before any upstream or referenced Bins
+        self.c_obj.reset()
+
+    def __init__(
+        self,
+        DeviceMemoryResource upstream_mr,
+        int8_t min_size_exponent=-1,
+        int8_t max_size_exponent=-1,
+    ):
+        """
+        Allocates memory from a set of specified "bin" sizes based on a
+        specified allocation size.
+
+        If min_size_exponent and max_size_exponent are specified, initializes
+        with one or more FixedSizeMemoryResource bins in the range
+        ``[2**min_size_exponent, 2**max_size_exponent]``.
+
+        Call :py:meth:`~.add_bin` to add additional bin allocators.
+
+        Parameters
+        ----------
+        upstream_mr : DeviceMemoryResource
+            The memory resource to use for allocations larger than any of the
+            bins.
+        min_size_exponent : size_t
+            The base-2 exponent of the minimum size FixedSizeMemoryResource
+            bin to create.
+        max_size_exponent : size_t
+            The base-2 exponent of the maximum size FixedSizeMemoryResource
+            bin to create.
+        """
+        pass
+
+    cpdef add_bin(
+        self,
+        size_t allocation_size,
+        DeviceMemoryResource bin_resource=None
+    ):
+        """
+        Adds a bin of the specified maximum allocation size to this memory
+        resource. If specified, uses bin_resource for allocation for this bin.
+        If not specified, creates and uses a FixedSizeMemoryResource for
+        allocation for this bin.
+
+        Allocations smaller than allocation_size and larger than the next
+        smaller bin size will use this fixed-size memory resource.
+
+        Parameters
+        ----------
+        allocation_size : size_t
+            The maximum allocation size in bytes for the created bin
+        bin_resource : DeviceMemoryResource
+            The resource to use for this bin (optional)
+        """
+        if bin_resource is None:
+            (<binning_memory_resource[device_memory_resource]*>(
+                self.c_obj.get()))[0].add_bin(allocation_size)
+        else:
+            # Save the ref to the new bin resource to ensure its lifetime
+            self._bin_mrs.append(bin_resource)
+
+            (<binning_memory_resource[device_memory_resource]*>(
+                self.c_obj.get()))[0].add_bin(
+                    allocation_size,
+                    bin_resource.get_mr())
+
+    @property
+    def bin_mrs(self) -> list:
+        """Get the list of binned memory resources."""
+        return self._bin_mrs
+
+
+cdef void* _allocate_callback_wrapper(
+    size_t nbytes,
+    cuda_stream_view stream,
+    void* ctx
+    # Note that this function is specifically designed to rethrow Python
+    # exceptions as C++ exceptions when called as a callback from C++, so it is
+    # noexcept from Cython's perspective.
+) noexcept nogil:
+    cdef CppExcept err
+    with gil:
+        try:
+            return <void*><uintptr_t>((<object>ctx)(
+                nbytes,
+                Stream._from_cudaStream_t(stream.value())
+            ))
+        except BaseException as e:
+            err = translate_python_except_to_cpp(e)
+    throw_cpp_except(err)
+
+cdef void _deallocate_callback_wrapper(
+    void* ptr,
+    size_t nbytes,
+    cuda_stream_view stream,
+    void* ctx
+) except * with gil:
+    (<object>ctx)(<uintptr_t>(ptr), nbytes, Stream._from_cudaStream_t(stream.value()))
+
+
+cdef class CallbackMemoryResource(DeviceMemoryResource):
+    """
+    A memory resource that uses the user-provided callables to do
+    memory allocation and deallocation.
+
+    ``CallbackMemoryResource`` should really only be used for
+    debugging memory issues, as there is a significant performance
+    penalty associated with using a Python function for each memory
+    allocation and deallocation.
+
+    Parameters
+    ----------
+    allocate_func: callable
+        The allocation function must accept two arguments. An integer
+        representing the number of bytes to allocate and a Stream on
+        which to perform the allocation, and return an integer
+        representing the pointer to the allocated memory.
+    deallocate_func: callable
+        The deallocation function must accept three arguments. an integer
+        representing the pointer to the memory to free, a second
+        integer representing the number of bytes to free, and a Stream
+        on which to perform the deallocation.
+
+    Examples
+    --------
+    >>> import rmm
+    >>> base_mr = rmm.mr.CudaMemoryResource()
+    >>> def allocate_func(size, stream):
+    ...     print(f"Allocating {size} bytes")
+    ...     return base_mr.allocate(size, stream)
+    ...
+    >>> def deallocate_func(ptr, size, stream):
+    ...     print(f"Deallocating {size} bytes")
+    ...     return base_mr.deallocate(ptr, size, stream)
+    ...
+    >>> rmm.mr.set_current_device_resource(
+        rmm.mr.CallbackMemoryResource(allocate_func, deallocate_func)
+    )
+    >>> dbuf = rmm.DeviceBuffer(size=256)
+    Allocating 256 bytes
+    >>> del dbuf
+    Deallocating 256 bytes
+    """
+    def __init__(
+        self,
+        allocate_func,
+        deallocate_func,
+    ):
+        self._allocate_func = allocate_func
+        self._deallocate_func = deallocate_func
+        self.c_obj.reset(
+            new callback_memory_resource(
+                <allocate_callback_t>(_allocate_callback_wrapper),
+                <deallocate_callback_t>(_deallocate_callback_wrapper),
+                <void*>(allocate_func),
+                <void*>(deallocate_func)
+            )
+        )
+
+
+def _append_id(filename, id):
+    """
+    Append ".dev<ID>" onto a filename before the extension
+
+    Example: _append_id("hello.txt", 1) returns "hello.dev1.txt"
+
+    Parameters
+    ----------
+    filename : string
+        The filename, possibly with extension
+    id : int
+        The ID to append
+    """
+    name, ext = os.path.splitext(filename)
+    return f"{name}.dev{id}{ext}"
+
+
+cdef class LimitingResourceAdaptor(UpstreamResourceAdaptor):
+
+    def __cinit__(
+        self,
+        DeviceMemoryResource upstream_mr,
+        size_t allocation_limit
+    ):
+        self.c_obj.reset(
+            new limiting_resource_adaptor[device_memory_resource](
+                upstream_mr.get_mr(),
+                allocation_limit
+            )
+        )
+
+    def __init__(
+        self,
+        DeviceMemoryResource upstream_mr,
+        size_t allocation_limit
+    ):
+        """
+        Memory resource that limits the total allocation amount possible
+        performed by an upstream memory resource.
+
+        Parameters
+        ----------
+        upstream_mr : DeviceMemoryResource
+            The upstream memory resource.
+        allocation_limit : size_t
+            Maximum memory allowed for this allocator.
+        """
+        pass
+
+    def get_allocated_bytes(self) -> size_t:
+        """
+        Query the number of bytes that have been allocated. Note that this can
+        not be used to know how large of an allocation is possible due to both
+        possible fragmentation and also internal page sizes and alignment that
+        is not tracked by this allocator.
+        """
+        return (<limiting_resource_adaptor[device_memory_resource]*>(
+            self.c_obj.get())
+        )[0].get_allocated_bytes()
+
+    def get_allocation_limit(self) -> size_t:
+        """
+        Query the maximum number of bytes that this allocator is allowed to
+        allocate. This is the limit on the allocator and not a representation
+        of the underlying device. The device may not be able to support this
+        limit.
+        """
+        return (<limiting_resource_adaptor[device_memory_resource]*>(
+            self.c_obj.get())
+        )[0].get_allocation_limit()
+
+
+cdef class LoggingResourceAdaptor(UpstreamResourceAdaptor):
+    def __cinit__(
+        self,
+        DeviceMemoryResource upstream_mr,
+        object log_file_name=None
+    ):
+        if log_file_name is None:
+            log_file_name = os.getenv("RMM_LOG_FILE")
+            if not log_file_name:
+                raise ValueError(
+                    "RMM log file must be specified either using "
+                    "log_file_name= argument or RMM_LOG_FILE "
+                    "environment variable"
+                )
+
+        # Append the device ID before the file extension
+        log_file_name = _append_id(
+            log_file_name, getDevice()
+        )
+        log_file_name = os.path.abspath(log_file_name)
+        self._log_file_name = log_file_name
+
+        self.c_obj.reset(
+            new logging_resource_adaptor[device_memory_resource](
+                upstream_mr.get_mr(),
+                log_file_name.encode()
+            )
+        )
+
+    def __init__(
+        self,
+        DeviceMemoryResource upstream_mr,
+        object log_file_name=None
+    ):
+        """
+        Memory resource that logs information about allocations/deallocations
+        performed by an upstream memory resource.
+
+        Parameters
+        ----------
+        upstream : DeviceMemoryResource
+            The upstream memory resource.
+        log_file_name : str
+            Path to the file to which logs are written.
+        """
+        pass
+
+    cpdef flush(self):
+        (<logging_resource_adaptor[device_memory_resource]*>(
+            self.get_mr()))[0].flush()
+
+    cpdef get_file_name(self):
+        return self._log_file_name
+
+    def __dealloc__(self):
+        self.c_obj.reset()
+
+cdef class StatisticsResourceAdaptor(UpstreamResourceAdaptor):
+
+    def __cinit__(
+        self,
+        DeviceMemoryResource upstream_mr
+    ):
+        self.c_obj.reset(
+            new statistics_resource_adaptor[device_memory_resource](
+                upstream_mr.get_mr()
+            )
+        )
+
+    def __init__(
+        self,
+        DeviceMemoryResource upstream_mr
+    ):
+        """
+        Memory resource that tracks the current, peak and total
+        allocations/deallocations performed by an upstream memory resource.
+        Includes the ability to query these statistics at any time.
+
+        A stack of counters is maintained. Use :meth:`push_counters` and
+        :meth:`pop_counters` to track statistics at different nesting levels.
+
+        Parameters
+        ----------
+        upstream : DeviceMemoryResource
+            The upstream memory resource.
+        """
+        pass
+
+    @property
+    def allocation_counts(self) -> Statistics:
+        """
+        Gets the current, peak, and total allocated bytes and number of
+        allocations.
+
+        The dictionary keys are ``current_bytes``, ``current_count``,
+        ``peak_bytes``, ``peak_count``, ``total_bytes``, and ``total_count``.
+
+        Returns:
+            dict: Dictionary containing allocation counts and bytes.
+        """
+        cdef statistics_resource_adaptor[device_memory_resource]* mr = \
+            <statistics_resource_adaptor[device_memory_resource]*> self.c_obj.get()
+
+        counts = deref(mr).get_allocations_counter()
+        byte_counts = deref(mr).get_bytes_counter()
+        return Statistics(
+            current_bytes=byte_counts.value,
+            current_count=counts.value,
+            peak_bytes=byte_counts.peak,
+            peak_count=counts.peak,
+            total_bytes=byte_counts.total,
+            total_count=counts.total,
+        )
+
+    def pop_counters(self) -> Statistics:
+        """
+        Pop a counter pair (bytes and allocations) from the stack
+
+        Returns
+        -------
+        The popped statistics
+        """
+        cdef statistics_resource_adaptor[device_memory_resource]* mr = \
+            <statistics_resource_adaptor[device_memory_resource]*> self.c_obj.get()
+
+        bytes_and_allocs = deref(mr).pop_counters()
+        return Statistics(
+            current_bytes=bytes_and_allocs.first.value,
+            current_count=bytes_and_allocs.second.value,
+            peak_bytes=bytes_and_allocs.first.peak,
+            peak_count=bytes_and_allocs.second.peak,
+            total_bytes=bytes_and_allocs.first.total,
+            total_count=bytes_and_allocs.second.total,
+        )
+
+    def push_counters(self) -> Statistics:
+        """
+        Push a new counter pair (bytes and allocations) on the stack
+
+        Returns
+        -------
+        The statistics _before_ the push
+        """
+
+        cdef statistics_resource_adaptor[device_memory_resource]* mr = \
+            <statistics_resource_adaptor[device_memory_resource]*> self.c_obj.get()
+
+        bytes_and_allocs = deref(mr).push_counters()
+        return Statistics(
+            current_bytes=bytes_and_allocs.first.value,
+            current_count=bytes_and_allocs.second.value,
+            peak_bytes=bytes_and_allocs.first.peak,
+            peak_count=bytes_and_allocs.second.peak,
+            total_bytes=bytes_and_allocs.first.total,
+            total_count=bytes_and_allocs.second.total,
+        )
+
+cdef class TrackingResourceAdaptor(UpstreamResourceAdaptor):
+
+    def __cinit__(
+        self,
+        DeviceMemoryResource upstream_mr,
+        bool capture_stacks=False
+    ):
+        self.c_obj.reset(
+            new tracking_resource_adaptor[device_memory_resource](
+                upstream_mr.get_mr(),
+                capture_stacks
+            )
+        )
+
+    def __init__(
+        self,
+        DeviceMemoryResource upstream_mr,
+        bool capture_stacks=False
+    ):
+        """
+        Memory resource that logs tracks allocations/deallocations
+        performed by an upstream memory resource. Includes the ability to
+        query all outstanding allocations with the stack trace, if desired.
+
+        Parameters
+        ----------
+        upstream : DeviceMemoryResource
+            The upstream memory resource.
+        capture_stacks : bool
+            Whether or not to capture the stack trace with each allocation.
+        """
+        pass
+
+    def get_allocated_bytes(self) -> size_t:
+        """
+        Query the number of bytes that have been allocated. Note that this can
+        not be used to know how large of an allocation is possible due to both
+        possible fragmentation and also internal page sizes and alignment that
+        is not tracked by this allocator.
+        """
+        return (<tracking_resource_adaptor[device_memory_resource]*>(
+            self.c_obj.get())
+        )[0].get_allocated_bytes()
+
+    def get_outstanding_allocations_str(self) -> str:
+        """
+        Returns a string containing information about the current outstanding
+        allocations. For each allocation, the address, size and optional
+        stack trace are shown.
+        """
+
+        return (<tracking_resource_adaptor[device_memory_resource]*>(
+            self.c_obj.get())
+        )[0].get_outstanding_allocations_str().decode('UTF-8')
+
+    def log_outstanding_allocations(self):
+        """
+        Logs the output of `get_outstanding_allocations_str` to the current
+        RMM log file if enabled.
+        """
+
+        (<tracking_resource_adaptor[device_memory_resource]*>(
+            self.c_obj.get()))[0].log_outstanding_allocations()
+
+
+# Note that this function is specifically designed to rethrow Python exceptions
+# as C++ exceptions when called as a callback from C++, so it is noexcept from
+# Cython's perspective.
+cdef bool _oom_callback_function(size_t bytes, void *callback_arg) noexcept nogil:
+    cdef CppExcept err
+    with gil:
+        try:
+            return (<object>callback_arg)(bytes)
+        except BaseException as e:
+            err = translate_python_except_to_cpp(e)
+    throw_cpp_except(err)
+
+
+cdef class FailureCallbackResourceAdaptor(UpstreamResourceAdaptor):
+
+    def __cinit__(
+        self,
+        DeviceMemoryResource upstream_mr,
+        object callback,
+    ):
+        self._callback = callback
+        self.c_obj.reset(
+            new failure_callback_resource_adaptor[device_memory_resource](
+                upstream_mr.get_mr(),
+                <failure_callback_t>_oom_callback_function,
+                <void*>callback
+            )
+        )
+
+    def __init__(
+        self,
+        DeviceMemoryResource upstream_mr,
+        object callback,
+    ):
+        """
+        Memory resource that call callback when memory allocation fails.
+
+        Parameters
+        ----------
+        upstream : DeviceMemoryResource
+            The upstream memory resource.
+        callback : callable
+            Function called when memory allocation fails.
+        """
+        pass
+
+cdef class PrefetchResourceAdaptor(UpstreamResourceAdaptor):
+
+    def __cinit__(
+        self,
+        DeviceMemoryResource upstream_mr
+    ):
+        self.c_obj.reset(
+            new prefetch_resource_adaptor[device_memory_resource](
+                upstream_mr.get_mr()
+            )
+        )
+
+    def __init__(
+        self,
+        DeviceMemoryResource upstream_mr
+    ):
+        """
+        Memory resource that prefetches all allocations.
+
+        Parameters
+        ----------
+        upstream : DeviceMemoryResource
+            The upstream memory resource.
+        """
+        pass
+
+
+# Global per-device memory resources; dict of int:DeviceMemoryResource
+cdef _per_device_mrs = defaultdict(CudaMemoryResource)
+
+
+cpdef void _initialize(
+    bool pool_allocator=False,
+    bool managed_memory=False,
+    object initial_pool_size=None,
+    object maximum_pool_size=None,
+    object devices=0,
+    bool logging=False,
+    object log_file_name=None,
+) except *:
+    """
+    Initializes RMM library using the options passed
+    """
+    if managed_memory:
+        upstream = ManagedMemoryResource
+    else:
+        upstream = CudaMemoryResource
+
+    if pool_allocator:
+        typ = PoolMemoryResource
+        args = (upstream(),)
+        kwargs = dict(
+            initial_pool_size=None if initial_pool_size is None
+            else parse_bytes(initial_pool_size),
+            maximum_pool_size=None if maximum_pool_size is None
+            else parse_bytes(maximum_pool_size)
+        )
+    else:
+        typ = upstream
+        args = ()
+        kwargs = {}
+
+    cdef DeviceMemoryResource mr
+    cdef int original_device
+
+    # Save the current device so we can reset it
+    try:
+        original_device = getDevice()
+    except CUDARuntimeError as e:
+        if e.status == cudaError_t.cudaErrorNoDevice:
+            warnings.warn(e.msg)
+        else:
+            raise e
+    else:
+        # reset any previously specified per device resources
+        global _per_device_mrs
+        _per_device_mrs.clear()
+
+        if devices is None:
+            devices = [0]
+        elif isinstance(devices, int):
+            devices = [devices]
+
+        # create a memory resource per specified device
+        for device in devices:
+            setDevice(device)
+
+            if logging:
+                mr = LoggingResourceAdaptor(
+                    typ(*args, **kwargs),
+                    log_file_name
+                )
+            else:
+                mr = typ(*args, **kwargs)
+
+            set_per_device_resource(device, mr)
+
+        # reset CUDA device to original
+        setDevice(original_device)
+
+
+cpdef get_per_device_resource(int device):
+    """
+    Get the default memory resource for the specified device.
+
+    If the returned memory resource is used when a different device is the
+    active CUDA device, behavior is undefined.
+
+    Parameters
+    ----------
+    device : int
+        The ID of the device for which to get the memory resource.
+    """
+    global _per_device_mrs
+    return _per_device_mrs[device]
+
+
+cpdef set_per_device_resource(int device, DeviceMemoryResource mr):
+    """
+    Set the default memory resource for the specified device.
+
+    Parameters
+    ----------
+    device : int
+        The ID of the device for which to get the memory resource.
+    mr : DeviceMemoryResource
+        The memory resource to set.  Must have been created while device was
+        the active CUDA device.
+    """
+    global _per_device_mrs
+    _per_device_mrs[device] = mr
+
+    # Since cuda_device_id does not have a default constructor, it must be heap
+    # allocated
+    cdef unique_ptr[cuda_device_id] device_id = \
+        make_unique[cuda_device_id](device)
+
+    cpp_set_per_device_resource(deref(device_id), mr.get_mr())
+
+
+cpdef set_current_device_resource(DeviceMemoryResource mr):
+    """
+    Set the default memory resource for the current device.
+
+    Parameters
+    ----------
+    mr : DeviceMemoryResource
+        The memory resource to set. Must have been created while the current
+        device is the active CUDA device.
+    """
+    set_per_device_resource(getDevice(), mr)
+
+
+cpdef get_per_device_resource_type(int device):
+    """
+    Get the memory resource type used for RMM device allocations on the
+    specified device.
+
+    Parameters
+    ----------
+    device : int
+        The device ID
+    """
+    return type(get_per_device_resource(device))
+
+
+cpdef DeviceMemoryResource get_current_device_resource():
+    """
+    Get the memory resource used for RMM device allocations on the current
+    device.
+
+    If the returned memory resource is used when a different device is the
+    active CUDA device, behavior is undefined.
+    """
+    return get_per_device_resource(getDevice())
+
+
+cpdef get_current_device_resource_type():
+    """
+    Get the memory resource type used for RMM device allocations on the
+    current device.
+    """
+    return type(get_current_device_resource())
+
+
+cpdef is_initialized():
+    """
+    Check whether RMM is initialized
+    """
+    global _per_device_mrs
+    cdef DeviceMemoryResource each_mr
+    return all(
+        [each_mr.get_mr() is not NULL
+            for each_mr in _per_device_mrs.values()]
+    )
+
+
+cpdef _flush_logs():
+    """
+    Flush the logs of all currently initialized LoggingResourceAdaptor
+    memory resources
+    """
+    global _per_device_mrs
+    cdef DeviceMemoryResource each_mr
+    for each_mr in _per_device_mrs.values():
+        if isinstance(each_mr, LoggingResourceAdaptor):
+            each_mr.flush()
+
+
+def enable_logging(log_file_name=None):
+    """
+    Enable logging of run-time events for all devices.
+
+    Parameters
+    ----------
+    log_file_name:  str, optional
+        Name of the log file. If not specified, the environment variable
+        RMM_LOG_FILE is used. A ValueError is thrown if neither is available.
+        A separate log file is produced for each device,
+        and the suffix `".dev{id}"` is automatically added to the log file
+        name.
+
+    Notes
+    -----
+    Note that if you use the environment variable CUDA_VISIBLE_DEVICES
+    with logging enabled, the suffix may not be what you expect. For
+    example, if you set CUDA_VISIBLE_DEVICES=1, the log file produced
+    will still have suffix `0`. Similarly, if you set
+    CUDA_VISIBLE_DEVICES=1,0 and use devices 0 and 1, the log file
+    with suffix `0` will correspond to the GPU with device ID `1`.
+    Use `rmm.get_log_filenames()` to get the log file names
+    corresponding to each device.
+    """
+    global _per_device_mrs
+
+    devices = [0] if not _per_device_mrs.keys() else _per_device_mrs.keys()
+
+    for device in devices:
+        each_mr = <DeviceMemoryResource>_per_device_mrs[device]
+        if not isinstance(each_mr, LoggingResourceAdaptor):
+            set_per_device_resource(
+                device,
+                LoggingResourceAdaptor(each_mr, log_file_name)
+            )
+
+
+def disable_logging():
+    """
+    Disable logging if it was enabled previously using
+    `rmm.initialize()` or `rmm.enable_logging()`.
+    """
+    global _per_device_mrs
+    for i, each_mr in _per_device_mrs.items():
+        if isinstance(each_mr, LoggingResourceAdaptor):
+            set_per_device_resource(i, each_mr.get_upstream())
+
+
+def get_log_filenames():
+    """
+    Returns the log filename (or `None` if not writing logs)
+    for each device in use.
+
+    Examples
+    --------
+    >>> import rmm
+    >>> rmm.reinitialize(devices=[0, 1], logging=True, log_file_name="rmm.log")
+    >>> rmm.get_log_filenames()
+    {0: '/home/user/workspace/rapids/rmm/python/rmm.dev0.log',
+     1: '/home/user/workspace/rapids/rmm/python/rmm.dev1.log'}
+    """
+    global _per_device_mrs
+
+    return {
+        i: each_mr.get_file_name()
+        if isinstance(each_mr, LoggingResourceAdaptor)
+        else None
+        for i, each_mr in _per_device_mrs.items()
+    }
+
+
+def available_device_memory():
+    """
+    Returns a tuple of free and total device memory memory.
+    """
+    cdef pair[size_t, size_t] res
+    res = c_available_device_memory()
+    return (res.first, res.second)