rapidsai · rapids-bot · Apr 19, 2022 · Nov 25, 2021 · Nov 25, 2021 · Dec 1, 2021
@@ -0,0 +1,125 @@
+/*
+ * Copyright (c) 2022, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#pragma once
+
+#include <rmm/mr/device/device_memory_resource.hpp>
+
+#include <cstddef>
+#include <functional>
+#include <utility>
+
+namespace rmm::mr {
+
+/**
+ * @brief Callback function type used by callback memory resource for allocation.
+ *
+ * The signature of the callback function is:
+ *   `void* allocate_callback_t(std::size_t bytes, cuda_stream_view stream, void* arg);
+ *
+ * * Returns a pointer to an allocation of at least `bytes` usable immediately on
+ *   `stream`. The stream-ordered behavior requirements are identical to
+ *   `device_memory_resource::allocate`.
+ *
+ * * This signature is compatible with `do_allocate` but adds the extra function
+ *   parameter `arg`. The `arg` is provided to the constructor of the
+ *   `callback_memory_resource` and will be forwarded along to every invocation
+ *   of the callback function.
+ */
+using allocate_callback_t = std::function<void*(std::size_t, cuda_stream_view, void*)>;
+
+/**
+ * @brief Callback function type used by callback_memory_resource for deallocation.
+ *
+ * The signature of the callback function is:
+ *   `void deallocate_callback_t(void* ptr, std::size_t bytes, cuda_stream_view stream, void* arg);
+ *
+ * * Deallocates memory pointed to by `ptr`. `bytes` specifies the size of the allocation
+ *   in bytes, and must equal the value of `bytes` that was passed to the allocate callback
+ *   function. The stream-ordered behavior requirements are identical to
+ *   `device_memory_resource::deallocate`.
+ *
+ * * This signature is compatible with `do_deallocate` but adds the extra function
+ *   parameter `arg`. The `arg` is provided to the constructor of the
+ *   `callback_memory_resource` and will be forwarded along to every invocation
+ *   of the callback function.
+ */
+using deallocate_callback_t = std::function<void(void*, std::size_t, cuda_stream_view, void*)>;
+
+/**
+ * @brief A device memory resource that uses the provided callbacks for memory allocation
+ * and deallocation.
+ */
+class callback_memory_resource final : public device_memory_resource {
+ public:
+  /**
+   * @brief Construct a new callback memory resource.
+   *
+   * Constructs a callback memory resource that uses the user-provided callbacks
+   * `allocate_callback` for allocation and `deallocate_callback` for deallocation.
+   *
+   * @param allocate_callback The callback function used for allocation
+   * @param deallocate_callback The callback function used for deallocation
+   * @param allocate_callback_arg Additional context passed to `allocate_callback`.
+   * It is the caller's responsibility to maintain the lifetime of the pointed-to data
+   * for the duration of the lifetime of the `callback_memory_resource`.
+   * @param deallocate_callback_arg Additional context passed to `deallocate_callback`.
+   * It is the caller's responsibility to maintain the lifetime of the pointed-to data
+   * for the duration of the lifetime of the `callback_memory_resource`.
+   */
+  callback_memory_resource(allocate_callback_t allocate_callback,
+                           deallocate_callback_t deallocate_callback,
+                           void* allocate_callback_arg   = nullptr,
+                           void* deallocate_callback_arg = nullptr) noexcept
+    : allocate_callback_(allocate_callback),
+      deallocate_callback_(deallocate_callback),
+      allocate_callback_arg_(allocate_callback_arg),
+      deallocate_callback_arg_(deallocate_callback_arg)
+  {
+  }
+
+  callback_memory_resource()                                = delete;
+  ~callback_memory_resource() override                      = default;
+  callback_memory_resource(callback_memory_resource const&) = delete;
+  callback_memory_resource& operator=(callback_memory_resource const&) = delete;
+  callback_memory_resource(callback_memory_resource&&) noexcept        = default;
+  callback_memory_resource& operator=(callback_memory_resource&&) noexcept = default;
+
+ private:
+  void* do_allocate(std::size_t bytes, cuda_stream_view stream) override
+  {
+    return allocate_callback_(bytes, stream, allocate_callback_arg_);
+  }
+
+  void do_deallocate(void* ptr, std::size_t bytes, cuda_stream_view stream) override
+  {
+    deallocate_callback_(ptr, bytes, stream, deallocate_callback_arg_);
+  }
+
+  [[nodiscard]] std::pair<std::size_t, std::size_t> do_get_mem_info(cuda_stream_view) const override
+  {
+    throw std::runtime_error("cannot get free / total memory");
+  }
+
+  [[nodiscard]] virtual bool supports_streams() const noexcept { return false; }
+  [[nodiscard]] virtual bool supports_get_mem_info() const noexcept { return false; }
+
+  allocate_callback_t allocate_callback_;
+  deallocate_callback_t deallocate_callback_;
+  void* allocate_callback_arg_;
+  void* deallocate_callback_arg_;
+};
+
+}  // namespace rmm::mr
@@ -21,11 +21,11 @@ from libcpp.vector cimport vector
 cdef extern from "rmm/mr/device/device_memory_resource.hpp" \
         namespace "rmm::mr" nogil:
     cdef cppclass device_memory_resource:
-        pass
+        void* allocate(size_t bytes) except +
+        void deallocate(void* ptr, size_t bytes) except +
 
 cdef class DeviceMemoryResource:
     cdef shared_ptr[device_memory_resource] c_obj
-
     cdef device_memory_resource* get_mr(self)
 
 cdef class UpstreamResourceAdaptor(DeviceMemoryResource):
@@ -57,6 +57,10 @@ cdef class BinningMemoryResource(UpstreamResourceAdaptor):
         size_t allocation_size,
         DeviceMemoryResource bin_resource=*)
 
+cdef class CallbackMemoryResource(DeviceMemoryResource):
+    cdef object _allocate_func
+    cdef object _deallocate_func
+
 cdef class LoggingResourceAdaptor(UpstreamResourceAdaptor):
     cdef object _log_file_name
     cpdef get_file_name(self)

@@ -17,7 +17,7 @@ import warnings
 from collections import defaultdict
 
 from cython.operator cimport dereference as deref
-from libc.stdint cimport int8_t, int64_t
+from libc.stdint cimport int8_t, int64_t, uintptr_t
 from libcpp cimport bool
 from libcpp.cast cimport dynamic_cast
 from libcpp.memory cimport make_shared, make_unique, shared_ptr, unique_ptr
@@ -26,6 +26,7 @@ from libcpp.string cimport string
 from cuda.cudart import cudaError_t
 
 from rmm._cuda.gpu import CUDARuntimeError, getDevice, setDevice
+from rmm._lib.cuda_stream_view cimport cuda_stream_view
 
 
 # NOTE: Keep extern declarations in .pyx file as much as possible to avoid
@@ -76,6 +77,19 @@ cdef extern from "rmm/mr/device/fixed_size_memory_resource.hpp" \
             size_t block_size,
             size_t block_to_preallocate) except +
 
+cdef extern from "rmm/mr/device/callback_memory_resource.hpp" \
+        namespace "rmm::mr" nogil:
+    ctypedef void* (*allocate_callback_t)(size_t, void*)
+    ctypedef void (*deallocate_callback_t)(void*, size_t, void*)
+
+    cdef cppclass callback_memory_resource(device_memory_resource):
+        callback_memory_resource(
+            allocate_callback_t allocate_callback,
+            deallocate_callback_t deallocate_callback,
+            void* allocate_callback_arg,
+            void* deallocate_callback_arg
+        ) except +
+
 cdef extern from "rmm/mr/device/binning_memory_resource.hpp" \
         namespace "rmm::mr" nogil:
     cdef cppclass binning_memory_resource[Upstream](device_memory_resource):
@@ -168,6 +182,12 @@ cdef class DeviceMemoryResource:
     cdef device_memory_resource* get_mr(self):
         return self.c_obj.get()
 
+    def allocate(self, size_t nbytes):
+        return <uintptr_t>self.c_obj.get().allocate(nbytes)
+
+    def deallocate(self, uintptr_t ptr, size_t nbytes):
+        self.c_obj.get().deallocate(<void*>(ptr), nbytes)
+
 
 cdef class UpstreamResourceAdaptor(DeviceMemoryResource):
 
@@ -444,6 +464,60 @@ cdef class BinningMemoryResource(UpstreamResourceAdaptor):
                     bin_resource.get_mr())
 
 
+cdef void* _allocate_callback_wrapper(
+    size_t nbytes,
+    cuda_stream_view stream,
+    void* ctx
+) with gil:
+    return <void*><uintptr_t>((<object>ctx)(nbytes))
+
+cdef void _deallocate_callback_wrapper(
+    void* ptr,
+    size_t nbytes,
+    cuda_stream_view stream,
+    void* ctx
+) with gil:
+    (<object>ctx)(<uintptr_t>(ptr), nbytes)
+
+
+cdef class CallbackMemoryResource(DeviceMemoryResource):
+    """
+    A memory resource that uses the user-provided callables to do
+    memory allocation and deallocation.
+
+    ``CallbackMemoryResource`` should really only be used for
+    debugging memory issues, as there is a significant performance
+    penalty associated with using a Python function for each memory
+    allocation and deallocation.
+
+    Parameters
+    ----------
+    allocate_func: callable
+        The allocation function must accept a single integer argument,
+        representing the number of bytes to allocate, and return an
+        integer representing the pointer to the allocated memory.
+    deallocate_func: callable
+        The deallocation function must accept two arguments, an integer
+        representing the pointer to the memory to free, and a second
+        integer representing the number of bytes to free.
+    """
+    def __init__(
+        self,
+        allocate_func,
+        deallocate_func,
+    ):
+        self._allocate_func = allocate_func
+        self._deallocate_func = deallocate_func
+        self.c_obj.reset(
+            new callback_memory_resource(
+                <allocate_callback_t>(_allocate_callback_wrapper),
+                <deallocate_callback_t>(_deallocate_callback_wrapper),
+                <void*>(allocate_func),
+                <void*>(deallocate_func)
+            )
+        )
+
+
 def _append_id(filename, id):
     """
     Append ".dev<ID>" onto a filename before the extension

@@ -13,6 +13,7 @@
 # limitations under the License.
 from rmm._lib.memory_resource import (
     BinningMemoryResource,
+    CallbackMemoryResource,
     CudaAsyncMemoryResource,
     CudaMemoryResource,
     DeviceMemoryResource,
@@ -39,6 +40,7 @@
 
 __all__ = [
     "BinningMemoryResource",
+    "CallbackMemoryResource",
     "CudaAsyncMemoryResource",
     "CudaMemoryResource",
     "DeviceMemoryResource",

@@ -719,3 +719,25 @@ def test_dev_buf_circle_ref_dealloc():
     # deallocate `dbuf1` (which needs the MR alive), a segfault occurs.
 
     gc.collect()
+
+
+def test_custom_mr(capsys):
+    base_mr = rmm.mr.CudaMemoryResource()
+
+    def allocate_func(size):
+        print(f"Allocating {size} bytes")
+        return base_mr.allocate(size)
+
+    def deallocate_func(ptr, size):
+        print(f"Deallocating {size} bytes")
+        return base_mr.deallocate(ptr, size)
+
+    rmm.mr.set_current_device_resource(
+        rmm.mr.CallbackMemoryResource(allocate_func, deallocate_func)
+    )
+
+    dbuf = rmm.DeviceBuffer(size=256)
+    del dbuf
+
+    captured = capsys.readouterr()
+    assert captured.out == "Allocating 256 bytes\nDeallocating 256 bytes\n"
@@ -150,3 +150,6 @@ ConfigureTest(ARENA_MR_TEST mr/device/arena_mr_tests.cpp)
 
 # binning MR tests
 ConfigureTest(BINNING_MR_TEST mr/device/binning_mr_tests.cpp)
+
+# callback memory resource tests
+ConfigureTest(CALLBACK_MR_TEST mr/device/callback_mr_tests.cpp)
@@ -0,0 +1,17 @@
+#include <rmm/mr/device/device_memory_resource.hpp>
+
+#include <gmock/gmock.h>
+
+namespace rmm::test {
+
+class mock_resource : public rmm::mr::device_memory_resource {
+ public:
+  MOCK_METHOD(bool, supports_streams, (), (const, override, noexcept));
+  MOCK_METHOD(bool, supports_get_mem_info, (), (const, override, noexcept));
+  MOCK_METHOD(void*, do_allocate, (std::size_t, cuda_stream_view), (override));
+  MOCK_METHOD(void, do_deallocate, (void*, std::size_t, cuda_stream_view), (override));
+  using size_pair = std::pair<std::size_t, std::size_t>;
+  MOCK_METHOD(size_pair, do_get_mem_info, (cuda_stream_view), (const, override));
+};
+
+}  // namespace rmm::test
@@ -14,6 +14,7 @@
  * limitations under the License.
  */
 
+#include "../../mock_resource.hpp"
 #include <rmm/detail/aligned.hpp>
 #include <rmm/detail/error.hpp>
 #include <rmm/mr/device/aligned_resource_adaptor.hpp>
@@ -28,16 +29,6 @@ namespace {
 
 using ::testing::Return;
 
-class mock_resource : public rmm::mr::device_memory_resource {
- public:
-  MOCK_METHOD(bool, supports_streams, (), (const, override, noexcept));
-  MOCK_METHOD(bool, supports_get_mem_info, (), (const, override, noexcept));
-  MOCK_METHOD(void*, do_allocate, (std::size_t, cuda_stream_view), (override));
-  MOCK_METHOD(void, do_deallocate, (void*, std::size_t, cuda_stream_view), (override));
-  using size_pair = std::pair<std::size_t, std::size_t>;
-  MOCK_METHOD(size_pair, do_get_mem_info, (cuda_stream_view), (const, override));
-};
-
 using aligned_mock = rmm::mr::aligned_resource_adaptor<mock_resource>;
 using aligned_real = rmm::mr::aligned_resource_adaptor<rmm::mr::device_memory_resource>;