diff --git a/include/rmm/mr/device/callback_memory_resource.hpp b/include/rmm/mr/device/callback_memory_resource.hpp new file mode 100644 index 000000000..e2deef46b --- /dev/null +++ b/include/rmm/mr/device/callback_memory_resource.hpp @@ -0,0 +1,125 @@ +/* + * Copyright (c) 2022, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#pragma once + +#include + +#include +#include +#include + +namespace rmm::mr { + +/** + * @brief Callback function type used by callback memory resource for allocation. + * + * The signature of the callback function is: + * `void* allocate_callback_t(std::size_t bytes, cuda_stream_view stream, void* arg); + * + * * Returns a pointer to an allocation of at least `bytes` usable immediately on + * `stream`. The stream-ordered behavior requirements are identical to + * `device_memory_resource::allocate`. + * + * * This signature is compatible with `do_allocate` but adds the extra function + * parameter `arg`. The `arg` is provided to the constructor of the + * `callback_memory_resource` and will be forwarded along to every invocation + * of the callback function. + */ +using allocate_callback_t = std::function; + +/** + * @brief Callback function type used by callback_memory_resource for deallocation. + * + * The signature of the callback function is: + * `void deallocate_callback_t(void* ptr, std::size_t bytes, cuda_stream_view stream, void* arg); + * + * * Deallocates memory pointed to by `ptr`. `bytes` specifies the size of the allocation + * in bytes, and must equal the value of `bytes` that was passed to the allocate callback + * function. The stream-ordered behavior requirements are identical to + * `device_memory_resource::deallocate`. + * + * * This signature is compatible with `do_deallocate` but adds the extra function + * parameter `arg`. The `arg` is provided to the constructor of the + * `callback_memory_resource` and will be forwarded along to every invocation + * of the callback function. + */ +using deallocate_callback_t = std::function; + +/** + * @brief A device memory resource that uses the provided callbacks for memory allocation + * and deallocation. + */ +class callback_memory_resource final : public device_memory_resource { + public: + /** + * @brief Construct a new callback memory resource. + * + * Constructs a callback memory resource that uses the user-provided callbacks + * `allocate_callback` for allocation and `deallocate_callback` for deallocation. + * + * @param allocate_callback The callback function used for allocation + * @param deallocate_callback The callback function used for deallocation + * @param allocate_callback_arg Additional context passed to `allocate_callback`. + * It is the caller's responsibility to maintain the lifetime of the pointed-to data + * for the duration of the lifetime of the `callback_memory_resource`. + * @param deallocate_callback_arg Additional context passed to `deallocate_callback`. + * It is the caller's responsibility to maintain the lifetime of the pointed-to data + * for the duration of the lifetime of the `callback_memory_resource`. + */ + callback_memory_resource(allocate_callback_t allocate_callback, + deallocate_callback_t deallocate_callback, + void* allocate_callback_arg = nullptr, + void* deallocate_callback_arg = nullptr) noexcept + : allocate_callback_(allocate_callback), + deallocate_callback_(deallocate_callback), + allocate_callback_arg_(allocate_callback_arg), + deallocate_callback_arg_(deallocate_callback_arg) + { + } + + callback_memory_resource() = delete; + ~callback_memory_resource() override = default; + callback_memory_resource(callback_memory_resource const&) = delete; + callback_memory_resource& operator=(callback_memory_resource const&) = delete; + callback_memory_resource(callback_memory_resource&&) noexcept = default; + callback_memory_resource& operator=(callback_memory_resource&&) noexcept = default; + + private: + void* do_allocate(std::size_t bytes, cuda_stream_view stream) override + { + return allocate_callback_(bytes, stream, allocate_callback_arg_); + } + + void do_deallocate(void* ptr, std::size_t bytes, cuda_stream_view stream) override + { + deallocate_callback_(ptr, bytes, stream, deallocate_callback_arg_); + } + + [[nodiscard]] std::pair do_get_mem_info(cuda_stream_view) const override + { + throw std::runtime_error("cannot get free / total memory"); + } + + [[nodiscard]] virtual bool supports_streams() const noexcept { return false; } + [[nodiscard]] virtual bool supports_get_mem_info() const noexcept { return false; } + + allocate_callback_t allocate_callback_; + deallocate_callback_t deallocate_callback_; + void* allocate_callback_arg_; + void* deallocate_callback_arg_; +}; + +} // namespace rmm::mr diff --git a/python/rmm/_lib/memory_resource.pxd b/python/rmm/_lib/memory_resource.pxd index 3a71fd500..92ce1ce54 100644 --- a/python/rmm/_lib/memory_resource.pxd +++ b/python/rmm/_lib/memory_resource.pxd @@ -1,4 +1,4 @@ -# Copyright (c) 2020-2021, NVIDIA CORPORATION. +# Copyright (c) 2020-2022, NVIDIA CORPORATION. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -21,11 +21,11 @@ from libcpp.vector cimport vector cdef extern from "rmm/mr/device/device_memory_resource.hpp" \ namespace "rmm::mr" nogil: cdef cppclass device_memory_resource: - pass + void* allocate(size_t bytes) except + + void deallocate(void* ptr, size_t bytes) except + cdef class DeviceMemoryResource: cdef shared_ptr[device_memory_resource] c_obj - cdef device_memory_resource* get_mr(self) cdef class UpstreamResourceAdaptor(DeviceMemoryResource): @@ -57,6 +57,10 @@ cdef class BinningMemoryResource(UpstreamResourceAdaptor): size_t allocation_size, DeviceMemoryResource bin_resource=*) +cdef class CallbackMemoryResource(DeviceMemoryResource): + cdef object _allocate_func + cdef object _deallocate_func + cdef class LoggingResourceAdaptor(UpstreamResourceAdaptor): cdef object _log_file_name cpdef get_file_name(self) diff --git a/python/rmm/_lib/memory_resource.pyx b/python/rmm/_lib/memory_resource.pyx index e02f1a3ae..a110bff97 100644 --- a/python/rmm/_lib/memory_resource.pyx +++ b/python/rmm/_lib/memory_resource.pyx @@ -1,4 +1,4 @@ -# Copyright (c) 2020-2021, NVIDIA CORPORATION. +# Copyright (c) 2020-2022, NVIDIA CORPORATION. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -17,7 +17,7 @@ import warnings from collections import defaultdict from cython.operator cimport dereference as deref -from libc.stdint cimport int8_t, int64_t +from libc.stdint cimport int8_t, int64_t, uintptr_t from libcpp cimport bool from libcpp.cast cimport dynamic_cast from libcpp.memory cimport make_shared, make_unique, shared_ptr, unique_ptr @@ -26,6 +26,7 @@ from libcpp.string cimport string from cuda.cudart import cudaError_t from rmm._cuda.gpu import CUDARuntimeError, getDevice, setDevice +from rmm._lib.cuda_stream_view cimport cuda_stream_view # NOTE: Keep extern declarations in .pyx file as much as possible to avoid @@ -76,6 +77,19 @@ cdef extern from "rmm/mr/device/fixed_size_memory_resource.hpp" \ size_t block_size, size_t block_to_preallocate) except + +cdef extern from "rmm/mr/device/callback_memory_resource.hpp" \ + namespace "rmm::mr" nogil: + ctypedef void* (*allocate_callback_t)(size_t, void*) + ctypedef void (*deallocate_callback_t)(void*, size_t, void*) + + cdef cppclass callback_memory_resource(device_memory_resource): + callback_memory_resource( + allocate_callback_t allocate_callback, + deallocate_callback_t deallocate_callback, + void* allocate_callback_arg, + void* deallocate_callback_arg + ) except + + cdef extern from "rmm/mr/device/binning_memory_resource.hpp" \ namespace "rmm::mr" nogil: cdef cppclass binning_memory_resource[Upstream](device_memory_resource): @@ -168,6 +182,12 @@ cdef class DeviceMemoryResource: cdef device_memory_resource* get_mr(self): return self.c_obj.get() + def allocate(self, size_t nbytes): + return self.c_obj.get().allocate(nbytes) + + def deallocate(self, uintptr_t ptr, size_t nbytes): + self.c_obj.get().deallocate((ptr), nbytes) + cdef class UpstreamResourceAdaptor(DeviceMemoryResource): @@ -444,6 +464,80 @@ cdef class BinningMemoryResource(UpstreamResourceAdaptor): bin_resource.get_mr()) +cdef void* _allocate_callback_wrapper( + size_t nbytes, + cuda_stream_view stream, + void* ctx +) with gil: + return ((ctx)(nbytes)) + +cdef void _deallocate_callback_wrapper( + void* ptr, + size_t nbytes, + cuda_stream_view stream, + void* ctx +) with gil: + (ctx)((ptr), nbytes) + + +cdef class CallbackMemoryResource(DeviceMemoryResource): + """ + A memory resource that uses the user-provided callables to do + memory allocation and deallocation. + + ``CallbackMemoryResource`` should really only be used for + debugging memory issues, as there is a significant performance + penalty associated with using a Python function for each memory + allocation and deallocation. + + Parameters + ---------- + allocate_func: callable + The allocation function must accept a single integer argument, + representing the number of bytes to allocate, and return an + integer representing the pointer to the allocated memory. + deallocate_func: callable + The deallocation function must accept two arguments, an integer + representing the pointer to the memory to free, and a second + integer representing the number of bytes to free. + + Examples + ------- + >>> import rmm + >>> base_mr = rmm.mr.CudaMemoryResource() + >>> def allocate_func(size): + ... print(f"Allocating {size} bytes") + ... return base_mr.allocate(size) + ... + >>> def deallocate_func(ptr, size): + ... print(f"Deallocating {size} bytes") + ... return base_mr.deallocate(ptr, size) + ... + >>> rmm.mr.set_current_device_resource( + rmm.mr.CallbackMemoryResource(allocate_func, deallocate_func) + ) + >>> dbuf = rmm.DeviceBuffer(size=256) + Allocating 256 bytes + >>> del dbuf + Deallocating 256 bytes + """ + def __init__( + self, + allocate_func, + deallocate_func, + ): + self._allocate_func = allocate_func + self._deallocate_func = deallocate_func + self.c_obj.reset( + new callback_memory_resource( + (_allocate_callback_wrapper), + (_deallocate_callback_wrapper), + (allocate_func), + (deallocate_func) + ) + ) + + def _append_id(filename, id): """ Append ".dev" onto a filename before the extension diff --git a/python/rmm/mr.py b/python/rmm/mr.py index eaa9705eb..ea0a0cf8c 100644 --- a/python/rmm/mr.py +++ b/python/rmm/mr.py @@ -13,6 +13,7 @@ # limitations under the License. from rmm._lib.memory_resource import ( BinningMemoryResource, + CallbackMemoryResource, CudaAsyncMemoryResource, CudaMemoryResource, DeviceMemoryResource, @@ -39,6 +40,7 @@ __all__ = [ "BinningMemoryResource", + "CallbackMemoryResource", "CudaAsyncMemoryResource", "CudaMemoryResource", "DeviceMemoryResource", diff --git a/python/rmm/tests/test_rmm.py b/python/rmm/tests/test_rmm.py index 409ed6b72..fa38ee9fe 100644 --- a/python/rmm/tests/test_rmm.py +++ b/python/rmm/tests/test_rmm.py @@ -1,4 +1,4 @@ -# Copyright (c) 2020-2021, NVIDIA CORPORATION. +# Copyright (c) 2020-2022, NVIDIA CORPORATION. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -719,3 +719,34 @@ def test_dev_buf_circle_ref_dealloc(): # deallocate `dbuf1` (which needs the MR alive), a segfault occurs. gc.collect() + + +def test_mr_allocate_deallocate(): + mr = rmm.mr.TrackingResourceAdaptor(rmm.mr.get_current_device_resource()) + size = 1 << 23 # 8 MiB + ptr = mr.allocate(size) + assert mr.get_allocated_bytes() == 1 << 23 + mr.deallocate(ptr, size) + assert mr.get_allocated_bytes() == 0 + + +def test_custom_mr(capsys): + base_mr = rmm.mr.CudaMemoryResource() + + def allocate_func(size): + print(f"Allocating {size} bytes") + return base_mr.allocate(size) + + def deallocate_func(ptr, size): + print(f"Deallocating {size} bytes") + return base_mr.deallocate(ptr, size) + + rmm.mr.set_current_device_resource( + rmm.mr.CallbackMemoryResource(allocate_func, deallocate_func) + ) + + dbuf = rmm.DeviceBuffer(size=256) + del dbuf + + captured = capsys.readouterr() + assert captured.out == "Allocating 256 bytes\nDeallocating 256 bytes\n" diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 344f66db5..8f68141fd 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -150,3 +150,6 @@ ConfigureTest(ARENA_MR_TEST mr/device/arena_mr_tests.cpp) # binning MR tests ConfigureTest(BINNING_MR_TEST mr/device/binning_mr_tests.cpp) + +# callback memory resource tests +ConfigureTest(CALLBACK_MR_TEST mr/device/callback_mr_tests.cpp) diff --git a/tests/mock_resource.hpp b/tests/mock_resource.hpp new file mode 100644 index 000000000..0436e2a2a --- /dev/null +++ b/tests/mock_resource.hpp @@ -0,0 +1,34 @@ +/* + * Copyright (c) 2022, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#pragma once + +#include + +#include + +namespace rmm::test { + +class mock_resource : public rmm::mr::device_memory_resource { + public: + MOCK_METHOD(bool, supports_streams, (), (const, override, noexcept)); + MOCK_METHOD(bool, supports_get_mem_info, (), (const, override, noexcept)); + MOCK_METHOD(void*, do_allocate, (std::size_t, cuda_stream_view), (override)); + MOCK_METHOD(void, do_deallocate, (void*, std::size_t, cuda_stream_view), (override)); + using size_pair = std::pair; + MOCK_METHOD(size_pair, do_get_mem_info, (cuda_stream_view), (const, override)); +}; + +} // namespace rmm::test diff --git a/tests/mr/device/aligned_mr_tests.cpp b/tests/mr/device/aligned_mr_tests.cpp index f1ed561f4..62b941d6f 100644 --- a/tests/mr/device/aligned_mr_tests.cpp +++ b/tests/mr/device/aligned_mr_tests.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021, NVIDIA CORPORATION. + * Copyright (c) 2021-2022, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -14,6 +14,7 @@ * limitations under the License. */ +#include "../../mock_resource.hpp" #include #include #include @@ -28,16 +29,6 @@ namespace { using ::testing::Return; -class mock_resource : public rmm::mr::device_memory_resource { - public: - MOCK_METHOD(bool, supports_streams, (), (const, override, noexcept)); - MOCK_METHOD(bool, supports_get_mem_info, (), (const, override, noexcept)); - MOCK_METHOD(void*, do_allocate, (std::size_t, cuda_stream_view), (override)); - MOCK_METHOD(void, do_deallocate, (void*, std::size_t, cuda_stream_view), (override)); - using size_pair = std::pair; - MOCK_METHOD(size_pair, do_get_mem_info, (cuda_stream_view), (const, override)); -}; - using aligned_mock = rmm::mr::aligned_resource_adaptor; using aligned_real = rmm::mr::aligned_resource_adaptor; diff --git a/tests/mr/device/callback_mr_tests.cpp b/tests/mr/device/callback_mr_tests.cpp new file mode 100644 index 000000000..101a75fc8 --- /dev/null +++ b/tests/mr/device/callback_mr_tests.cpp @@ -0,0 +1,84 @@ +/* + * Copyright (c) 2022, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../../byte_literals.hpp" +#include "../../mock_resource.hpp" +#include +#include +#include +#include +#include + +#include + +#include + +#include +#include + +namespace rmm::test { +namespace { + +using ::testing::_; + +TEST(CallbackTest, TestCallbacksAreInvoked) +{ + auto base_mr = mock_resource(); + EXPECT_CALL(base_mr, do_allocate(10_MiB, cuda_stream_view{})).Times(1); + EXPECT_CALL(base_mr, do_deallocate(_, 10_MiB, cuda_stream_view{})).Times(1); + + auto allocate_callback = [](std::size_t size, cuda_stream_view stream, void* arg) { + auto base_mr = static_cast(arg); + return base_mr->allocate(size, stream); + }; + auto deallocate_callback = [](void* ptr, std::size_t size, cuda_stream_view stream, void* arg) { + auto base_mr = static_cast(arg); + base_mr->deallocate(ptr, size, stream); + }; + auto mr = + rmm::mr::callback_memory_resource(allocate_callback, deallocate_callback, &base_mr, &base_mr); + auto ptr = mr.allocate(10_MiB); + mr.deallocate(ptr, 10_MiB); +} + +TEST(CallbackTest, LoggingTest) +{ + testing::internal::CaptureStdout(); + + auto base_mr = rmm::mr::get_current_device_resource(); + auto allocate_callback = [](std::size_t size, cuda_stream_view stream, void* arg) { + std::cout << "Allocating " << size << " bytes" << std::endl; + auto base_mr = static_cast(arg); + return base_mr->allocate(size, stream); + }; + + auto deallocate_callback = [](void* ptr, std::size_t size, cuda_stream_view stream, void* arg) { + std::cout << "Deallocating " << size << " bytes" << std::endl; + auto base_mr = static_cast(arg); + base_mr->deallocate(ptr, size, stream); + }; + auto mr = + rmm::mr::callback_memory_resource(allocate_callback, deallocate_callback, base_mr, base_mr); + auto ptr = mr.allocate(10_MiB); + mr.deallocate(ptr, 10_MiB); + + std::string output = testing::internal::GetCapturedStdout(); + std::string expect = fmt::format("Allocating {} bytes\nDeallocating {} bytes\n", 10_MiB, 10_MiB); + ASSERT_EQ(expect, output); +} + +} // namespace +} // namespace rmm::test