Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Use cuda::mr::memory_resource instead of raw device_memory_resource #1095

Merged
merged 37 commits into from
Nov 17, 2023
Merged
Show file tree
Hide file tree
Changes from 30 commits
Commits
Show all changes
37 commits
Select commit Hold shift + click to select a range
90adef2
Add cccl repo to build dependencies
miscco Sep 8, 2023
a49fe9c
PoC for the new design of `cuda::mr::{async_}resource_ref`
miscco Sep 8, 2023
0e78d9e
Make `pinned_memory_resource` usable for `pool_memory_resource`
miscco Sep 8, 2023
4d25841
Port `device_buffer` and `device_uvector` to the new interface
miscco Sep 8, 2023
de71dcc
Add cccl repo to build dependencies
miscco Sep 8, 2023
13a990f
PoC for the new design of `cuda::mr::{async_}resource_ref`
miscco Sep 8, 2023
9ceab83
Make `pinned_memory_resource` usable for `pool_memory_resource`
miscco Sep 8, 2023
c0df290
Port `device_buffer` and `device_uvector` to the new interface
miscco Sep 8, 2023
8b1bc04
Use upstream patch for memory resource.
bdice Oct 17, 2023
bccabd1
Add target_link_libraries for libcudacxx.
bdice Oct 17, 2023
5b27e10
Revert "Use upstream patch for memory resource."
bdice Oct 17, 2023
194da78
Merge branch 'memory_resource' of github.com:miscco/rmm into memory_r…
miscco Oct 18, 2023
6e610a3
Add restrictions on memory usage for tests
miscco Oct 18, 2023
edb8f78
Remove `get_current_device_resource_ref`
miscco Oct 18, 2023
4ea703e
Make CI happy
miscco Oct 18, 2023
e9a5ace
Merge branch 'branch-23.12' into memory_resource
harrism Oct 25, 2023
df9c6e9
Merge branch 'branch-23.12' into memory_resource
miscco Nov 1, 2023
8e7c91b
Address review comments
miscco Nov 1, 2023
849c880
Address alignment differences with `cuda::mr::resource_ref`
miscco Nov 1, 2023
2cf03a7
Roll back the changes to `device_u{buffer, vector}`
miscco Nov 2, 2023
76d55fa
Address review comments
miscco Nov 2, 2023
3390fb8
Revert "Roll back the changes to `device_u{buffer, vector}`"
miscco Nov 3, 2023
c0c8504
Merge branch 'branch-23.12' into memory_resource
miscco Nov 3, 2023
559a674
Add a check that we do not store unnecessary data inside a `async_res…
miscco Nov 7, 2023
c1f36d8
Properly test that a `device_memory_resource` is an `async_resource`
miscco Nov 7, 2023
1d422b5
Also port `rmm::mr::thrust_allocator` to `async_resource_ref``
miscco Nov 7, 2023
17021f1
Merge branch 'branch-23.12' into memory_resource
miscco Nov 7, 2023
e867442
Do not put `rmm::bad_alloc` in quotes
miscco Nov 7, 2023
73efc2e
Try to avoid namespaces in comments
miscco Nov 7, 2023
c298fbc
Tell sphinx to skipp documenting libcu++ names
miscco Nov 7, 2023
41c1bea
Merge branch 'branch-23.12' into memory_resource
harrism Nov 15, 2023
80201bd
Update copyright years.
bdice Nov 15, 2023
5937e3e
Fix typos and grammar.
bdice Nov 15, 2023
baf99f3
Drop support for CUDA older than CUDA 11 in tests.
bdice Nov 15, 2023
af12733
Do not propagate the `device_accessible` property for containers
miscco Nov 16, 2023
37c1e3f
Fix deadlock in new tests using fix from #1097
harrism Nov 16, 2023
67d1bdc
Merge branch 'memory_resource' of https://github.com/miscco/rmm into …
harrism Nov 16, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,7 @@ rapids_cpm_init()

include(cmake/thirdparty/get_fmt.cmake)
include(cmake/thirdparty/get_spdlog.cmake)
include(cmake/thirdparty/get_libcudacxx.cmake)
miscco marked this conversation as resolved.
Show resolved Hide resolved
include(cmake/thirdparty/get_thrust.cmake)

# ##################################################################################################
Expand All @@ -89,11 +90,13 @@ else()
target_link_libraries(rmm INTERFACE CUDA::cudart)
endif()

target_link_libraries(rmm INTERFACE libcudacxx::libcudacxx)
target_link_libraries(rmm INTERFACE rmm::Thrust)
target_link_libraries(rmm INTERFACE fmt::fmt-header-only)
target_link_libraries(rmm INTERFACE spdlog::spdlog_header_only)
target_link_libraries(rmm INTERFACE dl)
target_compile_features(rmm INTERFACE cxx_std_17 $<BUILD_INTERFACE:cuda_std_17>)
target_compile_definitions(rmm INTERFACE LIBCUDACXX_ENABLE_EXPERIMENTAL_MEMORY_RESOURCE)

# ##################################################################################################
# * tests and benchmarks ---------------------------------------------------------------------------
Expand Down
23 changes: 23 additions & 0 deletions cmake/thirdparty/get_libcudacxx.cmake
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
# =============================================================================
# Copyright (c) 2021, NVIDIA CORPORATION.
bdice marked this conversation as resolved.
Show resolved Hide resolved
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
# in compliance with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under the License
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
# or implied. See the License for the specific language governing permissions and limitations under
# the License.
# =============================================================================

# Use CPM to find or clone libcudacxx
function(find_and_configure_libcudacxx)

include(${rapids-cmake-dir}/cpm/libcudacxx.cmake)
rapids_cpm_libcudacxx(BUILD_EXPORT_SET rmm-exports INSTALL_EXPORT_SET rmm-exports)

endfunction()

find_and_configure_libcudacxx()
16 changes: 16 additions & 0 deletions include/rmm/cuda_stream_view.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,8 @@

#include <cuda_runtime_api.h>

#include <cuda/stream_ref>

#include <atomic>
#include <cstddef>
#include <cstdint>
Expand Down Expand Up @@ -58,6 +60,13 @@ class cuda_stream_view {
*/
constexpr cuda_stream_view(cudaStream_t stream) noexcept : stream_{stream} {}

/**
* @brief Implicit conversion from stream_ref.
*
* @param stream The underlying stream for this view
*/
constexpr cuda_stream_view(cuda::stream_ref stream) noexcept : stream_{stream.get()} {}

/**
* @brief Get the wrapped stream.
*
Expand All @@ -72,6 +81,13 @@ class cuda_stream_view {
*/
constexpr operator cudaStream_t() const noexcept { return value(); }

/**
* @brief Implicit conversion to stream_ref.
*
* @return stream_ref The underlying stream referenced by this cuda_stream_view
*/
constexpr operator cuda::stream_ref() const noexcept { return value(); }

/**
* @briefreturn{true if the wrapped stream is the CUDA per-thread default stream}
*/
Expand Down
32 changes: 21 additions & 11 deletions include/rmm/device_buffer.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,8 @@
#include <stdexcept>
#include <utility>

#include <cuda/memory_resource>

namespace rmm {
/**
* @addtogroup data_containers
Expand Down Expand Up @@ -79,6 +81,8 @@ namespace rmm {
*```
*/
class device_buffer {
using async_resource_ref = cuda::mr::async_resource_ref<cuda::mr::device_accessible>;

public:
// The copy constructor and copy assignment operator without a stream are deleted because they
// provide no way to specify an explicit stream
Expand Down Expand Up @@ -106,7 +110,7 @@ class device_buffer {
*/
explicit device_buffer(std::size_t size,
cuda_stream_view stream,
mr::device_memory_resource* mr = mr::get_current_device_resource())
async_resource_ref mr = mr::get_current_device_resource())
: _stream{stream}, _mr{mr}
{
allocate_async(size);
Expand Down Expand Up @@ -134,7 +138,7 @@ class device_buffer {
device_buffer(void const* source_data,
std::size_t size,
cuda_stream_view stream,
mr::device_memory_resource* mr = mr::get_current_device_resource())
async_resource_ref mr = rmm::mr::get_current_device_resource())
: _stream{stream}, _mr{mr}
{
allocate_async(size);
Expand Down Expand Up @@ -164,7 +168,7 @@ class device_buffer {
*/
device_buffer(device_buffer const& other,
cuda_stream_view stream,
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource())
async_resource_ref mr = rmm::mr::get_current_device_resource())
: device_buffer{other.data(), other.size(), stream, mr}
{
}
Expand Down Expand Up @@ -236,7 +240,6 @@ class device_buffer {
~device_buffer() noexcept
{
deallocate_async();
_mr = nullptr;
_stream = cuda_stream_view{};
}

Expand Down Expand Up @@ -395,18 +398,25 @@ class device_buffer {
void set_stream(cuda_stream_view stream) noexcept { _stream = stream; }

/**
* @briefreturn{Pointer to the memory resource used to allocate and deallocate}
* @briefreturn{The async_resource_ref used to allocate and deallocate}
*/
[[nodiscard]] async_resource_ref memory_resource() const noexcept { return _mr; }

/**
* @brief Enables the `cuda::mr::device_accessible` property
*
* This property declares that a `device_buffer` provides device accessible memory
*/
[[nodiscard]] mr::device_memory_resource* memory_resource() const noexcept { return _mr; }
friend void get_property(device_buffer const&, cuda::mr::device_accessible) noexcept {}
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

question (non-blocking): We haven't really talked about adding properties to containers yet. Is this something we feel ready to do? I don't think it will be strictly necessary for this PR.

Copy link
Member

@harrism harrism Nov 15, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@miscco can you comment? Since this is a pure addition and not a breaking change, the risk is low. However, if we decide to change this in the future perhaps it could be baggage that we wish we hadn't added so early?

I would like to understand the motivation for this as well, @miscco .

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In my mind, giving proper guarantees for the memory allocator is not enough if you drop them on the floor once you allocated.

So for me it does make sense that the feature that actually uses the memory allocated by the resource ref is also declaring what properties it inherited from the allocator. I am happy to pull it out though

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I dropped them for now, we can come up with a comprehensive decision later

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

if you drop them on the floor once you allocated.

Sorry, what do you mean by "drop them on the floor"? I wasn't saying you should remove this. Just trying to understand how it is used.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

My point is that currently the properties are solely used on the allocator. Once you have allocated something we do not propagate the properties anymore, but that might actually be the relevant part.

Often someone will pass around a device_buffer and it would be awesome to know whether it contains pinned memory without going through the memory resource

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes I agree that would be useful. But this property is hard coded. How would dynamic properties like the example you give work.


private:
void* _data{nullptr}; ///< Pointer to device memory allocation
std::size_t _size{}; ///< Requested size of the device memory allocation
std::size_t _capacity{}; ///< The actual size of the device memory allocation
cuda_stream_view _stream{}; ///< Stream to use for device memory deallocation
mr::device_memory_resource* _mr{
mr::get_current_device_resource()}; ///< The memory resource used to
///< allocate/deallocate device memory
async_resource_ref _mr{
rmm::mr::get_current_device_resource()}; ///< The memory resource used to
///< allocate/deallocate device memory

/**
* @brief Allocates the specified amount of memory and updates the size/capacity accordingly.
Expand All @@ -421,7 +431,7 @@ class device_buffer {
{
_size = bytes;
_capacity = bytes;
_data = (bytes > 0) ? memory_resource()->allocate(bytes, stream()) : nullptr;
_data = (bytes > 0) ? _mr.allocate_async(bytes, stream()) : nullptr;
miscco marked this conversation as resolved.
Show resolved Hide resolved
}

/**
Expand All @@ -435,7 +445,7 @@ class device_buffer {
*/
void deallocate_async() noexcept
{
if (capacity() > 0) { memory_resource()->deallocate(data(), capacity(), stream()); }
if (capacity() > 0) { _mr.deallocate_async(data(), capacity(), stream()); }
_size = 0;
_capacity = 0;
_data = nullptr;
Expand Down
28 changes: 18 additions & 10 deletions include/rmm/device_uvector.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,8 @@
#include <cstddef>
#include <vector>

#include <cuda/memory_resource>

namespace rmm {
/**
* @addtogroup data_containers
Expand Down Expand Up @@ -72,6 +74,7 @@ namespace rmm {
*/
template <typename T>
class device_uvector {
using async_resource_ref = cuda::mr::async_resource_ref<cuda::mr::device_accessible>;
static_assert(std::is_trivially_copyable<T>::value,
"device_uvector only supports types that are trivially copyable.");

Expand Down Expand Up @@ -121,10 +124,9 @@ class device_uvector {
* @param stream The stream on which to perform the allocation
* @param mr The resource used to allocate the device storage
*/
explicit device_uvector(
std::size_t size,
cuda_stream_view stream,
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource())
explicit device_uvector(std::size_t size,
cuda_stream_view stream,
async_resource_ref mr = rmm::mr::get_current_device_resource())
: _storage{elements_to_bytes(size), stream, mr}
{
}
Expand All @@ -138,10 +140,9 @@ class device_uvector {
* @param stream The stream on which to perform the copy
* @param mr The resource used to allocate device memory for the new vector
*/
explicit device_uvector(
device_uvector const& other,
cuda_stream_view stream,
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource())
explicit device_uvector(device_uvector const& other,
cuda_stream_view stream,
async_resource_ref mr = rmm::mr::get_current_device_resource())
: _storage{other._storage, stream, mr}
{
}
Expand Down Expand Up @@ -524,13 +525,20 @@ class device_uvector {
[[nodiscard]] bool is_empty() const noexcept { return size() == 0; }

/**
* @briefreturn{Pointer to underlying resource used to allocate and deallocate the device storage}
* @briefreturn{The async_resource_ref used to allocate and deallocate the device storage}
*/
[[nodiscard]] mr::device_memory_resource* memory_resource() const noexcept
[[nodiscard]] async_resource_ref memory_resource() const noexcept
{
return _storage.memory_resource();
}

/**
* @brief Enables the `cuda::mr::device_accessible` property
*
* This property declares that a `device_uvector` provides device accessible memory
*/
friend void get_property(device_uvector const&, cuda::mr::device_accessible) noexcept {}

/**
* @briefreturn{Stream most recently specified for allocation/deallocation}
*/
Expand Down
4 changes: 2 additions & 2 deletions include/rmm/mr/device/callback_memory_resource.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -143,8 +143,8 @@ class callback_memory_resource final : public device_memory_resource {
throw std::runtime_error("cannot get free / total memory");
}

[[nodiscard]] virtual bool supports_streams() const noexcept { return false; }
[[nodiscard]] virtual bool supports_get_mem_info() const noexcept { return false; }
[[nodiscard]] bool supports_streams() const noexcept override { return false; }
[[nodiscard]] bool supports_get_mem_info() const noexcept override { return false; }

allocate_callback_t allocate_callback_;
deallocate_callback_t deallocate_callback_;
Expand Down
Loading