From c3853ebdd755e01b4c48082abdc03f8c410647d6 Mon Sep 17 00:00:00 2001 From: Rong Ou Date: Wed, 2 Mar 2022 11:02:33 -0800 Subject: [PATCH 1/3] Log allocation failures --- benchmarks/utilities/log_parser.hpp | 3 ++- include/rmm/mr/device/logging_resource_adaptor.hpp | 11 ++++++++--- 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/benchmarks/utilities/log_parser.hpp b/benchmarks/utilities/log_parser.hpp index db939e65f..583d1cfc0 100644 --- a/benchmarks/utilities/log_parser.hpp +++ b/benchmarks/utilities/log_parser.hpp @@ -164,7 +164,8 @@ inline std::vector parse_csv(std::string const& filename) for (std::size_t i = 0; i < actions.size(); ++i) { auto const& action = actions[i]; - RMM_EXPECTS((action == "allocate") or (action == "free"), "Invalid action string."); + RMM_EXPECTS((action == "allocate") or (action == "allocate failure") or (action == "free"), + "Invalid action string."); auto act = (action == "allocate") ? action::ALLOCATE : action::FREE; events[i] = event{tids[i], act, sizes[i], pointers[i], streams[i], i}; } diff --git a/include/rmm/mr/device/logging_resource_adaptor.hpp b/include/rmm/mr/device/logging_resource_adaptor.hpp index f583e31b9..6b9d4e11f 100644 --- a/include/rmm/mr/device/logging_resource_adaptor.hpp +++ b/include/rmm/mr/device/logging_resource_adaptor.hpp @@ -222,9 +222,14 @@ class logging_resource_adaptor final : public device_memory_resource { */ void* do_allocate(std::size_t bytes, cuda_stream_view stream) override { - auto const ptr = upstream_->allocate(bytes, stream); - logger_->info("allocate,{},{},{}", ptr, bytes, fmt::ptr(stream.value())); - return ptr; + try { + auto const ptr = upstream_->allocate(bytes, stream); + logger_->info("allocate,{},{},{}", ptr, bytes, fmt::ptr(stream.value())); + return ptr; + } catch (...) { + logger_->info("allocate failure,,{},{}", bytes, fmt::ptr(stream.value())); + throw; + } } /** From 6b19b6469728cf4737a0a05e4e69b955c252e54c Mon Sep 17 00:00:00 2001 From: Rong Ou Date: Wed, 2 Mar 2022 12:37:53 -0800 Subject: [PATCH 2/3] add test --- benchmarks/replay/replay.cpp | 3 +- benchmarks/utilities/log_parser.hpp | 18 +++++++--- .../mr/device/logging_resource_adaptor.hpp | 2 +- tests/logger_tests.cpp | 35 +++++++++++++++++++ 4 files changed, 52 insertions(+), 6 deletions(-) diff --git a/benchmarks/replay/replay.cpp b/benchmarks/replay/replay.cpp index aa8c077da..edae79acf 100644 --- a/benchmarks/replay/replay.cpp +++ b/benchmarks/replay/replay.cpp @@ -217,10 +217,11 @@ struct replay_benchmark { cv.wait(lock, [&]() { return event_index == event.index; }); } + // rmm::detail::action::ALLOCATE_FAILURE is ignored. if (rmm::detail::action::ALLOCATE == event.act) { auto ptr = mr_->allocate(event.size); set_allocation(event.pointer, allocation{ptr, event.size}); - } else { + } else if (rmm::detail::action::FREE == event.act) { auto alloc = remove_allocation(event.pointer); mr_->deallocate(alloc.ptr, event.size); } diff --git a/benchmarks/utilities/log_parser.hpp b/benchmarks/utilities/log_parser.hpp index 583d1cfc0..d52672a4b 100644 --- a/benchmarks/utilities/log_parser.hpp +++ b/benchmarks/utilities/log_parser.hpp @@ -33,7 +33,7 @@ namespace rmm::detail { -enum class action : bool { ALLOCATE, FREE }; +enum class action { ALLOCATE, FREE, ALLOCATE_FAILURE }; /** * @brief Represents an allocation event @@ -85,8 +85,13 @@ struct event { inline std::ostream& operator<<(std::ostream& os, event const& evt) { - const auto* act_string = (evt.act == action::ALLOCATE) ? "allocate" : "free"; - + const auto* act_string{[&evt] { + switch (evt.act) { + case action::ALLOCATE: return "allocate"; + case action::FREE: return "free"; + default: return "allocate failure"; + } + }()}; const auto format_width{9}; os << "Thread: " << evt.thread_id << std::setw(format_width) << act_string @@ -166,7 +171,12 @@ inline std::vector parse_csv(std::string const& filename) auto const& action = actions[i]; RMM_EXPECTS((action == "allocate") or (action == "allocate failure") or (action == "free"), "Invalid action string."); - auto act = (action == "allocate") ? action::ALLOCATE : action::FREE; + auto act{action::ALLOCATE_FAILURE}; + if (action == "allocate") { + act = action::ALLOCATE; + } else if (action == "free") { + act = action::FREE; + } events[i] = event{tids[i], act, sizes[i], pointers[i], streams[i], i}; } return events; diff --git a/include/rmm/mr/device/logging_resource_adaptor.hpp b/include/rmm/mr/device/logging_resource_adaptor.hpp index 6b9d4e11f..d745e6ca3 100644 --- a/include/rmm/mr/device/logging_resource_adaptor.hpp +++ b/include/rmm/mr/device/logging_resource_adaptor.hpp @@ -227,7 +227,7 @@ class logging_resource_adaptor final : public device_memory_resource { logger_->info("allocate,{},{},{}", ptr, bytes, fmt::ptr(stream.value())); return ptr; } catch (...) { - logger_->info("allocate failure,,{},{}", bytes, fmt::ptr(stream.value())); + logger_->info("allocate failure,{},{},{}", nullptr, bytes, fmt::ptr(stream.value())); throw; } } diff --git a/tests/logger_tests.cpp b/tests/logger_tests.cpp index 1b8356ac7..f221a3a36 100644 --- a/tests/logger_tests.cpp +++ b/tests/logger_tests.cpp @@ -14,6 +14,7 @@ * limitations under the License. */ +#include "./byte_literals.hpp" #include #include @@ -24,6 +25,9 @@ #include +namespace rmm::test { +namespace { + class raii_restore_env { public: raii_restore_env(char const* name) : name_(name) @@ -212,6 +216,34 @@ TEST(Adaptor, EnvironmentPath) expect_log_events(filename, expected_events); } +TEST(Adaptor, AllocateFailure) +{ + std::string filename{"logs/failure.txt"}; + rmm::mr::cuda_memory_resource upstream; + + auto log_mr = rmm::mr::make_logging_adaptor(&upstream, filename); + + auto const size0{99}; + auto const size1{1_TiB}; + + auto* ptr0 = log_mr.allocate(size0); + log_mr.deallocate(ptr0, size0); + try { + log_mr.allocate(size1); + } catch (...) { + } + log_mr.flush(); + + using rmm::detail::action; + using rmm::detail::event; + + std::vector expected_events{{action::ALLOCATE, size0, ptr0}, + {action::FREE, size0, ptr0}, + {action::ALLOCATE_FAILURE, size1, nullptr}}; + + expect_log_events(filename, expected_events); +} + TEST(Adaptor, STDOUT) { testing::internal::CaptureStdout(); @@ -247,3 +279,6 @@ TEST(Adaptor, STDERR) std::string header = output.substr(0, output.find('\n')); ASSERT_EQ(header, log_mr.header()); } + +} // namespace +} // namespace rmm::test From 7a244ff7158f61a6ac8b21f9093c05f9b0f077b6 Mon Sep 17 00:00:00 2001 From: Rong Ou Date: Mon, 14 Mar 2022 09:14:43 -0700 Subject: [PATCH 3/3] update doc for allocation failure --- include/rmm/mr/device/logging_resource_adaptor.hpp | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/include/rmm/mr/device/logging_resource_adaptor.hpp b/include/rmm/mr/device/logging_resource_adaptor.hpp index d745e6ca3..60fd0d366 100644 --- a/include/rmm/mr/device/logging_resource_adaptor.hpp +++ b/include/rmm/mr/device/logging_resource_adaptor.hpp @@ -205,10 +205,16 @@ class logging_resource_adaptor final : public device_memory_resource { * @brief Allocates memory of size at least `bytes` using the upstream * resource and logs the allocation. * - * If the upstream allocation is successful logs the - * following CSV formatted line to the file specified at construction: + * If the upstream allocation is successful, logs the following CSV formatted + * line to the file specified at construction: * ``` - * thread_id,*TIMESTAMP*,"allocate",*bytes*,*stream* + * thread_id,*TIMESTAMP*,"allocate",*pointer*,*bytes*,*stream* + * ``` + * + * If the upstream allocation failed, logs the following CSV formatted line + * to the file specified at construction: + * ``` + * thread_id,*TIMESTAMP*,"allocate failure",0x0,*bytes*,*stream* * ``` * * The returned pointer has at least 256B alignment.