Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Log allocation failures #988

Merged
merged 4 commits into from
Mar 15, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion benchmarks/replay/replay.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -217,10 +217,11 @@ struct replay_benchmark {
cv.wait(lock, [&]() { return event_index == event.index; });
}

// rmm::detail::action::ALLOCATE_FAILURE is ignored.
if (rmm::detail::action::ALLOCATE == event.act) {
auto ptr = mr_->allocate(event.size);
set_allocation(event.pointer, allocation{ptr, event.size});
} else {
} else if (rmm::detail::action::FREE == event.act) {
auto alloc = remove_allocation(event.pointer);
mr_->deallocate(alloc.ptr, event.size);
}
Expand Down
21 changes: 16 additions & 5 deletions benchmarks/utilities/log_parser.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@

namespace rmm::detail {

enum class action : bool { ALLOCATE, FREE };
enum class action { ALLOCATE, FREE, ALLOCATE_FAILURE };

/**
* @brief Represents an allocation event
Expand Down Expand Up @@ -85,8 +85,13 @@ struct event {

inline std::ostream& operator<<(std::ostream& os, event const& evt)
{
const auto* act_string = (evt.act == action::ALLOCATE) ? "allocate" : "free";

const auto* act_string{[&evt] {
switch (evt.act) {
case action::ALLOCATE: return "allocate";
case action::FREE: return "free";
default: return "allocate failure";
}
}()};
const auto format_width{9};

os << "Thread: " << evt.thread_id << std::setw(format_width) << act_string
Expand Down Expand Up @@ -164,8 +169,14 @@ inline std::vector<event> parse_csv(std::string const& filename)

for (std::size_t i = 0; i < actions.size(); ++i) {
auto const& action = actions[i];
RMM_EXPECTS((action == "allocate") or (action == "free"), "Invalid action string.");
auto act = (action == "allocate") ? action::ALLOCATE : action::FREE;
RMM_EXPECTS((action == "allocate") or (action == "allocate failure") or (action == "free"),
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This will break the parsing as it assumes an event can only be an action::ALLOCATE or action::FREE. But adding an allocate failure effectively adds a third kind of event.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Fixed.

Copy link
Contributor

@jrhemstad jrhemstad Mar 10, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Adding a third event isn't quite what I meant. This parser is used for the replay benchmark as well and this will still break the replay benchmark.

Nevermind, I see the replay was already updated.

"Invalid action string.");
auto act{action::ALLOCATE_FAILURE};
if (action == "allocate") {
act = action::ALLOCATE;
} else if (action == "free") {
act = action::FREE;
}
events[i] = event{tids[i], act, sizes[i], pointers[i], streams[i], i};
}
return events;
Expand Down
23 changes: 17 additions & 6 deletions include/rmm/mr/device/logging_resource_adaptor.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -205,10 +205,16 @@ class logging_resource_adaptor final : public device_memory_resource {
* @brief Allocates memory of size at least `bytes` using the upstream
* resource and logs the allocation.
*
* If the upstream allocation is successful logs the
* following CSV formatted line to the file specified at construction:
* If the upstream allocation is successful, logs the following CSV formatted
* line to the file specified at construction:
* ```
* thread_id,*TIMESTAMP*,"allocate",*bytes*,*stream*
* thread_id,*TIMESTAMP*,"allocate",*pointer*,*bytes*,*stream*
* ```
*
* If the upstream allocation failed, logs the following CSV formatted line
* to the file specified at construction:
* ```
* thread_id,*TIMESTAMP*,"allocate failure",0x0,*bytes*,*stream*
* ```
*
* The returned pointer has at least 256B alignment.
Expand All @@ -222,9 +228,14 @@ class logging_resource_adaptor final : public device_memory_resource {
*/
void* do_allocate(std::size_t bytes, cuda_stream_view stream) override
{
auto const ptr = upstream_->allocate(bytes, stream);
logger_->info("allocate,{},{},{}", ptr, bytes, fmt::ptr(stream.value()));
return ptr;
try {
auto const ptr = upstream_->allocate(bytes, stream);
logger_->info("allocate,{},{},{}", ptr, bytes, fmt::ptr(stream.value()));
return ptr;
} catch (...) {
logger_->info("allocate failure,{},{},{}", nullptr, bytes, fmt::ptr(stream.value()));
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Documentation needs to be updated that it will log a different message for failed allocations.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done.

throw;
}
}

/**
Expand Down
35 changes: 35 additions & 0 deletions tests/logger_tests.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
* limitations under the License.
*/

#include "./byte_literals.hpp"
#include <benchmarks/utilities/log_parser.hpp>

#include <rmm/mr/device/cuda_memory_resource.hpp>
Expand All @@ -24,6 +25,9 @@

#include <thread>

namespace rmm::test {
namespace {

class raii_restore_env {
public:
raii_restore_env(char const* name) : name_(name)
Expand Down Expand Up @@ -212,6 +216,34 @@ TEST(Adaptor, EnvironmentPath)
expect_log_events(filename, expected_events);
}

TEST(Adaptor, AllocateFailure)
{
std::string filename{"logs/failure.txt"};
rmm::mr::cuda_memory_resource upstream;

auto log_mr = rmm::mr::make_logging_adaptor(&upstream, filename);

auto const size0{99};
auto const size1{1_TiB};

auto* ptr0 = log_mr.allocate(size0);
log_mr.deallocate(ptr0, size0);
try {
log_mr.allocate(size1);
} catch (...) {
}
log_mr.flush();

using rmm::detail::action;
using rmm::detail::event;

std::vector<event> expected_events{{action::ALLOCATE, size0, ptr0},
{action::FREE, size0, ptr0},
{action::ALLOCATE_FAILURE, size1, nullptr}};

expect_log_events(filename, expected_events);
}

TEST(Adaptor, STDOUT)
{
testing::internal::CaptureStdout();
Expand Down Expand Up @@ -247,3 +279,6 @@ TEST(Adaptor, STDERR)
std::string header = output.substr(0, output.find('\n'));
ASSERT_EQ(header, log_mr.header());
}

} // namespace
} // namespace rmm::test