diff --git a/python/cudf/cudf/core/buffer/buffer.py b/python/cudf/cudf/core/buffer/buffer.py index 73e589ebb8e..ebc4d76b6a0 100644 --- a/python/cudf/cudf/core/buffer/buffer.py +++ b/python/cudf/cudf/core/buffer/buffer.py @@ -1,4 +1,4 @@ -# Copyright (c) 2020-2022, NVIDIA CORPORATION. +# Copyright (c) 2020-2023, NVIDIA CORPORATION. from __future__ import annotations @@ -18,6 +18,27 @@ T = TypeVar("T", bound="Buffer") +def host_memory_allocation(nbytes: int) -> memoryview: + """Allocate host memory using NumPy + + This is an alternative to `bytearray` to avoid memory initialization cost. + A `bytearray` is zero-initialized using `calloc`, which we don't need. + Additionally, `numpy.empty` both skips the zero-initialization and uses + hugepages when available . + + Parameters + ---------- + nbytes : int + Size of the new host allocation in bytes. + + Return + ------ + memoryview + The new host allocation. + """ + return numpy.empty((nbytes,), dtype="u1").data + + def cuda_array_interface_wrapper( ptr: int, size: int, @@ -204,7 +225,7 @@ def __cuda_array_interface__(self) -> Mapping: def memoryview(self) -> memoryview: """Read-only access to the buffer through host memory.""" - host_buf = bytearray(self.size) + host_buf = host_memory_allocation(self.size) rmm._lib.device_buffer.copy_ptr_to_host(self.ptr, host_buf) return memoryview(host_buf).toreadonly() diff --git a/python/cudf/cudf/core/buffer/spillable_buffer.py b/python/cudf/cudf/core/buffer/spillable_buffer.py index c21f980e599..6b99b875572 100644 --- a/python/cudf/cudf/core/buffer/spillable_buffer.py +++ b/python/cudf/cudf/core/buffer/spillable_buffer.py @@ -1,4 +1,4 @@ -# Copyright (c) 2022, NVIDIA CORPORATION. +# Copyright (c) 2022-2023, NVIDIA CORPORATION. from __future__ import annotations @@ -13,7 +13,11 @@ import rmm -from cudf.core.buffer.buffer import Buffer, cuda_array_interface_wrapper +from cudf.core.buffer.buffer import ( + Buffer, + cuda_array_interface_wrapper, + host_memory_allocation, +) from cudf.utils.string import format_bytes if TYPE_CHECKING: @@ -205,7 +209,7 @@ def spill(self, target: str = "cpu") -> None: ) if (ptr_type, target) == ("gpu", "cpu"): - host_mem = memoryview(bytearray(self.size)) + host_mem = host_memory_allocation(self.size) rmm._lib.device_buffer.copy_ptr_to_host(self._ptr, host_mem) self._ptr_desc["memoryview"] = host_mem self._ptr = 0 @@ -339,7 +343,7 @@ def memoryview(self, *, offset: int = 0, size: int = None) -> memoryview: return self._ptr_desc["memoryview"][offset : offset + size] else: assert self._ptr_desc["type"] == "gpu" - ret = memoryview(bytearray(size)) + ret = host_memory_allocation(size) rmm._lib.device_buffer.copy_ptr_to_host( self._ptr + offset, ret )