Use numpy.empty() instead of bytearray to allocate host memory fo…

…r spilling (#12399) A `bytearray` is zero-initialized using `calloc`, which we don't need. Additionally, `numpy.empty` both skips the zero-initialization and uses hugepages when available <numpy/numpy#14216>. Authors: - Mads R. B. Kristensen (https://github.com/madsbk) - Vyas Ramasubramani (https://github.com/vyasr) - https://github.com/jakirkham Approvers: - Vyas Ramasubramani (https://github.com/vyasr) - https://github.com/jakirkham URL: #12399
rapidsai · Jan 11, 2023 · 074c359 · 074c359
1 parent 2e86077
commit 074c359
Show file tree

Hide file tree

Showing 2 changed files with 31 additions and 6 deletions.
diff --git a/python/cudf/cudf/core/buffer/buffer.py b/python/cudf/cudf/core/buffer/buffer.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2020-2022, NVIDIA CORPORATION.
+# Copyright (c) 2020-2023, NVIDIA CORPORATION.
 
 from __future__ import annotations
 
@@ -18,6 +18,27 @@
 T = TypeVar("T", bound="Buffer")
 
 
+def host_memory_allocation(nbytes: int) -> memoryview:
+    """Allocate host memory using NumPy
+
+    This is an alternative to `bytearray` to avoid memory initialization cost.
+    A `bytearray` is zero-initialized using `calloc`, which we don't need.
+    Additionally, `numpy.empty` both skips the zero-initialization and uses
+    hugepages when available <https://github.com/numpy/numpy/pull/14216>.
+
+    Parameters
+    ----------
+    nbytes : int
+        Size of the new host allocation in bytes.
+
+    Return
+    ------
+    memoryview
+        The new host allocation.
+    """
+    return numpy.empty((nbytes,), dtype="u1").data
+
+
 def cuda_array_interface_wrapper(
     ptr: int,
     size: int,
@@ -204,7 +225,7 @@ def __cuda_array_interface__(self) -> Mapping:
 
     def memoryview(self) -> memoryview:
         """Read-only access to the buffer through host memory."""
-        host_buf = bytearray(self.size)
+        host_buf = host_memory_allocation(self.size)
         rmm._lib.device_buffer.copy_ptr_to_host(self.ptr, host_buf)
         return memoryview(host_buf).toreadonly()
 

diff --git a/python/cudf/cudf/core/buffer/spillable_buffer.py b/python/cudf/cudf/core/buffer/spillable_buffer.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2022, NVIDIA CORPORATION.
+# Copyright (c) 2022-2023, NVIDIA CORPORATION.
 
 from __future__ import annotations
 
@@ -13,7 +13,11 @@
 
 import rmm
 
-from cudf.core.buffer.buffer import Buffer, cuda_array_interface_wrapper
+from cudf.core.buffer.buffer import (
+    Buffer,
+    cuda_array_interface_wrapper,
+    host_memory_allocation,
+)
 from cudf.utils.string import format_bytes
 
 if TYPE_CHECKING:
@@ -205,7 +209,7 @@ def spill(self, target: str = "cpu") -> None:
                 )
 
             if (ptr_type, target) == ("gpu", "cpu"):
-                host_mem = memoryview(bytearray(self.size))
+                host_mem = host_memory_allocation(self.size)
                 rmm._lib.device_buffer.copy_ptr_to_host(self._ptr, host_mem)
                 self._ptr_desc["memoryview"] = host_mem
                 self._ptr = 0
@@ -339,7 +343,7 @@ def memoryview(self, *, offset: int = 0, size: int = None) -> memoryview:
                 return self._ptr_desc["memoryview"][offset : offset + size]
             else:
                 assert self._ptr_desc["type"] == "gpu"
-                ret = memoryview(bytearray(size))
+                ret = host_memory_allocation(size)
                 rmm._lib.device_buffer.copy_ptr_to_host(
                     self._ptr + offset, ret
                 )