From 650e9d57d3363aa5728e9bf0c0d915dc833347ac Mon Sep 17 00:00:00 2001 From: Peter Andreas Entschev Date: Thu, 10 Dec 2020 14:22:14 -0800 Subject: [PATCH 01/18] Fix casting of CuPy stream to cudaStream_t --- python/rmm/_cuda/stream.pyx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/rmm/_cuda/stream.pyx b/python/rmm/_cuda/stream.pyx index 0f6c5ab19..ba5e935c8 100644 --- a/python/rmm/_cuda/stream.pyx +++ b/python/rmm/_cuda/stream.pyx @@ -101,7 +101,7 @@ cdef class Stream: try: import cupy if isinstance(obj, cupy.cuda.stream.Stream): - self._cuda_stream = (obj.ptr) + self._cuda_stream = (obj.ptr) self._owner = obj return except ImportError: From 23f3ca64ea95ff4d4f441d60ca9fb0cf645c4800 Mon Sep 17 00:00:00 2001 From: Peter Andreas Entschev Date: Thu, 10 Dec 2020 14:24:37 -0800 Subject: [PATCH 02/18] Support CuPy stream in rmm_cupy_allocator --- python/rmm/rmm.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/python/rmm/rmm.py b/python/rmm/rmm.py index ff53f0edb..3f79debf4 100644 --- a/python/rmm/rmm.py +++ b/python/rmm/rmm.py @@ -18,6 +18,7 @@ import rmm from rmm import _lib as librmm +from rmm._cuda.stream import Stream # Utility Functions @@ -193,7 +194,8 @@ def rmm_cupy_allocator(nbytes): if cupy is None: raise ModuleNotFoundError("No module named 'cupy'") - buf = librmm.device_buffer.DeviceBuffer(size=nbytes) + stream = Stream(obj=cupy.cuda.get_current_stream()) + buf = librmm.device_buffer.DeviceBuffer(size=nbytes, stream=stream) dev_id = -1 if buf.ptr else cupy.cuda.device.get_device_id() mem = cupy.cuda.UnownedMemory( ptr=buf.ptr, size=buf.size, owner=buf, device_id=dev_id From 27cdb4c1518cc464c86a4beee654c4e49d424242 Mon Sep 17 00:00:00 2001 From: Peter Andreas Entschev Date: Thu, 10 Dec 2020 14:35:50 -0800 Subject: [PATCH 03/18] Add test for CuPy async stream --- python/rmm/tests/test_rmm.py | 35 +++++++++++++++++++++-------------- 1 file changed, 21 insertions(+), 14 deletions(-) diff --git a/python/rmm/tests/test_rmm.py b/python/rmm/tests/test_rmm.py index 20755908b..28070cf28 100644 --- a/python/rmm/tests/test_rmm.py +++ b/python/rmm/tests/test_rmm.py @@ -269,22 +269,29 @@ def test_rmm_device_buffer_pickle_roundtrip(hb): assert hb3 == hb -def test_rmm_cupy_allocator(): +@pytest.mark.parametrize("stream", ["null", "async"]) +def test_rmm_cupy_allocator(stream): cupy = pytest.importorskip("cupy") - m = rmm.rmm_cupy_allocator(42) - assert m.mem.size == 42 - assert m.mem.ptr != 0 - assert isinstance(m.mem._owner, rmm.DeviceBuffer) - - m = rmm.rmm_cupy_allocator(0) - assert m.mem.size == 0 - assert m.mem.ptr == 0 - assert isinstance(m.mem._owner, rmm.DeviceBuffer) - - cupy.cuda.set_allocator(rmm.rmm_cupy_allocator) - a = cupy.arange(10) - assert isinstance(a.data.mem._owner, rmm.DeviceBuffer) + if stream == "null": + stream = cupy.cuda.stream.Stream.null + else: + stream = cupy.cuda.stream.Stream() + + with stream: + m = rmm.rmm_cupy_allocator(42) + assert m.mem.size == 42 + assert m.mem.ptr != 0 + assert isinstance(m.mem._owner, rmm.DeviceBuffer) + + m = rmm.rmm_cupy_allocator(0) + assert m.mem.size == 0 + assert m.mem.ptr == 0 + assert isinstance(m.mem._owner, rmm.DeviceBuffer) + + cupy.cuda.set_allocator(rmm.rmm_cupy_allocator) + a = cupy.arange(10) + assert isinstance(a.data.mem._owner, rmm.DeviceBuffer) @pytest.mark.parametrize("dtype", _dtypes) From 2d26f85d40f8818372b5c300068b0f1fe4332057 Mon Sep 17 00:00:00 2001 From: Peter Andreas Entschev Date: Thu, 10 Dec 2020 15:06:17 -0800 Subject: [PATCH 04/18] Add RMM pool with CuPy streams tests --- python/rmm/tests/test_rmm.py | 30 ++++++++++++++++++++++++++++-- 1 file changed, 28 insertions(+), 2 deletions(-) diff --git a/python/rmm/tests/test_rmm.py b/python/rmm/tests/test_rmm.py index 28070cf28..7c12e45c9 100644 --- a/python/rmm/tests/test_rmm.py +++ b/python/rmm/tests/test_rmm.py @@ -269,10 +269,31 @@ def test_rmm_device_buffer_pickle_roundtrip(hb): assert hb3 == hb +def test_rmm_cupy_allocator(): + cupy = pytest.importorskip("cupy") + + m = rmm.rmm_cupy_allocator(42) + assert m.mem.size == 42 + assert m.mem.ptr != 0 + assert isinstance(m.mem._owner, rmm.DeviceBuffer) + + m = rmm.rmm_cupy_allocator(0) + assert m.mem.size == 0 + assert m.mem.ptr == 0 + assert isinstance(m.mem._owner, rmm.DeviceBuffer) + + cupy.cuda.set_allocator(rmm.rmm_cupy_allocator) + a = cupy.arange(10) + assert isinstance(a.data.mem._owner, rmm.DeviceBuffer) + + @pytest.mark.parametrize("stream", ["null", "async"]) -def test_rmm_cupy_allocator(stream): +def test_rmm_pool_cupy_allocator_with_stream(stream): cupy = pytest.importorskip("cupy") + rmm.reinitialize(pool_allocator=True) + cupy.cuda.set_allocator(rmm.rmm_cupy_allocator) + if stream == "null": stream = cupy.cuda.stream.Stream.null else: @@ -289,10 +310,15 @@ def test_rmm_cupy_allocator(stream): assert m.mem.ptr == 0 assert isinstance(m.mem._owner, rmm.DeviceBuffer) - cupy.cuda.set_allocator(rmm.rmm_cupy_allocator) a = cupy.arange(10) assert isinstance(a.data.mem._owner, rmm.DeviceBuffer) + # Deleting all allocations known by the RMM pool is required + # before rmm.reinitialize(), otherwise it may segfault. + del a + + rmm.reinitialize() + @pytest.mark.parametrize("dtype", _dtypes) @pytest.mark.parametrize("nelem", _nelems) From 4c7d25b71afc632d918fc8da32b12a3b79505eae Mon Sep 17 00:00:00 2001 From: Peter Andreas Entschev Date: Thu, 10 Dec 2020 15:44:36 -0800 Subject: [PATCH 05/18] Fix casting of Numba stream to cudaStream_t --- python/rmm/_cuda/stream.pyx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/rmm/_cuda/stream.pyx b/python/rmm/_cuda/stream.pyx index ba5e935c8..9b99faa96 100644 --- a/python/rmm/_cuda/stream.pyx +++ b/python/rmm/_cuda/stream.pyx @@ -94,7 +94,7 @@ cdef class Stream: return self.c_is_default() def _init_from_numba_stream(self, obj): - self._cuda_stream = (obj.handle.value) + self._cuda_stream = (obj.handle.value) self._owner = obj def _init_from_cupy_stream(self, obj): From c7429350c51913dfdb6fd286de534b570d978ae0 Mon Sep 17 00:00:00 2001 From: Peter Andreas Entschev Date: Thu, 10 Dec 2020 16:03:20 -0800 Subject: [PATCH 06/18] Handle Numba's default stream --- python/rmm/_cuda/stream.pyx | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/python/rmm/_cuda/stream.pyx b/python/rmm/_cuda/stream.pyx index 9b99faa96..6a1f04be5 100644 --- a/python/rmm/_cuda/stream.pyx +++ b/python/rmm/_cuda/stream.pyx @@ -94,7 +94,10 @@ cdef class Stream: return self.c_is_default() def _init_from_numba_stream(self, obj): - self._cuda_stream = (obj.handle.value) + if obj.handle.value == None: + self._cuda_stream = cuda_stream_default.value() + else: + self._cuda_stream = (obj.handle.value) self._owner = obj def _init_from_cupy_stream(self, obj): From 93f95f08d8fd7a78c872f3c54b573807527a0dd7 Mon Sep 17 00:00:00 2001 From: Peter Andreas Entschev Date: Thu, 10 Dec 2020 16:04:10 -0800 Subject: [PATCH 07/18] Add RMM pool with Numba streams tests --- python/rmm/tests/test_rmm.py | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/python/rmm/tests/test_rmm.py b/python/rmm/tests/test_rmm.py index 7c12e45c9..69f53e84d 100644 --- a/python/rmm/tests/test_rmm.py +++ b/python/rmm/tests/test_rmm.py @@ -269,6 +269,21 @@ def test_rmm_device_buffer_pickle_roundtrip(hb): assert hb3 == hb +@pytest.mark.parametrize("stream", [cuda.default_stream(), cuda.stream()]) +#@pytest.mark.parametrize("stream", [cuda.stream()]) +def test_rmm_pool_numba_stream(stream): + rmm.reinitialize(pool_allocator=True) + + stream = rmm._cuda.stream.Stream(stream) + a = rmm._lib.device_buffer.DeviceBuffer(size=3, stream=stream) + + # Deleting all allocations known by the RMM pool is required + # before rmm.reinitialize(), otherwise it may segfault. + del a + + rmm.reinitialize() + + def test_rmm_cupy_allocator(): cupy = pytest.importorskip("cupy") From 209939b37bdb43702e0fb6438eb7e4726c062987 Mon Sep 17 00:00:00 2001 From: Peter Andreas Entschev Date: Fri, 11 Dec 2020 02:20:54 -0800 Subject: [PATCH 08/18] Fix comparison to None --- python/rmm/_cuda/stream.pyx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/rmm/_cuda/stream.pyx b/python/rmm/_cuda/stream.pyx index 6a1f04be5..f9d924e71 100644 --- a/python/rmm/_cuda/stream.pyx +++ b/python/rmm/_cuda/stream.pyx @@ -94,7 +94,7 @@ cdef class Stream: return self.c_is_default() def _init_from_numba_stream(self, obj): - if obj.handle.value == None: + if obj.handle.value is None: self._cuda_stream = cuda_stream_default.value() else: self._cuda_stream = (obj.handle.value) From fadf308ace021b42defa5f145b6aabfa0ee85362 Mon Sep 17 00:00:00 2001 From: Peter Andreas Entschev Date: Fri, 11 Dec 2020 02:21:10 -0800 Subject: [PATCH 09/18] Remove unnecessary commented out code --- python/rmm/tests/test_rmm.py | 1 - 1 file changed, 1 deletion(-) diff --git a/python/rmm/tests/test_rmm.py b/python/rmm/tests/test_rmm.py index 69f53e84d..5984a7311 100644 --- a/python/rmm/tests/test_rmm.py +++ b/python/rmm/tests/test_rmm.py @@ -270,7 +270,6 @@ def test_rmm_device_buffer_pickle_roundtrip(hb): @pytest.mark.parametrize("stream", [cuda.default_stream(), cuda.stream()]) -#@pytest.mark.parametrize("stream", [cuda.stream()]) def test_rmm_pool_numba_stream(stream): rmm.reinitialize(pool_allocator=True) From 079d24d729f26aaa3445dce92f8b12fcc9aaa288 Mon Sep 17 00:00:00 2001 From: Peter Andreas Entschev Date: Fri, 11 Dec 2020 13:52:22 -0800 Subject: [PATCH 10/18] Simplify handling of Numba streams --- python/rmm/_cuda/stream.pyx | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/python/rmm/_cuda/stream.pyx b/python/rmm/_cuda/stream.pyx index f9d924e71..4c9890d51 100644 --- a/python/rmm/_cuda/stream.pyx +++ b/python/rmm/_cuda/stream.pyx @@ -94,10 +94,7 @@ cdef class Stream: return self.c_is_default() def _init_from_numba_stream(self, obj): - if obj.handle.value is None: - self._cuda_stream = cuda_stream_default.value() - else: - self._cuda_stream = (obj.handle.value) + self._cuda_stream = (int(obj)) self._owner = obj def _init_from_cupy_stream(self, obj): From ea290a2fe0a2b2205f7c81aa09c244d554ec667a Mon Sep 17 00:00:00 2001 From: Peter Andreas Entschev Date: Fri, 11 Dec 2020 14:22:58 -0800 Subject: [PATCH 11/18] Store reference to stream in DeviceBuffer --- python/rmm/_lib/device_buffer.pxd | 1 + python/rmm/_lib/device_buffer.pyx | 7 ++++++- 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/python/rmm/_lib/device_buffer.pxd b/python/rmm/_lib/device_buffer.pxd index 98a79d1da..cad62b42b 100644 --- a/python/rmm/_lib/device_buffer.pxd +++ b/python/rmm/_lib/device_buffer.pxd @@ -37,6 +37,7 @@ cdef extern from "rmm/device_buffer.hpp" namespace "rmm" nogil: cdef class DeviceBuffer: cdef unique_ptr[device_buffer] c_obj + cdef Stream _stream @staticmethod cdef DeviceBuffer c_from_unique_ptr(unique_ptr[device_buffer] ptr) diff --git a/python/rmm/_lib/device_buffer.pyx b/python/rmm/_lib/device_buffer.pyx index b29bff151..5350ab884 100644 --- a/python/rmm/_lib/device_buffer.pyx +++ b/python/rmm/_lib/device_buffer.pyx @@ -51,7 +51,10 @@ cdef class DeviceBuffer: (and possibly size of data to copy) stream : optional CUDA stream to use for construction and/or copying, - default the default stream + default the default stream. A reference to the stream is + stored internally to make sure it doesn't go out of scope + while it's used, destroy the underlying stream will result + in undefined behavior. Note ---- @@ -68,6 +71,8 @@ cdef class DeviceBuffer: cdef const void* c_ptr cdef cudaError_t err + self._stream = stream + with nogil: c_ptr = ptr From 9884459f8674a7e37540ec4c4b3821dbfc909454 Mon Sep 17 00:00:00 2001 From: Peter Andreas Entschev Date: Fri, 11 Dec 2020 14:24:01 -0800 Subject: [PATCH 12/18] Fix test for DeviceBuffer.__sizeof__ --- python/rmm/tests/test_rmm.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/python/rmm/tests/test_rmm.py b/python/rmm/tests/test_rmm.py index 5984a7311..e44c291a7 100644 --- a/python/rmm/tests/test_rmm.py +++ b/python/rmm/tests/test_rmm.py @@ -109,7 +109,9 @@ def test_rmm_device_buffer(size): assert len(b) == b.size assert b.nbytes == b.size assert b.capacity() >= b.size - assert sys.getsizeof(b) == b.size + + # b.size + DeviceBuffer attributes overhead + assert sys.getsizeof(b) > b.size # Test `__cuda_array_interface__` keyset = {"data", "shape", "strides", "typestr", "version"} From 2cec83fc339db5ec6af1c96e632976a885262049 Mon Sep 17 00:00:00 2001 From: Peter Andreas Entschev Date: Fri, 11 Dec 2020 15:15:31 -0800 Subject: [PATCH 13/18] Add CuPy stream lifetime test --- python/rmm/tests/test_rmm.py | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/python/rmm/tests/test_rmm.py b/python/rmm/tests/test_rmm.py index e44c291a7..c8ed8b1f5 100644 --- a/python/rmm/tests/test_rmm.py +++ b/python/rmm/tests/test_rmm.py @@ -336,6 +336,21 @@ def test_rmm_pool_cupy_allocator_with_stream(stream): rmm.reinitialize() +def test_rmm_pool_cupy_allocator_stream_lifetime(): + cupy = pytest.importorskip("cupy") + + rmm.reinitialize(pool_allocator=True) + cupy.cuda.set_allocator(rmm.rmm_cupy_allocator) + + stream = cupy.cuda.stream.Stream() + + stream.use() + x = cupy.arange(10) + del stream + + del x + + @pytest.mark.parametrize("dtype", _dtypes) @pytest.mark.parametrize("nelem", _nelems) @pytest.mark.parametrize("alloc", _allocs) From e096237f9c6b27ed100b587f9f53d83cbe552611 Mon Sep 17 00:00:00 2001 From: Peter Andreas Entschev Date: Wed, 16 Dec 2020 21:58:40 +0100 Subject: [PATCH 14/18] Update `sizeof` assert in `test_rmm_device_buffer` Co-authored-by: Keith Kraus --- python/rmm/tests/test_rmm.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/python/rmm/tests/test_rmm.py b/python/rmm/tests/test_rmm.py index c8ed8b1f5..8d7b8f3fb 100644 --- a/python/rmm/tests/test_rmm.py +++ b/python/rmm/tests/test_rmm.py @@ -109,9 +109,7 @@ def test_rmm_device_buffer(size): assert len(b) == b.size assert b.nbytes == b.size assert b.capacity() >= b.size - - # b.size + DeviceBuffer attributes overhead - assert sys.getsizeof(b) > b.size + assert b.__sizeof__() == b.size # Test `__cuda_array_interface__` keyset = {"data", "shape", "strides", "typestr", "version"} From fd7e004bec4b0634edcd0255ab72bd9a3b2af030 Mon Sep 17 00:00:00 2001 From: Peter Andreas Entschev Date: Wed, 16 Dec 2020 21:59:15 +0100 Subject: [PATCH 15/18] Improve docs for `DeviceBuffer`'s `stream` kwarg Co-authored-by: Mark Harris --- python/rmm/_lib/device_buffer.pyx | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/python/rmm/_lib/device_buffer.pyx b/python/rmm/_lib/device_buffer.pyx index 5350ab884..56324c1cc 100644 --- a/python/rmm/_lib/device_buffer.pyx +++ b/python/rmm/_lib/device_buffer.pyx @@ -51,10 +51,11 @@ cdef class DeviceBuffer: (and possibly size of data to copy) stream : optional CUDA stream to use for construction and/or copying, - default the default stream. A reference to the stream is - stored internally to make sure it doesn't go out of scope - while it's used, destroy the underlying stream will result - in undefined behavior. + defaults to the CUDA default stream. A reference to the + stream is stored internally to ensure it doesn't go out of + scope while the DeviceBuffer is in use. Destroying the + underlying stream while the DeviceBuffer is in use will + result in undefined behavior. Note ---- From f83e9cfca8053c1e16dfcb004a982758f3e97bf7 Mon Sep 17 00:00:00 2001 From: Peter Andreas Entschev Date: Wed, 16 Dec 2020 13:02:07 -0800 Subject: [PATCH 16/18] Fix flake8 errors --- python/rmm/_lib/device_buffer.pyx | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/python/rmm/_lib/device_buffer.pyx b/python/rmm/_lib/device_buffer.pyx index 56324c1cc..d3a95900e 100644 --- a/python/rmm/_lib/device_buffer.pyx +++ b/python/rmm/_lib/device_buffer.pyx @@ -51,10 +51,10 @@ cdef class DeviceBuffer: (and possibly size of data to copy) stream : optional CUDA stream to use for construction and/or copying, - defaults to the CUDA default stream. A reference to the - stream is stored internally to ensure it doesn't go out of - scope while the DeviceBuffer is in use. Destroying the - underlying stream while the DeviceBuffer is in use will + defaults to the CUDA default stream. A reference to the + stream is stored internally to ensure it doesn't go out of + scope while the DeviceBuffer is in use. Destroying the + underlying stream while the DeviceBuffer is in use will result in undefined behavior. Note From 9c23566fb4ca1aff766835e99f17c5ec9d99f04f Mon Sep 17 00:00:00 2001 From: Keith Kraus Date: Mon, 1 Mar 2021 16:39:21 -0500 Subject: [PATCH 17/18] Fix merge --- python/rmm/_lib/device_buffer.pxd | 1 - python/rmm/_lib/device_buffer.pyx | 2 -- 2 files changed, 3 deletions(-) diff --git a/python/rmm/_lib/device_buffer.pxd b/python/rmm/_lib/device_buffer.pxd index 807f8ee42..3b6b4face 100644 --- a/python/rmm/_lib/device_buffer.pxd +++ b/python/rmm/_lib/device_buffer.pxd @@ -38,7 +38,6 @@ cdef extern from "rmm/device_buffer.hpp" namespace "rmm" nogil: cdef class DeviceBuffer: cdef unique_ptr[device_buffer] c_obj - cdef Stream _stream # Holds a reference to the DeviceMemoryResource used for allocation. # Ensures the MR does not get destroyed before this DeviceBuffer. `mr` is diff --git a/python/rmm/_lib/device_buffer.pyx b/python/rmm/_lib/device_buffer.pyx index 9c45b6a8b..3401b4802 100644 --- a/python/rmm/_lib/device_buffer.pyx +++ b/python/rmm/_lib/device_buffer.pyx @@ -75,8 +75,6 @@ cdef class DeviceBuffer: cdef const void* c_ptr cdef cudaError_t err - self._stream = stream - with nogil: c_ptr = ptr From bc644c706d14a50627275971839807a84d65c033 Mon Sep 17 00:00:00 2001 From: Keith Kraus Date: Mon, 1 Mar 2021 16:51:38 -0500 Subject: [PATCH 18/18] update tests based on autouse fixture --- python/rmm/tests/test_rmm.py | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/python/rmm/tests/test_rmm.py b/python/rmm/tests/test_rmm.py index aebf35a34..d0b1fbe9d 100644 --- a/python/rmm/tests/test_rmm.py +++ b/python/rmm/tests/test_rmm.py @@ -287,11 +287,8 @@ def test_rmm_pool_numba_stream(stream): stream = rmm._cuda.stream.Stream(stream) a = rmm._lib.device_buffer.DeviceBuffer(size=3, stream=stream) - # Deleting all allocations known by the RMM pool is required - # before rmm.reinitialize(), otherwise it may segfault. - del a - - rmm.reinitialize() + assert a.size == 3 + assert a.ptr != 0 def test_rmm_cupy_allocator():