diff --git a/python/rmm/_cuda/stream.pyx b/python/rmm/_cuda/stream.pyx index 0f6c5ab19..4c9890d51 100644 --- a/python/rmm/_cuda/stream.pyx +++ b/python/rmm/_cuda/stream.pyx @@ -94,14 +94,14 @@ cdef class Stream: return self.c_is_default() def _init_from_numba_stream(self, obj): - self._cuda_stream = (obj.handle.value) + self._cuda_stream = (int(obj)) self._owner = obj def _init_from_cupy_stream(self, obj): try: import cupy if isinstance(obj, cupy.cuda.stream.Stream): - self._cuda_stream = (obj.ptr) + self._cuda_stream = (obj.ptr) self._owner = obj return except ImportError: diff --git a/python/rmm/_lib/device_buffer.pyx b/python/rmm/_lib/device_buffer.pyx index 80da32f1b..3401b4802 100644 --- a/python/rmm/_lib/device_buffer.pyx +++ b/python/rmm/_lib/device_buffer.pyx @@ -54,7 +54,11 @@ cdef class DeviceBuffer: (and possibly size of data to copy) stream : optional CUDA stream to use for construction and/or copying, - default the default stream + defaults to the CUDA default stream. A reference to the + stream is stored internally to ensure it doesn't go out of + scope while the DeviceBuffer is in use. Destroying the + underlying stream while the DeviceBuffer is in use will + result in undefined behavior. Note ---- diff --git a/python/rmm/rmm.py b/python/rmm/rmm.py index ff53f0edb..3f79debf4 100644 --- a/python/rmm/rmm.py +++ b/python/rmm/rmm.py @@ -18,6 +18,7 @@ import rmm from rmm import _lib as librmm +from rmm._cuda.stream import Stream # Utility Functions @@ -193,7 +194,8 @@ def rmm_cupy_allocator(nbytes): if cupy is None: raise ModuleNotFoundError("No module named 'cupy'") - buf = librmm.device_buffer.DeviceBuffer(size=nbytes) + stream = Stream(obj=cupy.cuda.get_current_stream()) + buf = librmm.device_buffer.DeviceBuffer(size=nbytes, stream=stream) dev_id = -1 if buf.ptr else cupy.cuda.device.get_device_id() mem = cupy.cuda.UnownedMemory( ptr=buf.ptr, size=buf.size, owner=buf, device_id=dev_id diff --git a/python/rmm/tests/test_rmm.py b/python/rmm/tests/test_rmm.py index 5406bc265..d0b1fbe9d 100644 --- a/python/rmm/tests/test_rmm.py +++ b/python/rmm/tests/test_rmm.py @@ -280,6 +280,17 @@ def test_rmm_device_buffer_pickle_roundtrip(hb): assert hb3 == hb +@pytest.mark.parametrize("stream", [cuda.default_stream(), cuda.stream()]) +def test_rmm_pool_numba_stream(stream): + rmm.reinitialize(pool_allocator=True) + + stream = rmm._cuda.stream.Stream(stream) + a = rmm._lib.device_buffer.DeviceBuffer(size=3, stream=stream) + + assert a.size == 3 + assert a.ptr != 0 + + def test_rmm_cupy_allocator(): cupy = pytest.importorskip("cupy") @@ -298,6 +309,54 @@ def test_rmm_cupy_allocator(): assert isinstance(a.data.mem._owner, rmm.DeviceBuffer) +@pytest.mark.parametrize("stream", ["null", "async"]) +def test_rmm_pool_cupy_allocator_with_stream(stream): + cupy = pytest.importorskip("cupy") + + rmm.reinitialize(pool_allocator=True) + cupy.cuda.set_allocator(rmm.rmm_cupy_allocator) + + if stream == "null": + stream = cupy.cuda.stream.Stream.null + else: + stream = cupy.cuda.stream.Stream() + + with stream: + m = rmm.rmm_cupy_allocator(42) + assert m.mem.size == 42 + assert m.mem.ptr != 0 + assert isinstance(m.mem._owner, rmm.DeviceBuffer) + + m = rmm.rmm_cupy_allocator(0) + assert m.mem.size == 0 + assert m.mem.ptr == 0 + assert isinstance(m.mem._owner, rmm.DeviceBuffer) + + a = cupy.arange(10) + assert isinstance(a.data.mem._owner, rmm.DeviceBuffer) + + # Deleting all allocations known by the RMM pool is required + # before rmm.reinitialize(), otherwise it may segfault. + del a + + rmm.reinitialize() + + +def test_rmm_pool_cupy_allocator_stream_lifetime(): + cupy = pytest.importorskip("cupy") + + rmm.reinitialize(pool_allocator=True) + cupy.cuda.set_allocator(rmm.rmm_cupy_allocator) + + stream = cupy.cuda.stream.Stream() + + stream.use() + x = cupy.arange(10) + del stream + + del x + + @pytest.mark.parametrize("dtype", _dtypes) @pytest.mark.parametrize("nelem", _nelems) @pytest.mark.parametrize("alloc", _allocs)