diff --git a/python/rmm/_cuda/stream.pyx b/python/rmm/_cuda/stream.pyx
index 0f6c5ab19..4c9890d51 100644
--- a/python/rmm/_cuda/stream.pyx
+++ b/python/rmm/_cuda/stream.pyx
@@ -94,14 +94,14 @@ cdef class Stream:
         return self.c_is_default()
 
     def _init_from_numba_stream(self, obj):
-        self._cuda_stream = <cudaStream_t>(obj.handle.value)
+        self._cuda_stream = <cudaStream_t><uintptr_t>(int(obj))
         self._owner = obj
 
     def _init_from_cupy_stream(self, obj):
         try:
             import cupy
             if isinstance(obj, cupy.cuda.stream.Stream):
-                self._cuda_stream = <cudaStream_t>(obj.ptr)
+                self._cuda_stream = <cudaStream_t><uintptr_t>(obj.ptr)
                 self._owner = obj
                 return
         except ImportError:
diff --git a/python/rmm/_lib/device_buffer.pyx b/python/rmm/_lib/device_buffer.pyx
index 80da32f1b..3401b4802 100644
--- a/python/rmm/_lib/device_buffer.pyx
+++ b/python/rmm/_lib/device_buffer.pyx
@@ -54,7 +54,11 @@ cdef class DeviceBuffer:
             (and possibly size of data to copy)
         stream : optional
             CUDA stream to use for construction and/or copying,
-            default the default stream
+            defaults to the CUDA default stream. A reference to the
+            stream is stored internally to ensure it doesn't go out of
+            scope while the DeviceBuffer is in use. Destroying the
+            underlying stream while the DeviceBuffer is in use will
+            result in undefined behavior.
 
         Note
         ----
diff --git a/python/rmm/rmm.py b/python/rmm/rmm.py
index ff53f0edb..3f79debf4 100644
--- a/python/rmm/rmm.py
+++ b/python/rmm/rmm.py
@@ -18,6 +18,7 @@
 
 import rmm
 from rmm import _lib as librmm
+from rmm._cuda.stream import Stream
 
 
 # Utility Functions
@@ -193,7 +194,8 @@ def rmm_cupy_allocator(nbytes):
     if cupy is None:
         raise ModuleNotFoundError("No module named 'cupy'")
 
-    buf = librmm.device_buffer.DeviceBuffer(size=nbytes)
+    stream = Stream(obj=cupy.cuda.get_current_stream())
+    buf = librmm.device_buffer.DeviceBuffer(size=nbytes, stream=stream)
     dev_id = -1 if buf.ptr else cupy.cuda.device.get_device_id()
     mem = cupy.cuda.UnownedMemory(
         ptr=buf.ptr, size=buf.size, owner=buf, device_id=dev_id
diff --git a/python/rmm/tests/test_rmm.py b/python/rmm/tests/test_rmm.py
index 5406bc265..d0b1fbe9d 100644
--- a/python/rmm/tests/test_rmm.py
+++ b/python/rmm/tests/test_rmm.py
@@ -280,6 +280,17 @@ def test_rmm_device_buffer_pickle_roundtrip(hb):
         assert hb3 == hb
 
 
+@pytest.mark.parametrize("stream", [cuda.default_stream(), cuda.stream()])
+def test_rmm_pool_numba_stream(stream):
+    rmm.reinitialize(pool_allocator=True)
+
+    stream = rmm._cuda.stream.Stream(stream)
+    a = rmm._lib.device_buffer.DeviceBuffer(size=3, stream=stream)
+
+    assert a.size == 3
+    assert a.ptr != 0
+
+
 def test_rmm_cupy_allocator():
     cupy = pytest.importorskip("cupy")
 
@@ -298,6 +309,54 @@ def test_rmm_cupy_allocator():
     assert isinstance(a.data.mem._owner, rmm.DeviceBuffer)
 
 
+@pytest.mark.parametrize("stream", ["null", "async"])
+def test_rmm_pool_cupy_allocator_with_stream(stream):
+    cupy = pytest.importorskip("cupy")
+
+    rmm.reinitialize(pool_allocator=True)
+    cupy.cuda.set_allocator(rmm.rmm_cupy_allocator)
+
+    if stream == "null":
+        stream = cupy.cuda.stream.Stream.null
+    else:
+        stream = cupy.cuda.stream.Stream()
+
+    with stream:
+        m = rmm.rmm_cupy_allocator(42)
+        assert m.mem.size == 42
+        assert m.mem.ptr != 0
+        assert isinstance(m.mem._owner, rmm.DeviceBuffer)
+
+        m = rmm.rmm_cupy_allocator(0)
+        assert m.mem.size == 0
+        assert m.mem.ptr == 0
+        assert isinstance(m.mem._owner, rmm.DeviceBuffer)
+
+        a = cupy.arange(10)
+        assert isinstance(a.data.mem._owner, rmm.DeviceBuffer)
+
+    # Deleting all allocations known by the RMM pool is required
+    # before rmm.reinitialize(), otherwise it may segfault.
+    del a
+
+    rmm.reinitialize()
+
+
+def test_rmm_pool_cupy_allocator_stream_lifetime():
+    cupy = pytest.importorskip("cupy")
+
+    rmm.reinitialize(pool_allocator=True)
+    cupy.cuda.set_allocator(rmm.rmm_cupy_allocator)
+
+    stream = cupy.cuda.stream.Stream()
+
+    stream.use()
+    x = cupy.arange(10)
+    del stream
+
+    del x
+
+
 @pytest.mark.parametrize("dtype", _dtypes)
 @pytest.mark.parametrize("nelem", _nelems)
 @pytest.mark.parametrize("alloc", _allocs)