From 3846764160a938b492c174f94a81654b12fe8fe6 Mon Sep 17 00:00:00 2001 From: Ashwin Srinath Date: Fri, 5 Mar 2021 10:59:21 -0500 Subject: [PATCH 1/9] Add Cython definitions for device_uvector --- python/rmm/_lib/device_uvector.pxd | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) create mode 100644 python/rmm/_lib/device_uvector.pxd diff --git a/python/rmm/_lib/device_uvector.pxd b/python/rmm/_lib/device_uvector.pxd new file mode 100644 index 000000000..243c50809 --- /dev/null +++ b/python/rmm/_lib/device_uvector.pxd @@ -0,0 +1,24 @@ +from rmm._lib.device_buffer cimport device_buffer +from rmm._lib.cuda_stream_view cimport cuda_stream_view +from rmm._lib.memory_resource cimport device_memory_resource + + +cdef extern from "rmm/device_buffer.hpp" namespace "rmm" nogil: + cdef cppclass device_uvector[T]: + device_uvector(size_t size, cuda_stream_view stream) except + + T* element_ptr(size_t index) + void set_element(size_t element_index, const T& v, cuda_stream_view s) + void set_element_async( + size_t element_index, + const T& v, + cuda_stream_view s + ) except + + T front_element(cuda_stream_view s) except + + T back_element(cuda_stream_view s) except + + void resize(size_t new_size, cuda_stream_view stream) except + + void shrink_to_fit(cuda_stream_view stream) except + + device_buffer release() + size_t capacity() + T* data() + size_t size() + device_memory_resource* memory_resource() From 87a1a9a4ae10b5dfdc4a7b05725b1e053703043a Mon Sep 17 00:00:00 2001 From: Ashwin Srinath Date: Tue, 9 Mar 2021 10:17:48 -0500 Subject: [PATCH 2/9] Clarify log file name behaviour --- python/rmm/_lib/memory_resource.pyx | 20 +++++++++++++++----- python/rmm/rmm.py | 3 +++ 2 files changed, 18 insertions(+), 5 deletions(-) diff --git a/python/rmm/_lib/memory_resource.pyx b/python/rmm/_lib/memory_resource.pyx index 0c8764ce6..1291b474e 100644 --- a/python/rmm/_lib/memory_resource.pyx +++ b/python/rmm/_lib/memory_resource.pyx @@ -387,11 +387,6 @@ cdef class LoggingResourceAdaptor(UpstreamResourceAdaptor): "log_file_name= argument or RMM_LOG_FILE " "environment variable" ) - # Append the device ID before the file extension - log_file_name = _append_id( - log_file_name, getDevice() - ) - _log_file_name = log_file_name self.c_obj.reset( @@ -504,6 +499,11 @@ cpdef void _initialize( setDevice(device) if logging: + # Append the device ID before the file extension + log_file_name = _append_id( + log_file_name, device + ) + mr = LoggingResourceAdaptor( typ(*args, **kwargs), log_file_name @@ -628,6 +628,13 @@ cpdef _flush_logs(): def enable_logging(log_file_name=None): """ Enable logging of run-time events. + + log_file_name: str, optional + Name of the log file. If not specified, the environment variable + RMM_LOG_FILE is used. A TypeError is thrown if neither is available. + A separate log file is produced for each device, + and the suffix `".dev{id}"` is automatically added to the log file + name. """ global _per_device_mrs @@ -636,6 +643,9 @@ def enable_logging(log_file_name=None): for device in devices: each_mr = _per_device_mrs[device] if not isinstance(each_mr, LoggingResourceAdaptor): + log_file_name = _append_id( + log_file_name, device + ) set_per_device_resource( device, LoggingResourceAdaptor(each_mr, log_file_name) diff --git a/python/rmm/rmm.py b/python/rmm/rmm.py index 3f79debf4..c5a041148 100644 --- a/python/rmm/rmm.py +++ b/python/rmm/rmm.py @@ -66,6 +66,9 @@ def reinitialize( log_file_name : str Name of the log file. If not specified, the environment variable RMM_LOG_FILE is used. A TypeError is thrown if neither is available. + A separate log file is produced for each device, + and the suffix `".dev{id}"` is automatically added to the log file + name. """ rmm.mr._initialize( pool_allocator=pool_allocator, From ea6c41401f3f92fba709fc0ce1596c4d33b5fd06 Mon Sep 17 00:00:00 2001 From: Ashwin Srinath Date: Wed, 10 Mar 2021 15:16:24 -0500 Subject: [PATCH 3/9] Add a get_log_filenames API to get the filename per-device. More doc --- python/rmm/__init__.py | 5 ++-- python/rmm/_lib/memory_resource.pyx | 45 ++++++++++++++++++++++------- python/rmm/mr.py | 2 ++ python/rmm/rmm.py | 11 +++++++ 4 files changed, 50 insertions(+), 13 deletions(-) diff --git a/python/rmm/__init__.py b/python/rmm/__init__.py index 8c753828e..bdb7f7c56 100644 --- a/python/rmm/__init__.py +++ b/python/rmm/__init__.py @@ -15,7 +15,8 @@ from rmm import mr from rmm._lib.device_buffer import DeviceBuffer -from rmm.mr import disable_logging, enable_logging +from rmm._version import get_versions +from rmm.mr import disable_logging, enable_logging, get_log_filenames from rmm.rmm import ( RMMError, RMMNumbaManager, @@ -25,6 +26,4 @@ rmm_cupy_allocator, ) -from rmm._version import get_versions - __version__ = get_versions()["version"] diff --git a/python/rmm/_lib/memory_resource.pyx b/python/rmm/_lib/memory_resource.pyx index 1291b474e..a7f8e6925 100644 --- a/python/rmm/_lib/memory_resource.pyx +++ b/python/rmm/_lib/memory_resource.pyx @@ -387,7 +387,12 @@ cdef class LoggingResourceAdaptor(UpstreamResourceAdaptor): "log_file_name= argument or RMM_LOG_FILE " "environment variable" ) - _log_file_name = log_file_name + # Append the device ID before the file extension + log_file_name = _append_id( + log_file_name, getDevice() + ) + + self._log_file_name = log_file_name self.c_obj.reset( new logging_resource_adaptor[device_memory_resource]( @@ -499,11 +504,6 @@ cpdef void _initialize( setDevice(device) if logging: - # Append the device ID before the file extension - log_file_name = _append_id( - log_file_name, device - ) - mr = LoggingResourceAdaptor( typ(*args, **kwargs), log_file_name @@ -627,14 +627,27 @@ cpdef _flush_logs(): def enable_logging(log_file_name=None): """ - Enable logging of run-time events. + Enable logging of run-time events for all devices. + Parameters + ---------- log_file_name: str, optional Name of the log file. If not specified, the environment variable RMM_LOG_FILE is used. A TypeError is thrown if neither is available. A separate log file is produced for each device, and the suffix `".dev{id}"` is automatically added to the log file name. + + Notes + ----- + Note that if you use the environment variable CUDA_VISIBLE_DEVICES + with logging enabled, the suffix may not be what you expect. For + example, if you set CUDA_VISIBLE_DEVICES=1, the log file produced + will still have suffix `0`. Similarly, if you set + CUDA_VISIBLE_DEVICES=1,0 and use devices 0 and 1, the log file + with suffix `0` will correspond to the GPU with device ID `1`. + Use `rmm.get_log_filenames()` to get the log file names + corresponding to each device. """ global _per_device_mrs @@ -643,9 +656,6 @@ def enable_logging(log_file_name=None): for device in devices: each_mr = _per_device_mrs[device] if not isinstance(each_mr, LoggingResourceAdaptor): - log_file_name = _append_id( - log_file_name, device - ) set_per_device_resource( device, LoggingResourceAdaptor(each_mr, log_file_name) @@ -661,3 +671,18 @@ def disable_logging(): for i, each_mr in _per_device_mrs.items(): if isinstance(each_mr, LoggingResourceAdaptor): set_per_device_resource(i, each_mr.get_upstream()) + + +def get_log_filenames(): + """ + Returns the log filename (or `None` if not writing logs) + for each device in use. + """ + global _per_device_mrs + + return [ + each_mr.get_file_name() + if isinstance(each_mr, LoggingResourceAdaptor) + else None + for each_mr in _per_device_mrs.values() + ] diff --git a/python/rmm/mr.py b/python/rmm/mr.py index 44be50318..15cec90b0 100644 --- a/python/rmm/mr.py +++ b/python/rmm/mr.py @@ -14,6 +14,7 @@ enable_logging, get_current_device_resource, get_current_device_resource_type, + get_log_filenames, get_per_device_resource, get_per_device_resource_type, is_initialized, @@ -40,5 +41,6 @@ "get_current_device_resource", "get_per_device_resource_type", "get_current_device_resource_type", + "get_log_filenames", "is_initialized", ] diff --git a/python/rmm/rmm.py b/python/rmm/rmm.py index c5a041148..20c33b9bc 100644 --- a/python/rmm/rmm.py +++ b/python/rmm/rmm.py @@ -69,6 +69,17 @@ def reinitialize( A separate log file is produced for each device, and the suffix `".dev{id}"` is automatically added to the log file name. + + Notes + ----- + Note that if you use the environment variable CUDA_VISIBLE_DEVICES + with logging enabled, the suffix may not be what you expect. For + example, if you set CUDA_VISIBLE_DEVICES=1, the log file produced + will still have suffix `0`. Similarly, if you set + CUDA_VISIBLE_DEVICES=1,0 and use devices 0 and 1, the log file + with suffix `0` will correspond to the GPU with device ID `1`. + Use `rmm.get_log_filenames()` to get the log file names + corresponding to each device. """ rmm.mr._initialize( pool_allocator=pool_allocator, From 9db06ea2a4bfa620ca77c87bd8d5d9df0dec49db Mon Sep 17 00:00:00 2001 From: Ashwin Srinath Date: Wed, 10 Mar 2021 16:34:30 -0500 Subject: [PATCH 4/9] Return a mapping in get_log_filenames instead --- python/rmm/_lib/memory_resource.pyx | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/python/rmm/_lib/memory_resource.pyx b/python/rmm/_lib/memory_resource.pyx index a7f8e6925..aff669563 100644 --- a/python/rmm/_lib/memory_resource.pyx +++ b/python/rmm/_lib/memory_resource.pyx @@ -680,9 +680,9 @@ def get_log_filenames(): """ global _per_device_mrs - return [ - each_mr.get_file_name() + return { + i: each_mr.get_file_name() if isinstance(each_mr, LoggingResourceAdaptor) else None - for each_mr in _per_device_mrs.values() - ] + for i, each_mr in _per_device_mrs.items() + } From 06886c69180496f05f421a821fbe983971403667 Mon Sep 17 00:00:00 2001 From: Ashwin Srinath Date: Fri, 12 Mar 2021 18:31:51 -0500 Subject: [PATCH 5/9] Type->ValueError --- python/rmm/_lib/memory_resource.pyx | 4 ++-- python/rmm/rmm.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/python/rmm/_lib/memory_resource.pyx b/python/rmm/_lib/memory_resource.pyx index aff669563..078a107e7 100644 --- a/python/rmm/_lib/memory_resource.pyx +++ b/python/rmm/_lib/memory_resource.pyx @@ -382,7 +382,7 @@ cdef class LoggingResourceAdaptor(UpstreamResourceAdaptor): if log_file_name is None: log_file_name = os.getenv("RMM_LOG_FILE") if not log_file_name: - raise TypeError( + raise ValueError( "RMM log file must be specified either using " "log_file_name= argument or RMM_LOG_FILE " "environment variable" @@ -633,7 +633,7 @@ def enable_logging(log_file_name=None): ---------- log_file_name: str, optional Name of the log file. If not specified, the environment variable - RMM_LOG_FILE is used. A TypeError is thrown if neither is available. + RMM_LOG_FILE is used. A ValueError is thrown if neither is available. A separate log file is produced for each device, and the suffix `".dev{id}"` is automatically added to the log file name. diff --git a/python/rmm/rmm.py b/python/rmm/rmm.py index 20c33b9bc..9c57c6ac6 100644 --- a/python/rmm/rmm.py +++ b/python/rmm/rmm.py @@ -65,7 +65,7 @@ def reinitialize( This has significant performance impact. log_file_name : str Name of the log file. If not specified, the environment variable - RMM_LOG_FILE is used. A TypeError is thrown if neither is available. + RMM_LOG_FILE is used. A ValueError is thrown if neither is available. A separate log file is produced for each device, and the suffix `".dev{id}"` is automatically added to the log file name. From 6cf09feded30584b0a5acb1c7a7e7d8d79f3a5bc Mon Sep 17 00:00:00 2001 From: Ashwin Srinath Date: Thu, 18 Mar 2021 11:14:55 -0400 Subject: [PATCH 6/9] Use abspath --- python/rmm/_lib/memory_resource.pyx | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/python/rmm/_lib/memory_resource.pyx b/python/rmm/_lib/memory_resource.pyx index 078a107e7..df6e1036d 100644 --- a/python/rmm/_lib/memory_resource.pyx +++ b/python/rmm/_lib/memory_resource.pyx @@ -387,11 +387,12 @@ cdef class LoggingResourceAdaptor(UpstreamResourceAdaptor): "log_file_name= argument or RMM_LOG_FILE " "environment variable" ) + # Append the device ID before the file extension log_file_name = _append_id( log_file_name, getDevice() ) - + log_file_name = os.path.abspath(log_file_name) self._log_file_name = log_file_name self.c_obj.reset( From c82f5bdb831d9dbf1bb50b10a182c07ced740761 Mon Sep 17 00:00:00 2001 From: Ashwin Srinath Date: Thu, 18 Mar 2021 11:17:19 -0400 Subject: [PATCH 7/9] Add example --- python/rmm/_lib/memory_resource.pyx | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/python/rmm/_lib/memory_resource.pyx b/python/rmm/_lib/memory_resource.pyx index df6e1036d..acd24c91f 100644 --- a/python/rmm/_lib/memory_resource.pyx +++ b/python/rmm/_lib/memory_resource.pyx @@ -678,6 +678,14 @@ def get_log_filenames(): """ Returns the log filename (or `None` if not writing logs) for each device in use. + + Examples + -------- + >>> import rmm + >>> rmm.reinitialize(devices=[0, 1], logging=True, log_file_name="rmm.log") + >>> rmm.mr.get_log_filenames() + {0: '/home/ashwin/workspace/rapids/rmm/python/rmm.dev0.log', + 1: '/home/ashwin/workspace/rapids/rmm/python/rmm.dev1.log'} """ global _per_device_mrs From 5e0a2d78ab7800834739eac89a2aae51c7c4b99d Mon Sep 17 00:00:00 2001 From: Ashwin Srinath Date: Thu, 18 Mar 2021 11:18:06 -0400 Subject: [PATCH 8/9] Fix example --- python/rmm/_lib/memory_resource.pyx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/rmm/_lib/memory_resource.pyx b/python/rmm/_lib/memory_resource.pyx index acd24c91f..b0e045a5f 100644 --- a/python/rmm/_lib/memory_resource.pyx +++ b/python/rmm/_lib/memory_resource.pyx @@ -683,7 +683,7 @@ def get_log_filenames(): -------- >>> import rmm >>> rmm.reinitialize(devices=[0, 1], logging=True, log_file_name="rmm.log") - >>> rmm.mr.get_log_filenames() + >>> rmm.get_log_filenames() {0: '/home/ashwin/workspace/rapids/rmm/python/rmm.dev0.log', 1: '/home/ashwin/workspace/rapids/rmm/python/rmm.dev1.log'} """ From 1660b6471e059a16ac8c8b4c773ad7aec791787a Mon Sep 17 00:00:00 2001 From: Ashwin Srinath Date: Thu, 18 Mar 2021 11:18:47 -0400 Subject: [PATCH 9/9] Change ashwin->user --- python/rmm/_lib/memory_resource.pyx | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/python/rmm/_lib/memory_resource.pyx b/python/rmm/_lib/memory_resource.pyx index b0e045a5f..fb06b53f5 100644 --- a/python/rmm/_lib/memory_resource.pyx +++ b/python/rmm/_lib/memory_resource.pyx @@ -684,8 +684,8 @@ def get_log_filenames(): >>> import rmm >>> rmm.reinitialize(devices=[0, 1], logging=True, log_file_name="rmm.log") >>> rmm.get_log_filenames() - {0: '/home/ashwin/workspace/rapids/rmm/python/rmm.dev0.log', - 1: '/home/ashwin/workspace/rapids/rmm/python/rmm.dev1.log'} + {0: '/home/user/workspace/rapids/rmm/python/rmm.dev0.log', + 1: '/home/user/workspace/rapids/rmm/python/rmm.dev1.log'} """ global _per_device_mrs