Skip to content

Commit

Permalink
Check for 'CUDA error: out of memory' with auto-microbatching
Browse files Browse the repository at this point in the history
  • Loading branch information
JAEarly committed Jun 13, 2024
1 parent 832f17d commit a74bb8e
Showing 1 changed file with 1 addition and 1 deletion.
2 changes: 1 addition & 1 deletion composer/trainer/trainer.py
Original file line number Diff line number Diff line change
Expand Up @@ -307,7 +307,7 @@ def _get_initial_device_train_microbatch_size(

def _is_cuda_oom(e: RuntimeError):
"""Determines if error is CUDA Out of Memory and if auto_microbatching is enabled."""
if 'CUDA out of memory' in str(e):
if any(s in str(e) for s in ['CUDA out of memory', 'CUDA error: out of memory']):
return True
# With batch_norm, large batch sizes sometimes result in cuDNN instead of Cuda OOMs.
if 'cuDNN error: CUDNN_STATUS_NOT_SUPPORTED. This error may appear if you passed in a non-contiguous input.' in str(
Expand Down

0 comments on commit a74bb8e

Please sign in to comment.