Skip to content

Commit

Permalink
[MIGraphX EP] Fix CopyTensorAsync and add guards for stream sync Copy…
Browse files Browse the repository at this point in the history
…Tensors (#16787)

Add compile guards to gate functionality based on MIGRAPHX_STREAM_SYNC
for adding the following

- remove excess hipStreamSyncronize to nullstream on CopyTensor calls
- Add proper call for stream synchronized CopyTensorAsync for
DeviceToHost case

Without this change subsequent CopyTensorAsync() calls will fail for
cards that don't use pinned memory thus causing hipMemcpy() calls to
occur before certain kernel operations occur.

![image](https://github.com/microsoft/onnxruntime/assets/107195283/4915c18a-fb2d-40c9-a50e-a7c6613c324b)

becomes

![image](https://github.com/microsoft/onnxruntime/assets/107195283/f661acf4-e2af-4c9a-b26a-30fca339cf1d)

---------

Co-authored-by: Ted Themistokleous <tthemist@amd.com>
  • Loading branch information
2 people authored and jchen351 committed Aug 12, 2023
1 parent 6c3e915 commit 707b543
Showing 1 changed file with 4 additions and 3 deletions.
7 changes: 4 additions & 3 deletions onnxruntime/core/providers/migraphx/gpu_data_transfer.cc
Original file line number Diff line number Diff line change
Expand Up @@ -24,17 +24,14 @@ common::Status GPUDataTransfer::CopyTensor(const Tensor& src, Tensor& dst) const
// Copy only if the two addresses are different.
if (dst_data != src_data) {
HIP_CALL_THROW(hipMemcpy(dst_data, src_data, bytes, hipMemcpyDeviceToDevice));
HIP_CALL_THROW(hipStreamSynchronize(nullptr));
}
} else {
// copy from other CPU memory to GPU, this is blocking
HIP_CALL_THROW(hipMemcpy(dst_data, src_data, bytes, hipMemcpyHostToDevice));
HIP_CALL_THROW(hipStreamSynchronize(nullptr)); // TODO: still need stream sync? since already blocking
}
} else if (src_device.Type() == OrtDevice::GPU) {
// copying from GPU to CPU memory, this is blocking
HIP_CALL_THROW(hipMemcpy(dst_data, src_data, bytes, hipMemcpyDeviceToHost));
HIP_CALL_THROW(hipStreamSynchronize(nullptr)); // TODO: still need stream sync? since already blocking
} else {
// copying between cpu memory
memcpy(dst_data, src_data, bytes);
Expand Down Expand Up @@ -63,13 +60,17 @@ common::Status GPUDataTransfer::CopyTensorAsync(const Tensor& src, Tensor& dst,
HIP_CALL_THROW(hipMemcpy(dst_data, src_data, bytes, hipMemcpyHostToDevice));
}
} else if (src_device.Type() == OrtDevice::GPU) {
#ifndef MIGRAPHX_STREAM_SYNC
if (dst_device.Type() == OrtDevice::CPU && dst_device.MemType() == OrtDevice::MemType::HIP_PINNED) {
// copying from GPU to pinned memory, this is non-blocking
HIP_CALL_THROW(hipMemcpyAsync(dst_data, src_data, bytes, hipMemcpyDeviceToHost, static_cast<hipStream_t>(stream.GetHandle())));
} else {
// copying from GPU to CPU memory, this is blocking
HIP_CALL_THROW(hipMemcpy(dst_data, src_data, bytes, hipMemcpyDeviceToHost));
}
#else
HIP_CALL_THROW(hipMemcpyAsync(dst_data, src_data, bytes, hipMemcpyDeviceToHost, static_cast<hipStream_t>(stream.GetHandle())));
#endif
} else {
// copying between cpu memory
memcpy(dst_data, src_data, bytes);
Expand Down

0 comments on commit 707b543

Please sign in to comment.