Skip to content

Commit

Permalink
Sync cuda stream before updating stats
Browse files Browse the repository at this point in the history
Signed-off-by: Michael Tuttle <[email protected]>
  • Loading branch information
quic-mtuttle authored Oct 17, 2023
1 parent 2be0b4e commit 9890b66
Showing 1 changed file with 6 additions and 0 deletions.
6 changes: 6 additions & 0 deletions TrainingExtensions/onnx/src/QcQuantizeOp.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,12 @@ void QcQuantizeKernel::Compute(OrtKernelContext* context)
{
allocator = &cudaAllocator;
stream = api_.KernelContext_GetGPUComputeStream(context);
if ((opMode == DlQuantization::TensorQuantizerOpMode::updateStats) ||
(opMode == DlQuantization::TensorQuantizerOpMode::oneShotQuantizeDequantize))
{
// updateStats doesn't use cuda stream, must synchronize first to ensure input buffer is populated
cudaStreamSynchronize(reinterpret_cast<cudaStream_t>(stream));
}
}
#endif

Expand Down

0 comments on commit 9890b66

Please sign in to comment.