Skip to content
This repository has been archived by the owner on Jan 26, 2024. It is now read-only.

Commit

Permalink
SWDEV-359548 - Detect fence optimization
Browse files Browse the repository at this point in the history
- Use a dirty flag to determine fence optimization
- If fence is dirty submit a marker at top level to sync.

Change-Id: I53fb19b5bb05b7c7b37c41637a6c7aaf870b639a
(cherry picked from commit 0c4e328)
  • Loading branch information
saleelk authored and zhang2amd committed Oct 28, 2022
1 parent 3a96b01 commit 1e7d894
Show file tree
Hide file tree
Showing 4 changed files with 13 additions and 1 deletion.
3 changes: 3 additions & 0 deletions device/device.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -1263,6 +1263,9 @@ class VirtualDevice : public amd::HeapObject {
//! Returns the status of queue handler callback
virtual bool isHandlerPending() const = 0;

//! Returns fence state of the VirtualGPU
virtual bool isFenceDirty() const = 0;

private:
//! Disable default copy constructor
VirtualDevice& operator=(const VirtualDevice&);
Expand Down
2 changes: 2 additions & 0 deletions device/pal/palvirtual.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -360,6 +360,8 @@ class VirtualGPU : public device::VirtualDevice {

bool isHandlerPending() const { return false; }

bool isFenceDirty() const { return false; }

//! Returns GPU device object associated with this kernel
const Device& dev() const { return gpuDevice_; }

Expand Down
7 changes: 6 additions & 1 deletion device/rocm/rocvirtual.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -825,6 +825,7 @@ bool VirtualGPU::dispatchGenericAqlPacket(
if (fence_state_ == amd::Device::kCacheStateSystem &&
expected_fence_state == amd::Device::kCacheStateSystem) {
header = dispatchPacketHeader_;
fence_dirty_ = true;
}

fence_state_ = static_cast<Device::CacheState>(expected_fence_state);
Expand Down Expand Up @@ -991,6 +992,9 @@ void VirtualGPU::dispatchBarrierPacket(uint16_t packetHeader, bool skipSignal,
barrier_packet_.completion_signal = signal;
}

// Reset fence_dirty_ flag if we submit a barrier
fence_dirty_ = false;

while ((index - hsa_queue_load_read_index_scacquire(gpu_queue_)) >= queueMask);
hsa_barrier_and_packet_t* aql_loc =
&(reinterpret_cast<hsa_barrier_and_packet_t*>(gpu_queue_->base_address))[index & queueMask];
Expand Down Expand Up @@ -1074,7 +1078,8 @@ VirtualGPU::VirtualGPU(Device& device, bool profiling, bool cooperative,
cuMask_(cuMask),
priority_(priority),
copy_command_type_(0),
fence_state_(Device::CacheState::kCacheStateInvalid)
fence_state_(Device::CacheState::kCacheStateInvalid),
fence_dirty_(false)
{
index_ = device.numOfVgpus_++;
gpu_device_ = device.getBackendDevice();
Expand Down
2 changes: 2 additions & 0 deletions device/rocm/rocvirtual.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -402,6 +402,7 @@ class VirtualGPU : public device::VirtualDevice {
bool isHandlerPending() const { return barriers_.IsHandlerPending(); }

void* allocKernArg(size_t size, size_t alignment);
bool isFenceDirty() const { return fence_dirty_; }
// } roc OpenCL integration
private:
//! Dispatches a barrier with blocking HSA signals
Expand Down Expand Up @@ -535,5 +536,6 @@ class VirtualGPU : public device::VirtualDevice {
//!< but ROC profiler expects D2H or H2D detection
int fence_state_; //!< Fence scope
//!< kUnknown/kFlushedToDevice/kFlushedToSystem
bool fence_dirty_; //!< Fence modified flag
};
}

0 comments on commit 1e7d894

Please sign in to comment.