Skip to content

Commit

Permalink
Fix misaligned stores in buffer (#1063)
Browse files Browse the repository at this point in the history
* Fix misaligned read/write to buffer

- causes undefined behavior

* Update run-ci.py

- fix spurious CDash submission failure warning

* Improve run-ci.py support for UBSan

* Relax rocprofv3 summary stats count expectation

* Update CHANGELOG
  • Loading branch information
jrmadsen authored Sep 10, 2024
1 parent 7a9af10 commit 37e0d7e
Show file tree
Hide file tree
Showing 9 changed files with 60 additions and 29 deletions.
30 changes: 20 additions & 10 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ Full documentation for ROCprofiler-SDK is available at [Click Here](source/docs/

## ROCprofiler-SDK for AFAR I

## Additions
### Additions

- HSA API Tracing
- Kernel Dispatch Tracing
Expand All @@ -14,7 +14,7 @@ Full documentation for ROCprofiler-SDK is available at [Click Here](source/docs/

## ROCprofiler-SDK for AFAR II

## Additions
### Additions

- HIP API Tracing
- ROCTx Tracing
Expand All @@ -25,7 +25,7 @@ Full documentation for ROCprofiler-SDK is available at [Click Here](source/docs/

## ROCprofiler-SDK for AFAR III

## Additions
### Additions

- Kernel Dispatch Counter Collection – (includes serialization and multidimensional instances)
- Kernel serialization
Expand All @@ -43,7 +43,7 @@ Full documentation for ROCprofiler-SDK is available at [Click Here](source/docs/

## ROCprofiler-SDK for AFAR IV

## Additions
### Additions

- Page Migration Reporting (API)
- Scratch Memory Reporting (API)
Expand All @@ -55,7 +55,7 @@ Full documentation for ROCprofiler-SDK is available at [Click Here](source/docs/

## ROCprofiler-SDK for AFAR V

## Additions
### Additions

- Agent/Device Counter Collection (API)
- Single JSON output format support (tool)
Expand All @@ -67,17 +67,17 @@ Full documentation for ROCprofiler-SDK is available at [Click Here](source/docs/
- ROCProf V3 Multi-GPU Support:
- Multi-process (multiple files)

## Fixes
### Fixes

- SQ_ACCUM_PREV and SQ_ACCUM_PREV_HIRE overwriting issue

## Changes
### Changes

- rocprofv3 tool now needs `--` in front of application. For detailed uses, please [Click Here](source/docs/rocprofv3.md)

## ROCprofiler-SDK for AFAR VI

## Additions
### Additions

- OTF2 Tool Support
- Kernel and Range Filtering
Expand All @@ -90,6 +90,16 @@ Full documentation for ROCprofiler-SDK is available at [Click Here](source/docs/
- Dispatch_Id
- Added CSV column for counter_collection

## Fixes
### Fixes

- Miscellaneous bug fixes

## ROCprofiler-SDK for AFAR VII

### Additions

### Fixes

### Changes

- Fix misaligned stores (undefined behavior) for buffer records
6 changes: 4 additions & 2 deletions source/lib/common/container/record_header_buffer.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -235,14 +235,15 @@ record_header_buffer::emplace(uint64_t _hash, Tp& _v)
if(m_headers.empty()) return false;

constexpr auto request_size = sizeof(Tp);
constexpr auto align_size = alignof(Tp);

// notify there was a request
m_requested.fetch_add(1);

// in theory, we shouldn't need to lock here but the thread sanitizer says there is a race.
// the lock will be short-lived so hopefully, it will scale fine
write_lock();
auto* _addr = m_buffer.request(request_size, false);
auto* _addr = m_buffer.request(request_size, align_size, false);
write_unlock();

read_lock();
Expand Down Expand Up @@ -277,14 +278,15 @@ record_header_buffer::emplace(uint32_t _category, uint32_t _kind, Tp& _v)
if(m_headers.empty()) return false;

constexpr auto request_size = sizeof(Tp);
constexpr auto align_size = alignof(Tp);

// notify there was a request
m_requested.fetch_add(1);

// in theory, we shouldn't need to lock here but the thread sanitizer says there is a race.
// the lock will be short-lived so hopefully, it will scale fine
write_lock();
auto* _addr = m_buffer.request(request_size, false);
auto* _addr = m_buffer.request(request_size, align_size, false);
write_unlock();

read_lock();
Expand Down
13 changes: 9 additions & 4 deletions source/lib/common/container/ring_buffer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -134,30 +134,35 @@ ring_buffer::as_string() const
//

void*
ring_buffer::request(size_t _length, bool _wrap)
ring_buffer::request(size_t _length, size_t _align, bool _wrap)
{
if(m_ptr == nullptr || m_size == 0) return nullptr;

if(is_full()) return (_wrap) ? retrieve(_length) : nullptr;

LOG_IF(FATAL, _align == 0) << "alignment must be non-zero";

// if write count is at the tail of buffer, bump to the end of buffer
size_t _write_count = 0;
size_t _offset = 0;
size_t _write_pos = 0;
do
{
// Make sure we don't put in more than there's room for, by writing no
// more than there is free.
if(_length > free()) return nullptr;

_offset = 0;
_write_count = m_write_count.load(std::memory_order_acquire);
auto _modulo = m_size - (_write_count % m_size);
if(_modulo < _length) _offset = _modulo;
auto _align_modulo = (_write_count % _align);
auto _align_offset = (_align_modulo > 0) ? (_align - _align_modulo) : 0;
_write_pos = _write_count + _align_offset;
} while(!m_write_count.compare_exchange_strong(
_write_count, _write_count + _length + _offset, std::memory_order_seq_cst));
_write_count, _write_pos + _length + _offset, std::memory_order_seq_cst));

// pointer in buffer
void* _out = write_ptr(_write_count);
void* _out = write_ptr(_write_pos);

return _out;
}
Expand Down
14 changes: 8 additions & 6 deletions source/lib/common/container/ring_buffer.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,7 @@ struct ring_buffer
void destroy();

/// Request a pointer for writing at least \param n bytes.
void* request(size_t n, bool wrap = true);
void* request(size_t n, size_t align, bool wrap = true);

/// Retrieve a pointer for reading at least \param n bytes.
void* retrieve(size_t n) const;
Expand Down Expand Up @@ -174,8 +174,9 @@ ring_buffer::write(Tp* in, std::enable_if_t<std::is_class<Tp>::value, int>)
{
if(in == nullptr || m_ptr == nullptr) return {0, nullptr};

auto _length = sizeof(Tp);
void* _out_p = request(_length);
constexpr auto _length = sizeof(Tp);
constexpr auto _align = alignof(Tp);
void* _out_p = request(_length, _align, true);

if(_out_p == nullptr) return {0, nullptr};

Expand All @@ -194,8 +195,9 @@ ring_buffer::write(Tp* in, std::enable_if_t<!std::is_class<Tp>::value, int>)
{
if(in == nullptr || m_ptr == nullptr) return {0, nullptr};

auto _length = sizeof(Tp);
void* _out_p = request(_length);
constexpr auto _length = sizeof(Tp);
constexpr auto _align = alignof(Tp);
void* _out_p = request(_length, _align, true);

if(_out_p == nullptr) return {0, nullptr};

Expand All @@ -214,7 +216,7 @@ ring_buffer::request(bool wrap)
{
if(m_ptr == nullptr) return nullptr;

return reinterpret_cast<Tp*>(request(sizeof(Tp), wrap));
return reinterpret_cast<Tp*>(request(sizeof(Tp), alignof(Tp), wrap));
}
//
template <typename Tp>
Expand Down
3 changes: 2 additions & 1 deletion source/lib/tests/buffering/buffering-parallel.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -156,7 +156,8 @@ validate(const std::vector<rocprofiler_record_header_t*>& _headers)
if(itr->hash == typeid(data_type).hash_code())
{
auto* _data = static_cast<data_type*>(itr->payload);
EXPECT_EQ(_ref_data, *_data);
EXPECT_EQ(_ref_data, *_data)
<< "\nREF: " << _ref_data.to_string() << "\nINP: " << _data->to_string();
}
}
}
Expand Down
2 changes: 1 addition & 1 deletion source/lib/tests/buffering/buffering-save-load.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -218,7 +218,7 @@ TEST(buffering, save_load)

// verify the data, at a high-level is correct
EXPECT_EQ(_buffer.size(), num_variants);
EXPECT_EQ(_buffer.count(), data_size);
EXPECT_GE(_buffer.count(), data_size);
EXPECT_GE(_buffer.free(), 0);
EXPECT_GE(_buffer.capacity(), data_size);
EXPECT_FALSE(_buffer.is_empty());
Expand Down
19 changes: 15 additions & 4 deletions source/scripts/run-ci.py
Original file line number Diff line number Diff line change
Expand Up @@ -117,6 +117,17 @@ def option_in_args(_key, _args):
os.environ.get("TSAN_OPTIONS", ""),
]
)
elif MEMCHECK_TYPE == "UndefinedBehaviorSanitizer":
MEMCHECK_SUPPRESSION_FILE = (
f"{SOURCE_DIR}/source/scripts/undef-behavior-sanitizer-suppr.txt"
)
os.environ["UBSAN_OPTIONS"] = " ".join(
[
"print_stacktrace=1",
f"suppressions={SOURCE_DIR}/source/scripts/undef-behavior-sanitizer-suppr.txt",
os.environ.get("UBSAN_OPTIONS", ""),
]
)

codecov_exclude = [
"/usr/.*",
Expand Down Expand Up @@ -235,12 +246,12 @@ def generate_dashboard_script(args):
RETRY_DELAY 10
CAPTURE_CMAKE_ERROR _cdash_submit_err)
if("{STRICT_SUBMIT}" GREATER 0)
if(NOT _cdash_submit_err EQUAL 0)
if(NOT _cdash_submit_err EQUAL 0)
if("{STRICT_SUBMIT}" GREATER 0)
message(FATAL_ERROR "CDash submission failed: {SUBMIT_ERR}")
else()
message(AUTHOR_WARNING "CDash submission failure ignored due to absence of --require-cdash-submission")
endif()
else()
message(AUTHOR_WARNING "CDash submission failure ignored due to absence of --require-cdash-submission")
endif()
endif()
endmacro()
Expand Down
Empty file.
2 changes: 1 addition & 1 deletion tests/rocprofv3/summary/validate.py
Original file line number Diff line number Diff line change
Expand Up @@ -185,7 +185,7 @@ def test_summary_data(json_data):
)
assert oitr.value.count == 2
elif itr.domain == "HIP_API":
assert itr.stats.count == 2135
assert itr.stats.count >= 2130 and itr.stats.count <= 2140
elif itr.domain == "MEMORY_COPY":
assert itr.stats.count == 12
elif itr.domain == "MARKER_API":
Expand Down

0 comments on commit 37e0d7e

Please sign in to comment.