Skip to content

Commit

Permalink
GH-985 Correctly monitor outstanding compiles
Browse files Browse the repository at this point in the history
  • Loading branch information
heifner committed Nov 12, 2024
1 parent 2619f9e commit 98b3061
Show file tree
Hide file tree
Showing 3 changed files with 10 additions and 5 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -118,6 +118,7 @@ class code_cache_base {
std::mutex _mtx;
queued_compilies_t _queued_compiles; // protected by _mtx
std::unordered_map<code_tuple, bool> _outstanding_compiles_and_poison; // protected by _mtx
std::atomic<size_t> _outstanding_compiles{0};

size_t _free_bytes_eviction_threshold;
void check_eviction_threshold(size_t free_bytes);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,8 @@ void code_cache_async::wait_on_compile_monitor_message() {
return;
}

--_outstanding_compiles;

const auto& msg = std::get<wasm_compilation_result_message>(message);
_result_queue.push(msg);

Expand All @@ -92,10 +94,11 @@ void code_cache_async::wait_on_compile_monitor_message() {
//called from non-main thread
void code_cache_async::process_queued_compiles() {
std::lock_guard g(_mtx);
while (_outstanding_compiles_and_poison.size() < _threads && !_queued_compiles.empty()) {
while (_outstanding_compiles < _threads && !_queued_compiles.empty()) {
auto nextup = _queued_compiles.begin();

_outstanding_compiles_and_poison.emplace(nextup->code(), false);
++_outstanding_compiles;
FC_ASSERT(write_message_with_fds(_compile_monitor_write_socket, nextup->msg, nextup->fds_to_pass), "EOS VM failed to communicate to OOP manager");

_queued_compiles.erase(nextup);
Expand Down Expand Up @@ -212,6 +215,7 @@ code_cache_async::get_descriptor_for_code(mode m, uint64_t executing_action_id,
}

_outstanding_compiles_and_poison.emplace(ct, false);
++_outstanding_compiles;
write_message_with_fds(_compile_monitor_write_socket, msg, fds_to_pass);
failure = get_cd_failure::temporary; // Compile might not be done yet
return nullptr;
Expand Down
8 changes: 4 additions & 4 deletions unittests/eosvmoc_interrupt_tests.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -40,16 +40,16 @@ BOOST_AUTO_TEST_CASE( wasm_interrupt_test ) { try {
auto pre_id = t.control->get_wasm_interface().get_executing_action_id();

// Use an infinite executing action. When oc compile completes it will kill the action and restart it under
// eosvmoc. That action will then fail when it hits the 75000ms deadline.
// 75000ms has to be long enough for oc compile to complete and kill the non-oc executing transaction
// eosvmoc. That action will then fail when it hits the 5000ms deadline.
// 5000ms has to be long enough for oc compile to complete and kill the non-oc executing transaction
BOOST_CHECK_THROW( push_trx( t, test_api_action<WASM_TEST_ACTION("test_checktime", "checktime_failure")>{},
0, 150, 75000, true, fc::raw::pack(10000000000000000000ULL) ),
0, 150, 5000, true, fc::raw::pack(10000000000000000000ULL) ),
deadline_exception );

auto post_id = t.control->get_wasm_interface().get_executing_action_id();

// each action uses 1 id, 2 if retried because of oc compile completion interruption
// if post_id == pre_id + 1, then likely that 75000ms above was not long enough for oc compile to complete
// if post_id == pre_id + 1, then likely that 5000ms above was not long enough for oc compile to complete
BOOST_TEST(post_id == pre_id + 2);

BOOST_REQUIRE_EQUAL( t.validate(), true );
Expand Down

0 comments on commit 98b3061

Please sign in to comment.