diff --git a/libraries/chain/controller.cpp b/libraries/chain/controller.cpp index 0f7fdc8b02..0eea0e53ce 100644 --- a/libraries/chain/controller.cpp +++ b/libraries/chain/controller.cpp @@ -1288,7 +1288,7 @@ struct controller_impl { thread_pool(), my_finalizers(cfg.finalizers_dir / config::safety_filename), main_thread_timer(timer), // assumes constructor is called from main thread - wasmif( conf.wasm_runtime, conf.eosvmoc_tierup, db, conf.state_dir, conf.eosvmoc_config, !conf.profile_accounts.empty() ) + wasmif( conf.wasm_runtime, conf.eosvmoc_tierup, db, main_thread_timer, conf.state_dir, conf.eosvmoc_config, !conf.profile_accounts.empty() ) { assert(cfg.chain_thread_pool_size > 0); thread_pool.start( cfg.chain_thread_pool_size, [this]( const fc::exception& e ) { diff --git a/libraries/chain/include/eosio/chain/wasm_interface.hpp b/libraries/chain/include/eosio/chain/wasm_interface.hpp index 72f5cd3f23..7adfce6966 100644 --- a/libraries/chain/include/eosio/chain/wasm_interface.hpp +++ b/libraries/chain/include/eosio/chain/wasm_interface.hpp @@ -7,7 +7,9 @@ namespace eosio { namespace chain { - class apply_context; +struct platform_timer; + +class apply_context; class wasm_runtime_interface; class controller; namespace eosvmoc { struct config; } @@ -48,7 +50,7 @@ namespace eosio { namespace chain { inline static bool test_disable_tierup = false; // set by unittests to test tierup failing - wasm_interface(vm_type vm, vm_oc_enable eosvmoc_tierup, const chainbase::database& d, const std::filesystem::path data_dir, const eosvmoc::config& eosvmoc_config, bool profile); + wasm_interface(vm_type vm, vm_oc_enable eosvmoc_tierup, const chainbase::database& d, platform_timer& main_thread_timer, const std::filesystem::path data_dir, const eosvmoc::config& eosvmoc_config, bool profile); ~wasm_interface(); #ifdef EOSIO_EOS_VM_OC_RUNTIME_ENABLED @@ -57,6 +59,9 @@ namespace eosio { namespace chain { // returns true if EOS VM OC is enabled bool is_eos_vm_oc_enabled() const; + + // return internal executing action id, used for testing + uint64_t get_executing_action_id() const; #endif //call before dtor to skip what can be minutes of dtor overhead with some runtimes; can cause leaks @@ -82,7 +87,6 @@ namespace eosio { namespace chain { std::function substitute_apply; private: - vm_oc_enable eosvmoc_tierup; unique_ptr my; }; diff --git a/libraries/chain/include/eosio/chain/wasm_interface_private.hpp b/libraries/chain/include/eosio/chain/wasm_interface_private.hpp index 961b6dd27e..a5024c6d26 100644 --- a/libraries/chain/include/eosio/chain/wasm_interface_private.hpp +++ b/libraries/chain/include/eosio/chain/wasm_interface_private.hpp @@ -48,8 +48,9 @@ namespace eosio { namespace chain { #ifdef EOSIO_EOS_VM_OC_RUNTIME_ENABLED struct eosvmoc_tier { // Called from main thread - eosvmoc_tier(const std::filesystem::path& d, const eosvmoc::config& c, const chainbase::database& db) - : cc(d, c, db) { + eosvmoc_tier(const std::filesystem::path& d, const eosvmoc::config& c, const chainbase::database& db, + eosvmoc::code_cache_async::compile_complete_callback cb) + : cc(d, c, db, std::move(cb)) { // Construct exec and mem for the main thread exec = std::make_unique(cc); mem = std::make_unique(wasm_constraints::maximum_linear_memory/wasm_constraints::wasm_page_size); @@ -70,9 +71,12 @@ struct eosvmoc_tier { #endif wasm_interface_impl(wasm_interface::vm_type vm, wasm_interface::vm_oc_enable eosvmoc_tierup, const chainbase::database& d, - const std::filesystem::path data_dir, const eosvmoc::config& eosvmoc_config, bool profile) + platform_timer& main_thread_timer, const std::filesystem::path data_dir, + const eosvmoc::config& eosvmoc_config, bool profile) : db(d) + , main_thread_timer(main_thread_timer) , wasm_runtime_time(vm) + , eosvmoc_tierup(eosvmoc_tierup) { #ifdef EOSIO_EOS_VM_RUNTIME_ENABLED if(vm == wasm_interface::vm_type::eos_vm) @@ -96,13 +100,93 @@ struct eosvmoc_tier { #ifdef EOSIO_EOS_VM_OC_RUNTIME_ENABLED if(eosvmoc_tierup != wasm_interface::vm_oc_enable::oc_none) { EOS_ASSERT(vm != wasm_interface::vm_type::eos_vm_oc, wasm_exception, "You can't use EOS VM OC as the base runtime when tier up is activated"); - eosvmoc = std::make_unique(data_dir, eosvmoc_config, d); + eosvmoc = std::make_unique(data_dir, eosvmoc_config, d, [this](uint64_t executing_action_id, fc::time_point queued_time) { + async_compile_complete(executing_action_id, queued_time); + }); } #endif } ~wasm_interface_impl() = default; +#ifdef EOSIO_EOS_VM_OC_RUNTIME_ENABLED + // called from async thread + void async_compile_complete(uint64_t exec_action_id, fc::time_point queued_time) { + if (exec_action_id == executing_action_id) { // is action still executing? + ilog("EOS VM OC tier up compile complete ${t}ms, interrupting non-oc execution id: ${id}", + ("t", (fc::time_point::now() - queued_time).count()/1000)("id", exec_action_id)); + eos_vm_oc_compile_interrupt = true; + main_thread_timer.expire_now(); + } + } +#endif + + void apply( const digest_type& code_hash, const uint8_t& vm_type, const uint8_t& vm_version, apply_context& context ) { + while (true) { + bool attempt_tierup = false; +#ifdef EOSIO_EOS_VM_OC_RUNTIME_ENABLED + attempt_tierup = eosvmoc && (eosvmoc_tierup == wasm_interface::vm_oc_enable::oc_all || context.should_use_eos_vm_oc()); + ++executing_action_id; + auto ex = fc::make_scoped_exit([&]() { + eos_vm_oc_compile_interrupt = false; + ++executing_action_id; // indicate no longer executing + }); + if (attempt_tierup) { + const chain::eosvmoc::code_descriptor* cd = nullptr; + chain::eosvmoc::code_cache_base::get_cd_failure failure = chain::eosvmoc::code_cache_base::get_cd_failure::temporary; + try { + // Ideally all validator nodes would switch to using oc before block producer nodes so that validators + // are never overwhelmed. Compile whitelisted account contracts first on non-produced blocks. This makes + // it more likely that validators will switch to the oc compiled contract before the block producer runs + // an action for the contract with oc. + chain::eosvmoc::code_cache_async::mode m; + m.whitelisted = context.is_eos_vm_oc_whitelisted(); + m.high_priority = m.whitelisted && context.is_applying_block(); + m.write_window = context.control.is_write_window(); + auto timer_pause = fc::make_scoped_exit([&](){ + context.trx_context.resume_billing_timer(); + }); + context.trx_context.pause_billing_timer(); + cd = eosvmoc->cc.get_descriptor_for_code(m, executing_action_id, code_hash, vm_version, failure); + if (wasm_interface::test_disable_tierup) + cd = nullptr; + } catch (...) { + // swallow errors here, if EOS VM OC has gone in to the weeds we shouldn't bail: continue to try and run baseline + // In the future, consider moving bits of EOS VM that can fire exceptions and such out of this call path + static bool once_is_enough; + if (!once_is_enough) + elog("EOS VM OC has encountered an unexpected failure"); + once_is_enough = true; + } + if (cd) { + if (!context.is_applying_block()) // read_only_trx_test.py looks for this log statement + tlog("${a} speculatively executing ${i} ${h} with eos vm oc", ("a", context.get_receiver())("h", code_hash)); + eosvmoc->exec->execute(*cd, *eosvmoc->mem, context); + break; + } + } +#endif + auto start = fc::time_point::now(); + try { + get_instantiated_module(code_hash, vm_type, vm_version, context.trx_context)->apply(context); + } catch (const interrupt_exception& e) { + if (attempt_tierup && context.is_applying_block() && eos_vm_oc_compile_interrupt) { + wlog("EOS VM OC compile complete id: ${id}, interrupt of ${r} <= ${a}::${act} after ${t}ms code ${h}", + ("id", executing_action_id.load())("r", context.get_receiver())("a", context.get_action().account) + ("act", context.get_action().name)("t", (fc::time_point::now()-start).count()/1000)("h", code_hash)); + // currently no time limit on apply block, however timer does need to be reset + context.trx_context.resume_billing_timer(start); + continue; // attempt tierup again now that compile is complete + } + throw; + } + break; + } + } + + // used for testing + uint64_t get_executing_action_id() const { return executing_action_id; } + bool is_code_cached(const digest_type& code_hash, const uint8_t& vm_type, const uint8_t& vm_version) const { // This method is only called from tests; performance is not critical. // No need for an additional check if we should lock or not. @@ -212,7 +296,11 @@ struct eosvmoc_tier { wasm_cache_index wasm_instantiation_cache; const chainbase::database& db; + platform_timer& main_thread_timer; const wasm_interface::vm_type wasm_runtime_time; + const wasm_interface::vm_oc_enable eosvmoc_tierup; + std::atomic executing_action_id{0}; // monotonic increasing for each action apply, doesn't matter if it wraps + std::atomic eos_vm_oc_compile_interrupt{false}; #ifdef EOSIO_EOS_VM_OC_RUNTIME_ENABLED std::unique_ptr eosvmoc{nullptr}; // used by all threads diff --git a/libraries/chain/include/eosio/chain/webassembly/eos-vm-oc/code_cache.hpp b/libraries/chain/include/eosio/chain/webassembly/eos-vm-oc/code_cache.hpp index 1097229574..e3f650a7e7 100644 --- a/libraries/chain/include/eosio/chain/webassembly/eos-vm-oc/code_cache.hpp +++ b/libraries/chain/include/eosio/chain/webassembly/eos-vm-oc/code_cache.hpp @@ -14,7 +14,6 @@ #include #include - #include namespace std { @@ -47,6 +46,9 @@ using allocator_t = bip::rbtree_best_fit()(ct); +} class code_cache_base { public: @@ -92,22 +94,30 @@ class code_cache_base { std::filesystem::path _cache_file_path; int _cache_fd; + std::atomic _executing_id{0}; // id of executing action io_context _ctx; - local::datagram_protocol::socket _compile_monitor_write_socket{_ctx}; + local::datagram_protocol::socket _compile_monitor_write_socket{_ctx}; // protected by _mtx for async local::datagram_protocol::socket _compile_monitor_read_socket{_ctx}; + struct queued_compile_entry { + compile_wasm_message msg; + std::vector fds_to_pass; + + const code_tuple& code() const { return msg.code; } + }; //these are really only useful to the async code cache, but keep them here so free_code can be shared using queued_compilies_t = boost::multi_index_container< - compile_wasm_message, + queued_compile_entry, indexed_by< sequenced<>, hashed_unique, - member> + const_mem_fun> > >; - queued_compilies_t _queued_compiles; - std::unordered_map _outstanding_compiles_and_poison; + std::mutex _mtx; + queued_compilies_t _queued_compiles; // protected by _mtx + std::unordered_map _outstanding_compiles_and_poison; // protected by _mtx size_t _free_bytes_eviction_threshold; void check_eviction_threshold(size_t free_bytes); @@ -121,21 +131,30 @@ class code_cache_base { class code_cache_async : public code_cache_base { public: - code_cache_async(const std::filesystem::path& data_dir, const eosvmoc::config& eosvmoc_config, const chainbase::database& db); + // called from async thread, provides executing_action_id of any compiles spawned by get_descriptor_for_code + using compile_complete_callback = std::function; + + code_cache_async(const std::filesystem::path& data_dir, const eosvmoc::config& eosvmoc_config, + const chainbase::database& db, compile_complete_callback cb); ~code_cache_async(); //If code is in cache: returns pointer & bumps to front of MRU list //If code is not in cache, and not blacklisted, and not currently compiling: return nullptr and kick off compile //otherwise: return nullptr - const code_descriptor* const get_descriptor_for_code(mode m, const digest_type& code_id, const uint8_t& vm_version, get_cd_failure& failure); + const code_descriptor* const + get_descriptor_for_code(mode m, uint64_t executing_action_id, + const digest_type& code_id, const uint8_t& vm_version, get_cd_failure& failure); private: + compile_complete_callback _compile_complete_func; // called from async thread, provides executing_action_id std::thread _monitor_reply_thread; boost::lockfree::spsc_queue _result_queue; - void wait_on_compile_monitor_message(); - std::tuple consume_compile_thread_queue(); std::unordered_set _blacklist; size_t _threads; + + void wait_on_compile_monitor_message(); + std::tuple consume_compile_thread_queue(); + void process_queued_compiles(); }; class code_cache_sync : public code_cache_base { diff --git a/libraries/chain/include/eosio/chain/webassembly/eos-vm-oc/ipc_protocol.hpp b/libraries/chain/include/eosio/chain/webassembly/eos-vm-oc/ipc_protocol.hpp index 93ce749fad..52bbfb3d26 100644 --- a/libraries/chain/include/eosio/chain/webassembly/eos-vm-oc/ipc_protocol.hpp +++ b/libraries/chain/include/eosio/chain/webassembly/eos-vm-oc/ipc_protocol.hpp @@ -22,6 +22,8 @@ struct code_tuple { struct compile_wasm_message { code_tuple code; + uint64_t executing_action_id{0}; // action id that initiated the compilation + fc::time_point queued_time; // when compilation was queued to begin std::optional limits; //Two sent fd: 1) communication socket for result, 2) the wasm to compile }; @@ -35,6 +37,8 @@ struct code_compilation_result_message { unsigned apply_offset; int starting_memory_pages; unsigned initdata_prologue_size; + uint64_t executing_action_id{0}; // action id that initiated the compilation + fc::time_point queued_time; // when compilation was queued to begin //Two sent fds: 1) wasm code, 2) initial memory snapshot }; @@ -50,6 +54,8 @@ struct wasm_compilation_result_message { code_tuple code; wasm_compilation_result result; size_t cache_free_bytes; + uint64_t executing_action_id{0}; // action id that initiated the compilation, copied from compile_wasm_message + fc::time_point queued_time; // when compilation was queued to begin, copied from compile_wasm_message }; using eosvmoc_message = std::varianteosvmoc && (eosvmoc_tierup == wasm_interface::vm_oc_enable::oc_all || context.should_use_eos_vm_oc())) { - const chain::eosvmoc::code_descriptor* cd = nullptr; - chain::eosvmoc::code_cache_base::get_cd_failure failure = chain::eosvmoc::code_cache_base::get_cd_failure::temporary; - try { - // Ideally all validator nodes would switch to using oc before block producer nodes so that validators - // are never overwhelmed. Compile whitelisted account contracts first on non-produced blocks. This makes - // it more likely that validators will switch to the oc compiled contract before the block producer runs - // an action for the contract with oc. - chain::eosvmoc::code_cache_async::mode m; - m.whitelisted = context.is_eos_vm_oc_whitelisted(); - m.high_priority = m.whitelisted && context.is_applying_block(); - m.write_window = context.control.is_write_window(); - cd = my->eosvmoc->cc.get_descriptor_for_code(m, code_hash, vm_version, failure); - if (test_disable_tierup) - cd = nullptr; - } catch (...) { - // swallow errors here, if EOS VM OC has gone in to the weeds we shouldn't bail: continue to try and run baseline - // In the future, consider moving bits of EOS VM that can fire exceptions and such out of this call path - static bool once_is_enough; - if (!once_is_enough) - elog("EOS VM OC has encountered an unexpected failure"); - once_is_enough = true; - } - if (cd) { - if (!context.is_applying_block()) // read_only_trx_test.py looks for this log statement - tlog("${a} speculatively executing ${h} with eos vm oc", ("a", context.get_receiver())("h", code_hash)); - my->eosvmoc->exec->execute(*cd, *my->eosvmoc->mem, context); - return; - } - } -#endif - - my->get_instantiated_module(code_hash, vm_type, vm_version, context.trx_context)->apply(context); + my->apply( code_hash, vm_type, vm_version, context ); } bool wasm_interface::is_code_cached(const digest_type& code_hash, const uint8_t& vm_type, const uint8_t& vm_version) const { @@ -131,6 +99,10 @@ namespace eosio { namespace chain { bool wasm_interface::is_eos_vm_oc_enabled() const { return my->is_eos_vm_oc_enabled(); } + + uint64_t wasm_interface::get_executing_action_id() const { + return my->get_executing_action_id(); + } #endif wasm_instantiated_module_interface::~wasm_instantiated_module_interface() = default; diff --git a/libraries/chain/webassembly/runtimes/eos-vm-oc/code_cache.cpp b/libraries/chain/webassembly/runtimes/eos-vm-oc/code_cache.cpp index 4ce5d0a2e8..bcbb809c2d 100644 --- a/libraries/chain/webassembly/runtimes/eos-vm-oc/code_cache.cpp +++ b/libraries/chain/webassembly/runtimes/eos-vm-oc/code_cache.cpp @@ -38,12 +38,15 @@ static constexpr size_t descriptor_ptr_from_file_start = header_offset + offseto static_assert(sizeof(code_cache_header) <= header_size, "code_cache_header too big"); -code_cache_async::code_cache_async(const std::filesystem::path& data_dir, const eosvmoc::config& eosvmoc_config, const chainbase::database& db) : +code_cache_async::code_cache_async(const std::filesystem::path& data_dir, const eosvmoc::config& eosvmoc_config, + const chainbase::database& db, compile_complete_callback cb) : code_cache_base(data_dir, eosvmoc_config, db), + _compile_complete_func(std::move(cb)), _result_queue(eosvmoc_config.threads * 2), _threads(eosvmoc_config.threads) { FC_ASSERT(_threads, "EOS VM OC requires at least 1 compile thread"); + assert(_compile_complete_func); wait_on_compile_monitor_message(); @@ -54,7 +57,9 @@ code_cache_async::code_cache_async(const std::filesystem::path& data_dir, const } code_cache_async::~code_cache_async() { + std::unique_lock g(_mtx); _compile_monitor_write_socket.shutdown(local::datagram_protocol::socket::shutdown_send); + g.unlock(); _monitor_reply_thread.join(); consume_compile_thread_queue(); } @@ -73,18 +78,42 @@ void code_cache_async::wait_on_compile_monitor_message() { return; } - _result_queue.push(std::get(message)); + const auto& msg = std::get(message); + _result_queue.push(msg); + + _compile_complete_func(msg.executing_action_id, msg.queued_time); + + process_queued_compiles(); wait_on_compile_monitor_message(); }); } +//called from non-main thread +void code_cache_async::process_queued_compiles() { + std::lock_guard g(_mtx); + while (_outstanding_compiles_and_poison.size() < _threads && !_queued_compiles.empty()) { + auto nextup = _queued_compiles.begin(); + + _outstanding_compiles_and_poison.emplace(nextup->code(), false); + FC_ASSERT(write_message_with_fds(_compile_monitor_write_socket, nextup->msg, nextup->fds_to_pass), "EOS VM failed to communicate to OOP manager"); + + _queued_compiles.erase(nextup); + } +} +//called from main thread //number processed, bytes available (only if number processed > 0) std::tuple code_cache_async::consume_compile_thread_queue() { + std::unique_lock g(_mtx); + auto outstanding_compiles = _outstanding_compiles_and_poison; // will always be small, <= _threads + g.unlock(); + + std::vector erased; + erased.reserve(outstanding_compiles.size()); size_t bytes_remaining = 0; size_t gotsome = _result_queue.consume_all([&](const wasm_compilation_result_message& result) { - if(_outstanding_compiles_and_poison[result.code] == false) { + if(outstanding_compiles[result.code] == false) { std::visit(overloaded { [&](const code_descriptor& cd) { _cache_index.push_front(cd); @@ -94,43 +123,33 @@ std::tuple code_cache_async::consume_compile_thread_queue() { _blacklist.emplace(result.code); }, [&](const compilation_result_toofull&) { - run_eviction_round(); + run_eviction_round(); // call without mutex lock } }, result.result); } - _outstanding_compiles_and_poison.erase(result.code); + erased.push_back(result.code); bytes_remaining = result.cache_free_bytes; }); + g.lock(); + for (const auto& e : erased) + _outstanding_compiles_and_poison.erase(e); + g.unlock(); + return {gotsome, bytes_remaining}; } -const code_descriptor* const code_cache_async::get_descriptor_for_code(mode m, const digest_type& code_id, const uint8_t& vm_version, get_cd_failure& failure) { +const code_descriptor* const +code_cache_async::get_descriptor_for_code(mode m, uint64_t executing_action_id, const digest_type& code_id, const uint8_t& vm_version, get_cd_failure& failure) { //if there are any outstanding compiles, process the result queue now //When app is in write window, all tasks are running sequentially and read-only threads //are not running. Safe to update cache entries. - if(m.write_window && _outstanding_compiles_and_poison.size()) { + if(m.write_window) { auto [count_processed, bytes_remaining] = consume_compile_thread_queue(); if(count_processed) check_eviction_threshold(bytes_remaining); - - while(count_processed && _queued_compiles.size()) { - auto nextup = _queued_compiles.begin(); - - //it's not clear this check is required: if apply() was called for code then it existed in the code_index; and then - // if we got notification of it no longer existing we would have removed it from queued_compiles - const code_object* const codeobject = _db.find(boost::make_tuple(nextup->code.code_id, 0, nextup->code.vm_version)); - if(codeobject) { - _outstanding_compiles_and_poison.emplace(nextup->code, false); - std::vector fds_to_pass; - fds_to_pass.emplace_back(memfd_for_bytearray(codeobject->code)); - FC_ASSERT(write_message_with_fds(_compile_monitor_write_socket, *nextup, fds_to_pass), "EOS VM failed to communicate to OOP manager"); - --count_processed; - } - _queued_compiles.erase(nextup); - } } //check for entry in cache @@ -155,25 +174,17 @@ const code_descriptor* const code_cache_async::get_descriptor_for_code(mode m, c // whitelisted, remove from blacklist and allow to try compile again _blacklist.erase(ct); } + std::unique_lock g(_mtx); if(auto it = _outstanding_compiles_and_poison.find(ct); it != _outstanding_compiles_and_poison.end()) { failure = get_cd_failure::temporary; // Compile might not be done yet it->second = false; return nullptr; } - if(auto it = _queued_compiles.get().find(ct); it != _queued_compiles.get().end()) { - failure = get_cd_failure::temporary; // Compile might not be done yet - return nullptr; - } - - auto msg = compile_wasm_message{ ct, !m.whitelisted ? _eosvmoc_config.non_whitelisted_limits : std::optional{} }; - if(_outstanding_compiles_and_poison.size() >= _threads) { - if (m.high_priority) - _queued_compiles.push_front(msg); - else - _queued_compiles.push_back(msg); + if(_queued_compiles.get().contains(ct)) { failure = get_cd_failure::temporary; // Compile might not be done yet return nullptr; } + g.unlock(); const code_object* const codeobject = _db.find(boost::make_tuple(code_id, 0, vm_version)); if(!codeobject) { //should be impossible right? @@ -181,9 +192,26 @@ const code_descriptor* const code_cache_async::get_descriptor_for_code(mode m, c return nullptr; } - _outstanding_compiles_and_poison.emplace(ct, false); + auto msg = compile_wasm_message{ + .code = ct, + .executing_action_id = executing_action_id, + .queued_time = fc::time_point::now(), + .limits = !m.whitelisted ? _eosvmoc_config.non_whitelisted_limits : std::optional{} + }; std::vector fds_to_pass; fds_to_pass.emplace_back(memfd_for_bytearray(codeobject->code)); + + g.lock(); + if(_outstanding_compiles_and_poison.size() >= _threads) { + if (m.high_priority) + _queued_compiles.emplace_front(std::move(msg), std::move(fds_to_pass)); + else + _queued_compiles.emplace_back(std::move(msg), std::move(fds_to_pass)); + failure = get_cd_failure::temporary; // Compile might not be done yet + return nullptr; + } + + _outstanding_compiles_and_poison.emplace(ct, false); write_message_with_fds(_compile_monitor_write_socket, msg, fds_to_pass); failure = get_cd_failure::temporary; // Compile might not be done yet return nullptr; @@ -216,7 +244,12 @@ const code_descriptor* const code_cache_sync::get_descriptor_for_code_sync(mode std::vector fds_to_pass; fds_to_pass.emplace_back(memfd_for_bytearray(codeobject->code)); - auto msg = compile_wasm_message{ {code_id, vm_version}, !m.whitelisted ? _eosvmoc_config.non_whitelisted_limits : std::optional{} }; + auto msg = compile_wasm_message{ + .code = {code_id, vm_version}, + .executing_action_id = 0, + .queued_time = fc::time_point{}, // could use now() if compile time measurement desired + .limits = !m.whitelisted ? _eosvmoc_config.non_whitelisted_limits : std::optional{} + }; write_message_with_fds(_compile_monitor_write_socket, msg, fds_to_pass); auto [success, message, fds] = read_message_with_fds(_compile_monitor_read_socket); EOS_ASSERT(success, wasm_execution_error, "failed to read response from monitor process"); @@ -394,8 +427,11 @@ code_cache_base::~code_cache_base() { } +// called from main thread void code_cache_base::free_code(const digest_type& code_id, const uint8_t& vm_version) { code_cache_index::index::type::iterator it = _cache_index.get().find(boost::make_tuple(code_id, vm_version)); + + std::lock_guard g(_mtx); if(it != _cache_index.get().end()) { write_message_with_fds(_compile_monitor_write_socket, evict_wasms_message{ {*it} }); _cache_index.get().erase(it); @@ -414,15 +450,18 @@ void code_cache_base::free_code(const digest_type& code_id, const uint8_t& vm_ve compiling_it->second = true; } +// called from main thread void code_cache_base::run_eviction_round() { evict_wasms_message evict_msg; for(unsigned int i = 0; i < 25 && _cache_index.size() > 1; ++i) { evict_msg.codes.emplace_back(_cache_index.back()); _cache_index.pop_back(); } + std::lock_guard g(_mtx); write_message_with_fds(_compile_monitor_write_socket, evict_msg); } +// called from main thread void code_cache_base::check_eviction_threshold(size_t free_bytes) { if(free_bytes < _free_bytes_eviction_threshold) run_eviction_round(); diff --git a/libraries/chain/webassembly/runtimes/eos-vm-oc/compile_monitor.cpp b/libraries/chain/webassembly/runtimes/eos-vm-oc/compile_monitor.cpp index 9b160cf03d..c811dfce7e 100644 --- a/libraries/chain/webassembly/runtimes/eos-vm-oc/compile_monitor.cpp +++ b/libraries/chain/webassembly/runtimes/eos-vm-oc/compile_monitor.cpp @@ -71,7 +71,7 @@ struct compile_monitor_session { connection_dead_signal(); return; } - kick_compile_off(compile.code, compile.limits, std::move(fds[0])); + kick_compile_off(compile, std::move(fds[0])); }, [&](const evict_wasms_message& evict) { for(const code_descriptor& cd : evict.codes) { @@ -90,7 +90,7 @@ struct compile_monitor_session { }); } - void kick_compile_off(const code_tuple& code_id, const std::optional& limits, wrapped_fd&& wasm_code) { + void kick_compile_off(const compile_wasm_message& msg, wrapped_fd&& wasm_code) { //prepare a requst to go out to the trampoline int socks[2]; socketpair(AF_UNIX, SOCK_SEQPACKET | SOCK_CLOEXEC, 0, socks); @@ -100,14 +100,14 @@ struct compile_monitor_session { fds_pass_to_trampoline.emplace_back(socks[1]); fds_pass_to_trampoline.emplace_back(std::move(wasm_code)); - eosvmoc_message trampoline_compile_request = compile_wasm_message{code_id, limits}; + eosvmoc_message trampoline_compile_request = msg; if(write_message_with_fds(_trampoline_socket, trampoline_compile_request, fds_pass_to_trampoline) == false) { - wasm_compilation_result_message reply{code_id, compilation_result_unknownfailure{}, _allocator->get_free_memory()}; + wasm_compilation_result_message reply{msg.code, compilation_result_unknownfailure{}, _allocator->get_free_memory(), msg.executing_action_id, msg.queued_time}; write_message_with_fds(_nodeos_instance_socket, reply); return; } - current_compiles.emplace_front(code_id, std::move(response_socket)); + current_compiles.emplace_front(msg.code.code_id, std::move(response_socket)); read_message_from_compile_task(current_compiles.begin()); } @@ -124,7 +124,7 @@ struct compile_monitor_session { auto& [code, socket] = *current_compile_it; auto [success, message, fds] = read_message_with_fds(socket); - wasm_compilation_result_message reply{code, compilation_result_unknownfailure{}, _allocator->get_free_memory()}; + wasm_compilation_result_message reply{code, compilation_result_unknownfailure{}, _allocator->get_free_memory(), 0, fc::time_point{}}; void* code_ptr = nullptr; void* mem_ptr = nullptr; @@ -143,6 +143,8 @@ struct compile_monitor_session { copy_memfd_contents_to_pointer(code_ptr, fds[0]); copy_memfd_contents_to_pointer(mem_ptr, fds[1]); + reply.executing_action_id = result.executing_action_id; + reply.queued_time = result.queued_time; reply.result = code_descriptor { code.code_id, code.vm_version, diff --git a/libraries/chain/webassembly/runtimes/eos-vm-oc/compile_trampoline.cpp b/libraries/chain/webassembly/runtimes/eos-vm-oc/compile_trampoline.cpp index e136dae981..f84471635c 100644 --- a/libraries/chain/webassembly/runtimes/eos-vm-oc/compile_trampoline.cpp +++ b/libraries/chain/webassembly/runtimes/eos-vm-oc/compile_trampoline.cpp @@ -17,7 +17,8 @@ using namespace IR; namespace eosio { namespace chain { namespace eosvmoc { -void run_compile(wrapped_fd&& response_sock, wrapped_fd&& wasm_code, uint64_t stack_size_limit, size_t generated_code_size_limit) noexcept { //noexcept; we'll just blow up if anything tries to cross this boundry +void run_compile(wrapped_fd&& response_sock, wrapped_fd&& wasm_code, uint64_t stack_size_limit, + size_t generated_code_size_limit, uint64_t executing_action_id, fc::time_point queued_time) noexcept { //noexcept; we'll just blow up if anything tries to cross this boundry std::vector wasm = vector_for_memfd(wasm_code); //ideally we catch exceptions and sent them upstream as strings for easier reporting @@ -33,6 +34,8 @@ void run_compile(wrapped_fd&& response_sock, wrapped_fd&& wasm_code, uint64_t st instantiated_code code = LLVMJIT::instantiateModule(module, stack_size_limit, generated_code_size_limit); code_compilation_result_message result_message; + result_message.executing_action_id = executing_action_id; + result_message.queued_time = queued_time; const std::map& function_to_offsets = code.function_offsets; @@ -166,7 +169,8 @@ void run_compile_trampoline(int fd) { prctl(PR_SET_NAME, "oc-compile"); prctl(PR_SET_PDEATHSIG, SIGKILL); - const auto& limits = std::get(message).limits; + const auto& msg = std::get(message); + const auto& limits = msg.limits; uint64_t stack_size = std::numeric_limits::max(); uint64_t generated_code_size_limit = std::numeric_limits::max(); @@ -193,7 +197,7 @@ void run_compile_trampoline(int fd) { struct rlimit core_limits = {0u, 0u}; setrlimit(RLIMIT_CORE, &core_limits); - run_compile(std::move(fds[0]), std::move(fds[1]), stack_size, generated_code_size_limit); + run_compile(std::move(fds[0]), std::move(fds[1]), stack_size, generated_code_size_limit, msg.executing_action_id, msg.queued_time); _exit(0); } else if(pid == -1)