From 5c4a35a23e787c7c8a4237ad98e16f73487e9bc5 Mon Sep 17 00:00:00 2001 From: Kevin Heifner Date: Fri, 8 Nov 2024 07:53:30 -0600 Subject: [PATCH 1/3] GH-985 For an interrupt exception do not remove block from fork db --- libraries/chain/controller.cpp | 25 ++++++++++++++----------- 1 file changed, 14 insertions(+), 11 deletions(-) diff --git a/libraries/chain/controller.cpp b/libraries/chain/controller.cpp index a88b647c2d..b01d44b80d 100644 --- a/libraries/chain/controller.cpp +++ b/libraries/chain/controller.cpp @@ -4462,6 +4462,7 @@ struct controller_impl { } catch (const fc::exception& e) { if (e.code() == interrupt_exception::code_value) { ilog("interrupt while applying block ${bn} : ${id}", ("bn", bsp->block_num())("id", bsp->id())); + throw; // do not want to remove block from fork_db } else { elog("exception thrown while applying block ${bn} : ${id}, previous ${p}, error: ${e}", ("bn", bsp->block_num())("id", bsp->id())("p", bsp->previous())("e", e.to_detail_string())); @@ -4478,18 +4479,20 @@ struct controller_impl { // Remove the block that threw and all forks built off it. fork_db.remove( (*ritr)->id() ); - // pop all blocks from the bad fork, discarding their transactions - // ritr base is a forward itr to the last block successfully applied - auto applied_itr = ritr.base(); - for( auto itr = applied_itr; itr != new_head_branch.end(); ++itr ) { - pop_block(); - } - EOS_ASSERT( !switch_fork || chain_head.id() == old_head_branch.back()->header.previous, fork_database_exception, - "loss of sync between fork_db and chainbase during fork switch reversal" ); // _should_ never fail + if (switch_fork) { + // pop all blocks from the bad fork, discarding their transactions + // ritr base is a forward itr to the last block successfully applied + auto applied_itr = ritr.base(); + for( auto itr = applied_itr; itr != new_head_branch.end(); ++itr ) { + pop_block(); + } + EOS_ASSERT( chain_head.id() == old_head_branch.back()->header.previous, fork_database_exception, + "loss of sync between fork_db and chainbase during fork switch reversal" ); // _should_ never fail - // re-apply good blocks - for( auto ritr = old_head_branch.rbegin(); ritr != old_head_branch.rend(); ++ritr ) { - apply_block( *ritr, controller::block_status::validated /* we previously validated these blocks*/, trx_lookup ); + // re-apply good blocks + for( auto ritr = old_head_branch.rbegin(); ritr != old_head_branch.rend(); ++ritr ) { + apply_block( *ritr, controller::block_status::validated /* we previously validated these blocks*/, trx_lookup ); + } } std::rethrow_exception(except); } // end if exception From 0b3825d849c707d4629a23c5417134a8a0216d34 Mon Sep 17 00:00:00 2001 From: Kevin Heifner Date: Mon, 11 Nov 2024 08:51:33 -0600 Subject: [PATCH 2/3] GH-985 Remove block from forkdb on interrupt if it has been running longer than 2 block intervals --- libraries/chain/controller.cpp | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/libraries/chain/controller.cpp b/libraries/chain/controller.cpp index b01d44b80d..70e245ed93 100644 --- a/libraries/chain/controller.cpp +++ b/libraries/chain/controller.cpp @@ -4436,8 +4436,9 @@ struct controller_impl { } } - auto start = fc::time_point::now(); + const auto start_apply_blocks_loop = fc::time_point::now(); for( auto ritr = new_head_branch.rbegin(); ritr != new_head_branch.rend(); ++ritr ) { + const auto start_apply_block = fc::time_point::now(); auto except = std::exception_ptr{}; const auto& bsp = *ritr; try { @@ -4450,7 +4451,7 @@ struct controller_impl { } // Break every ~500ms to allow other tasks (e.g. get_info, SHiP) opportunity to run. User expected // to call apply_blocks again if this returns incomplete. - if (!replaying && fc::time_point::now() - start > fc::milliseconds(500)) { + if (!replaying && fc::time_point::now() - start_apply_blocks_loop > fc::milliseconds(500)) { result = controller::apply_blocks_result::incomplete; break; } @@ -4461,8 +4462,11 @@ struct controller_impl { throw; } catch (const fc::exception& e) { if (e.code() == interrupt_exception::code_value) { - ilog("interrupt while applying block ${bn} : ${id}", ("bn", bsp->block_num())("id", bsp->id())); - throw; // do not want to remove block from fork_db + if (fc::time_point::now() - start_apply_block < fc::milliseconds(2 * config::block_interval_ms)) { + ilog("interrupt while applying block ${bn} : ${id}", ("bn", bsp->block_num())("id", bsp->id())); + throw; // do not want to remove block from fork_db if not interrupting a long, maybe infinite, block + } + ilog("interrupt while applying block, removing block ${bn} : ${id}", ("bn", bsp->block_num())("id", bsp->id())); } else { elog("exception thrown while applying block ${bn} : ${id}, previous ${p}, error: ${e}", ("bn", bsp->block_num())("id", bsp->id())("p", bsp->previous())("e", e.to_detail_string())); From af67e0d84c1f4c97401bf90c2b8ddd9bf4373e0b Mon Sep 17 00:00:00 2001 From: Kevin Heifner Date: Mon, 11 Nov 2024 08:51:56 -0600 Subject: [PATCH 3/3] GH-985 Cleanup interrupt test --- tests/interrupt_trx_test.py | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/tests/interrupt_trx_test.py b/tests/interrupt_trx_test.py index b39172f4c1..e680f49a79 100755 --- a/tests/interrupt_trx_test.py +++ b/tests/interrupt_trx_test.py @@ -71,12 +71,6 @@ trans=prodNode.pushMessage(contract, action, data, opts, silentErrors=True) assert trans and not trans[0], "push doitforever action did not fail as expected" - prodNode.processUrllibRequest("test_control", "swap_action", {"from": "doitslow", "to": "doitforever"}) - - action="doitslow" - trans=prodNode.pushMessage(contract, action, data, opts) - assert trans and trans[0], "Failed to push doitslow action" - prodNode.waitForProducer("defproducera") prodNode.processUrllibRequest("test_control", "swap_action", @@ -84,6 +78,10 @@ "trx_priv_key":EOSIO_ACCT_PRIVATE_DEFAULT_KEY, "blk_priv_key":cluster.defproduceraAccount.activePrivateKey}) + action="doitslow" + trans=prodNode.pushMessage(contract, action, data, opts) + assert trans and trans[0], "Failed to push doitslow action" + assert not prodNode.waitForHeadToAdvance(3), f"prodNode did advance head after doitforever action" prodNode.interruptAndVerifyExitStatus()