From 9bffe3b6f059e3b90290ec7033ce35b1f66470c4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Piotr=20W=C3=B3jcik?= <3044353+pwojcikdev@users.noreply.github.com> Date: Mon, 30 Sep 2024 15:40:22 +0200 Subject: [PATCH] Database scan improvements (#4734) * Rename to `database_scan` * More efficient database scan iteration * Increase batch size * Comments --- nano/node/CMakeLists.txt | 4 +- .../bootstrap_ascending/database_scan.cpp | 167 ++++++++++++++++++ .../bootstrap_ascending/database_scan.hpp | 61 +++++++ nano/node/bootstrap_ascending/iterators.cpp | 128 -------------- nano/node/bootstrap_ascending/iterators.hpp | 62 ------- nano/node/bootstrap_ascending/service.cpp | 7 +- nano/node/bootstrap_ascending/service.hpp | 4 +- nano/store/pending.hpp | 3 + 8 files changed, 239 insertions(+), 197 deletions(-) create mode 100644 nano/node/bootstrap_ascending/database_scan.cpp create mode 100644 nano/node/bootstrap_ascending/database_scan.hpp delete mode 100644 nano/node/bootstrap_ascending/iterators.cpp delete mode 100644 nano/node/bootstrap_ascending/iterators.hpp diff --git a/nano/node/CMakeLists.txt b/nano/node/CMakeLists.txt index 4cafc54bde..419e00a72c 100644 --- a/nano/node/CMakeLists.txt +++ b/nano/node/CMakeLists.txt @@ -49,8 +49,8 @@ add_library( bootstrap_ascending/throttle.cpp bootstrap_ascending/account_sets.hpp bootstrap_ascending/account_sets.cpp - bootstrap_ascending/iterators.hpp - bootstrap_ascending/iterators.cpp + bootstrap_ascending/database_scan.hpp + bootstrap_ascending/database_scan.cpp bootstrap_ascending/peer_scoring.hpp bootstrap_ascending/peer_scoring.cpp bootstrap_ascending/service.hpp diff --git a/nano/node/bootstrap_ascending/database_scan.cpp b/nano/node/bootstrap_ascending/database_scan.cpp new file mode 100644 index 0000000000..7a7b287fc0 --- /dev/null +++ b/nano/node/bootstrap_ascending/database_scan.cpp @@ -0,0 +1,167 @@ +#include +#include +#include +#include +#include +#include +#include +#include + +/* + * database_scan + */ + +nano::bootstrap_ascending::database_scan::database_scan (nano::ledger & ledger_a) : + ledger{ ledger_a }, + accounts_iterator{ ledger }, + pending_iterator{ ledger } +{ +} + +nano::account nano::bootstrap_ascending::database_scan::next (std::function const & filter) +{ + if (queue.empty ()) + { + fill (); + } + + while (!queue.empty ()) + { + auto result = queue.front (); + queue.pop_front (); + + if (filter (result)) + { + return result; + } + } + + return { 0 }; +} + +void nano::bootstrap_ascending::database_scan::fill () +{ + auto transaction = ledger.store.tx_begin_read (); + + auto set1 = accounts_iterator.next_batch (transaction, batch_size); + auto set2 = pending_iterator.next_batch (transaction, batch_size); + + queue.insert (queue.end (), set1.begin (), set1.end ()); + queue.insert (queue.end (), set2.begin (), set2.end ()); +} + +bool nano::bootstrap_ascending::database_scan::warmed_up () const +{ + return accounts_iterator.warmed_up () && pending_iterator.warmed_up (); +} + +std::unique_ptr nano::bootstrap_ascending::database_scan::collect_container_info (std::string const & name) const +{ + auto composite = std::make_unique (name); + composite->add_component (std::make_unique (container_info{ "accounts_iterator", accounts_iterator.completed, 0 })); + composite->add_component (std::make_unique (container_info{ "pending_iterator", pending_iterator.completed, 0 })); + return composite; +} + +/* + * account_database_iterator + */ + +nano::bootstrap_ascending::account_database_iterator::account_database_iterator (nano::ledger & ledger_a) : + ledger{ ledger_a } +{ +} + +std::deque nano::bootstrap_ascending::account_database_iterator::next_batch (nano::store::transaction & transaction, size_t batch_size) +{ + std::deque result; + + auto it = ledger.store.account.begin (transaction, next); + auto const end = ledger.store.account.end (); + + for (size_t count = 0; it != end && count < batch_size; ++it, ++count) + { + auto const & account = it->first; + result.push_back (account); + next = account.number () + 1; + } + + if (it == end) + { + // Reset for the next ledger iteration + next = { 0 }; + ++completed; + } + + return result; +} + +bool nano::bootstrap_ascending::account_database_iterator::warmed_up () const +{ + return completed > 0; +} + +/* + * pending_database_iterator + */ + +nano::bootstrap_ascending::pending_database_iterator::pending_database_iterator (nano::ledger & ledger_a) : + ledger{ ledger_a } +{ +} + +std::deque nano::bootstrap_ascending::pending_database_iterator::next_batch (nano::store::transaction & transaction, size_t batch_size) +{ + std::deque result; + + auto it = ledger.store.pending.begin (transaction, next); + auto const end = ledger.store.pending.end (); + + // TODO: This pending iteration heuristic should be encapsulated in a pending_iterator class and reused across other components + // The heuristic is to advance the iterator sequentially until we reach a new account or perform a fresh lookup if the account has too many pending blocks + // This is to avoid the overhead of performing a fresh lookup for every pending account as majority of accounts have only a few pending blocks + auto advance_iterator = [&] () { + auto const starting_account = it->first.account; + + // For RocksDB, sequential access is ~10x faster than performing a fresh lookup (tested on my machine) + const size_t sequential_attempts = 10; + + // First try advancing sequentially + for (size_t count = 0; count < sequential_attempts && it != end; ++count, ++it) + { + if (it->first.account != starting_account) + { + break; + } + } + + // If we didn't advance to the next account, perform a fresh lookup + if (it != end && it->first.account != starting_account) + { + it = ledger.store.pending.begin (transaction, { starting_account.number () + 1, 0 }); + } + + debug_assert (it == end || it->first.account != starting_account); + }; + + for (size_t count = 0; it != end && count < batch_size; advance_iterator (), ++count) + { + auto const & account = it->first.account; + result.push_back (account); + next = { account.number () + 1, 0 }; + } + + if (it == end) + { + // Reset for the next ledger iteration + next = { 0, 0 }; + ++completed; + } + + return result; +} + +bool nano::bootstrap_ascending::pending_database_iterator::warmed_up () const +{ + return completed > 0; +} \ No newline at end of file diff --git a/nano/node/bootstrap_ascending/database_scan.hpp b/nano/node/bootstrap_ascending/database_scan.hpp new file mode 100644 index 0000000000..3c61be9a67 --- /dev/null +++ b/nano/node/bootstrap_ascending/database_scan.hpp @@ -0,0 +1,61 @@ +#pragma once + +#include +#include +#include + +#include + +namespace nano::bootstrap_ascending +{ +struct account_database_iterator +{ + explicit account_database_iterator (nano::ledger &); + + std::deque next_batch (nano::store::transaction &, size_t batch_size); + bool warmed_up () const; + + nano::ledger & ledger; + nano::account next{ 0 }; + size_t completed{ 0 }; +}; + +struct pending_database_iterator +{ + explicit pending_database_iterator (nano::ledger &); + + std::deque next_batch (nano::store::transaction &, size_t batch_size); + bool warmed_up () const; + + nano::ledger & ledger; + nano::pending_key next{ 0, 0 }; + size_t completed{ 0 }; +}; + +class database_scan +{ +public: + explicit database_scan (nano::ledger &); + + nano::account next (std::function const & filter); + + // Indicates if a full ledger iteration has taken place e.g. warmed up + bool warmed_up () const; + + std::unique_ptr collect_container_info (std::string const & name) const; + +private: // Dependencies + nano::ledger & ledger; + +private: + void fill (); + +private: + account_database_iterator accounts_iterator; + pending_database_iterator pending_iterator; + + std::deque queue; + + static size_t constexpr batch_size = 512; +}; +} diff --git a/nano/node/bootstrap_ascending/iterators.cpp b/nano/node/bootstrap_ascending/iterators.cpp deleted file mode 100644 index 47812801fd..0000000000 --- a/nano/node/bootstrap_ascending/iterators.cpp +++ /dev/null @@ -1,128 +0,0 @@ -#include -#include -#include -#include -#include -#include -#include -#include - -/* - * database_iterator - */ - -nano::bootstrap_ascending::database_iterator::database_iterator (nano::ledger & ledger, table_type table_a) : - ledger{ ledger }, - table{ table_a } -{ -} - -nano::account nano::bootstrap_ascending::database_iterator::operator* () const -{ - return current; -} - -void nano::bootstrap_ascending::database_iterator::next (secure::transaction & tx) -{ - switch (table) - { - case table_type::account: - { - auto item = ledger.store.account.begin (tx, current.number () + 1); - if (item != ledger.store.account.end ()) - { - current = item->first; - } - else - { - current = { 0 }; - } - break; - } - case table_type::pending: - { - auto item = ledger.any.receivable_upper_bound (tx, current); - if (item != ledger.any.receivable_end ()) - { - current = item->first.account; - } - else - { - current = { 0 }; - } - break; - } - } -} - -/* - * buffered_iterator - */ - -nano::bootstrap_ascending::buffered_iterator::buffered_iterator (nano::ledger & ledger) : - ledger{ ledger }, - accounts_iterator{ ledger, database_iterator::table_type::account }, - pending_iterator{ ledger, database_iterator::table_type::pending } -{ -} - -nano::account nano::bootstrap_ascending::buffered_iterator::operator* () const -{ - return !buffer.empty () ? buffer.front () : nano::account{ 0 }; -} - -nano::account nano::bootstrap_ascending::buffered_iterator::next (std::function const & filter) -{ - if (buffer.empty ()) - { - fill (); - } - - while (!buffer.empty ()) - { - auto result = buffer.front (); - buffer.pop_front (); - - if (filter (result)) - { - return result; - } - } - - return { 0 }; -} - -bool nano::bootstrap_ascending::buffered_iterator::warmup () const -{ - return warmup_m; -} - -void nano::bootstrap_ascending::buffered_iterator::fill () -{ - debug_assert (buffer.empty ()); - - // Fill half from accounts table and half from pending table - auto transaction = ledger.tx_begin_read (); - - for (int n = 0; n < size / 2; ++n) - { - accounts_iterator.next (transaction); - if (!(*accounts_iterator).is_zero ()) - { - buffer.push_back (*accounts_iterator); - } - } - - for (int n = 0; n < size / 2; ++n) - { - pending_iterator.next (transaction); - if (!(*pending_iterator).is_zero ()) - { - buffer.push_back (*pending_iterator); - } - else - { - warmup_m = false; - } - } -} diff --git a/nano/node/bootstrap_ascending/iterators.hpp b/nano/node/bootstrap_ascending/iterators.hpp deleted file mode 100644 index e5404098ef..0000000000 --- a/nano/node/bootstrap_ascending/iterators.hpp +++ /dev/null @@ -1,62 +0,0 @@ -#pragma once - -#include - -#include - -namespace nano -{ -class ledger; -} - -namespace nano::secure -{ -class transaction; -} - -namespace nano::bootstrap_ascending -{ -class database_iterator -{ -public: - enum class table_type - { - account, - pending - }; - - explicit database_iterator (nano::ledger & ledger, table_type); - nano::account operator* () const; - void next (secure::transaction & tx); - -private: - nano::ledger & ledger; - nano::account current{ 0 }; - const table_type table; -}; - -class buffered_iterator -{ -public: - explicit buffered_iterator (nano::ledger & ledger); - - nano::account operator* () const; - nano::account next (std::function const & filter); - - // Indicates if a full ledger iteration has taken place e.g. warmed up - bool warmup () const; - -private: - void fill (); - -private: - nano::ledger & ledger; - std::deque buffer; - bool warmup_m{ true }; - - database_iterator accounts_iterator; - database_iterator pending_iterator; - - static std::size_t constexpr size = 1024; -}; -} // nano::bootstrap_ascending diff --git a/nano/node/bootstrap_ascending/service.cpp b/nano/node/bootstrap_ascending/service.cpp index c82e91c51e..450767f032 100644 --- a/nano/node/bootstrap_ascending/service.cpp +++ b/nano/node/bootstrap_ascending/service.cpp @@ -28,7 +28,7 @@ nano::bootstrap_ascending::service::service (nano::node_config const & node_conf stats{ stat_a }, logger{ logger_a }, accounts{ config.account_sets, stats }, - iterator{ ledger }, + database_scan{ ledger }, throttle{ compute_throttle_size () }, scoring{ config, node_config_a.network_params.network }, database_limiter{ config.database_rate_limit, 1.0 } @@ -345,7 +345,7 @@ nano::account nano::bootstrap_ascending::service::next_database (bool should_thr return { 0 }; } - auto account = iterator.next ([this] (nano::account const & account) { + auto account = database_scan.next ([this] (nano::account const & account) { return count_tags (account, query_source::database) == 0; }); @@ -512,7 +512,7 @@ void nano::bootstrap_ascending::service::run_database () while (!stopped) { // Avoid high churn rate of database requests - bool should_throttle = !iterator.warmup () && throttle.throttled (); + bool should_throttle = !database_scan.warmed_up () && throttle.throttled (); lock.unlock (); stats.inc (nano::stat::type::bootstrap_ascending, nano::stat::detail::loop_database); run_one_database (should_throttle); @@ -839,6 +839,7 @@ std::unique_ptr nano::bootstrap_ascending::servi composite->add_component (std::make_unique (container_info{ "throttle", throttle.size (), 0 })); composite->add_component (std::make_unique (container_info{ "throttle_successes", throttle.successes (), 0 })); composite->add_component (accounts.collect_container_info ("accounts")); + composite->add_component (database_scan.collect_container_info ("database_scan")); return composite; } diff --git a/nano/node/bootstrap_ascending/service.hpp b/nano/node/bootstrap_ascending/service.hpp index a31461a7f1..4f3f3668dd 100644 --- a/nano/node/bootstrap_ascending/service.hpp +++ b/nano/node/bootstrap_ascending/service.hpp @@ -8,7 +8,7 @@ #include #include #include -#include +#include #include #include #include @@ -155,7 +155,7 @@ namespace bootstrap_ascending private: nano::bootstrap_ascending::account_sets accounts; - nano::bootstrap_ascending::buffered_iterator iterator; + nano::bootstrap_ascending::database_scan database_scan; nano::bootstrap_ascending::throttle throttle; nano::bootstrap_ascending::peer_scoring scoring; diff --git a/nano/store/pending.hpp b/nano/store/pending.hpp index 49f2b8a31c..23272cba73 100644 --- a/nano/store/pending.hpp +++ b/nano/store/pending.hpp @@ -20,6 +20,9 @@ namespace nano::store */ class pending { +public: + using iterator = store::iterator; + public: virtual void put (store::write_transaction const &, nano::pending_key const &, nano::pending_info const &) = 0; virtual void del (store::write_transaction const &, nano::pending_key const &) = 0;