From 7c0a644fb2d72d64d3a6157dead043c43527b2f7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Jasikowski?= Date: Wed, 27 Nov 2024 13:54:56 +0100 Subject: [PATCH 01/28] Changed log format for some SQLiteNode warnings --- sqlitecluster/SQLiteNode.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sqlitecluster/SQLiteNode.cpp b/sqlitecluster/SQLiteNode.cpp index 1f2359478..1e91d4cf9 100644 --- a/sqlitecluster/SQLiteNode.cpp +++ b/sqlitecluster/SQLiteNode.cpp @@ -1496,7 +1496,7 @@ void SQLiteNode::_onMESSAGE(SQLitePeer* peer, const SData& message) { peer->sendMessage(response); } catch (const SException& e) { // This is the same handling as at the bottom of _onMESSAGE. - PWARN("Error processing message '" << message.methodLine << "' (" << e.what() << "), reconnecting."); + SWARN("Error processing message, reconnecting", {{"peer", peer->name}, {"message", message.methodLine}, {"reason", e.what()}}); SData reconnect("RECONNECT"); reconnect["Reason"] = e.what(); peer->sendMessage(reconnect.serialize()); @@ -1713,7 +1713,7 @@ void SQLiteNode::_onMESSAGE(SQLitePeer* peer, const SData& message) { STHROW("unrecognized message"); } } catch (const SException& e) { - PWARN("Error processing message '" << message.methodLine << "' (" << e.what() << "), reconnecting."); + SWARN("Error processing message, reconnecting", {{"peer", peer->name}, {"message", message.methodLine}, {"reason", e.what()}}); SData reconnect("RECONNECT"); reconnect["Reason"] = e.what(); peer->sendMessage(reconnect.serialize()); From d9a333481c3b62b4fc2757e008e14ee35618a759 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Jasikowski?= Date: Thu, 28 Nov 2024 12:03:59 +0100 Subject: [PATCH 02/28] Fixed log format for some SQLiteNode warnings --- sqlitecluster/SQLiteNode.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sqlitecluster/SQLiteNode.cpp b/sqlitecluster/SQLiteNode.cpp index 1e91d4cf9..94a50a9d9 100644 --- a/sqlitecluster/SQLiteNode.cpp +++ b/sqlitecluster/SQLiteNode.cpp @@ -1496,7 +1496,7 @@ void SQLiteNode::_onMESSAGE(SQLitePeer* peer, const SData& message) { peer->sendMessage(response); } catch (const SException& e) { // This is the same handling as at the bottom of _onMESSAGE. - SWARN("Error processing message, reconnecting", {{"peer", peer->name}, {"message", message.methodLine}, {"reason", e.what()}}); + SWARN("Error processing message, reconnecting", {{"peer", peer->name}, {"message", message.methodLine}, {"reason", string(e.what())}}); SData reconnect("RECONNECT"); reconnect["Reason"] = e.what(); peer->sendMessage(reconnect.serialize()); @@ -1713,7 +1713,7 @@ void SQLiteNode::_onMESSAGE(SQLitePeer* peer, const SData& message) { STHROW("unrecognized message"); } } catch (const SException& e) { - SWARN("Error processing message, reconnecting", {{"peer", peer->name}, {"message", message.methodLine}, {"reason", e.what()}}); + SWARN("Error processing message, reconnecting", {{"peer", peer->name}, {"message", message.methodLine}, {"reason", string(e.what())}}); SData reconnect("RECONNECT"); reconnect["Reason"] = e.what(); peer->sendMessage(reconnect.serialize()); From 1fc0f2f3182ab1e82c478960591ca9fe7dba5a5d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Jasikowski?= Date: Thu, 28 Nov 2024 12:14:48 +0100 Subject: [PATCH 03/28] Reverting changes for tests --- sqlitecluster/SQLiteNode.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sqlitecluster/SQLiteNode.cpp b/sqlitecluster/SQLiteNode.cpp index 94a50a9d9..4b170025b 100644 --- a/sqlitecluster/SQLiteNode.cpp +++ b/sqlitecluster/SQLiteNode.cpp @@ -1496,7 +1496,7 @@ void SQLiteNode::_onMESSAGE(SQLitePeer* peer, const SData& message) { peer->sendMessage(response); } catch (const SException& e) { // This is the same handling as at the bottom of _onMESSAGE. - SWARN("Error processing message, reconnecting", {{"peer", peer->name}, {"message", message.methodLine}, {"reason", string(e.what())}}); + // SWARN("Error processing message, reconnecting", {{"peer", peer->name}, {"message", message.methodLine}, {"reason", string(e.what())}}); SData reconnect("RECONNECT"); reconnect["Reason"] = e.what(); peer->sendMessage(reconnect.serialize()); @@ -1713,7 +1713,7 @@ void SQLiteNode::_onMESSAGE(SQLitePeer* peer, const SData& message) { STHROW("unrecognized message"); } } catch (const SException& e) { - SWARN("Error processing message, reconnecting", {{"peer", peer->name}, {"message", message.methodLine}, {"reason", string(e.what())}}); + //SWARN("Error processing message, reconnecting", {{"peer", peer->name}, {"message", message.methodLine}, {"reason", string(e.what())}}); SData reconnect("RECONNECT"); reconnect["Reason"] = e.what(); peer->sendMessage(reconnect.serialize()); From cae4fb990fa729652bd29d8ab13528187af5e4e6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Jasikowski?= Date: Thu, 28 Nov 2024 12:52:58 +0100 Subject: [PATCH 04/28] Fixed log format for some SQLiteNode warnings --- sqlitecluster/SQLiteNode.cpp | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/sqlitecluster/SQLiteNode.cpp b/sqlitecluster/SQLiteNode.cpp index 4b170025b..928f8fd37 100644 --- a/sqlitecluster/SQLiteNode.cpp +++ b/sqlitecluster/SQLiteNode.cpp @@ -1496,7 +1496,11 @@ void SQLiteNode::_onMESSAGE(SQLitePeer* peer, const SData& message) { peer->sendMessage(response); } catch (const SException& e) { // This is the same handling as at the bottom of _onMESSAGE. - // SWARN("Error processing message, reconnecting", {{"peer", peer->name}, {"message", message.methodLine}, {"reason", string(e.what())}}); + SWARN("Error processing message, reconnecting", { + {"peer", peer ? peer->name : "unknown"}, + {"message", !message.methodLine.empty() ? message.methodLine : ""}, + {"reason", e.what()} + }); SData reconnect("RECONNECT"); reconnect["Reason"] = e.what(); peer->sendMessage(reconnect.serialize()); @@ -1713,7 +1717,11 @@ void SQLiteNode::_onMESSAGE(SQLitePeer* peer, const SData& message) { STHROW("unrecognized message"); } } catch (const SException& e) { - //SWARN("Error processing message, reconnecting", {{"peer", peer->name}, {"message", message.methodLine}, {"reason", string(e.what())}}); + SWARN("Error processing message, reconnecting", { + {"peer", peer ? peer->name : "unknown"}, + {"message", !message.methodLine.empty() ? message.methodLine : ""}, + {"reason", e.what()} + }); SData reconnect("RECONNECT"); reconnect["Reason"] = e.what(); peer->sendMessage(reconnect.serialize()); From 30dde06fa45f25f8b570a73564c194592e97687b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Jasikowski?= Date: Thu, 28 Nov 2024 13:31:56 +0100 Subject: [PATCH 05/28] Fixed log format for some SQLiteNode warnings --- sqlitecluster/SQLiteNode.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sqlitecluster/SQLiteNode.cpp b/sqlitecluster/SQLiteNode.cpp index 0b4b69184..5e5984670 100644 --- a/sqlitecluster/SQLiteNode.cpp +++ b/sqlitecluster/SQLiteNode.cpp @@ -1523,7 +1523,7 @@ void SQLiteNode::_onMESSAGE(SQLitePeer* peer, const SData& message) { // This is the same handling as at the bottom of _onMESSAGE. SWARN("Error processing message, reconnecting", { {"peer", peer ? peer->name : "unknown"}, - {"message", !message.methodLine.empty() ? message.methodLine : ""}, + {"message", !message.empty()? message.methodLine : ""}, {"reason", e.what()} }); SData reconnect("RECONNECT"); @@ -1755,7 +1755,7 @@ void SQLiteNode::_onMESSAGE(SQLitePeer* peer, const SData& message) { } catch (const SException& e) { SWARN("Error processing message, reconnecting", { {"peer", peer ? peer->name : "unknown"}, - {"message", !message.methodLine.empty() ? message.methodLine : ""}, + {"message", !message.empty() ? message.methodLine : ""}, {"reason", e.what()} }); SData reconnect("RECONNECT"); From d15d30074e69cfa6c1ed056de81434b67d1eafac Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Jasikowski?= Date: Tue, 3 Dec 2024 12:44:42 +0100 Subject: [PATCH 06/28] Added logging parameters to whitelist --- libstuff/SLog.cpp | 2 ++ sqlitecluster/SQLiteNode.cpp | 4 ++-- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/libstuff/SLog.cpp b/libstuff/SLog.cpp index e87c92398..1cadbafbe 100644 --- a/libstuff/SLog.cpp +++ b/libstuff/SLog.cpp @@ -74,8 +74,10 @@ static const set PARAMS_WHITELIST = { "lastIP", "logParam", "nvpName", + "peer", "policyAccountID", "policyID", + "reason", "reportID", "requestID", "requestTimestamp", diff --git a/sqlitecluster/SQLiteNode.cpp b/sqlitecluster/SQLiteNode.cpp index 5e5984670..0b911c9b0 100644 --- a/sqlitecluster/SQLiteNode.cpp +++ b/sqlitecluster/SQLiteNode.cpp @@ -1523,7 +1523,7 @@ void SQLiteNode::_onMESSAGE(SQLitePeer* peer, const SData& message) { // This is the same handling as at the bottom of _onMESSAGE. SWARN("Error processing message, reconnecting", { {"peer", peer ? peer->name : "unknown"}, - {"message", !message.empty()? message.methodLine : ""}, + {"errorMessage", !message.empty()? message.methodLine : ""}, {"reason", e.what()} }); SData reconnect("RECONNECT"); @@ -1755,7 +1755,7 @@ void SQLiteNode::_onMESSAGE(SQLitePeer* peer, const SData& message) { } catch (const SException& e) { SWARN("Error processing message, reconnecting", { {"peer", peer ? peer->name : "unknown"}, - {"message", !message.empty() ? message.methodLine : ""}, + {"errorMessage", !message.empty() ? message.methodLine : ""}, {"reason", e.what()} }); SData reconnect("RECONNECT"); From a962a8d5e04ef3fcfffd2835c1919d353ba987b6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Jasikowski?= Date: Tue, 3 Dec 2024 12:55:34 +0100 Subject: [PATCH 07/28] Added logging parameters to whitelist --- libstuff/SLog.cpp | 3 +++ sqlitecluster/SQLiteNode.cpp | 6 +++--- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/libstuff/SLog.cpp b/libstuff/SLog.cpp index bb3943082..a963ed277 100644 --- a/libstuff/SLog.cpp +++ b/libstuff/SLog.cpp @@ -49,6 +49,9 @@ static set PARAMS_WHITELIST = { "indexName", "isUnique", "logParam", + "message", + "peer", + "reason", "requestID", "status", "userID", diff --git a/sqlitecluster/SQLiteNode.cpp b/sqlitecluster/SQLiteNode.cpp index 53884d297..662b3476f 100644 --- a/sqlitecluster/SQLiteNode.cpp +++ b/sqlitecluster/SQLiteNode.cpp @@ -1522,7 +1522,7 @@ void SQLiteNode::_onMESSAGE(SQLitePeer* peer, const SData& message) { // This is the same handling as at the bottom of _onMESSAGE. SWARN("Error processing message, reconnecting", { {"peer", peer ? peer->name : "unknown"}, - {"errorMessage", !message.empty()? message.methodLine : ""}, + {"message", !message.empty()? message.methodLine : ""}, {"reason", e.what()} }); SData reconnect("RECONNECT"); @@ -1754,8 +1754,8 @@ void SQLiteNode::_onMESSAGE(SQLitePeer* peer, const SData& message) { } catch (const SException& e) { SWARN("Error processing message, reconnecting", { {"peer", peer ? peer->name : "unknown"}, - {"errorMessage", !message.empty() ? message.methodLine : ""}, - {"reason", e.what()} + {"message", !message.empty() ? message.methodLine : ""}, + {"reason", e.what()} }); SData reconnect("RECONNECT"); reconnect["Reason"] = e.what(); From c03124a1ee7edb689c86cb33660f9822afbad29a Mon Sep 17 00:00:00 2001 From: Tyler Karaszewski Date: Wed, 4 Dec 2024 12:19:00 -0800 Subject: [PATCH 08/28] Allow first message to be either LOGIN or NODE_LOGIN to deprecate old message --- sqlitecluster/SQLiteNode.cpp | 38 +++++++++++++++++++++++++++++------- 1 file changed, 31 insertions(+), 7 deletions(-) diff --git a/sqlitecluster/SQLiteNode.cpp b/sqlitecluster/SQLiteNode.cpp index 79be57fed..3ea870cd7 100644 --- a/sqlitecluster/SQLiteNode.cpp +++ b/sqlitecluster/SQLiteNode.cpp @@ -56,6 +56,20 @@ // dbCountAtStart: The highest committed transaction in the DB at the start of this transaction on leader, for // optimizing replication. +// On LOGIN vs NODE_LOGIN. +// _onConnect sends a LOGIN message. +// _onConnect is called in exctly two places: +// 1. In response to a NODE_LOGIN message received on a newly connected socket on the sync port. It's expected when +// establishing a connection, a node sends this NODE_LOGIN as its first message. +// 2. Immediately following establishing a TCP connection to another node and sending a NODE_LOGIN message. In the case that +// we are the initiating node, we immediately queue three messages: +// 1. NODE_LOGIN +// 2. PING +// 3. LOGIN +// +// When we receive a NODE_LOGIN, we immediately respond with a PING followed by a LOGIN (by calling _onConnect). +// We can cobine all of these into a single login message. + #undef SLOGPREFIX #define SLOGPREFIX "{" << _name << "/" << SQLiteNode::stateName(_state) << "} " @@ -1284,6 +1298,9 @@ void SQLiteNode::_onMESSAGE(SQLitePeer* peer, const SData& message) { peer->latency = max(STimeNow() - message.calc64("Timestamp"), 1ul); SINFO("Received PONG from peer '" << peer->name << "' (" << peer->latency/1000 << "ms latency)"); return; + } else if (SIEquals(message.methodLine, "NODE_LOGIN")) { + // Do nothing, this keeps this code from warning until NODE_LOGIN is deprecated. + return; } // Every other message broadcasts the current state of the node @@ -1357,6 +1374,8 @@ void SQLiteNode::_onMESSAGE(SQLitePeer* peer, const SData& message) { STHROW("you're *not* supposed to be a 0-priority permafollower"); } + // Validate hash here, mark node as forked if found. + // It's an error to have to peers configured with the same priority, except 0 and -1 SASSERT(_priority == -1 || _priority == 0 || message.calc("Priority") != _priority); PINFO("Peer logged in at '" << message["State"] << "', priority #" << message["Priority"] << " commit #" @@ -1745,7 +1764,7 @@ void SQLiteNode::_onMESSAGE(SQLitePeer* peer, const SData& message) { } } } else { - STHROW("unrecognized message"); + STHROW("unrecognized message: " + message.methodLine); } } catch (const SException& e) { PWARN("Error processing message '" << message.methodLine << "' (" << e.what() << "), reconnecting."); @@ -1759,13 +1778,15 @@ void SQLiteNode::_onMESSAGE(SQLitePeer* peer, const SData& message) { void SQLiteNode::_onConnect(SQLitePeer* peer) { SASSERT(peer); SASSERTWARN(!peer->loggedIn); - // Send the LOGIN - PINFO("Sending LOGIN"); SData login("LOGIN"); + login["Name"] = _name; login["Priority"] = to_string(_priority); login["State"] = stateName(_state); login["Version"] = _version; login["Permafollower"] = _originalPriority ? "false" : "true"; + PINFO("Sending " << login.serialize()); + + // NOTE: the following call adds CommitCount, Hash, and commandAddress fields. _sendToPeer(peer, login); } @@ -2565,12 +2586,14 @@ void SQLiteNode::postPoll(fd_map& fdm, uint64_t& nextActivity) { int messageSize = message.deserialize(socket->recvBuffer); if (messageSize) { socket->recvBuffer.consumeFront(messageSize); - if (SIEquals(message.methodLine, "NODE_LOGIN")) { + // Allow either LOGIN or NODE_LOGIN until we deprecate NODE_LOGIN. + if (SIEquals(message.methodLine, "NODE_LOGIN") || SIEquals(message.methodLine, "LOGIN")) { SQLitePeer* peer = getPeerByName(message["Name"]); if (peer) { if (peer->setSocket(socket)) { - _sendPING(peer); _onConnect(peer); + _sendPING(peer); + _onMESSAGE(peer, message); // Connected OK, don't need in _unauthenticatedIncomingSockets anymore. socketsToRemove.push_back(socket); @@ -2589,7 +2612,7 @@ void SQLiteNode::postPoll(fd_map& fdm, uint64_t& nextActivity) { STHROW("Unauthenticated node '" + message["Name"] + "' attempted to connected, rejecting."); } } else { - STHROW("expecting NODE_LOGIN"); + STHROW("expecting LOGIN or NODE_LOGIN"); } } else if (STimeNow() > socket->lastRecvTime + 5'000'000) { STHROW("Incoming socket didn't send a message for over 5s, closing."); @@ -2612,11 +2635,12 @@ void SQLiteNode::postPoll(fd_map& fdm, uint64_t& nextActivity) { switch (result) { case SQLitePeer::PeerPostPollStatus::JUST_CONNECTED: { + // When NODE_LOGIN is deprecated, we can remove the next 3 lines. SData login("NODE_LOGIN"); login["Name"] = _name; peer->sendMessage(login.serialize()); - _sendPING(peer); _onConnect(peer); + _sendPING(peer); } break; case SQLitePeer::PeerPostPollStatus::SOCKET_ERROR: From e5659c7e4eab52a257b76f6e9d301be589948743 Mon Sep 17 00:00:00 2001 From: Tyler Karaszewski Date: Wed, 4 Dec 2024 14:22:36 -0800 Subject: [PATCH 09/28] add known bad peer, refuse to talk to it --- sqlitecluster/SQLiteNode.cpp | 21 ++++++++++++++++----- sqlitecluster/SQLitePeer.cpp | 2 ++ sqlitecluster/SQLitePeer.h | 3 +++ 3 files changed, 21 insertions(+), 5 deletions(-) diff --git a/sqlitecluster/SQLiteNode.cpp b/sqlitecluster/SQLiteNode.cpp index 3ea870cd7..5f8e7d893 100644 --- a/sqlitecluster/SQLiteNode.cpp +++ b/sqlitecluster/SQLiteNode.cpp @@ -1303,6 +1303,12 @@ void SQLiteNode::_onMESSAGE(SQLitePeer* peer, const SData& message) { return; } + // We allow PING and PONG even for bad peers just to avoid them getting caught in reconnect cycles. + if (peer->knownBad) { + PINFO("Received message " << message.methodLine << " from known bad peer, ignoring."); + return; + } + // Every other message broadcasts the current state of the node if (!message.isSet("CommitCount")) { STHROW("missing CommitCount"); @@ -1374,17 +1380,22 @@ void SQLiteNode::_onMESSAGE(SQLitePeer* peer, const SData& message) { STHROW("you're *not* supposed to be a 0-priority permafollower"); } - // Validate hash here, mark node as forked if found. - // It's an error to have to peers configured with the same priority, except 0 and -1 SASSERT(_priority == -1 || _priority == 0 || message.calc("Priority") != _priority); - PINFO("Peer logged in at '" << message["State"] << "', priority #" << message["Priority"] << " commit #" - << message["CommitCount"] << " (" << message["Hash"] << ")"); peer->priority = message.calc("Priority"); - peer->loggedIn = true; peer->version = message["Version"]; peer->state = stateFromName(message["State"]); + // Validate hash here, mark node as forked if found. + if (false) { + PINFO("Peer is forked, marking as bad, will ignore."); + peer->knownBad = true; + } else { + PINFO("Peer logged in at '" << message["State"] << "', priority #" << message["Priority"] << " commit #" + << message["CommitCount"] << " (" << message["Hash"] << ")"); + peer->loggedIn = true; + } + // If the peer is already standing up, go ahead and approve or deny immediately. if (peer->state == SQLiteNodeState::STANDINGUP) { _sendStandupResponse(peer, message); diff --git a/sqlitecluster/SQLitePeer.cpp b/sqlitecluster/SQLitePeer.cpp index 82c2de8a2..4daf2c785 100644 --- a/sqlitecluster/SQLitePeer.cpp +++ b/sqlitecluster/SQLitePeer.cpp @@ -26,6 +26,7 @@ SQLitePeer::SQLitePeer(const string& name_, const string& host_, const STable& p transactionResponse(Response::NONE), version(), lastPingTime(0), + knownBad(false), hash() { } @@ -79,6 +80,7 @@ void SQLitePeer::reset() { version = ""; lastPingTime = 0, setCommit(0, ""); + knownBad = false; } void SQLitePeer::shutdownSocket() { diff --git a/sqlitecluster/SQLitePeer.h b/sqlitecluster/SQLitePeer.h index 0eb984656..296a11ed8 100644 --- a/sqlitecluster/SQLitePeer.h +++ b/sqlitecluster/SQLitePeer.h @@ -91,6 +91,9 @@ class SQLitePeer { atomic version; atomic lastPingTime; + // Set to true when this peer is known to be unusable, I.e., when it has a database that is forked from us. + atomic knownBad; + private: // For initializing the permafollower value from the params list. static bool isPermafollower(const STable& params); From ccdc5cd82f1b933381600cdec0900a5e21374192 Mon Sep 17 00:00:00 2001 From: Tyler Karaszewski Date: Wed, 4 Dec 2024 15:51:51 -0800 Subject: [PATCH 10/28] Stop communicating on fork. Test don't pass yet. --- sqlitecluster/SQLiteNode.cpp | 27 +++++++++++++++++++++------ 1 file changed, 21 insertions(+), 6 deletions(-) diff --git a/sqlitecluster/SQLiteNode.cpp b/sqlitecluster/SQLiteNode.cpp index 5f8e7d893..3b95b3833 100644 --- a/sqlitecluster/SQLiteNode.cpp +++ b/sqlitecluster/SQLiteNode.cpp @@ -1386,14 +1386,23 @@ void SQLiteNode::_onMESSAGE(SQLitePeer* peer, const SData& message) { peer->version = message["Version"]; peer->state = stateFromName(message["State"]); - // Validate hash here, mark node as forked if found. - if (false) { - PINFO("Peer is forked, marking as bad, will ignore."); - peer->knownBad = true; - } else { + uint64_t peerCommitCount; + string peerCommitHash; + bool hashesMatch = true; + peer->getCommit(peerCommitCount, peerCommitHash); + if (!peerCommitHash.empty() && peerCommitCount <= getCommitCount()) { + string query, hash; + _db.getCommit(peerCommitCount, query, hash); + hashesMatch = (peerCommitHash == hash); + } + + if (hashesMatch) { PINFO("Peer logged in at '" << message["State"] << "', priority #" << message["Priority"] << " commit #" << message["CommitCount"] << " (" << message["Hash"] << ")"); peer->loggedIn = true; + } else { + PINFO("Peer is forked, marking as bad, will ignore."); + peer->knownBad = true; } // If the peer is already standing up, go ahead and approve or deny immediately. @@ -1775,7 +1784,7 @@ void SQLiteNode::_onMESSAGE(SQLitePeer* peer, const SData& message) { } } } else { - STHROW("unrecognized message: " + message.methodLine); + SINFO("unrecognized message: " + message.methodLine); } } catch (const SException& e) { PWARN("Error processing message '" << message.methodLine << "' (" << e.what() << "), reconnecting."); @@ -1902,6 +1911,9 @@ void SQLiteNode::_sendToPeer(SQLitePeer* peer, const SData& message) { // We can treat this whole function as atomic and thread-safe as it sends data to a peer with it's own atomic // `sendMessage` and the peer itself (assuming it's something from _peerList, which, if not, don't do that) is // const and will exist without changing until destruction. + if (peer->knownBad) { + PINFO("Skipping message " << message.methodLine << " to known bad peer."); + } peer->sendMessage(_addPeerHeaders(message).serialize()); } @@ -1910,6 +1922,9 @@ void SQLiteNode::_sendToAllPeers(const SData& message, bool subscribedOnly) { // Loop across all connected peers and send the message. _peerList is const so this is thread-safe. for (auto peer : _peerList) { + if (peer->knownBad) { + PINFO("Skipping message " << message.methodLine << " to known bad peer."); + } // This check is strictly thread-safe, as SQLitePeer::subscribed is atomic, but there's still a race condition // around checking subscribed and then sending, as subscribed could technically change. if (!subscribedOnly || peer->subscribed) { From 466725411ae8838f1e77e9451b1638cbfeb2b2c3 Mon Sep 17 00:00:00 2001 From: Tyler Karaszewski Date: Wed, 4 Dec 2024 16:18:36 -0800 Subject: [PATCH 11/28] Rename variable, remove _forkedFrom list --- sqlitecluster/SQLiteNode.cpp | 53 ++++++++++++++++++++++-------------- sqlitecluster/SQLiteNode.h | 7 ----- sqlitecluster/SQLitePeer.cpp | 4 +-- sqlitecluster/SQLitePeer.h | 2 +- 4 files changed, 36 insertions(+), 30 deletions(-) diff --git a/sqlitecluster/SQLiteNode.cpp b/sqlitecluster/SQLiteNode.cpp index 3b95b3833..705e29a29 100644 --- a/sqlitecluster/SQLiteNode.cpp +++ b/sqlitecluster/SQLiteNode.cpp @@ -639,7 +639,7 @@ bool SQLiteNode::update() { // Find the freshest non-broken peer (including permafollowers). if (peer->loggedIn) { loggedInPeers.push_back(peer->name); - if (_forkedFrom.count(peer->name)) { + if (peer->forked) { SWARN("Hash mismatch. Forked from peer " << peer->name << " so not considering it." << _getLostQuorumLogMessage()); continue; } @@ -757,7 +757,7 @@ bool SQLiteNode::update() { continue; } - if (_forkedFrom.count(peer->name)) { + if (peer->forked) { // Forked nodes are treated as ineligible for leader, etc. SHMMM("Not counting forked peer " << peer->name << " for freshest, highestPriority, or currentLeader."); continue; @@ -1304,8 +1304,8 @@ void SQLiteNode::_onMESSAGE(SQLitePeer* peer, const SData& message) { } // We allow PING and PONG even for bad peers just to avoid them getting caught in reconnect cycles. - if (peer->knownBad) { - PINFO("Received message " << message.methodLine << " from known bad peer, ignoring."); + if (peer->forked) { + PINFO("Received message " << message.methodLine << " from forked peer, ignoring."); return; } @@ -1402,7 +1402,13 @@ void SQLiteNode::_onMESSAGE(SQLitePeer* peer, const SData& message) { peer->loggedIn = true; } else { PINFO("Peer is forked, marking as bad, will ignore."); - peer->knownBad = true; + + // Send it a message before we mark it as forked, as we'll refuse afterward. + SData forked("FORKED"); + _sendToPeer(peer, forked); + + // And mark it dead until it reconnects. + peer->forked = true; } // If the peer is already standing up, go ahead and approve or deny immediately. @@ -1579,14 +1585,21 @@ void SQLiteNode::_onMESSAGE(SQLitePeer* peer, const SData& message) { SQResult result; uint64_t commitNum = SToUInt64(message["hashMismatchNumber"]); _db.getCommits(commitNum, commitNum, result); - _forkedFrom.insert(peer->name); + peer->forked = true; + + size_t forkedCount = 0; + for (const auto& p : _peerList) { + if (p->forked) { + forkedCount++; + } + } SALERT("Hash mismatch. Peer " << peer->name << " and I have forked at commit " << message["hashMismatchNumber"] - << ". I have forked from " << _forkedFrom.size() << " other nodes. I am " << stateName(_state) + << ". I have forked from " << forkedCount << " other nodes. I am " << stateName(_state) << " and have hash " << result[0][0] << " for that commit. Peer has hash " << message["hashMismatchValue"] << "." << _getLostQuorumLogMessage()); - if (_forkedFrom.size() > ((_peerList.size() + 1) / 2)) { + if (forkedCount > ((_peerList.size() + 1) / 2)) { SERROR("Hash mismatch. I have forked from over half the cluster. This is unrecoverable." << _getLostQuorumLogMessage()); } @@ -1783,8 +1796,11 @@ void SQLiteNode::_onMESSAGE(SQLitePeer* peer, const SData& message) { << e.what() << "', ignoring."); } } + } else if (SIEquals(message.methodLine, "FORKED")) { + peer->forked = true; + PINFO("Peer said we're forked, beleiving them."); } else { - SINFO("unrecognized message: " + message.methodLine); + PINFO("unrecognized message: " + message.methodLine); } } catch (const SException& e) { PWARN("Error processing message '" << message.methodLine << "' (" << e.what() << "), reconnecting."); @@ -1911,8 +1927,8 @@ void SQLiteNode::_sendToPeer(SQLitePeer* peer, const SData& message) { // We can treat this whole function as atomic and thread-safe as it sends data to a peer with it's own atomic // `sendMessage` and the peer itself (assuming it's something from _peerList, which, if not, don't do that) is // const and will exist without changing until destruction. - if (peer->knownBad) { - PINFO("Skipping message " << message.methodLine << " to known bad peer."); + if (peer->forked) { + PINFO("Skipping message " << message.methodLine << " to forked peer."); } peer->sendMessage(_addPeerHeaders(message).serialize()); } @@ -1922,8 +1938,8 @@ void SQLiteNode::_sendToAllPeers(const SData& message, bool subscribedOnly) { // Loop across all connected peers and send the message. _peerList is const so this is thread-safe. for (auto peer : _peerList) { - if (peer->knownBad) { - PINFO("Skipping message " << message.methodLine << " to known bad peer."); + if (peer->forked) { + PINFO("Skipping message " << message.methodLine << " to forked peer."); } // This check is strictly thread-safe, as SQLitePeer::subscribed is atomic, but there's still a race condition // around checking subscribed and then sending, as subscribed could technically change. @@ -2014,16 +2030,12 @@ void SQLiteNode::_changeState(SQLiteNodeState newState, uint64_t commitIDToCance _leadPeer = nullptr; } - if (newState >= SQLiteNodeState::STANDINGUP) { - // Not forked from anyone. Note that this includes both LEADING and FOLLOWING. - _forkedFrom.clear(); - } - // Re-enable commits if they were disabled during a previous stand-down. if (newState != SQLiteNodeState::SEARCHING) { _db.setCommitEnabled(true); } +#if 0 // If we're going searching and have forked from at least 1 peer, sleep for a second. This is intended to prevent thousands of lines of log spam when this happens in an infinite // loop. It's entirely possible that we do this for valid reasons - it may be the peer that has the bad database and not us, and there are plenty of other reasons we could switch to // SEARCHING, but in those cases, we just wait an extra second before trying again. @@ -2031,6 +2043,7 @@ void SQLiteNode::_changeState(SQLiteNodeState newState, uint64_t commitIDToCance SWARN("Going searching while forked peers present, sleeping 1 second." << _getLostQuorumLogMessage()); sleep(1); } +#endif // Additional logic for some new states if (newState == SQLiteNodeState::LEADING) { @@ -2250,7 +2263,7 @@ void SQLiteNode::_updateSyncPeer() continue; } - if (_forkedFrom.count(peer->name)) { + if (peer->forked) { SWARN("Hash mismatch. Can't choose peer " << peer->name << " due to previous hash mismatch."); continue; } @@ -2821,7 +2834,7 @@ void SQLiteNode::_sendStandupResponse(SQLitePeer* peer, const SData& message) { return; } - if (_forkedFrom.count(peer->name)) { + if (peer->forked) { PHMMM("Forked from peer, can't approve standup."); response["Response"] = "abstain"; response["Reason"] = "We are forked"; diff --git a/sqlitecluster/SQLiteNode.h b/sqlitecluster/SQLiteNode.h index 7c14667d4..3826ba0bd 100644 --- a/sqlitecluster/SQLiteNode.h +++ b/sqlitecluster/SQLiteNode.h @@ -393,13 +393,6 @@ class SQLiteNode : public STCPManager { // This can be removed once we've figured out why replication falls behind. See this issue: https://github.com/Expensify/Expensify/issues/210528 atomic _concurrentReplicateTransactions = 0; - // We keep a set of strings that are the names of nodes we've forked from, in the case we ever receive a hash mismatch while trying to synchronize. - // Whenever we become LEADING or FOLLOWING this is cleared. This resets the case where one node has forked, we attempt to synchronize from it, and fail, - // but later synchronize from someone else. Once we've come up completely, we no longer "hold a grudge" against this node, which will likely get fixed - // while we're online. - // In the event that this list becomes longer than half the cluster size, the node kills itself and logs that it's in an unrecoverable state. - set _forkedFrom; - // A pointer to a SQLite instance that is passed to plugin's stateChanged function. This prevents plugins from operating on the same handle that // the sync node is when they run queries in stateChanged. SQLite* pluginDB; diff --git a/sqlitecluster/SQLitePeer.cpp b/sqlitecluster/SQLitePeer.cpp index 4daf2c785..35146de52 100644 --- a/sqlitecluster/SQLitePeer.cpp +++ b/sqlitecluster/SQLitePeer.cpp @@ -26,7 +26,7 @@ SQLitePeer::SQLitePeer(const string& name_, const string& host_, const STable& p transactionResponse(Response::NONE), version(), lastPingTime(0), - knownBad(false), + forked(false), hash() { } @@ -80,7 +80,7 @@ void SQLitePeer::reset() { version = ""; lastPingTime = 0, setCommit(0, ""); - knownBad = false; + forked = false; } void SQLitePeer::shutdownSocket() { diff --git a/sqlitecluster/SQLitePeer.h b/sqlitecluster/SQLitePeer.h index 296a11ed8..d6d0a8030 100644 --- a/sqlitecluster/SQLitePeer.h +++ b/sqlitecluster/SQLitePeer.h @@ -92,7 +92,7 @@ class SQLitePeer { atomic lastPingTime; // Set to true when this peer is known to be unusable, I.e., when it has a database that is forked from us. - atomic knownBad; + atomic forked; private: // For initializing the permafollower value from the params list. From 0689a931a2845e03f5c2ebb64a5efc2728d4e579 Mon Sep 17 00:00:00 2001 From: Tyler Karaszewski Date: Wed, 4 Dec 2024 16:42:58 -0800 Subject: [PATCH 12/28] Fix fork test --- sqlitecluster/SQLiteNode.cpp | 48 +++++++++++++++++------- sqlitecluster/SQLiteNode.h | 2 + test/clustertest/tests/ForkCheckTest.cpp | 5 +++ 3 files changed, 41 insertions(+), 14 deletions(-) diff --git a/sqlitecluster/SQLiteNode.cpp b/sqlitecluster/SQLiteNode.cpp index 705e29a29..8478678fa 100644 --- a/sqlitecluster/SQLiteNode.cpp +++ b/sqlitecluster/SQLiteNode.cpp @@ -1409,6 +1409,7 @@ void SQLiteNode::_onMESSAGE(SQLitePeer* peer, const SData& message) { // And mark it dead until it reconnects. peer->forked = true; + _dieIfForkedFromCluster(); } // If the peer is already standing up, go ahead and approve or deny immediately. @@ -1587,22 +1588,11 @@ void SQLiteNode::_onMESSAGE(SQLitePeer* peer, const SData& message) { _db.getCommits(commitNum, commitNum, result); peer->forked = true; - size_t forkedCount = 0; - for (const auto& p : _peerList) { - if (p->forked) { - forkedCount++; - } - } - SALERT("Hash mismatch. Peer " << peer->name << " and I have forked at commit " << message["hashMismatchNumber"] - << ". I have forked from " << forkedCount << " other nodes. I am " << stateName(_state) - << " and have hash " << result[0][0] << " for that commit. Peer has hash " << message["hashMismatchValue"] << "." - << _getLostQuorumLogMessage()); - - if (forkedCount > ((_peerList.size() + 1) / 2)) { - SERROR("Hash mismatch. I have forked from over half the cluster. This is unrecoverable." << _getLostQuorumLogMessage()); - } + << ". I am " << stateName(_state) << " and have hash " << result[0][0] << " for that commit. Peer has hash " + << message["hashMismatchValue"] << "." << _getLostQuorumLogMessage()); + _dieIfForkedFromCluster(); STHROW("Hash mismatch"); } if (!_syncPeer) { @@ -1799,6 +1789,23 @@ void SQLiteNode::_onMESSAGE(SQLitePeer* peer, const SData& message) { } else if (SIEquals(message.methodLine, "FORKED")) { peer->forked = true; PINFO("Peer said we're forked, beleiving them."); + _dieIfForkedFromCluster(); + + // If leader said we're forked from it, we need a new leader. + if (peer == _leadPeer) { + _leadPeer = nullptr; + _changeState(SQLiteNodeState::SEARCHING); + } + + // If our sync peer said we're forked from it, and we're currently synchronizing, we need a new sync peer. + // However, if we're not currently syncing, then we don't need to change states, this peer could have been chosen + // hours or days ago. + if (peer == _syncPeer) { + _syncPeer = nullptr; + if (_state == SQLiteNodeState::SYNCHRONIZING) { + _changeState(SQLiteNodeState::SEARCHING); + } + } } else { PINFO("unrecognized message: " + message.methodLine); } @@ -2908,3 +2915,16 @@ void SQLiteNode::_sendStandupResponse(SQLitePeer* peer, const SData& message) { } _sendToPeer(peer, response); } + +void SQLiteNode::_dieIfForkedFromCluster() { + size_t forkedCount = 0; + for (const auto& p : _peerList) { + if (p->forked) { + forkedCount++; + } + } + + if (forkedCount > ((_peerList.size() + 1) / 2)) { + SERROR("I have forked from over half the cluster (" << forkedCount << " nodes). This is unrecoverable." << _getLostQuorumLogMessage()); + } +} \ No newline at end of file diff --git a/sqlitecluster/SQLiteNode.h b/sqlitecluster/SQLiteNode.h index 3826ba0bd..354aef7c9 100644 --- a/sqlitecluster/SQLiteNode.h +++ b/sqlitecluster/SQLiteNode.h @@ -280,6 +280,8 @@ class SQLiteNode : public STCPManager { // commitCount that we do, this will return null. void _updateSyncPeer(); + void _dieIfForkedFromCluster(); + const string _commandAddress; const string _name; const vector _peerList; diff --git a/test/clustertest/tests/ForkCheckTest.cpp b/test/clustertest/tests/ForkCheckTest.cpp index 6e0c0d262..330c8735f 100644 --- a/test/clustertest/tests/ForkCheckTest.cpp +++ b/test/clustertest/tests/ForkCheckTest.cpp @@ -119,5 +119,10 @@ struct ForkCheckTest : tpunit::TestFixture { // And that signal should have been ABORT. ASSERT_EQUAL(SIGABRT, WTERMSIG(status)); + + // We call stopServer on the forked leader because it crashed, but the cluster tester doesn't realize, so shutting down + // normally will time out after a minute. Calling `stopServer` explicitly will clear the server PID, and we won't need + // to wait for this timeout. + tester.getTester(0).stopServer(); } } __ForkCheckTest; From 002a9373aa6b08cde04fbbfd2f0b0c029a2e50b9 Mon Sep 17 00:00:00 2001 From: Tyler Karaszewski Date: Wed, 4 Dec 2024 16:46:01 -0800 Subject: [PATCH 13/28] remove test that tests condition that's no longer used. --- .../tests/ForkedNodeApprovalTest.cpp | 161 ------------------ 1 file changed, 161 deletions(-) delete mode 100644 test/clustertest/tests/ForkedNodeApprovalTest.cpp diff --git a/test/clustertest/tests/ForkedNodeApprovalTest.cpp b/test/clustertest/tests/ForkedNodeApprovalTest.cpp deleted file mode 100644 index 197b883cc..000000000 --- a/test/clustertest/tests/ForkedNodeApprovalTest.cpp +++ /dev/null @@ -1,161 +0,0 @@ - -#include - -#include -#include -#include -#include - -struct ForkedNodeApprovalTest : tpunit::TestFixture { - ForkedNodeApprovalTest() - : tpunit::TestFixture("ForkedNodeApproval", TEST(ForkedNodeApprovalTest::test)) {} - - pair getMaxJournalCommit(BedrockTester& tester, bool online = true) { - SQResult journals; - tester.readDB("SELECT name FROM sqlite_schema WHERE type ='table' AND name LIKE 'journal%';", journals, online); - uint64_t maxJournalCommit = 0; - string maxJournalTable; - for (auto& row : journals.rows) { - string maxID = tester.readDB("SELECT MAX(id) FROM " + row[0] + ";", online); - try { - uint64_t maxCommitNum = stoull(maxID); - if (maxCommitNum > maxJournalCommit) { - maxJournalCommit = maxCommitNum; - maxJournalTable = row[0]; - } - } catch (const invalid_argument& e) { - // do nothing, skip this journal with no entries. - continue; - } - } - return make_pair(maxJournalCommit, maxJournalTable); - } - - void test() { - // Create a cluster, wait for it to come up. - BedrockClusterTester tester(ClusterSize::THREE_NODE_CLUSTER); - - // We'll tell the threads to stop when they're done. - atomic stop(false); - - // We want to not spam a stopped leader. - atomic leaderIsUp(true); - - // Just use a bunch of copies of the same command. - SData spamCommand("idcollision"); - - // In a vector. - const vector commands(100, spamCommand); - - // Now create 9 threads spamming 100 commands at a time, each. 9 cause we have three nodes. - vector threads; - for (size_t i = 0; i < 9; i++) { - threads.emplace_back([&tester, i, &commands, &stop, &leaderIsUp](){ - while (!stop) { - // Pick a tester, send, don't care about the result. - size_t testerNum = i % 3; - if (testerNum == 0 && !leaderIsUp) { - // If we're looking for leader and it's down, wait a second to avoid pegging the CPU. - sleep(1); - } else { - // If we're not leader or leader is up, spam away! - tester.getTester(testerNum).executeWaitMultipleData(commands); - } - } - }); - } - - // Let them spam for a second. - sleep(1); - - // We can try and stop the leader. - leaderIsUp = false; - tester.getTester(0).stopServer(); - - // Spam a few more commands and then we can stop. - sleep(1); - stop = true; - for (auto& t : threads) { - t.join(); - } - - // Fetch the latest journal commits on leader and follower - auto result = getMaxJournalCommit(tester.getTester(0), false); - - uint64_t leaderMaxCommit = result.first; - string leaderMaxCommitJournal = result.second; - result = getMaxJournalCommit(tester.getTester(1)); - uint64_t followerMaxCommit = result.first; - - // Make sure the follower got farther than the leader. - ASSERT_GREATER_THAN(followerMaxCommit, leaderMaxCommit); - - // We need to release any DB that the tester is holding. - tester.getTester(0).freeDB(); - tester.getTester(1).freeDB(); - - // Break leader. - { - string filename = tester.getTester(0).getArg("-db"); - string query = "UPDATE " + leaderMaxCommitJournal + " SET hash = 'abcdef123456' WHERE id = " + to_string(leaderMaxCommit) + ";"; - - sqlite3* db = nullptr; - sqlite3_open_v2(filename.c_str(), &db, SQLITE_OPEN_READWRITE | SQLITE_OPEN_CREATE | SQLITE_OPEN_NOMUTEX, NULL); - char* errMsg = nullptr; - sqlite3_exec(db, query.c_str(), 0, 0, &errMsg); - if (errMsg) { - cout << "Error updating db: " << errMsg << endl; - ASSERT_TRUE(false); - } - sqlite3_close_v2(db); - } - - // Stop the second follower. - tester.getTester(2).stopServer(); - - // Start the broken leader back up. - tester.getTester(0).startServer(false); - - // We should not get a leader, the primary leader needs to synchronize, but can't because it's forked. - // The secondary leader should go leading, but can't, because it only receives `abstain` responses to standup requests. - // It's possible for the secondary leader to go leading once, but it should quickly fall out of leading when the fork is detected and primary leader reconnects. - // After that, it should not go leading again, primary leader should abstain from participation. - auto start = chrono::steady_clock::now(); - bool abstainDetected = false; - while (true) { - if (chrono::steady_clock::now() - start > 30s) { - cout << "It's been 30 seconds." << endl; - break; - } - SData command("Status"); - auto responseJson = tester.getTester(1).executeWaitMultipleData({command}, 1, true)[0].content; - - auto json = SParseJSONObject(responseJson); - auto peers = SParseJSONArray(json["peerList"]); - for (auto& peer : peers) { - auto peerJSON = SParseJSONObject(peer); - if (peerJSON["name"] == "cluster_node_0" && peerJSON["standupResponse"] == "ABSTAIN") { - abstainDetected = true; - break; - } - } - if (abstainDetected) { - break; - } - - // try again. - usleep(50'000); - } - - ASSERT_TRUE(abstainDetected); - - // Ok, now we can start the second follower back up and secondary leader should be able to lead. - tester.getTester(2).startServer(false); - ASSERT_TRUE(tester.getTester(1).waitForState("LEADING")); - - // We call stopServer on the forked leader because it crashed, but the cluster tester doesn't realize, so shutting down - // normally will time out after a minute. Calling `stopServer` explicitly will clear the server PID, and we won't need - // to wait for this timeout. - tester.getTester(0).stopServer(); - } -} __ForkedNodeApprovalTest; From f50aec04744ff9fb9cfb44e18b5c378bff3f1e2a Mon Sep 17 00:00:00 2001 From: Tyler Karaszewski Date: Wed, 4 Dec 2024 16:54:03 -0800 Subject: [PATCH 14/28] Remove ABSTAIN --- sqlitecluster/SQLiteNode.cpp | 25 ------------------------- sqlitecluster/SQLitePeer.cpp | 3 --- sqlitecluster/SQLitePeer.h | 3 +-- 3 files changed, 1 insertion(+), 30 deletions(-) diff --git a/sqlitecluster/SQLiteNode.cpp b/sqlitecluster/SQLiteNode.cpp index 8478678fa..7374ac0f3 100644 --- a/sqlitecluster/SQLiteNode.cpp +++ b/sqlitecluster/SQLiteNode.cpp @@ -861,7 +861,6 @@ bool SQLiteNode::update() { size_t numFullPeers = 0; size_t numLoggedInFullPeers = 0; size_t approveCount = 0; - size_t abstainCount = 0; if (_isShuttingDown) { SINFO("Shutting down while standing up, setting state to SEARCHING"); _changeState(SQLiteNodeState::SEARCHING); @@ -879,9 +878,6 @@ bool SQLiteNode::update() { // Has it responded yet? if (peer->standupResponse == SQLitePeer::Response::NONE) { // This peer hasn't yet responded. We do nothing with it in this case, maybe it will have responded by the next check. - } else if (peer->standupResponse == SQLitePeer::Response::ABSTAIN) { - PHMMM("Peer abstained from participation in quorum"); - abstainCount++; } else if (peer->standupResponse == SQLitePeer::Response::DENY) { // It responeded, but didn't approve -- abort PHMMM("Refused our STANDUP, cancel and RE-SEARCH"); @@ -894,16 +890,6 @@ bool SQLiteNode::update() { } } - // If the majority of full peers responds with abstain, then re-search. - const bool majorityAbstained = abstainCount * 2 > numFullPeers; - if (majorityAbstained) { - // Majority abstained, meaning we're probably forked, - // so we go back to searching so we can go back to synchronizing and see if we're forked. - SHMMM("Majority of full peers abstained; re-SEARCHING."); - _changeState(SQLiteNodeState::SEARCHING); - return true; // Re-update - } - // If everyone's responded with approval and we form a majority, then finish standup. bool majorityConnected = numLoggedInFullPeers * 2 >= numFullPeers; bool quorumApproved = approveCount * 2 >= numFullPeers; @@ -1531,9 +1517,6 @@ void SQLiteNode::_onMESSAGE(SQLitePeer* peer, const SData& message) { if (SIEquals(message["Response"], "approve")) { PINFO("Received standup approval"); peer->standupResponse = SQLitePeer::Response::APPROVE; - } else if (SIEquals(message["Response"], "abstain")) { - PINFO("Received standup abstain"); - peer->standupResponse = SQLitePeer::Response::ABSTAIN; } else { PHMMM("Received standup denial: reason='" << message["Reason"] << "'"); peer->standupResponse = SQLitePeer::Response::DENY; @@ -2841,14 +2824,6 @@ void SQLiteNode::_sendStandupResponse(SQLitePeer* peer, const SData& message) { return; } - if (peer->forked) { - PHMMM("Forked from peer, can't approve standup."); - response["Response"] = "abstain"; - response["Reason"] = "We are forked"; - _sendToPeer(peer, response); - return; - } - // What's our state if (SWITHIN(SQLiteNodeState::STANDINGUP, _state, SQLiteNodeState::STANDINGDOWN)) { // Oh crap, it's trying to stand up while we're leading. Who is higher priority? diff --git a/sqlitecluster/SQLitePeer.cpp b/sqlitecluster/SQLitePeer.cpp index 35146de52..c93589520 100644 --- a/sqlitecluster/SQLitePeer.cpp +++ b/sqlitecluster/SQLitePeer.cpp @@ -207,9 +207,6 @@ string SQLitePeer::responseName(Response response) { case Response::DENY: return "DENY"; break; - case Response::ABSTAIN: - return "ABSTAIN"; - break; default: return ""; } diff --git a/sqlitecluster/SQLitePeer.h b/sqlitecluster/SQLitePeer.h index d6d0a8030..48bc3433c 100644 --- a/sqlitecluster/SQLitePeer.h +++ b/sqlitecluster/SQLitePeer.h @@ -8,8 +8,7 @@ class SQLitePeer { enum class Response { NONE, APPROVE, - DENY, - ABSTAIN + DENY }; enum class PeerPostPollStatus { From 0e828eb21f325f189642a47678a6d3ef562f9495 Mon Sep 17 00:00:00 2001 From: Tyler Karaszewski Date: Wed, 4 Dec 2024 17:01:06 -0800 Subject: [PATCH 15/28] Comment cleanup --- sqlitecluster/SQLiteNode.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sqlitecluster/SQLiteNode.cpp b/sqlitecluster/SQLiteNode.cpp index 7374ac0f3..7f779d43d 100644 --- a/sqlitecluster/SQLiteNode.cpp +++ b/sqlitecluster/SQLiteNode.cpp @@ -56,6 +56,7 @@ // dbCountAtStart: The highest committed transaction in the DB at the start of this transaction on leader, for // optimizing replication. +// NOTE: This comment as well as NODE_LOGIN should be removed after https://github.com/Expensify/Bedrock/pull/1999 is deployed. // On LOGIN vs NODE_LOGIN. // _onConnect sends a LOGIN message. // _onConnect is called in exctly two places: @@ -68,7 +69,6 @@ // 3. LOGIN // // When we receive a NODE_LOGIN, we immediately respond with a PING followed by a LOGIN (by calling _onConnect). -// We can cobine all of these into a single login message. #undef SLOGPREFIX #define SLOGPREFIX "{" << _name << "/" << SQLiteNode::stateName(_state) << "} " @@ -2902,4 +2902,4 @@ void SQLiteNode::_dieIfForkedFromCluster() { if (forkedCount > ((_peerList.size() + 1) / 2)) { SERROR("I have forked from over half the cluster (" << forkedCount << " nodes). This is unrecoverable." << _getLostQuorumLogMessage()); } -} \ No newline at end of file +} From 2d6b73adc2ca09cc8fe42249a826584da282ddb5 Mon Sep 17 00:00:00 2001 From: Tyler Karaszewski Date: Wed, 4 Dec 2024 17:19:39 -0800 Subject: [PATCH 16/28] Standardize use of _sendToPeer --- sqlitecluster/SQLiteNode.cpp | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/sqlitecluster/SQLiteNode.cpp b/sqlitecluster/SQLiteNode.cpp index 7f779d43d..d6c03bb70 100644 --- a/sqlitecluster/SQLiteNode.cpp +++ b/sqlitecluster/SQLiteNode.cpp @@ -1277,7 +1277,7 @@ void SQLiteNode::_onMESSAGE(SQLitePeer* peer, const SData& message) { SINFO("Received PING from peer '" << peer->name << "'. Sending PONG."); SData pong("PONG"); pong["Timestamp"] = message["Timestamp"]; - peer->sendMessage(pong.serialize()); + peer->sendMessage(pong); return; } else if (SIEquals(message.methodLine, "PONG")) { // Latency must be > 0 because we treat 0 as "not connected". @@ -1531,7 +1531,7 @@ void SQLiteNode::_onMESSAGE(SQLitePeer* peer, const SData& message) { SINFO("Asked to help SYNCHRONIZE but shutting down."); SData response("SYNCHRONIZE_RESPONSE"); response["ShuttingDown"] = "true"; - peer->sendMessage(response); + _sendToPeer(peer, response); } else { _pendingSynchronizeResponses++; static atomic synchronizeCount(0); @@ -1546,13 +1546,13 @@ void SQLiteNode::_onMESSAGE(SQLitePeer* peer, const SData& message) { // The following two lines are copied from `_sendToPeer`. response["CommitCount"] = to_string(db.getCommitCount()); response["Hash"] = db.getCommittedHash(); - peer->sendMessage(response); + _sendToPeer(peer, response); } catch (const SException& e) { // This is the same handling as at the bottom of _onMESSAGE. PWARN("Error processing message '" << message.methodLine << "' (" << e.what() << "), reconnecting."); SData reconnect("RECONNECT"); reconnect["Reason"] = e.what(); - peer->sendMessage(reconnect.serialize()); + _sendToPeer(peer, reconnect); peer->shutdownSocket(); } @@ -1796,7 +1796,7 @@ void SQLiteNode::_onMESSAGE(SQLitePeer* peer, const SData& message) { PWARN("Error processing message '" << message.methodLine << "' (" << e.what() << "), reconnecting."); SData reconnect("RECONNECT"); reconnect["Reason"] = e.what(); - peer->sendMessage(reconnect.serialize()); + _sendToPeer(peer, reconnect); peer->shutdownSocket(); } } @@ -1920,11 +1920,11 @@ void SQLiteNode::_sendToPeer(SQLitePeer* peer, const SData& message) { if (peer->forked) { PINFO("Skipping message " << message.methodLine << " to forked peer."); } - peer->sendMessage(_addPeerHeaders(message).serialize()); + peer->sendMessage(_addPeerHeaders(message)); } void SQLiteNode::_sendToAllPeers(const SData& message, bool subscribedOnly) { - const string serializedMessage = _addPeerHeaders(message).serialize(); + const SData messageWithHeaders = _addPeerHeaders(message); // Loop across all connected peers and send the message. _peerList is const so this is thread-safe. for (auto peer : _peerList) { @@ -1934,7 +1934,7 @@ void SQLiteNode::_sendToAllPeers(const SData& message, bool subscribedOnly) { // This check is strictly thread-safe, as SQLitePeer::subscribed is atomic, but there's still a race condition // around checking subscribed and then sending, as subscribed could technically change. if (!subscribedOnly || peer->subscribed) { - peer->sendMessage(serializedMessage); + peer->sendMessage(messageWithHeaders); } } } @@ -2667,7 +2667,7 @@ void SQLiteNode::postPoll(fd_map& fdm, uint64_t& nextActivity) { // When NODE_LOGIN is deprecated, we can remove the next 3 lines. SData login("NODE_LOGIN"); login["Name"] = _name; - peer->sendMessage(login.serialize()); + _sendToPeer(peer, login); _onConnect(peer); _sendPING(peer); } @@ -2676,7 +2676,7 @@ void SQLiteNode::postPoll(fd_map& fdm, uint64_t& nextActivity) { { SData reconnect("RECONNECT"); reconnect["Reason"] = "socket error"; - peer->sendMessage(reconnect.serialize()); + _sendToPeer(peer, reconnect); peer->shutdownSocket(); } break; @@ -2734,7 +2734,7 @@ void SQLiteNode::_sendPING(SQLitePeer* peer) { SASSERT(peer); SData ping("PING"); ping["Timestamp"] = SToStr(STimeNow()); - peer->sendMessage(ping.serialize()); + peer->sendMessage(ping); } SQLitePeer* SQLiteNode::getPeerByName(const string& name) const { From 859ba9f500361ef02dbb79505d0abf1f6f1333b1 Mon Sep 17 00:00:00 2001 From: Tyler Karaszewski Date: Wed, 4 Dec 2024 18:37:00 -0800 Subject: [PATCH 17/28] Update sqlitecluster/SQLiteNode.cpp Co-authored-by: Carlos Alvarez --- sqlitecluster/SQLiteNode.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/sqlitecluster/SQLiteNode.cpp b/sqlitecluster/SQLiteNode.cpp index d6c03bb70..38f676c10 100644 --- a/sqlitecluster/SQLiteNode.cpp +++ b/sqlitecluster/SQLiteNode.cpp @@ -1930,6 +1930,7 @@ void SQLiteNode::_sendToAllPeers(const SData& message, bool subscribedOnly) { for (auto peer : _peerList) { if (peer->forked) { PINFO("Skipping message " << message.methodLine << " to forked peer."); + continue; } // This check is strictly thread-safe, as SQLitePeer::subscribed is atomic, but there's still a race condition // around checking subscribed and then sending, as subscribed could technically change. From 12cdd008b05440e883e8dbde799e9411446242f6 Mon Sep 17 00:00:00 2001 From: Tyler Karaszewski Date: Wed, 4 Dec 2024 18:37:25 -0800 Subject: [PATCH 18/28] Update sqlitecluster/SQLiteNode.cpp Co-authored-by: Carlos Alvarez --- sqlitecluster/SQLiteNode.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/sqlitecluster/SQLiteNode.cpp b/sqlitecluster/SQLiteNode.cpp index 38f676c10..a26c84f71 100644 --- a/sqlitecluster/SQLiteNode.cpp +++ b/sqlitecluster/SQLiteNode.cpp @@ -1919,6 +1919,7 @@ void SQLiteNode::_sendToPeer(SQLitePeer* peer, const SData& message) { // const and will exist without changing until destruction. if (peer->forked) { PINFO("Skipping message " << message.methodLine << " to forked peer."); + return; } peer->sendMessage(_addPeerHeaders(message)); } From d8c495fe8bdb16dae2226d3baa0dd1229226e339 Mon Sep 17 00:00:00 2001 From: Tyler Karaszewski Date: Thu, 5 Dec 2024 09:42:34 -0800 Subject: [PATCH 19/28] Refactor test to make multiple tests easier to run --- test/clustertest/tests/ForkCheckTest.cpp | 61 ++++++++++++++---------- 1 file changed, 35 insertions(+), 26 deletions(-) diff --git a/test/clustertest/tests/ForkCheckTest.cpp b/test/clustertest/tests/ForkCheckTest.cpp index 330c8735f..631df5264 100644 --- a/test/clustertest/tests/ForkCheckTest.cpp +++ b/test/clustertest/tests/ForkCheckTest.cpp @@ -1,3 +1,4 @@ +#include "test/lib/BedrockTester.h" #include #include @@ -7,7 +8,9 @@ struct ForkCheckTest : tpunit::TestFixture { ForkCheckTest() - : tpunit::TestFixture("ForkCheck", TEST(ForkCheckTest::test)) {} + : tpunit::TestFixture("ForkCheck", + TEST(ForkCheckTest::forkAtShutDown), + TEST(ForkCheckTest::forkAtCrash)) {} pair getMaxJournalCommit(BedrockTester& tester, bool online = true) { SQResult journals; @@ -30,40 +33,42 @@ struct ForkCheckTest : tpunit::TestFixture { return make_pair(maxJournalCommit, maxJournalTable); } - void test() { - // Create a cluster, wait for it to come up. - BedrockClusterTester tester(ClusterSize::FIVE_NODE_CLUSTER); - - // We'll tell the threads to stop when they're done. - atomic stop(false); - - // We want to not spam a stopped leader. - atomic leaderIsUp(true); - + vector createThreads(size_t num, BedrockClusterTester& tester, atomic& stop, atomic& leaderIsUp) { // Just use a bunch of copies of the same command. - SData spamCommand("idcollision"); - - // In a vector. - const vector commands(100, spamCommand); - - // Now create 9 threads spamming 100 commands at a time, each. 9 cause we have three nodes. vector threads; - for (size_t i = 0; i < 9; i++) { - threads.emplace_back([&tester, i, &commands, &stop, &leaderIsUp](){ + for (size_t num = 0; num < 9; num++) { + threads.emplace_back([&tester, num, &stop, &leaderIsUp](){ + const vector commands(100, SData("idcollision")); while (!stop) { // Pick a tester, send, don't care about the result. - size_t testerNum = i % 3; + size_t testerNum = num % 5; if (testerNum == 0 && !leaderIsUp) { - // If we're looking for leader and it's down, wait a second to avoid pegging the CPU. - sleep(1); - } else { - // If we're not leader or leader is up, spam away! - tester.getTester(testerNum).executeWaitMultipleData(commands); + // If leader's off, don't use it. + testerNum = 1; } + tester.getTester(testerNum).executeWaitMultipleData(commands); } }); } + return threads; + } + + // This primary test here checks that a node that is forked will not be able to rejoin the cluster when reconnecting. + // This is a reasonable test for a fork that happens at shutdown. + void forkAtShutDown() { + // Create a cluster, wait for it to come up. + BedrockClusterTester tester(ClusterSize::FIVE_NODE_CLUSTER); + + // We'll tell the threads to stop when they're done. + atomic stop(false); + + // We want to not spam a stopped leader. + atomic leaderIsUp(true); + + // Now create 15 threads spamming 100 commands at a time, each. 15 cause we have five nodes. + vector threads = createThreads(15, tester, stop, leaderIsUp); + // Let them spam for a second. sleep(1); @@ -71,7 +76,7 @@ struct ForkCheckTest : tpunit::TestFixture { leaderIsUp = false; tester.getTester(0).stopServer(); - // Spam a few more commands and then we can stop. + // Spam a few more commands so thar the follower is ahead of the stopped leader, and then we can stop. sleep(1); stop = true; for (auto& t : threads) { @@ -125,4 +130,8 @@ struct ForkCheckTest : tpunit::TestFixture { // to wait for this timeout. tester.getTester(0).stopServer(); } + + void forkAtCrash() { + + } } __ForkCheckTest; From ac2aa66647d2fe6c331393bf2058ae62935bb2e9 Mon Sep 17 00:00:00 2001 From: Tyler Karaszewski Date: Thu, 5 Dec 2024 10:05:00 -0800 Subject: [PATCH 20/28] Remove second test --- test/clustertest/tests/ForkCheckTest.cpp | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/test/clustertest/tests/ForkCheckTest.cpp b/test/clustertest/tests/ForkCheckTest.cpp index 631df5264..835cc76cd 100644 --- a/test/clustertest/tests/ForkCheckTest.cpp +++ b/test/clustertest/tests/ForkCheckTest.cpp @@ -4,13 +4,13 @@ #include #include #include +#include #include struct ForkCheckTest : tpunit::TestFixture { ForkCheckTest() : tpunit::TestFixture("ForkCheck", - TEST(ForkCheckTest::forkAtShutDown), - TEST(ForkCheckTest::forkAtCrash)) {} + TEST(ForkCheckTest::forkAtShutDown)) {} pair getMaxJournalCommit(BedrockTester& tester, bool online = true) { SQResult journals; @@ -66,7 +66,7 @@ struct ForkCheckTest : tpunit::TestFixture { // We want to not spam a stopped leader. atomic leaderIsUp(true); - // Now create 15 threads spamming 100 commands at a time, each. 15 cause we have five nodes. + // Now create 15 threads spamming 100 commands at a time, each. 15 because we have five nodes. vector threads = createThreads(15, tester, stop, leaderIsUp); // Let them spam for a second. @@ -130,8 +130,4 @@ struct ForkCheckTest : tpunit::TestFixture { // to wait for this timeout. tester.getTester(0).stopServer(); } - - void forkAtCrash() { - - } } __ForkCheckTest; From e6774554997c399f2626d3e5f417de3c8d382c71 Mon Sep 17 00:00:00 2001 From: Tyler Karaszewski Date: Thu, 5 Dec 2024 13:24:01 -0800 Subject: [PATCH 21/28] Fix last instance of _forkedFrom --- sqlitecluster/SQLiteNode.cpp | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/sqlitecluster/SQLiteNode.cpp b/sqlitecluster/SQLiteNode.cpp index a26c84f71..5042e2e3e 100644 --- a/sqlitecluster/SQLiteNode.cpp +++ b/sqlitecluster/SQLiteNode.cpp @@ -2027,15 +2027,17 @@ void SQLiteNode::_changeState(SQLiteNodeState newState, uint64_t commitIDToCance _db.setCommitEnabled(true); } -#if 0 // If we're going searching and have forked from at least 1 peer, sleep for a second. This is intended to prevent thousands of lines of log spam when this happens in an infinite // loop. It's entirely possible that we do this for valid reasons - it may be the peer that has the bad database and not us, and there are plenty of other reasons we could switch to // SEARCHING, but in those cases, we just wait an extra second before trying again. - if (newState == SQLiteNodeState::SEARCHING && _forkedFrom.size()) { + bool forkedPeers = false; + for (const auto p : _peerList) { + forkedPeers = forkedPeers || p->forked; + } + if (newState == SQLiteNodeState::SEARCHING && forkedPeers) { SWARN("Going searching while forked peers present, sleeping 1 second." << _getLostQuorumLogMessage()); sleep(1); } -#endif // Additional logic for some new states if (newState == SQLiteNodeState::LEADING) { From 54cdcbc1ca8e549387338781da27933e9336fab1 Mon Sep 17 00:00:00 2001 From: Tyler Karaszewski Date: Thu, 5 Dec 2024 19:44:30 -0800 Subject: [PATCH 22/28] Code review feedback --- sqlitecluster/SQLiteNode.cpp | 21 +++++++++++++++------ 1 file changed, 15 insertions(+), 6 deletions(-) diff --git a/sqlitecluster/SQLiteNode.cpp b/sqlitecluster/SQLiteNode.cpp index 2bd99cd2f..e7c1a3703 100644 --- a/sqlitecluster/SQLiteNode.cpp +++ b/sqlitecluster/SQLiteNode.cpp @@ -1720,7 +1720,7 @@ void SQLiteNode::_onMESSAGE(SQLitePeer* peer, const SData& message) { } } else if (SIEquals(message.methodLine, "FORKED")) { peer->forked = true; - PINFO("Peer said we're forked, beleiving them."); + PINFO("Peer said we're forked, believing them."); _dieIfForkedFromCluster(); // If leader said we're forked from it, we need a new leader. @@ -2825,14 +2825,23 @@ void SQLiteNode::_sendStandupResponse(SQLitePeer* peer, const SData& message) { } void SQLiteNode::_dieIfForkedFromCluster() { - size_t forkedCount = 0; + size_t quorumNodeCount = 0; + size_t forkedFullPeerCount = 0; for (const auto& p : _peerList) { - if (p->forked) { - forkedCount++; + if (!p->permaFollower) { + forkedFullPeerCount++; + if (p->forked) { + forkedFullPeerCount++; + } } } - if (forkedCount > ((_peerList.size() + 1) / 2)) { - SERROR("I have forked from over half the cluster (" << forkedCount << " nodes). This is unrecoverable." << _getLostQuorumLogMessage()); + // Am *I* a permaFollower? + if (_originalPriority != 0) { + quorumNodeCount++; + } + + if (forkedFullPeerCount >= (quorumNodeCount + 1) / 2) { + SERROR("I have forked from over half the cluster (" << forkedFullPeerCount << " nodes). This is unrecoverable." << _getLostQuorumLogMessage()); } } From 7c06bf595122db6b9ae23cdb20914295c7cd926e Mon Sep 17 00:00:00 2001 From: Tyler Karaszewski Date: Thu, 5 Dec 2024 19:48:43 -0800 Subject: [PATCH 23/28] Revert "Update SQLite with a small optimization + more logs (v15)" --- libstuff/sqlite3.c | 103 ++++++--------------------------------------- libstuff/sqlite3.h | 2 +- 2 files changed, 15 insertions(+), 90 deletions(-) diff --git a/libstuff/sqlite3.c b/libstuff/sqlite3.c index e95e639a2..8084ff4a9 100644 --- a/libstuff/sqlite3.c +++ b/libstuff/sqlite3.c @@ -18,7 +18,7 @@ ** separate file. This file contains only code for the core SQLite library. ** ** The content in this amalgamation comes from Fossil check-in -** 5f9f6764e9dffef60213bbc9604940ddfc71. +** 853f9cf453d13cf826443b0d27331e1f4e9e. */ #define SQLITE_CORE 1 #define SQLITE_AMALGAMATION 1 @@ -465,7 +465,7 @@ extern "C" { */ #define SQLITE_VERSION "3.47.0" #define SQLITE_VERSION_NUMBER 3047000 -#define SQLITE_SOURCE_ID "2024-12-05 19:45:14 5f9f6764e9dffef60213bbc9604940ddfc713436333c3f62ed8a090697fcbb1e" +#define SQLITE_SOURCE_ID "2024-11-20 20:26:59 853f9cf453d13cf826443b0d27331e1f4e9e06f9f4ce674f22ccadad86e20937" /* ** CAPI3REF: Run-Time Library Version Numbers @@ -18229,19 +18229,10 @@ struct sqlite3 { #define SCHEMA_TIME_BEFORE_PREPARE 8 #define SCHEMA_TIME_BEFORE_FINALIZE 9 #define SCHEMA_TIME_BEGIN_ANALYZE_LOAD 10 +#define SCHEMA_TIME_END_ANALYZE_LOAD 11 +#define SCHEMA_TIME_FINISH 12 -#define SCHEMA_TIME_AFTER_CLEAR_STATS 11 -#define SCHEMA_TIME_AFTER_STAT1 12 -#define SCHEMA_TIME_AFTER_DEFAULTS 13 - -#define SCHEMA_TIME_AFTER_STAT4_Q1 14 -#define SCHEMA_TIME_AFTER_STAT4_Q2 15 -#define SCHEMA_TIME_AFTER_STAT4 16 - -#define SCHEMA_TIME_END_ANALYZE_LOAD 17 -#define SCHEMA_TIME_FINISH 18 - -#define SCHEMA_TIME_N 19 +#define SCHEMA_TIME_N 13 #define SCHEMA_TIME_TIMEOUT (2 * 1000 * 1000) @@ -67561,25 +67552,6 @@ static void walCleanupHash(Wal *pWal){ #endif /* SQLITE_ENABLE_EXPENSIVE_ASSERT */ } -/* -** Zero the n byte block indicated by pointer p. n Must be a multiple of -** 8, and p must be aligned to an 8-byte boundary. -*/ -static void zero64(void *p, int n){ - size_t c = n / sizeof(u64); - void *d = p; - - assert( (n & 0x7)==0 ); - assert( EIGHT_BYTE_ALIGNMENT(p) ); - - __asm__ volatile ( - "rep stosq" - : "+D" (d), "+c" (c) - : "a" (0) - : "memory" - ); -} - /* ** Set an entry in the wal-index that will map database page number ** pPage into WAL frame iFrame. @@ -67618,10 +67590,10 @@ static int walIndexAppend(Wal *pWal, int iWal, u32 iFrame, u32 iPage){ ** entire hash table and aPgno[] array before proceeding. */ if( pWal->aCommitTime ) t = sqlite3STimeNow(); - if( idx==1 && sLoc.aPgno[0]!=0 ){ + if( idx==1 ){ int nByte = (int)((u8*)&sLoc.aHash[HASHTABLE_NSLOT] - (u8*)sLoc.aPgno); - assert( nByte>=0 && (nByte & 0x07)==0 ); - zero64((void*)sLoc.aPgno, nByte); + assert( nByte>=0 ); + memset((void*)sLoc.aPgno, 0, nByte); } if( pWal->aCommitTime ){ pWal->aCommitTime[COMMIT_TIME_WALINDEX_MEMSETUS]+=sqlite3STimeNow()-t; @@ -67791,7 +67763,6 @@ static int walIndexRecoverOne(Wal *pWal, int iWal, u32 *pnCkpt, int *pbZero){ if( aShare==0 ) break; SEH_SET_ON_ERROR(iPg, aShare); pWal->apWiData[iPg] = aPrivate; - memset(aPrivate, 0, WALINDEX_PGSZ); if( iWal ){ assert( version==WAL_VERSION2 ); @@ -68920,39 +68891,6 @@ static int walCheckpoint( } } - if( bWal2 && rc==SQLITE_OK && eMode!=SQLITE_CHECKPOINT_PASSIVE ){ - /* In wal2 mode, a non-passive checkpoint waits for all readers of - ** the wal file just checkpointed to finish, then zeroes the hash - ** tables associated with that wal file. This is because in some - ** deployments, zeroing the hash tables as they are overwritten within - ** COMMIT commands is a significant performance hit. - ** - ** Currently, both of the "PART" locks are held for the wal file - ** being checkpointed. i.e. if iCkpt==0, then we already hold both - ** WAL_LOCK_PART1 and WAL_LOCK_PART1_FULL2. If we now also take an - ** exclusive lock on WAL_LOCK_PART2_FULL1, then it is guaranteed that - ** there are no remaining readers of the (iCkpt==0) wal file. Similar - ** logic, with different locks, is used for (iCkpt==1). - */ - int lockIdx = WAL_READ_LOCK( - iCkpt==0 ? WAL_LOCK_PART2_FULL1 : WAL_LOCK_PART1_FULL2 - ); - assert( iCkpt==0 || iCkpt==1 ); - rc = walBusyLock(pWal, xBusy, pBusyArg, lockIdx, 1); - if( rc==SQLITE_OK ){ - int iHash; - for(iHash = walFramePage2(iCkpt, mxSafeFrame); iHash>=0; iHash-=2){ - WalHashLoc sLoc; - int nByte; - memset(&sLoc, 0, sizeof(sLoc)); - walHashGet(pWal, iHash, &sLoc); - nByte = (int)((u8*)&sLoc.aHash[HASHTABLE_NSLOT] - (u8*)sLoc.aPgno); - memset((void*)sLoc.aPgno, 0, nByte); - } - walUnlockExclusive(pWal, lockIdx, 1); - } - } - if( rc==SQLITE_BUSY ){ /* Reset the return code so as not to report a checkpoint failure ** just because there are active readers. */ @@ -93469,7 +93407,7 @@ SQLITE_PRIVATE void sqlite3CommitTimeLog(u64 *aCommit){ } zStr = sqlite3_mprintf("%z%s%s%d%s", zStr, (zStr?", ":""),zHash,iVal,zU); } - sqlite3_log(SQLITE_WARNING, "slow commit (v=15): (%s)", zStr); + sqlite3_log(SQLITE_WARNING, "slow commit (v=12): (%s)", zStr); sqlite3_free(zStr); } } @@ -93497,7 +93435,7 @@ SQLITE_PRIVATE void sqlite3PrepareTimeLog(const char *zSql, int nSql, u64 *aPrep } if( nByte<0 ){ nByte = sqlite3Strlen30(zSql); } sqlite3_log(SQLITE_WARNING, - "slow prepare (v=15): (%s) [%.*s]", zStr, nByte, zSql + "slow prepare (v=12): (%s) [%.*s]", zStr, nByte, zSql ); sqlite3_free(zStr); } @@ -93513,7 +93451,7 @@ SQLITE_PRIVATE void sqlite3SchemaTimeLog(u64 *aSchema){ (aSchema[ii]==0 ? 0 : (int)(aSchema[ii] - i1)) ); } - sqlite3_log(SQLITE_WARNING, "slow schema (v=15): (%s)", zStr); + sqlite3_log(SQLITE_WARNING, "slow schema (v=12): (%s)", zStr); sqlite3_free(zStr); } } @@ -100339,9 +100277,7 @@ case OP_AutoCommit: { u64 aCommit[COMMIT_TIME_N]; memset(aCommit, 0, sizeof(aCommit)); - if( iRollback==0 ){ - sqlite3CommitTimeSet(aCommit, COMMIT_TIME_START); - } + sqlite3CommitTimeSet(aCommit, COMMIT_TIME_START); if( iRollback ){ assert( desiredAutoCommit==1 ); @@ -100388,7 +100324,7 @@ case OP_AutoCommit: { rc = SQLITE_ERROR; } sqlite3CommitTimeSet(aCommit, COMMIT_TIME_FINISH); - if( desiredAutoCommit && !iRollback ) sqlite3CommitTimeLog(aCommit); + if( desiredAutoCommit ) sqlite3CommitTimeLog(aCommit); goto vdbe_return; }else{ sqlite3VdbeError(p, @@ -123847,8 +123783,6 @@ static int loadStatTbl( sqlite3DbFree(db, zSql); if( rc ) return rc; - sqlite3PrepareTimeSet(db->aSchemaTime, SCHEMA_TIME_AFTER_STAT4_Q1); - while( sqlite3_step(pStmt)==SQLITE_ROW ){ char *zIndex; /* Index name */ Index *pIdx; /* Pointer to the index object */ @@ -123894,7 +123828,6 @@ static int loadStatTbl( pIdx->nSample++; } rc = sqlite3_finalize(pStmt); - sqlite3PrepareTimeSet(db->aSchemaTime, SCHEMA_TIME_AFTER_STAT4_Q2); if( rc==SQLITE_OK ) initAvgEq(pPrevIdx); return rc; } @@ -123968,8 +123901,6 @@ SQLITE_PRIVATE int sqlite3AnalysisLoad(sqlite3 *db, int iDb){ #endif } - sqlite3PrepareTimeSet(db->aSchemaTime, SCHEMA_TIME_AFTER_CLEAR_STATS); - /* Load new statistics out of the sqlite_stat1 table */ sInfo.db = db; sInfo.zDatabase = db->aDb[iDb].zDbSName; @@ -123986,8 +123917,6 @@ SQLITE_PRIVATE int sqlite3AnalysisLoad(sqlite3 *db, int iDb){ } } - sqlite3PrepareTimeSet(db->aSchemaTime, SCHEMA_TIME_AFTER_STAT1); - /* Set appropriate defaults on all indexes not in the sqlite_stat1 table */ assert( sqlite3SchemaMutexHeld(db, iDb, 0) ); for(i=sqliteHashFirst(&pSchema->idxHash); i; i=sqliteHashNext(i)){ @@ -123995,8 +123924,6 @@ SQLITE_PRIVATE int sqlite3AnalysisLoad(sqlite3 *db, int iDb){ if( !pIdx->hasStat1 ) sqlite3DefaultRowEst(pIdx); } - sqlite3PrepareTimeSet(db->aSchemaTime, SCHEMA_TIME_AFTER_DEFAULTS); - /* Load the statistics from the sqlite_stat4 table. */ #ifdef SQLITE_ENABLE_STAT4 if( rc==SQLITE_OK ){ @@ -124011,8 +123938,6 @@ SQLITE_PRIVATE int sqlite3AnalysisLoad(sqlite3 *db, int iDb){ } #endif - sqlite3PrepareTimeSet(db->aSchemaTime, SCHEMA_TIME_AFTER_STAT4); - if( rc==SQLITE_NOMEM ){ sqlite3OomFault(db); } @@ -257904,7 +257829,7 @@ static void fts5SourceIdFunc( ){ assert( nArg==0 ); UNUSED_PARAM2(nArg, apUnused); - sqlite3_result_text(pCtx, "fts5: 2024-12-05 19:45:14 5f9f6764e9dffef60213bbc9604940ddfc713436333c3f62ed8a090697fcbb1e", -1, SQLITE_TRANSIENT); + sqlite3_result_text(pCtx, "fts5: 2024-11-20 20:26:59 853f9cf453d13cf826443b0d27331e1f4e9e06f9f4ce674f22ccadad86e20937", -1, SQLITE_TRANSIENT); } /* diff --git a/libstuff/sqlite3.h b/libstuff/sqlite3.h index 6287d400b..7d8654612 100644 --- a/libstuff/sqlite3.h +++ b/libstuff/sqlite3.h @@ -148,7 +148,7 @@ extern "C" { */ #define SQLITE_VERSION "3.47.0" #define SQLITE_VERSION_NUMBER 3047000 -#define SQLITE_SOURCE_ID "2024-12-05 19:45:14 5f9f6764e9dffef60213bbc9604940ddfc713436333c3f62ed8a090697fcbb1e" +#define SQLITE_SOURCE_ID "2024-11-20 20:26:59 853f9cf453d13cf826443b0d27331e1f4e9e06f9f4ce674f22ccadad86e20937" /* ** CAPI3REF: Run-Time Library Version Numbers From ff664a7857dc062662a8b4f39661d357c40925da Mon Sep 17 00:00:00 2001 From: Florent De Neve Date: Fri, 6 Dec 2024 10:48:02 -0400 Subject: [PATCH 24/28] SQLite fix for non x86 arch --- libstuff/sqlite3.c | 107 +++++++++++++++++++++++++++++++++++++++------ libstuff/sqlite3.h | 2 +- 2 files changed, 94 insertions(+), 15 deletions(-) diff --git a/libstuff/sqlite3.c b/libstuff/sqlite3.c index 8084ff4a9..f10a739a6 100644 --- a/libstuff/sqlite3.c +++ b/libstuff/sqlite3.c @@ -18,7 +18,7 @@ ** separate file. This file contains only code for the core SQLite library. ** ** The content in this amalgamation comes from Fossil check-in -** 853f9cf453d13cf826443b0d27331e1f4e9e. +** 5fa1699e31856a6585cc59183641c4cc99e1. */ #define SQLITE_CORE 1 #define SQLITE_AMALGAMATION 1 @@ -465,7 +465,7 @@ extern "C" { */ #define SQLITE_VERSION "3.47.0" #define SQLITE_VERSION_NUMBER 3047000 -#define SQLITE_SOURCE_ID "2024-11-20 20:26:59 853f9cf453d13cf826443b0d27331e1f4e9e06f9f4ce674f22ccadad86e20937" +#define SQLITE_SOURCE_ID "2024-12-06 09:36:28 5fa1699e31856a6585cc59183641c4cc99e1ccd99c384d13e9c4442e9c07f41a" /* ** CAPI3REF: Run-Time Library Version Numbers @@ -18229,10 +18229,19 @@ struct sqlite3 { #define SCHEMA_TIME_BEFORE_PREPARE 8 #define SCHEMA_TIME_BEFORE_FINALIZE 9 #define SCHEMA_TIME_BEGIN_ANALYZE_LOAD 10 -#define SCHEMA_TIME_END_ANALYZE_LOAD 11 -#define SCHEMA_TIME_FINISH 12 -#define SCHEMA_TIME_N 13 +#define SCHEMA_TIME_AFTER_CLEAR_STATS 11 +#define SCHEMA_TIME_AFTER_STAT1 12 +#define SCHEMA_TIME_AFTER_DEFAULTS 13 + +#define SCHEMA_TIME_AFTER_STAT4_Q1 14 +#define SCHEMA_TIME_AFTER_STAT4_Q2 15 +#define SCHEMA_TIME_AFTER_STAT4 16 + +#define SCHEMA_TIME_END_ANALYZE_LOAD 17 +#define SCHEMA_TIME_FINISH 18 + +#define SCHEMA_TIME_N 19 #define SCHEMA_TIME_TIMEOUT (2 * 1000 * 1000) @@ -67552,6 +67561,29 @@ static void walCleanupHash(Wal *pWal){ #endif /* SQLITE_ENABLE_EXPENSIVE_ASSERT */ } +/* +** Zero the n byte block indicated by pointer p. n Must be a multiple of +** 8, and p must be aligned to an 8-byte boundary. +*/ +static void zero64(void *p, int n){ +#if defined(__x86_64__) + size_t c = n / sizeof(u64); + void *d = p; + + assert( (n & 0x7)==0 ); + assert( EIGHT_BYTE_ALIGNMENT(p) ); + + __asm__ volatile ( + "rep stosq" + : "+D" (d), "+c" (c) + : "a" (0) + : "memory" + ); +#else + memset(p, 0, n); +#endif +} + /* ** Set an entry in the wal-index that will map database page number ** pPage into WAL frame iFrame. @@ -67590,10 +67622,10 @@ static int walIndexAppend(Wal *pWal, int iWal, u32 iFrame, u32 iPage){ ** entire hash table and aPgno[] array before proceeding. */ if( pWal->aCommitTime ) t = sqlite3STimeNow(); - if( idx==1 ){ + if( idx==1 && sLoc.aPgno[0]!=0 ){ int nByte = (int)((u8*)&sLoc.aHash[HASHTABLE_NSLOT] - (u8*)sLoc.aPgno); - assert( nByte>=0 ); - memset((void*)sLoc.aPgno, 0, nByte); + assert( nByte>=0 && (nByte & 0x07)==0 ); + zero64((void*)sLoc.aPgno, nByte); } if( pWal->aCommitTime ){ pWal->aCommitTime[COMMIT_TIME_WALINDEX_MEMSETUS]+=sqlite3STimeNow()-t; @@ -67763,6 +67795,7 @@ static int walIndexRecoverOne(Wal *pWal, int iWal, u32 *pnCkpt, int *pbZero){ if( aShare==0 ) break; SEH_SET_ON_ERROR(iPg, aShare); pWal->apWiData[iPg] = aPrivate; + memset(aPrivate, 0, WALINDEX_PGSZ); if( iWal ){ assert( version==WAL_VERSION2 ); @@ -68891,6 +68924,39 @@ static int walCheckpoint( } } + if( bWal2 && rc==SQLITE_OK && eMode!=SQLITE_CHECKPOINT_PASSIVE ){ + /* In wal2 mode, a non-passive checkpoint waits for all readers of + ** the wal file just checkpointed to finish, then zeroes the hash + ** tables associated with that wal file. This is because in some + ** deployments, zeroing the hash tables as they are overwritten within + ** COMMIT commands is a significant performance hit. + ** + ** Currently, both of the "PART" locks are held for the wal file + ** being checkpointed. i.e. if iCkpt==0, then we already hold both + ** WAL_LOCK_PART1 and WAL_LOCK_PART1_FULL2. If we now also take an + ** exclusive lock on WAL_LOCK_PART2_FULL1, then it is guaranteed that + ** there are no remaining readers of the (iCkpt==0) wal file. Similar + ** logic, with different locks, is used for (iCkpt==1). + */ + int lockIdx = WAL_READ_LOCK( + iCkpt==0 ? WAL_LOCK_PART2_FULL1 : WAL_LOCK_PART1_FULL2 + ); + assert( iCkpt==0 || iCkpt==1 ); + rc = walBusyLock(pWal, xBusy, pBusyArg, lockIdx, 1); + if( rc==SQLITE_OK ){ + int iHash; + for(iHash = walFramePage2(iCkpt, mxSafeFrame); iHash>=0; iHash-=2){ + WalHashLoc sLoc; + int nByte; + memset(&sLoc, 0, sizeof(sLoc)); + walHashGet(pWal, iHash, &sLoc); + nByte = (int)((u8*)&sLoc.aHash[HASHTABLE_NSLOT] - (u8*)sLoc.aPgno); + memset((void*)sLoc.aPgno, 0, nByte); + } + walUnlockExclusive(pWal, lockIdx, 1); + } + } + if( rc==SQLITE_BUSY ){ /* Reset the return code so as not to report a checkpoint failure ** just because there are active readers. */ @@ -93407,7 +93473,7 @@ SQLITE_PRIVATE void sqlite3CommitTimeLog(u64 *aCommit){ } zStr = sqlite3_mprintf("%z%s%s%d%s", zStr, (zStr?", ":""),zHash,iVal,zU); } - sqlite3_log(SQLITE_WARNING, "slow commit (v=12): (%s)", zStr); + sqlite3_log(SQLITE_WARNING, "slow commit (v=15): (%s)", zStr); sqlite3_free(zStr); } } @@ -93435,7 +93501,7 @@ SQLITE_PRIVATE void sqlite3PrepareTimeLog(const char *zSql, int nSql, u64 *aPrep } if( nByte<0 ){ nByte = sqlite3Strlen30(zSql); } sqlite3_log(SQLITE_WARNING, - "slow prepare (v=12): (%s) [%.*s]", zStr, nByte, zSql + "slow prepare (v=15): (%s) [%.*s]", zStr, nByte, zSql ); sqlite3_free(zStr); } @@ -93451,7 +93517,7 @@ SQLITE_PRIVATE void sqlite3SchemaTimeLog(u64 *aSchema){ (aSchema[ii]==0 ? 0 : (int)(aSchema[ii] - i1)) ); } - sqlite3_log(SQLITE_WARNING, "slow schema (v=12): (%s)", zStr); + sqlite3_log(SQLITE_WARNING, "slow schema (v=15): (%s)", zStr); sqlite3_free(zStr); } } @@ -100277,7 +100343,9 @@ case OP_AutoCommit: { u64 aCommit[COMMIT_TIME_N]; memset(aCommit, 0, sizeof(aCommit)); - sqlite3CommitTimeSet(aCommit, COMMIT_TIME_START); + if( iRollback==0 ){ + sqlite3CommitTimeSet(aCommit, COMMIT_TIME_START); + } if( iRollback ){ assert( desiredAutoCommit==1 ); @@ -100324,7 +100392,7 @@ case OP_AutoCommit: { rc = SQLITE_ERROR; } sqlite3CommitTimeSet(aCommit, COMMIT_TIME_FINISH); - if( desiredAutoCommit ) sqlite3CommitTimeLog(aCommit); + if( desiredAutoCommit && !iRollback ) sqlite3CommitTimeLog(aCommit); goto vdbe_return; }else{ sqlite3VdbeError(p, @@ -123783,6 +123851,8 @@ static int loadStatTbl( sqlite3DbFree(db, zSql); if( rc ) return rc; + sqlite3PrepareTimeSet(db->aSchemaTime, SCHEMA_TIME_AFTER_STAT4_Q1); + while( sqlite3_step(pStmt)==SQLITE_ROW ){ char *zIndex; /* Index name */ Index *pIdx; /* Pointer to the index object */ @@ -123828,6 +123898,7 @@ static int loadStatTbl( pIdx->nSample++; } rc = sqlite3_finalize(pStmt); + sqlite3PrepareTimeSet(db->aSchemaTime, SCHEMA_TIME_AFTER_STAT4_Q2); if( rc==SQLITE_OK ) initAvgEq(pPrevIdx); return rc; } @@ -123901,6 +123972,8 @@ SQLITE_PRIVATE int sqlite3AnalysisLoad(sqlite3 *db, int iDb){ #endif } + sqlite3PrepareTimeSet(db->aSchemaTime, SCHEMA_TIME_AFTER_CLEAR_STATS); + /* Load new statistics out of the sqlite_stat1 table */ sInfo.db = db; sInfo.zDatabase = db->aDb[iDb].zDbSName; @@ -123917,6 +123990,8 @@ SQLITE_PRIVATE int sqlite3AnalysisLoad(sqlite3 *db, int iDb){ } } + sqlite3PrepareTimeSet(db->aSchemaTime, SCHEMA_TIME_AFTER_STAT1); + /* Set appropriate defaults on all indexes not in the sqlite_stat1 table */ assert( sqlite3SchemaMutexHeld(db, iDb, 0) ); for(i=sqliteHashFirst(&pSchema->idxHash); i; i=sqliteHashNext(i)){ @@ -123924,6 +123999,8 @@ SQLITE_PRIVATE int sqlite3AnalysisLoad(sqlite3 *db, int iDb){ if( !pIdx->hasStat1 ) sqlite3DefaultRowEst(pIdx); } + sqlite3PrepareTimeSet(db->aSchemaTime, SCHEMA_TIME_AFTER_DEFAULTS); + /* Load the statistics from the sqlite_stat4 table. */ #ifdef SQLITE_ENABLE_STAT4 if( rc==SQLITE_OK ){ @@ -123938,6 +124015,8 @@ SQLITE_PRIVATE int sqlite3AnalysisLoad(sqlite3 *db, int iDb){ } #endif + sqlite3PrepareTimeSet(db->aSchemaTime, SCHEMA_TIME_AFTER_STAT4); + if( rc==SQLITE_NOMEM ){ sqlite3OomFault(db); } @@ -257829,7 +257908,7 @@ static void fts5SourceIdFunc( ){ assert( nArg==0 ); UNUSED_PARAM2(nArg, apUnused); - sqlite3_result_text(pCtx, "fts5: 2024-11-20 20:26:59 853f9cf453d13cf826443b0d27331e1f4e9e06f9f4ce674f22ccadad86e20937", -1, SQLITE_TRANSIENT); + sqlite3_result_text(pCtx, "fts5: 2024-12-06 09:36:28 5fa1699e31856a6585cc59183641c4cc99e1ccd99c384d13e9c4442e9c07f41a", -1, SQLITE_TRANSIENT); } /* diff --git a/libstuff/sqlite3.h b/libstuff/sqlite3.h index 7d8654612..c79479408 100644 --- a/libstuff/sqlite3.h +++ b/libstuff/sqlite3.h @@ -148,7 +148,7 @@ extern "C" { */ #define SQLITE_VERSION "3.47.0" #define SQLITE_VERSION_NUMBER 3047000 -#define SQLITE_SOURCE_ID "2024-11-20 20:26:59 853f9cf453d13cf826443b0d27331e1f4e9e06f9f4ce674f22ccadad86e20937" +#define SQLITE_SOURCE_ID "2024-12-06 09:36:28 5fa1699e31856a6585cc59183641c4cc99e1ccd99c384d13e9c4442e9c07f41a" /* ** CAPI3REF: Run-Time Library Version Numbers From 5456a9888fb1903ff29a2f97144df705fa6caaa3 Mon Sep 17 00:00:00 2001 From: Florent De Neve Date: Fri, 6 Dec 2024 14:05:42 -0400 Subject: [PATCH 25/28] Update SQLite with more logs, v16 --- libstuff/sqlite3.c | 41 +++++++++++++++++++++++++---------------- libstuff/sqlite3.h | 2 +- 2 files changed, 26 insertions(+), 17 deletions(-) diff --git a/libstuff/sqlite3.c b/libstuff/sqlite3.c index f10a739a6..b3c5eebf7 100644 --- a/libstuff/sqlite3.c +++ b/libstuff/sqlite3.c @@ -18,7 +18,7 @@ ** separate file. This file contains only code for the core SQLite library. ** ** The content in this amalgamation comes from Fossil check-in -** 5fa1699e31856a6585cc59183641c4cc99e1. +** 65b753735b8e8fb70d2b522d527426f1eb5c. */ #define SQLITE_CORE 1 #define SQLITE_AMALGAMATION 1 @@ -465,7 +465,7 @@ extern "C" { */ #define SQLITE_VERSION "3.47.0" #define SQLITE_VERSION_NUMBER 3047000 -#define SQLITE_SOURCE_ID "2024-12-06 09:36:28 5fa1699e31856a6585cc59183641c4cc99e1ccd99c384d13e9c4442e9c07f41a" +#define SQLITE_SOURCE_ID "2024-12-06 17:52:38 65b753735b8e8fb70d2b522d527426f1eb5c09339fb4b15cf69cbd2e595b160f" /* ** CAPI3REF: Run-Time Library Version Numbers @@ -18234,21 +18234,22 @@ struct sqlite3 { #define SCHEMA_TIME_AFTER_STAT1 12 #define SCHEMA_TIME_AFTER_DEFAULTS 13 -#define SCHEMA_TIME_AFTER_STAT4_Q1 14 -#define SCHEMA_TIME_AFTER_STAT4_Q2 15 -#define SCHEMA_TIME_AFTER_STAT4 16 +#define SCHEMA_TIME_STAT4_Q1_BODY 14 +#define SCHEMA_TIME_AFTER_STAT4_Q1 15 +#define SCHEMA_TIME_AFTER_STAT4_Q2 16 +#define SCHEMA_TIME_AFTER_STAT4 17 -#define SCHEMA_TIME_END_ANALYZE_LOAD 17 -#define SCHEMA_TIME_FINISH 18 +#define SCHEMA_TIME_END_ANALYZE_LOAD 18 +#define SCHEMA_TIME_FINISH 19 -#define SCHEMA_TIME_N 19 -#define SCHEMA_TIME_TIMEOUT (2 * 1000 * 1000) +#define SCHEMA_TIME_N 20 +#define SCHEMA_TIME_TIMEOUT (0 * 1000 * 1000) #define sqlite3PrepareTimeSet(x,y) sqlite3CommitTimeSet(x,y) SQLITE_PRIVATE void sqlite3PrepareTimeLog(const char *zSql, int nSql, u64 *aPrepareTime); -SQLITE_PRIVATE void sqlite3SchemaTimeLog(u64 *aSchemaTime); +SQLITE_PRIVATE void sqlite3SchemaTimeLog(u64 *aSchemaTime, const char *zFile); #define PREPARE_TIME_TIMEOUT (2 * 1000 * 1000) /* 2 second timeout */ @@ -93473,7 +93474,7 @@ SQLITE_PRIVATE void sqlite3CommitTimeLog(u64 *aCommit){ } zStr = sqlite3_mprintf("%z%s%s%d%s", zStr, (zStr?", ":""),zHash,iVal,zU); } - sqlite3_log(SQLITE_WARNING, "slow commit (v=15): (%s)", zStr); + sqlite3_log(SQLITE_WARNING, "slow commit (v=16): (%s)", zStr); sqlite3_free(zStr); } } @@ -93501,12 +93502,12 @@ SQLITE_PRIVATE void sqlite3PrepareTimeLog(const char *zSql, int nSql, u64 *aPrep } if( nByte<0 ){ nByte = sqlite3Strlen30(zSql); } sqlite3_log(SQLITE_WARNING, - "slow prepare (v=15): (%s) [%.*s]", zStr, nByte, zSql + "slow prepare (v=16): (%s) [%.*s]", zStr, nByte, zSql ); sqlite3_free(zStr); } } -SQLITE_PRIVATE void sqlite3SchemaTimeLog(u64 *aSchema){ +SQLITE_PRIVATE void sqlite3SchemaTimeLog(u64 *aSchema, const char *zFile){ u64 i1 = aSchema[SCHEMA_TIME_START]; assert( SCHEMA_TIME_START==0 && SCHEMA_TIME_FINISH==SCHEMA_TIME_N-1 ); if( aSchema[SCHEMA_TIME_FINISH]>(i1+SCHEMA_TIME_TIMEOUT) ){ @@ -93517,7 +93518,7 @@ SQLITE_PRIVATE void sqlite3SchemaTimeLog(u64 *aSchema){ (aSchema[ii]==0 ? 0 : (int)(aSchema[ii] - i1)) ); } - sqlite3_log(SQLITE_WARNING, "slow schema (v=15): (%s)", zStr); + sqlite3_log(SQLITE_WARNING, "slow schema (%s) (v=16): (%s)", zFile, zStr); sqlite3_free(zStr); } } @@ -123800,6 +123801,8 @@ static int loadStatTbl( tRowcnt *pSpace; /* Available allocated memory space */ u8 *pPtr; /* Available memory as a u8 for easier manipulation */ + u64 t = sqlite3STimeNow(); + zIndex = (char *)sqlite3_column_text(pStmt, 0); if( zIndex==0 ) continue; nSample = sqlite3_column_int(pStmt, 1); @@ -123839,6 +123842,9 @@ static int loadStatTbl( pIdx->aSample[i].anDLt = pSpace; pSpace += nIdxCol; } assert( ((u8*)pSpace)-nByte==(u8*)(pIdx->aSample) ); + if( db->aSchemaTime ){ + db->aSchemaTime[SCHEMA_TIME_STAT4_Q1_BODY] += (sqlite3STimeNow() - t); + } } rc = sqlite3_finalize(pStmt); if( rc ) return rc; @@ -145944,7 +145950,10 @@ SQLITE_PRIVATE int sqlite3InitOne(sqlite3 *db, int iDb, char **pzErrMsg, u32 mFl error_out: db->aSchemaTime = 0; sqlite3PrepareTimeSet(aSchemaTime, SCHEMA_TIME_FINISH); - sqlite3SchemaTimeLog(aSchemaTime); + if( rc==SQLITE_OK && iDb==0 ){ + const char *zFile = sqlite3BtreeGetFilename(pDb->pBt); + sqlite3SchemaTimeLog(aSchemaTime, zFile); + } if( rc ){ if( rc==SQLITE_NOMEM || rc==SQLITE_IOERR_NOMEM ){ sqlite3OomFault(db); @@ -257908,7 +257917,7 @@ static void fts5SourceIdFunc( ){ assert( nArg==0 ); UNUSED_PARAM2(nArg, apUnused); - sqlite3_result_text(pCtx, "fts5: 2024-12-06 09:36:28 5fa1699e31856a6585cc59183641c4cc99e1ccd99c384d13e9c4442e9c07f41a", -1, SQLITE_TRANSIENT); + sqlite3_result_text(pCtx, "fts5: 2024-12-06 17:52:38 65b753735b8e8fb70d2b522d527426f1eb5c09339fb4b15cf69cbd2e595b160f", -1, SQLITE_TRANSIENT); } /* diff --git a/libstuff/sqlite3.h b/libstuff/sqlite3.h index c79479408..ff4f7b3f8 100644 --- a/libstuff/sqlite3.h +++ b/libstuff/sqlite3.h @@ -148,7 +148,7 @@ extern "C" { */ #define SQLITE_VERSION "3.47.0" #define SQLITE_VERSION_NUMBER 3047000 -#define SQLITE_SOURCE_ID "2024-12-06 09:36:28 5fa1699e31856a6585cc59183641c4cc99e1ccd99c384d13e9c4442e9c07f41a" +#define SQLITE_SOURCE_ID "2024-12-06 17:52:38 65b753735b8e8fb70d2b522d527426f1eb5c09339fb4b15cf69cbd2e595b160f" /* ** CAPI3REF: Run-Time Library Version Numbers From f4bb1f1a58c2076d75b2b3becbcd3810bfb33bac Mon Sep 17 00:00:00 2001 From: Tyler Karaszewski Date: Fri, 6 Dec 2024 14:22:41 -0800 Subject: [PATCH 26/28] Fix wrong count being incremented --- sqlitecluster/SQLiteNode.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sqlitecluster/SQLiteNode.cpp b/sqlitecluster/SQLiteNode.cpp index 29aad0bcc..e0281c1e1 100644 --- a/sqlitecluster/SQLiteNode.cpp +++ b/sqlitecluster/SQLiteNode.cpp @@ -2837,7 +2837,7 @@ void SQLiteNode::_dieIfForkedFromCluster() { size_t forkedFullPeerCount = 0; for (const auto& p : _peerList) { if (!p->permaFollower) { - forkedFullPeerCount++; + quorumNodeCount++; if (p->forked) { forkedFullPeerCount++; } From 8d3fad988ae525735cef2fc54b7e0ce4ca87585b Mon Sep 17 00:00:00 2001 From: Tyler Karaszewski Date: Fri, 6 Dec 2024 14:25:30 -0800 Subject: [PATCH 27/28] Change comment --- sqlitecluster/SQLiteNode.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sqlitecluster/SQLiteNode.cpp b/sqlitecluster/SQLiteNode.cpp index e0281c1e1..2907e7e84 100644 --- a/sqlitecluster/SQLiteNode.cpp +++ b/sqlitecluster/SQLiteNode.cpp @@ -1273,7 +1273,7 @@ void SQLiteNode::_onMESSAGE(SQLitePeer* peer, const SData& message) { return; } - // We allow PING and PONG even for bad peers just to avoid them getting caught in reconnect cycles. + // We ignore everything except PING and PONG from forked nodes, so we can return here in that case. if (peer->forked) { PINFO("Received message " << message.methodLine << " from forked peer, ignoring."); return; From c86f79953454fe2b25ac3f8ad0321fdc03f82ae5 Mon Sep 17 00:00:00 2001 From: Tyler Karaszewski Date: Fri, 6 Dec 2024 14:26:08 -0800 Subject: [PATCH 28/28] Update sqlitecluster/SQLiteNode.cpp Co-authored-by: Carlos Alvarez --- sqlitecluster/SQLiteNode.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sqlitecluster/SQLiteNode.cpp b/sqlitecluster/SQLiteNode.cpp index 2907e7e84..5a75611cd 100644 --- a/sqlitecluster/SQLiteNode.cpp +++ b/sqlitecluster/SQLiteNode.cpp @@ -2844,7 +2844,7 @@ void SQLiteNode::_dieIfForkedFromCluster() { } } - // Am *I* a permaFollower? + // Increase quorumNodeCount if *I* am not a permafollower if (_originalPriority != 0) { quorumNodeCount++; }