From 7c0a644fb2d72d64d3a6157dead043c43527b2f7 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Micha=C5=82=20Jasikowski?= <michal@expensify.com>
Date: Wed, 27 Nov 2024 13:54:56 +0100
Subject: [PATCH 01/28] Changed log format for some SQLiteNode warnings

---
 sqlitecluster/SQLiteNode.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/sqlitecluster/SQLiteNode.cpp b/sqlitecluster/SQLiteNode.cpp
index 1f2359478..1e91d4cf9 100644
--- a/sqlitecluster/SQLiteNode.cpp
+++ b/sqlitecluster/SQLiteNode.cpp
@@ -1496,7 +1496,7 @@ void SQLiteNode::_onMESSAGE(SQLitePeer* peer, const SData& message) {
                         peer->sendMessage(response);
                     } catch (const SException& e) {
                         // This is the same handling as at the bottom of _onMESSAGE.
-                        PWARN("Error processing message '" << message.methodLine << "' (" << e.what() << "), reconnecting.");
+                        SWARN("Error processing message, reconnecting", {{"peer", peer->name}, {"message", message.methodLine}, {"reason", e.what()}});
                         SData reconnect("RECONNECT");
                         reconnect["Reason"] = e.what();
                         peer->sendMessage(reconnect.serialize());
@@ -1713,7 +1713,7 @@ void SQLiteNode::_onMESSAGE(SQLitePeer* peer, const SData& message) {
             STHROW("unrecognized message");
         }
     } catch (const SException& e) {
-        PWARN("Error processing message '" << message.methodLine << "' (" << e.what() << "), reconnecting.");
+        SWARN("Error processing message, reconnecting", {{"peer", peer->name}, {"message", message.methodLine}, {"reason", e.what()}});
         SData reconnect("RECONNECT");
         reconnect["Reason"] = e.what();
         peer->sendMessage(reconnect.serialize());

From d9a333481c3b62b4fc2757e008e14ee35618a759 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Micha=C5=82=20Jasikowski?= <michal@expensify.com>
Date: Thu, 28 Nov 2024 12:03:59 +0100
Subject: [PATCH 02/28] Fixed log format for some SQLiteNode warnings

---
 sqlitecluster/SQLiteNode.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/sqlitecluster/SQLiteNode.cpp b/sqlitecluster/SQLiteNode.cpp
index 1e91d4cf9..94a50a9d9 100644
--- a/sqlitecluster/SQLiteNode.cpp
+++ b/sqlitecluster/SQLiteNode.cpp
@@ -1496,7 +1496,7 @@ void SQLiteNode::_onMESSAGE(SQLitePeer* peer, const SData& message) {
                         peer->sendMessage(response);
                     } catch (const SException& e) {
                         // This is the same handling as at the bottom of _onMESSAGE.
-                        SWARN("Error processing message, reconnecting", {{"peer", peer->name}, {"message", message.methodLine}, {"reason", e.what()}});
+                        SWARN("Error processing message, reconnecting", {{"peer", peer->name}, {"message", message.methodLine}, {"reason", string(e.what())}});
                         SData reconnect("RECONNECT");
                         reconnect["Reason"] = e.what();
                         peer->sendMessage(reconnect.serialize());
@@ -1713,7 +1713,7 @@ void SQLiteNode::_onMESSAGE(SQLitePeer* peer, const SData& message) {
             STHROW("unrecognized message");
         }
     } catch (const SException& e) {
-        SWARN("Error processing message, reconnecting", {{"peer", peer->name}, {"message", message.methodLine}, {"reason", e.what()}});
+        SWARN("Error processing message, reconnecting", {{"peer", peer->name}, {"message", message.methodLine}, {"reason", string(e.what())}});
         SData reconnect("RECONNECT");
         reconnect["Reason"] = e.what();
         peer->sendMessage(reconnect.serialize());

From 1fc0f2f3182ab1e82c478960591ca9fe7dba5a5d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Micha=C5=82=20Jasikowski?= <michal@expensify.com>
Date: Thu, 28 Nov 2024 12:14:48 +0100
Subject: [PATCH 03/28] Reverting changes for tests

---
 sqlitecluster/SQLiteNode.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/sqlitecluster/SQLiteNode.cpp b/sqlitecluster/SQLiteNode.cpp
index 94a50a9d9..4b170025b 100644
--- a/sqlitecluster/SQLiteNode.cpp
+++ b/sqlitecluster/SQLiteNode.cpp
@@ -1496,7 +1496,7 @@ void SQLiteNode::_onMESSAGE(SQLitePeer* peer, const SData& message) {
                         peer->sendMessage(response);
                     } catch (const SException& e) {
                         // This is the same handling as at the bottom of _onMESSAGE.
-                        SWARN("Error processing message, reconnecting", {{"peer", peer->name}, {"message", message.methodLine}, {"reason", string(e.what())}});
+                        // SWARN("Error processing message, reconnecting", {{"peer", peer->name}, {"message", message.methodLine}, {"reason", string(e.what())}});
                         SData reconnect("RECONNECT");
                         reconnect["Reason"] = e.what();
                         peer->sendMessage(reconnect.serialize());
@@ -1713,7 +1713,7 @@ void SQLiteNode::_onMESSAGE(SQLitePeer* peer, const SData& message) {
             STHROW("unrecognized message");
         }
     } catch (const SException& e) {
-        SWARN("Error processing message, reconnecting", {{"peer", peer->name}, {"message", message.methodLine}, {"reason", string(e.what())}});
+        //SWARN("Error processing message, reconnecting", {{"peer", peer->name}, {"message", message.methodLine}, {"reason", string(e.what())}});
         SData reconnect("RECONNECT");
         reconnect["Reason"] = e.what();
         peer->sendMessage(reconnect.serialize());

From cae4fb990fa729652bd29d8ab13528187af5e4e6 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Micha=C5=82=20Jasikowski?= <michal@expensify.com>
Date: Thu, 28 Nov 2024 12:52:58 +0100
Subject: [PATCH 04/28] Fixed log format for some SQLiteNode warnings

---
 sqlitecluster/SQLiteNode.cpp | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

diff --git a/sqlitecluster/SQLiteNode.cpp b/sqlitecluster/SQLiteNode.cpp
index 4b170025b..928f8fd37 100644
--- a/sqlitecluster/SQLiteNode.cpp
+++ b/sqlitecluster/SQLiteNode.cpp
@@ -1496,7 +1496,11 @@ void SQLiteNode::_onMESSAGE(SQLitePeer* peer, const SData& message) {
                         peer->sendMessage(response);
                     } catch (const SException& e) {
                         // This is the same handling as at the bottom of _onMESSAGE.
-                        // SWARN("Error processing message, reconnecting", {{"peer", peer->name}, {"message", message.methodLine}, {"reason", string(e.what())}});
+                        SWARN("Error processing message, reconnecting", {
+                            {"peer", peer ? peer->name : "unknown"},
+                              {"message", !message.methodLine.empty() ? message.methodLine : ""},
+                              {"reason", e.what()}
+                        });
                         SData reconnect("RECONNECT");
                         reconnect["Reason"] = e.what();
                         peer->sendMessage(reconnect.serialize());
@@ -1713,7 +1717,11 @@ void SQLiteNode::_onMESSAGE(SQLitePeer* peer, const SData& message) {
             STHROW("unrecognized message");
         }
     } catch (const SException& e) {
-        //SWARN("Error processing message, reconnecting", {{"peer", peer->name}, {"message", message.methodLine}, {"reason", string(e.what())}});
+        SWARN("Error processing message, reconnecting", {
+            {"peer", peer ? peer->name : "unknown"},
+              {"message", !message.methodLine.empty() ? message.methodLine : ""},
+              {"reason", e.what()}
+        });
         SData reconnect("RECONNECT");
         reconnect["Reason"] = e.what();
         peer->sendMessage(reconnect.serialize());

From 30dde06fa45f25f8b570a73564c194592e97687b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Micha=C5=82=20Jasikowski?= <michal@expensify.com>
Date: Thu, 28 Nov 2024 13:31:56 +0100
Subject: [PATCH 05/28] Fixed log format for some SQLiteNode warnings

---
 sqlitecluster/SQLiteNode.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/sqlitecluster/SQLiteNode.cpp b/sqlitecluster/SQLiteNode.cpp
index 0b4b69184..5e5984670 100644
--- a/sqlitecluster/SQLiteNode.cpp
+++ b/sqlitecluster/SQLiteNode.cpp
@@ -1523,7 +1523,7 @@ void SQLiteNode::_onMESSAGE(SQLitePeer* peer, const SData& message) {
                         // This is the same handling as at the bottom of _onMESSAGE.
                         SWARN("Error processing message, reconnecting", {
                             {"peer", peer ? peer->name : "unknown"},
-                              {"message", !message.methodLine.empty() ? message.methodLine : ""},
+                              {"message", !message.empty()? message.methodLine : ""},
                               {"reason", e.what()}
                         });
                         SData reconnect("RECONNECT");
@@ -1755,7 +1755,7 @@ void SQLiteNode::_onMESSAGE(SQLitePeer* peer, const SData& message) {
     } catch (const SException& e) {
         SWARN("Error processing message, reconnecting", {
             {"peer", peer ? peer->name : "unknown"},
-              {"message", !message.methodLine.empty() ? message.methodLine : ""},
+              {"message", !message.empty() ? message.methodLine : ""},
               {"reason", e.what()}
         });
         SData reconnect("RECONNECT");

From d15d30074e69cfa6c1ed056de81434b67d1eafac Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Micha=C5=82=20Jasikowski?= <michal@expensify.com>
Date: Tue, 3 Dec 2024 12:44:42 +0100
Subject: [PATCH 06/28] Added logging parameters to whitelist

---
 libstuff/SLog.cpp            | 2 ++
 sqlitecluster/SQLiteNode.cpp | 4 ++--
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/libstuff/SLog.cpp b/libstuff/SLog.cpp
index e87c92398..1cadbafbe 100644
--- a/libstuff/SLog.cpp
+++ b/libstuff/SLog.cpp
@@ -74,8 +74,10 @@ static const set<string> PARAMS_WHITELIST = {
     "lastIP",
     "logParam",
     "nvpName",
+    "peer",
     "policyAccountID",
     "policyID",
+    "reason",
     "reportID",
     "requestID",
     "requestTimestamp",
diff --git a/sqlitecluster/SQLiteNode.cpp b/sqlitecluster/SQLiteNode.cpp
index 5e5984670..0b911c9b0 100644
--- a/sqlitecluster/SQLiteNode.cpp
+++ b/sqlitecluster/SQLiteNode.cpp
@@ -1523,7 +1523,7 @@ void SQLiteNode::_onMESSAGE(SQLitePeer* peer, const SData& message) {
                         // This is the same handling as at the bottom of _onMESSAGE.
                         SWARN("Error processing message, reconnecting", {
                             {"peer", peer ? peer->name : "unknown"},
-                              {"message", !message.empty()? message.methodLine : ""},
+                              {"errorMessage", !message.empty()? message.methodLine : ""},
                               {"reason", e.what()}
                         });
                         SData reconnect("RECONNECT");
@@ -1755,7 +1755,7 @@ void SQLiteNode::_onMESSAGE(SQLitePeer* peer, const SData& message) {
     } catch (const SException& e) {
         SWARN("Error processing message, reconnecting", {
             {"peer", peer ? peer->name : "unknown"},
-              {"message", !message.empty() ? message.methodLine : ""},
+              {"errorMessage", !message.empty() ? message.methodLine : ""},
               {"reason", e.what()}
         });
         SData reconnect("RECONNECT");

From a962a8d5e04ef3fcfffd2835c1919d353ba987b6 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Micha=C5=82=20Jasikowski?= <michal@expensify.com>
Date: Tue, 3 Dec 2024 12:55:34 +0100
Subject: [PATCH 07/28] Added logging parameters to whitelist

---
 libstuff/SLog.cpp            | 3 +++
 sqlitecluster/SQLiteNode.cpp | 6 +++---
 2 files changed, 6 insertions(+), 3 deletions(-)

diff --git a/libstuff/SLog.cpp b/libstuff/SLog.cpp
index bb3943082..a963ed277 100644
--- a/libstuff/SLog.cpp
+++ b/libstuff/SLog.cpp
@@ -49,6 +49,9 @@ static set<string> PARAMS_WHITELIST = {
     "indexName",
     "isUnique",
     "logParam",
+    "message",
+    "peer",
+    "reason",
     "requestID",
     "status",
     "userID",
diff --git a/sqlitecluster/SQLiteNode.cpp b/sqlitecluster/SQLiteNode.cpp
index 53884d297..662b3476f 100644
--- a/sqlitecluster/SQLiteNode.cpp
+++ b/sqlitecluster/SQLiteNode.cpp
@@ -1522,7 +1522,7 @@ void SQLiteNode::_onMESSAGE(SQLitePeer* peer, const SData& message) {
                         // This is the same handling as at the bottom of _onMESSAGE.
                         SWARN("Error processing message, reconnecting", {
                             {"peer", peer ? peer->name : "unknown"},
-                              {"errorMessage", !message.empty()? message.methodLine : ""},
+                              {"message", !message.empty()? message.methodLine : ""},
                               {"reason", e.what()}
                         });
                         SData reconnect("RECONNECT");
@@ -1754,8 +1754,8 @@ void SQLiteNode::_onMESSAGE(SQLitePeer* peer, const SData& message) {
     } catch (const SException& e) {
         SWARN("Error processing message, reconnecting", {
             {"peer", peer ? peer->name : "unknown"},
-              {"errorMessage", !message.empty() ? message.methodLine : ""},
-              {"reason", e.what()}
+            {"message", !message.empty() ? message.methodLine : ""},
+            {"reason", e.what()}
         });
         SData reconnect("RECONNECT");
         reconnect["Reason"] = e.what();

From c03124a1ee7edb689c86cb33660f9822afbad29a Mon Sep 17 00:00:00 2001
From: Tyler Karaszewski <tyler@expensify.com>
Date: Wed, 4 Dec 2024 12:19:00 -0800
Subject: [PATCH 08/28] Allow first message to be either LOGIN or NODE_LOGIN to
 deprecate old message

---
 sqlitecluster/SQLiteNode.cpp | 38 +++++++++++++++++++++++++++++-------
 1 file changed, 31 insertions(+), 7 deletions(-)

diff --git a/sqlitecluster/SQLiteNode.cpp b/sqlitecluster/SQLiteNode.cpp
index 79be57fed..3ea870cd7 100644
--- a/sqlitecluster/SQLiteNode.cpp
+++ b/sqlitecluster/SQLiteNode.cpp
@@ -56,6 +56,20 @@
 // dbCountAtStart:   The highest committed transaction in the DB at the start of this transaction on leader, for
 //                   optimizing replication.
 
+// On LOGIN vs NODE_LOGIN.
+// _onConnect sends a LOGIN message.
+// _onConnect is called in exctly two places:
+// 1. In response to a NODE_LOGIN message received on a newly connected socket on the sync port. It's expected when
+//    establishing a connection, a node sends this NODE_LOGIN as its first message.
+// 2. Immediately following establishing a TCP connection to another node and sending a NODE_LOGIN message. In the case that
+//    we are the initiating node, we immediately queue three messages:
+//    1. NODE_LOGIN
+//    2. PING
+//    3. LOGIN
+//
+// When we receive a NODE_LOGIN, we immediately respond with a PING followed by a LOGIN (by calling _onConnect).
+// We can cobine all of these into a single login message.
+
 #undef SLOGPREFIX
 #define SLOGPREFIX "{" << _name << "/" << SQLiteNode::stateName(_state) << "} "
 
@@ -1284,6 +1298,9 @@ void SQLiteNode::_onMESSAGE(SQLitePeer* peer, const SData& message) {
             peer->latency = max(STimeNow() - message.calc64("Timestamp"), 1ul);
             SINFO("Received PONG from peer '" << peer->name << "' (" << peer->latency/1000 << "ms latency)");
             return;
+        } else if (SIEquals(message.methodLine, "NODE_LOGIN")) {
+            // Do nothing, this keeps this code from warning until NODE_LOGIN is deprecated.
+            return;
         }
 
         // Every other message broadcasts the current state of the node
@@ -1357,6 +1374,8 @@ void SQLiteNode::_onMESSAGE(SQLitePeer* peer, const SData& message) {
                 STHROW("you're *not* supposed to be a 0-priority permafollower");
             }
 
+            // Validate hash here, mark node as forked if found.
+
             // It's an error to have to peers configured with the same priority, except 0 and -1
             SASSERT(_priority == -1 || _priority == 0 || message.calc("Priority") != _priority);
             PINFO("Peer logged in at '" << message["State"] << "', priority #" << message["Priority"] << " commit #"
@@ -1745,7 +1764,7 @@ void SQLiteNode::_onMESSAGE(SQLitePeer* peer, const SData& message) {
                 }
             }
         } else {
-            STHROW("unrecognized message");
+            STHROW("unrecognized message: " + message.methodLine);
         }
     } catch (const SException& e) {
         PWARN("Error processing message '" << message.methodLine << "' (" << e.what() << "), reconnecting.");
@@ -1759,13 +1778,15 @@ void SQLiteNode::_onMESSAGE(SQLitePeer* peer, const SData& message) {
 void SQLiteNode::_onConnect(SQLitePeer* peer) {
     SASSERT(peer);
     SASSERTWARN(!peer->loggedIn);
-    // Send the LOGIN
-    PINFO("Sending LOGIN");
     SData login("LOGIN");
+    login["Name"] = _name;
     login["Priority"] = to_string(_priority);
     login["State"] = stateName(_state);
     login["Version"] = _version;
     login["Permafollower"] = _originalPriority ? "false" : "true";
+    PINFO("Sending " << login.serialize());
+
+    // NOTE: the following call adds CommitCount, Hash, and commandAddress fields.
     _sendToPeer(peer, login);
 }
 
@@ -2565,12 +2586,14 @@ void SQLiteNode::postPoll(fd_map& fdm, uint64_t& nextActivity) {
             int messageSize = message.deserialize(socket->recvBuffer);
             if (messageSize) {
                 socket->recvBuffer.consumeFront(messageSize);
-                if (SIEquals(message.methodLine, "NODE_LOGIN")) {
+                // Allow either LOGIN or NODE_LOGIN until we deprecate NODE_LOGIN.
+                if (SIEquals(message.methodLine, "NODE_LOGIN") || SIEquals(message.methodLine, "LOGIN")) {
                     SQLitePeer* peer = getPeerByName(message["Name"]);
                     if (peer) {
                         if (peer->setSocket(socket)) {
-                            _sendPING(peer);
                             _onConnect(peer);
+                            _sendPING(peer);
+                            _onMESSAGE(peer, message);
 
                             // Connected OK, don't need in _unauthenticatedIncomingSockets anymore.
                             socketsToRemove.push_back(socket);
@@ -2589,7 +2612,7 @@ void SQLiteNode::postPoll(fd_map& fdm, uint64_t& nextActivity) {
                         STHROW("Unauthenticated node '" + message["Name"] + "' attempted to connected, rejecting.");
                     }
                 } else {
-                    STHROW("expecting NODE_LOGIN");
+                    STHROW("expecting LOGIN or NODE_LOGIN");
                 }
             } else if (STimeNow() > socket->lastRecvTime + 5'000'000) {
                 STHROW("Incoming socket didn't send a message for over 5s, closing.");
@@ -2612,11 +2635,12 @@ void SQLiteNode::postPoll(fd_map& fdm, uint64_t& nextActivity) {
         switch (result) {
             case SQLitePeer::PeerPostPollStatus::JUST_CONNECTED:
             {
+                // When NODE_LOGIN is deprecated, we can remove the next 3 lines.
                 SData login("NODE_LOGIN");
                 login["Name"] = _name;
                 peer->sendMessage(login.serialize());
-                _sendPING(peer);
                 _onConnect(peer);
+                _sendPING(peer);
             }
             break;
             case SQLitePeer::PeerPostPollStatus::SOCKET_ERROR:

From e5659c7e4eab52a257b76f6e9d301be589948743 Mon Sep 17 00:00:00 2001
From: Tyler Karaszewski <tyler@expensify.com>
Date: Wed, 4 Dec 2024 14:22:36 -0800
Subject: [PATCH 09/28] add known bad peer, refuse to talk to it

---
 sqlitecluster/SQLiteNode.cpp | 21 ++++++++++++++++-----
 sqlitecluster/SQLitePeer.cpp |  2 ++
 sqlitecluster/SQLitePeer.h   |  3 +++
 3 files changed, 21 insertions(+), 5 deletions(-)

diff --git a/sqlitecluster/SQLiteNode.cpp b/sqlitecluster/SQLiteNode.cpp
index 3ea870cd7..5f8e7d893 100644
--- a/sqlitecluster/SQLiteNode.cpp
+++ b/sqlitecluster/SQLiteNode.cpp
@@ -1303,6 +1303,12 @@ void SQLiteNode::_onMESSAGE(SQLitePeer* peer, const SData& message) {
             return;
         }
 
+        // We allow PING and PONG even for bad peers just to avoid them getting caught in reconnect cycles.
+        if (peer->knownBad) {
+            PINFO("Received message " << message.methodLine << " from known bad peer, ignoring.");
+            return;
+        }
+
         // Every other message broadcasts the current state of the node
         if (!message.isSet("CommitCount")) {
             STHROW("missing CommitCount");
@@ -1374,17 +1380,22 @@ void SQLiteNode::_onMESSAGE(SQLitePeer* peer, const SData& message) {
                 STHROW("you're *not* supposed to be a 0-priority permafollower");
             }
 
-            // Validate hash here, mark node as forked if found.
-
             // It's an error to have to peers configured with the same priority, except 0 and -1
             SASSERT(_priority == -1 || _priority == 0 || message.calc("Priority") != _priority);
-            PINFO("Peer logged in at '" << message["State"] << "', priority #" << message["Priority"] << " commit #"
-                  << message["CommitCount"] << " (" << message["Hash"] << ")");
             peer->priority = message.calc("Priority");
-            peer->loggedIn = true;
             peer->version = message["Version"];
             peer->state = stateFromName(message["State"]);
 
+            // Validate hash here, mark node as forked if found.
+            if (false) {
+                PINFO("Peer is forked, marking as bad, will ignore.");
+                peer->knownBad = true;
+            } else {
+                PINFO("Peer logged in at '" << message["State"] << "', priority #" << message["Priority"] << " commit #"
+                    << message["CommitCount"] << " (" << message["Hash"] << ")");
+                peer->loggedIn = true;
+            }
+
             // If the peer is already standing up, go ahead and approve or deny immediately.
             if (peer->state == SQLiteNodeState::STANDINGUP) {
                 _sendStandupResponse(peer, message);
diff --git a/sqlitecluster/SQLitePeer.cpp b/sqlitecluster/SQLitePeer.cpp
index 82c2de8a2..4daf2c785 100644
--- a/sqlitecluster/SQLitePeer.cpp
+++ b/sqlitecluster/SQLitePeer.cpp
@@ -26,6 +26,7 @@ SQLitePeer::SQLitePeer(const string& name_, const string& host_, const STable& p
     transactionResponse(Response::NONE),
     version(),
     lastPingTime(0),
+    knownBad(false),
     hash()
 { }
 
@@ -79,6 +80,7 @@ void SQLitePeer::reset() {
     version = "";
     lastPingTime = 0,
     setCommit(0, "");
+    knownBad = false;
 }
 
 void SQLitePeer::shutdownSocket() {
diff --git a/sqlitecluster/SQLitePeer.h b/sqlitecluster/SQLitePeer.h
index 0eb984656..296a11ed8 100644
--- a/sqlitecluster/SQLitePeer.h
+++ b/sqlitecluster/SQLitePeer.h
@@ -91,6 +91,9 @@ class SQLitePeer {
     atomic<string> version;
     atomic<uint64_t> lastPingTime;
 
+    // Set to true when this peer is known to be unusable, I.e., when it has a database that is forked from us.
+    atomic<bool> knownBad;
+
   private:
     // For initializing the permafollower value from the params list.
     static bool isPermafollower(const STable& params);

From ccdc5cd82f1b933381600cdec0900a5e21374192 Mon Sep 17 00:00:00 2001
From: Tyler Karaszewski <tyler@expensify.com>
Date: Wed, 4 Dec 2024 15:51:51 -0800
Subject: [PATCH 10/28] Stop communicating on fork. Test don't pass yet.

---
 sqlitecluster/SQLiteNode.cpp | 27 +++++++++++++++++++++------
 1 file changed, 21 insertions(+), 6 deletions(-)

diff --git a/sqlitecluster/SQLiteNode.cpp b/sqlitecluster/SQLiteNode.cpp
index 5f8e7d893..3b95b3833 100644
--- a/sqlitecluster/SQLiteNode.cpp
+++ b/sqlitecluster/SQLiteNode.cpp
@@ -1386,14 +1386,23 @@ void SQLiteNode::_onMESSAGE(SQLitePeer* peer, const SData& message) {
             peer->version = message["Version"];
             peer->state = stateFromName(message["State"]);
 
-            // Validate hash here, mark node as forked if found.
-            if (false) {
-                PINFO("Peer is forked, marking as bad, will ignore.");
-                peer->knownBad = true;
-            } else {
+            uint64_t peerCommitCount;
+            string peerCommitHash;
+            bool hashesMatch = true;
+            peer->getCommit(peerCommitCount, peerCommitHash);
+            if (!peerCommitHash.empty() && peerCommitCount <= getCommitCount()) {
+                string query, hash;
+                _db.getCommit(peerCommitCount, query, hash);
+                hashesMatch = (peerCommitHash == hash);
+            }
+
+            if (hashesMatch) {
                 PINFO("Peer logged in at '" << message["State"] << "', priority #" << message["Priority"] << " commit #"
                     << message["CommitCount"] << " (" << message["Hash"] << ")");
                 peer->loggedIn = true;
+            } else {
+                PINFO("Peer is forked, marking as bad, will ignore.");
+                peer->knownBad = true;
             }
 
             // If the peer is already standing up, go ahead and approve or deny immediately.
@@ -1775,7 +1784,7 @@ void SQLiteNode::_onMESSAGE(SQLitePeer* peer, const SData& message) {
                 }
             }
         } else {
-            STHROW("unrecognized message: " + message.methodLine);
+            SINFO("unrecognized message: " + message.methodLine);
         }
     } catch (const SException& e) {
         PWARN("Error processing message '" << message.methodLine << "' (" << e.what() << "), reconnecting.");
@@ -1902,6 +1911,9 @@ void SQLiteNode::_sendToPeer(SQLitePeer* peer, const SData& message) {
     // We can treat this whole function as atomic and thread-safe as it sends data to a peer with it's own atomic
     // `sendMessage` and the peer itself (assuming it's something from _peerList, which, if not, don't do that) is
     // const and will exist without changing until destruction.
+    if (peer->knownBad) {
+        PINFO("Skipping message " << message.methodLine << " to known bad peer.");
+    }
     peer->sendMessage(_addPeerHeaders(message).serialize());
 }
 
@@ -1910,6 +1922,9 @@ void SQLiteNode::_sendToAllPeers(const SData& message, bool subscribedOnly) {
 
     // Loop across all connected peers and send the message. _peerList is const so this is thread-safe.
     for (auto peer : _peerList) {
+        if (peer->knownBad) {
+            PINFO("Skipping message " << message.methodLine << " to known bad peer.");
+        }
         // This check is strictly thread-safe, as SQLitePeer::subscribed is atomic, but there's still a race condition
         // around checking subscribed and then sending, as subscribed could technically change.
         if (!subscribedOnly || peer->subscribed) {

From 466725411ae8838f1e77e9451b1638cbfeb2b2c3 Mon Sep 17 00:00:00 2001
From: Tyler Karaszewski <tyler@expensify.com>
Date: Wed, 4 Dec 2024 16:18:36 -0800
Subject: [PATCH 11/28] Rename variable, remove _forkedFrom list

---
 sqlitecluster/SQLiteNode.cpp | 53 ++++++++++++++++++++++--------------
 sqlitecluster/SQLiteNode.h   |  7 -----
 sqlitecluster/SQLitePeer.cpp |  4 +--
 sqlitecluster/SQLitePeer.h   |  2 +-
 4 files changed, 36 insertions(+), 30 deletions(-)

diff --git a/sqlitecluster/SQLiteNode.cpp b/sqlitecluster/SQLiteNode.cpp
index 3b95b3833..705e29a29 100644
--- a/sqlitecluster/SQLiteNode.cpp
+++ b/sqlitecluster/SQLiteNode.cpp
@@ -639,7 +639,7 @@ bool SQLiteNode::update() {
             // Find the freshest non-broken peer (including permafollowers).
             if (peer->loggedIn) {
                 loggedInPeers.push_back(peer->name);
-                if (_forkedFrom.count(peer->name)) {
+                if (peer->forked) {
                     SWARN("Hash mismatch. Forked from peer " << peer->name << " so not considering it." << _getLostQuorumLogMessage());
                     continue;
                 }
@@ -757,7 +757,7 @@ bool SQLiteNode::update() {
                 continue;
             }
 
-            if (_forkedFrom.count(peer->name)) {
+            if (peer->forked) {
                 // Forked nodes are treated as ineligible for leader, etc.
                 SHMMM("Not counting forked peer " << peer->name << " for freshest, highestPriority, or currentLeader.");
                 continue;
@@ -1304,8 +1304,8 @@ void SQLiteNode::_onMESSAGE(SQLitePeer* peer, const SData& message) {
         }
 
         // We allow PING and PONG even for bad peers just to avoid them getting caught in reconnect cycles.
-        if (peer->knownBad) {
-            PINFO("Received message " << message.methodLine << " from known bad peer, ignoring.");
+        if (peer->forked) {
+            PINFO("Received message " << message.methodLine << " from forked peer, ignoring.");
             return;
         }
 
@@ -1402,7 +1402,13 @@ void SQLiteNode::_onMESSAGE(SQLitePeer* peer, const SData& message) {
                 peer->loggedIn = true;
             } else {
                 PINFO("Peer is forked, marking as bad, will ignore.");
-                peer->knownBad = true;
+
+                // Send it a message before we mark it as forked, as we'll refuse afterward.
+                SData forked("FORKED");
+                _sendToPeer(peer, forked);
+
+                // And mark it dead until it reconnects.
+                peer->forked = true;
             }
 
             // If the peer is already standing up, go ahead and approve or deny immediately.
@@ -1579,14 +1585,21 @@ void SQLiteNode::_onMESSAGE(SQLitePeer* peer, const SData& message) {
                 SQResult result;
                 uint64_t commitNum = SToUInt64(message["hashMismatchNumber"]);
                 _db.getCommits(commitNum, commitNum, result);
-                _forkedFrom.insert(peer->name);
+                peer->forked = true;
+
+                size_t forkedCount = 0;
+                for (const auto& p : _peerList) {
+                    if (p->forked) {
+                        forkedCount++;
+                    }
+                }
 
                 SALERT("Hash mismatch. Peer " << peer->name << " and I have forked at commit " << message["hashMismatchNumber"]
-                       << ". I have forked from " << _forkedFrom.size() << " other nodes. I am " << stateName(_state)
+                       << ". I have forked from " << forkedCount << " other nodes. I am " << stateName(_state)
                        << " and have hash " << result[0][0] << " for that commit. Peer has hash " << message["hashMismatchValue"] << "."
                        << _getLostQuorumLogMessage());
 
-                if (_forkedFrom.size() > ((_peerList.size() + 1) / 2)) {
+                if (forkedCount > ((_peerList.size() + 1) / 2)) {
                     SERROR("Hash mismatch. I have forked from over half the cluster. This is unrecoverable." << _getLostQuorumLogMessage());
                 }
 
@@ -1783,8 +1796,11 @@ void SQLiteNode::_onMESSAGE(SQLitePeer* peer, const SData& message) {
                           << e.what() << "', ignoring.");
                 }
             }
+        } else if (SIEquals(message.methodLine, "FORKED")) {
+            peer->forked = true;
+            PINFO("Peer said we're forked, beleiving them.");
         } else {
-            SINFO("unrecognized message: " + message.methodLine);
+            PINFO("unrecognized message: " + message.methodLine);
         }
     } catch (const SException& e) {
         PWARN("Error processing message '" << message.methodLine << "' (" << e.what() << "), reconnecting.");
@@ -1911,8 +1927,8 @@ void SQLiteNode::_sendToPeer(SQLitePeer* peer, const SData& message) {
     // We can treat this whole function as atomic and thread-safe as it sends data to a peer with it's own atomic
     // `sendMessage` and the peer itself (assuming it's something from _peerList, which, if not, don't do that) is
     // const and will exist without changing until destruction.
-    if (peer->knownBad) {
-        PINFO("Skipping message " << message.methodLine << " to known bad peer.");
+    if (peer->forked) {
+        PINFO("Skipping message " << message.methodLine << " to forked peer.");
     }
     peer->sendMessage(_addPeerHeaders(message).serialize());
 }
@@ -1922,8 +1938,8 @@ void SQLiteNode::_sendToAllPeers(const SData& message, bool subscribedOnly) {
 
     // Loop across all connected peers and send the message. _peerList is const so this is thread-safe.
     for (auto peer : _peerList) {
-        if (peer->knownBad) {
-            PINFO("Skipping message " << message.methodLine << " to known bad peer.");
+        if (peer->forked) {
+            PINFO("Skipping message " << message.methodLine << " to forked peer.");
         }
         // This check is strictly thread-safe, as SQLitePeer::subscribed is atomic, but there's still a race condition
         // around checking subscribed and then sending, as subscribed could technically change.
@@ -2014,16 +2030,12 @@ void SQLiteNode::_changeState(SQLiteNodeState newState, uint64_t commitIDToCance
             _leadPeer = nullptr;
         }
 
-        if (newState >= SQLiteNodeState::STANDINGUP) {
-            // Not forked from anyone. Note that this includes both LEADING and FOLLOWING.
-            _forkedFrom.clear();
-        }
-
         // Re-enable commits if they were disabled during a previous stand-down.
         if (newState != SQLiteNodeState::SEARCHING) {
             _db.setCommitEnabled(true);
         }
 
+#if 0
         // If we're going searching and have forked from at least 1 peer, sleep for a second. This is intended to prevent thousands of lines of log spam when this happens in an infinite
         // loop. It's entirely possible that we do this for valid reasons - it may be the peer that has the bad database and not us, and there are plenty of other reasons we could switch to
         // SEARCHING, but in those cases, we just wait an extra second before trying again.
@@ -2031,6 +2043,7 @@ void SQLiteNode::_changeState(SQLiteNodeState newState, uint64_t commitIDToCance
             SWARN("Going searching while forked peers present, sleeping 1 second." << _getLostQuorumLogMessage());
             sleep(1);
         }
+#endif
 
         // Additional logic for some new states
         if (newState == SQLiteNodeState::LEADING) {
@@ -2250,7 +2263,7 @@ void SQLiteNode::_updateSyncPeer()
             continue;
         }
 
-        if (_forkedFrom.count(peer->name)) {
+        if (peer->forked) {
             SWARN("Hash mismatch. Can't choose peer " << peer->name << " due to previous hash mismatch.");
             continue;
         }
@@ -2821,7 +2834,7 @@ void SQLiteNode::_sendStandupResponse(SQLitePeer* peer, const SData& message) {
         return;
     }
 
-    if (_forkedFrom.count(peer->name)) {
+    if (peer->forked) {
         PHMMM("Forked from peer, can't approve standup.");
         response["Response"] = "abstain";
         response["Reason"] = "We are forked";
diff --git a/sqlitecluster/SQLiteNode.h b/sqlitecluster/SQLiteNode.h
index 7c14667d4..3826ba0bd 100644
--- a/sqlitecluster/SQLiteNode.h
+++ b/sqlitecluster/SQLiteNode.h
@@ -393,13 +393,6 @@ class SQLiteNode : public STCPManager {
     // This can be removed once we've figured out why replication falls behind. See this issue: https://github.com/Expensify/Expensify/issues/210528
     atomic<size_t> _concurrentReplicateTransactions = 0;
 
-    // We keep a set of strings that are the names of nodes we've forked from, in the case we ever receive a hash mismatch while trying to synchronize.
-    // Whenever we become LEADING or FOLLOWING this is cleared. This resets the case where one node has forked, we attempt to synchronize from it, and fail,
-    // but later synchronize from someone else. Once we've come up completely, we no longer "hold a grudge" against this node, which will likely get fixed
-    // while we're online.
-    // In the event that this list becomes longer than half the cluster size, the node kills itself and logs that it's in an unrecoverable state.
-    set<string> _forkedFrom;
-
     // A pointer to a SQLite instance that is passed to plugin's stateChanged function. This prevents plugins from operating on the same handle that
     // the sync node is when they run queries in stateChanged.
     SQLite* pluginDB;
diff --git a/sqlitecluster/SQLitePeer.cpp b/sqlitecluster/SQLitePeer.cpp
index 4daf2c785..35146de52 100644
--- a/sqlitecluster/SQLitePeer.cpp
+++ b/sqlitecluster/SQLitePeer.cpp
@@ -26,7 +26,7 @@ SQLitePeer::SQLitePeer(const string& name_, const string& host_, const STable& p
     transactionResponse(Response::NONE),
     version(),
     lastPingTime(0),
-    knownBad(false),
+    forked(false),
     hash()
 { }
 
@@ -80,7 +80,7 @@ void SQLitePeer::reset() {
     version = "";
     lastPingTime = 0,
     setCommit(0, "");
-    knownBad = false;
+    forked = false;
 }
 
 void SQLitePeer::shutdownSocket() {
diff --git a/sqlitecluster/SQLitePeer.h b/sqlitecluster/SQLitePeer.h
index 296a11ed8..d6d0a8030 100644
--- a/sqlitecluster/SQLitePeer.h
+++ b/sqlitecluster/SQLitePeer.h
@@ -92,7 +92,7 @@ class SQLitePeer {
     atomic<uint64_t> lastPingTime;
 
     // Set to true when this peer is known to be unusable, I.e., when it has a database that is forked from us.
-    atomic<bool> knownBad;
+    atomic<bool> forked;
 
   private:
     // For initializing the permafollower value from the params list.

From 0689a931a2845e03f5c2ebb64a5efc2728d4e579 Mon Sep 17 00:00:00 2001
From: Tyler Karaszewski <tyler@expensify.com>
Date: Wed, 4 Dec 2024 16:42:58 -0800
Subject: [PATCH 12/28] Fix fork test

---
 sqlitecluster/SQLiteNode.cpp             | 48 +++++++++++++++++-------
 sqlitecluster/SQLiteNode.h               |  2 +
 test/clustertest/tests/ForkCheckTest.cpp |  5 +++
 3 files changed, 41 insertions(+), 14 deletions(-)

diff --git a/sqlitecluster/SQLiteNode.cpp b/sqlitecluster/SQLiteNode.cpp
index 705e29a29..8478678fa 100644
--- a/sqlitecluster/SQLiteNode.cpp
+++ b/sqlitecluster/SQLiteNode.cpp
@@ -1409,6 +1409,7 @@ void SQLiteNode::_onMESSAGE(SQLitePeer* peer, const SData& message) {
 
                 // And mark it dead until it reconnects.
                 peer->forked = true;
+                _dieIfForkedFromCluster();
             }
 
             // If the peer is already standing up, go ahead and approve or deny immediately.
@@ -1587,22 +1588,11 @@ void SQLiteNode::_onMESSAGE(SQLitePeer* peer, const SData& message) {
                 _db.getCommits(commitNum, commitNum, result);
                 peer->forked = true;
 
-                size_t forkedCount = 0;
-                for (const auto& p : _peerList) {
-                    if (p->forked) {
-                        forkedCount++;
-                    }
-                }
-
                 SALERT("Hash mismatch. Peer " << peer->name << " and I have forked at commit " << message["hashMismatchNumber"]
-                       << ". I have forked from " << forkedCount << " other nodes. I am " << stateName(_state)
-                       << " and have hash " << result[0][0] << " for that commit. Peer has hash " << message["hashMismatchValue"] << "."
-                       << _getLostQuorumLogMessage());
-
-                if (forkedCount > ((_peerList.size() + 1) / 2)) {
-                    SERROR("Hash mismatch. I have forked from over half the cluster. This is unrecoverable." << _getLostQuorumLogMessage());
-                }
+                       << ". I am " << stateName(_state) << " and have hash " << result[0][0] << " for that commit. Peer has hash "
+                       << message["hashMismatchValue"] << "." << _getLostQuorumLogMessage());
 
+                _dieIfForkedFromCluster();
                 STHROW("Hash mismatch");
             }
             if (!_syncPeer) {
@@ -1799,6 +1789,23 @@ void SQLiteNode::_onMESSAGE(SQLitePeer* peer, const SData& message) {
         } else if (SIEquals(message.methodLine, "FORKED")) {
             peer->forked = true;
             PINFO("Peer said we're forked, beleiving them.");
+            _dieIfForkedFromCluster();
+
+            // If leader said we're forked from it, we need a new leader.
+            if (peer == _leadPeer) {
+                _leadPeer = nullptr;
+                _changeState(SQLiteNodeState::SEARCHING);
+            }
+
+            // If our sync peer said we're forked from it, and we're currently synchronizing, we need a new sync peer.
+            // However, if we're not currently syncing, then we don't need to change states, this peer could have been chosen
+            // hours or days ago.
+            if (peer == _syncPeer) {
+                _syncPeer = nullptr;
+                if (_state == SQLiteNodeState::SYNCHRONIZING) {
+                    _changeState(SQLiteNodeState::SEARCHING);
+                }
+            }
         } else {
             PINFO("unrecognized message: " + message.methodLine);
         }
@@ -2908,3 +2915,16 @@ void SQLiteNode::_sendStandupResponse(SQLitePeer* peer, const SData& message) {
     }
     _sendToPeer(peer, response);
 }
+
+void SQLiteNode::_dieIfForkedFromCluster() {
+    size_t forkedCount = 0;
+    for (const auto& p : _peerList) {
+        if (p->forked) {
+            forkedCount++;
+        }
+    }
+
+    if (forkedCount > ((_peerList.size() + 1) / 2)) {
+        SERROR("I have forked from over half the cluster (" << forkedCount << " nodes). This is unrecoverable." << _getLostQuorumLogMessage());
+    }
+}
\ No newline at end of file
diff --git a/sqlitecluster/SQLiteNode.h b/sqlitecluster/SQLiteNode.h
index 3826ba0bd..354aef7c9 100644
--- a/sqlitecluster/SQLiteNode.h
+++ b/sqlitecluster/SQLiteNode.h
@@ -280,6 +280,8 @@ class SQLiteNode : public STCPManager {
     // commitCount that we do, this will return null.
     void _updateSyncPeer();
 
+    void _dieIfForkedFromCluster();
+
     const string _commandAddress;
     const string _name;
     const vector<SQLitePeer*> _peerList;
diff --git a/test/clustertest/tests/ForkCheckTest.cpp b/test/clustertest/tests/ForkCheckTest.cpp
index 6e0c0d262..330c8735f 100644
--- a/test/clustertest/tests/ForkCheckTest.cpp
+++ b/test/clustertest/tests/ForkCheckTest.cpp
@@ -119,5 +119,10 @@ struct ForkCheckTest : tpunit::TestFixture {
 
         // And that signal should have been ABORT.
         ASSERT_EQUAL(SIGABRT, WTERMSIG(status));
+
+        // We call stopServer on the forked leader because it crashed, but the cluster tester doesn't realize, so shutting down
+        // normally will time out after a minute. Calling `stopServer` explicitly will clear the server PID, and we won't need
+        // to wait for this timeout.
+        tester.getTester(0).stopServer();
     }
 } __ForkCheckTest;

From 002a9373aa6b08cde04fbbfd2f0b0c029a2e50b9 Mon Sep 17 00:00:00 2001
From: Tyler Karaszewski <tyler@expensify.com>
Date: Wed, 4 Dec 2024 16:46:01 -0800
Subject: [PATCH 13/28] remove test that tests condition that's no longer used.

---
 .../tests/ForkedNodeApprovalTest.cpp          | 161 ------------------
 1 file changed, 161 deletions(-)
 delete mode 100644 test/clustertest/tests/ForkedNodeApprovalTest.cpp

diff --git a/test/clustertest/tests/ForkedNodeApprovalTest.cpp b/test/clustertest/tests/ForkedNodeApprovalTest.cpp
deleted file mode 100644
index 197b883cc..000000000
--- a/test/clustertest/tests/ForkedNodeApprovalTest.cpp
+++ /dev/null
@@ -1,161 +0,0 @@
-
-#include <sys/wait.h>
-
-#include <libstuff/SData.h>
-#include <libstuff/SQResult.h>
-#include <sqlitecluster/SQLite.h>
-#include <test/clustertest/BedrockClusterTester.h>
-
-struct ForkedNodeApprovalTest : tpunit::TestFixture {
-    ForkedNodeApprovalTest()
-        : tpunit::TestFixture("ForkedNodeApproval", TEST(ForkedNodeApprovalTest::test)) {}
-
-    pair<uint64_t, string> getMaxJournalCommit(BedrockTester& tester, bool online = true) {
-        SQResult journals;
-        tester.readDB("SELECT name FROM sqlite_schema WHERE type ='table' AND name LIKE 'journal%';", journals, online);
-        uint64_t maxJournalCommit = 0;
-        string maxJournalTable;
-        for (auto& row : journals.rows) {
-            string maxID = tester.readDB("SELECT MAX(id) FROM " + row[0] + ";", online);
-            try {
-                uint64_t maxCommitNum = stoull(maxID);
-                if (maxCommitNum > maxJournalCommit) {
-                    maxJournalCommit = maxCommitNum;
-                    maxJournalTable = row[0];
-                }
-            } catch (const invalid_argument& e) {
-                // do nothing, skip this journal with no entries.
-                continue;
-            }
-        }
-        return make_pair(maxJournalCommit, maxJournalTable);
-    }
-
-    void test() {
-        // Create a cluster, wait for it to come up.
-        BedrockClusterTester tester(ClusterSize::THREE_NODE_CLUSTER);
-
-        // We'll tell the threads to stop when they're done.
-        atomic<bool> stop(false);
-
-        // We want to not spam a stopped leader.
-        atomic<bool> leaderIsUp(true);
-
-        // Just use a bunch of copies of the same command.
-        SData spamCommand("idcollision");
-
-        // In a vector.
-        const vector<SData> commands(100, spamCommand);
-
-        // Now create 9 threads spamming 100 commands at a time, each. 9 cause we have three nodes.
-        vector<thread> threads;
-        for (size_t i = 0; i < 9; i++) {
-            threads.emplace_back([&tester, i, &commands, &stop, &leaderIsUp](){
-                while (!stop) {
-                    // Pick a tester, send, don't care about the result.
-                    size_t testerNum = i % 3;
-                    if (testerNum == 0 && !leaderIsUp) {
-                        // If we're looking for leader and it's down, wait a second to avoid pegging the CPU.
-                        sleep(1);
-                    } else {
-                        // If we're not leader or leader is up, spam away!
-                        tester.getTester(testerNum).executeWaitMultipleData(commands);
-                    }
-                }
-            });
-        }
-
-        // Let them spam for a second.
-        sleep(1);
-
-        // We can try and stop the leader.
-        leaderIsUp = false;
-        tester.getTester(0).stopServer();
-
-        // Spam a few more commands and then we can stop.
-        sleep(1);
-        stop = true;
-        for (auto& t : threads) {
-            t.join();
-        }
-
-        // Fetch the latest journal commits on leader and follower
-        auto result = getMaxJournalCommit(tester.getTester(0), false);
-
-        uint64_t leaderMaxCommit = result.first;
-        string leaderMaxCommitJournal = result.second;
-        result = getMaxJournalCommit(tester.getTester(1));
-        uint64_t followerMaxCommit = result.first;
-
-        // Make sure the follower got farther than the leader.
-        ASSERT_GREATER_THAN(followerMaxCommit, leaderMaxCommit);
-
-        // We need to release any DB that the tester is holding.
-        tester.getTester(0).freeDB();
-        tester.getTester(1).freeDB();
-
-        // Break leader.
-        {
-            string filename = tester.getTester(0).getArg("-db");
-            string query = "UPDATE " + leaderMaxCommitJournal + " SET hash = 'abcdef123456' WHERE id = " + to_string(leaderMaxCommit) + ";";
-
-            sqlite3* db = nullptr;
-            sqlite3_open_v2(filename.c_str(), &db, SQLITE_OPEN_READWRITE | SQLITE_OPEN_CREATE | SQLITE_OPEN_NOMUTEX, NULL);
-            char* errMsg = nullptr;
-            sqlite3_exec(db, query.c_str(), 0, 0, &errMsg);
-            if (errMsg) {
-                cout << "Error updating db: " << errMsg << endl;
-                ASSERT_TRUE(false);
-            }
-            sqlite3_close_v2(db);
-        }
-
-        // Stop the second follower.
-        tester.getTester(2).stopServer();
-
-        // Start the broken leader back up.
-        tester.getTester(0).startServer(false);
-
-        // We should not get a leader, the primary leader needs to synchronize, but can't because it's forked.
-        // The secondary leader should go leading, but can't, because it only receives `abstain` responses to standup requests.
-        // It's possible for the secondary leader to go leading once, but it should quickly fall out of leading when the fork is detected and primary leader reconnects.
-        // After that, it should not go leading again, primary leader should abstain from participation.
-        auto start = chrono::steady_clock::now();
-        bool abstainDetected = false;
-        while (true) {
-            if (chrono::steady_clock::now() - start > 30s) {
-                cout << "It's been 30 seconds." << endl;
-                break;
-            }
-            SData command("Status");
-            auto responseJson = tester.getTester(1).executeWaitMultipleData({command}, 1, true)[0].content;
-
-            auto json = SParseJSONObject(responseJson);
-            auto peers = SParseJSONArray(json["peerList"]);
-            for (auto& peer : peers) {
-                auto peerJSON = SParseJSONObject(peer);
-                if (peerJSON["name"] == "cluster_node_0" && peerJSON["standupResponse"] == "ABSTAIN") {
-                    abstainDetected = true;
-                    break;
-                }
-            }
-            if (abstainDetected) {
-                break;
-            }
-
-            // try again.
-            usleep(50'000);
-        }
-
-        ASSERT_TRUE(abstainDetected);
-
-        // Ok, now we can start the second follower back up and secondary leader should be able to lead.
-        tester.getTester(2).startServer(false);
-        ASSERT_TRUE(tester.getTester(1).waitForState("LEADING"));
-
-        // We call stopServer on the forked leader because it crashed, but the cluster tester doesn't realize, so shutting down
-        // normally will time out after a minute. Calling `stopServer` explicitly will clear the server PID, and we won't need
-        // to wait for this timeout.
-        tester.getTester(0).stopServer();
-    }
-} __ForkedNodeApprovalTest;

From f50aec04744ff9fb9cfb44e18b5c378bff3f1e2a Mon Sep 17 00:00:00 2001
From: Tyler Karaszewski <tyler@expensify.com>
Date: Wed, 4 Dec 2024 16:54:03 -0800
Subject: [PATCH 14/28] Remove ABSTAIN

---
 sqlitecluster/SQLiteNode.cpp | 25 -------------------------
 sqlitecluster/SQLitePeer.cpp |  3 ---
 sqlitecluster/SQLitePeer.h   |  3 +--
 3 files changed, 1 insertion(+), 30 deletions(-)

diff --git a/sqlitecluster/SQLiteNode.cpp b/sqlitecluster/SQLiteNode.cpp
index 8478678fa..7374ac0f3 100644
--- a/sqlitecluster/SQLiteNode.cpp
+++ b/sqlitecluster/SQLiteNode.cpp
@@ -861,7 +861,6 @@ bool SQLiteNode::update() {
         size_t numFullPeers = 0;
         size_t numLoggedInFullPeers = 0;
         size_t approveCount = 0;
-        size_t abstainCount = 0;
         if (_isShuttingDown) {
             SINFO("Shutting down while standing up, setting state to SEARCHING");
             _changeState(SQLiteNodeState::SEARCHING);
@@ -879,9 +878,6 @@ bool SQLiteNode::update() {
                     // Has it responded yet?
                     if (peer->standupResponse == SQLitePeer::Response::NONE) {
                         // This peer hasn't yet responded. We do nothing with it in this case, maybe it will have responded by the next check.
-                    } else if (peer->standupResponse == SQLitePeer::Response::ABSTAIN) {
-                        PHMMM("Peer abstained from participation in quorum");
-                        abstainCount++;
                     } else if (peer->standupResponse == SQLitePeer::Response::DENY) {
                         // It responeded, but didn't approve -- abort
                         PHMMM("Refused our STANDUP, cancel and RE-SEARCH");
@@ -894,16 +890,6 @@ bool SQLiteNode::update() {
             }
         }
 
-        // If the majority of full peers responds with abstain, then re-search.
-        const bool majorityAbstained = abstainCount * 2 > numFullPeers;
-        if (majorityAbstained) {
-            // Majority abstained, meaning we're probably forked,
-            // so we go back to searching so we can go back to synchronizing and see if we're forked.
-            SHMMM("Majority of full peers abstained; re-SEARCHING.");
-            _changeState(SQLiteNodeState::SEARCHING);
-            return true; // Re-update
-        }
-
         // If everyone's responded with approval and we form a majority, then finish standup.
         bool majorityConnected = numLoggedInFullPeers * 2 >= numFullPeers;
         bool quorumApproved = approveCount * 2 >= numFullPeers;
@@ -1531,9 +1517,6 @@ void SQLiteNode::_onMESSAGE(SQLitePeer* peer, const SData& message) {
                 if (SIEquals(message["Response"], "approve")) {
                     PINFO("Received standup approval");
                     peer->standupResponse = SQLitePeer::Response::APPROVE;
-                } else if (SIEquals(message["Response"], "abstain")) {
-                    PINFO("Received standup abstain");
-                    peer->standupResponse = SQLitePeer::Response::ABSTAIN;
                 } else {
                     PHMMM("Received standup denial: reason='" << message["Reason"] << "'");
                     peer->standupResponse = SQLitePeer::Response::DENY;
@@ -2841,14 +2824,6 @@ void SQLiteNode::_sendStandupResponse(SQLitePeer* peer, const SData& message) {
         return;
     }
 
-    if (peer->forked) {
-        PHMMM("Forked from peer, can't approve standup.");
-        response["Response"] = "abstain";
-        response["Reason"] = "We are forked";
-        _sendToPeer(peer, response);
-        return;
-    }
-
     // What's our state
     if (SWITHIN(SQLiteNodeState::STANDINGUP, _state, SQLiteNodeState::STANDINGDOWN)) {
         // Oh crap, it's trying to stand up while we're leading. Who is higher priority?
diff --git a/sqlitecluster/SQLitePeer.cpp b/sqlitecluster/SQLitePeer.cpp
index 35146de52..c93589520 100644
--- a/sqlitecluster/SQLitePeer.cpp
+++ b/sqlitecluster/SQLitePeer.cpp
@@ -207,9 +207,6 @@ string SQLitePeer::responseName(Response response) {
         case Response::DENY:
             return "DENY";
             break;
-        case Response::ABSTAIN:
-            return "ABSTAIN";
-            break;
         default:
             return "";
     }
diff --git a/sqlitecluster/SQLitePeer.h b/sqlitecluster/SQLitePeer.h
index d6d0a8030..48bc3433c 100644
--- a/sqlitecluster/SQLitePeer.h
+++ b/sqlitecluster/SQLitePeer.h
@@ -8,8 +8,7 @@ class SQLitePeer {
     enum class Response {
         NONE,
         APPROVE,
-        DENY,
-        ABSTAIN
+        DENY
     };
 
     enum class PeerPostPollStatus {

From 0e828eb21f325f189642a47678a6d3ef562f9495 Mon Sep 17 00:00:00 2001
From: Tyler Karaszewski <tyler@expensify.com>
Date: Wed, 4 Dec 2024 17:01:06 -0800
Subject: [PATCH 15/28] Comment cleanup

---
 sqlitecluster/SQLiteNode.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/sqlitecluster/SQLiteNode.cpp b/sqlitecluster/SQLiteNode.cpp
index 7374ac0f3..7f779d43d 100644
--- a/sqlitecluster/SQLiteNode.cpp
+++ b/sqlitecluster/SQLiteNode.cpp
@@ -56,6 +56,7 @@
 // dbCountAtStart:   The highest committed transaction in the DB at the start of this transaction on leader, for
 //                   optimizing replication.
 
+// NOTE: This comment as well as NODE_LOGIN should be removed after https://github.com/Expensify/Bedrock/pull/1999 is deployed.
 // On LOGIN vs NODE_LOGIN.
 // _onConnect sends a LOGIN message.
 // _onConnect is called in exctly two places:
@@ -68,7 +69,6 @@
 //    3. LOGIN
 //
 // When we receive a NODE_LOGIN, we immediately respond with a PING followed by a LOGIN (by calling _onConnect).
-// We can cobine all of these into a single login message.
 
 #undef SLOGPREFIX
 #define SLOGPREFIX "{" << _name << "/" << SQLiteNode::stateName(_state) << "} "
@@ -2902,4 +2902,4 @@ void SQLiteNode::_dieIfForkedFromCluster() {
     if (forkedCount > ((_peerList.size() + 1) / 2)) {
         SERROR("I have forked from over half the cluster (" << forkedCount << " nodes). This is unrecoverable." << _getLostQuorumLogMessage());
     }
-}
\ No newline at end of file
+}

From 2d6b73adc2ca09cc8fe42249a826584da282ddb5 Mon Sep 17 00:00:00 2001
From: Tyler Karaszewski <tyler@expensify.com>
Date: Wed, 4 Dec 2024 17:19:39 -0800
Subject: [PATCH 16/28] Standardize use of _sendToPeer

---
 sqlitecluster/SQLiteNode.cpp | 22 +++++++++++-----------
 1 file changed, 11 insertions(+), 11 deletions(-)

diff --git a/sqlitecluster/SQLiteNode.cpp b/sqlitecluster/SQLiteNode.cpp
index 7f779d43d..d6c03bb70 100644
--- a/sqlitecluster/SQLiteNode.cpp
+++ b/sqlitecluster/SQLiteNode.cpp
@@ -1277,7 +1277,7 @@ void SQLiteNode::_onMESSAGE(SQLitePeer* peer, const SData& message) {
             SINFO("Received PING from peer '" << peer->name << "'. Sending PONG.");
             SData pong("PONG");
             pong["Timestamp"] = message["Timestamp"];
-            peer->sendMessage(pong.serialize());
+            peer->sendMessage(pong);
             return;
         } else if (SIEquals(message.methodLine, "PONG")) {
             // Latency must be > 0 because we treat 0 as "not connected".
@@ -1531,7 +1531,7 @@ void SQLiteNode::_onMESSAGE(SQLitePeer* peer, const SData& message) {
                 SINFO("Asked to help SYNCHRONIZE but shutting down.");
                 SData response("SYNCHRONIZE_RESPONSE");
                 response["ShuttingDown"] = "true";
-                peer->sendMessage(response);
+                _sendToPeer(peer, response);
             } else {
                 _pendingSynchronizeResponses++;
                 static atomic<size_t> synchronizeCount(0);
@@ -1546,13 +1546,13 @@ void SQLiteNode::_onMESSAGE(SQLitePeer* peer, const SData& message) {
                         // The following two lines are copied from `_sendToPeer`.
                         response["CommitCount"] = to_string(db.getCommitCount());
                         response["Hash"] = db.getCommittedHash();
-                        peer->sendMessage(response);
+                        _sendToPeer(peer, response);
                     } catch (const SException& e) {
                         // This is the same handling as at the bottom of _onMESSAGE.
                         PWARN("Error processing message '" << message.methodLine << "' (" << e.what() << "), reconnecting.");
                         SData reconnect("RECONNECT");
                         reconnect["Reason"] = e.what();
-                        peer->sendMessage(reconnect.serialize());
+                        _sendToPeer(peer, reconnect);
                         peer->shutdownSocket();
                     }
 
@@ -1796,7 +1796,7 @@ void SQLiteNode::_onMESSAGE(SQLitePeer* peer, const SData& message) {
         PWARN("Error processing message '" << message.methodLine << "' (" << e.what() << "), reconnecting.");
         SData reconnect("RECONNECT");
         reconnect["Reason"] = e.what();
-        peer->sendMessage(reconnect.serialize());
+        _sendToPeer(peer, reconnect);
         peer->shutdownSocket();
     }
 }
@@ -1920,11 +1920,11 @@ void SQLiteNode::_sendToPeer(SQLitePeer* peer, const SData& message) {
     if (peer->forked) {
         PINFO("Skipping message " << message.methodLine << " to forked peer.");
     }
-    peer->sendMessage(_addPeerHeaders(message).serialize());
+    peer->sendMessage(_addPeerHeaders(message));
 }
 
 void SQLiteNode::_sendToAllPeers(const SData& message, bool subscribedOnly) {
-    const string serializedMessage = _addPeerHeaders(message).serialize();
+    const SData messageWithHeaders = _addPeerHeaders(message);
 
     // Loop across all connected peers and send the message. _peerList is const so this is thread-safe.
     for (auto peer : _peerList) {
@@ -1934,7 +1934,7 @@ void SQLiteNode::_sendToAllPeers(const SData& message, bool subscribedOnly) {
         // This check is strictly thread-safe, as SQLitePeer::subscribed is atomic, but there's still a race condition
         // around checking subscribed and then sending, as subscribed could technically change.
         if (!subscribedOnly || peer->subscribed) {
-            peer->sendMessage(serializedMessage);
+            peer->sendMessage(messageWithHeaders);
         }
     }
 }
@@ -2667,7 +2667,7 @@ void SQLiteNode::postPoll(fd_map& fdm, uint64_t& nextActivity) {
                 // When NODE_LOGIN is deprecated, we can remove the next 3 lines.
                 SData login("NODE_LOGIN");
                 login["Name"] = _name;
-                peer->sendMessage(login.serialize());
+                _sendToPeer(peer, login);
                 _onConnect(peer);
                 _sendPING(peer);
             }
@@ -2676,7 +2676,7 @@ void SQLiteNode::postPoll(fd_map& fdm, uint64_t& nextActivity) {
             {
                 SData reconnect("RECONNECT");
                 reconnect["Reason"] = "socket error";
-                peer->sendMessage(reconnect.serialize());
+                _sendToPeer(peer, reconnect);
                 peer->shutdownSocket();
             }
             break;
@@ -2734,7 +2734,7 @@ void SQLiteNode::_sendPING(SQLitePeer* peer) {
     SASSERT(peer);
     SData ping("PING");
     ping["Timestamp"] = SToStr(STimeNow());
-    peer->sendMessage(ping.serialize());
+    peer->sendMessage(ping);
 }
 
 SQLitePeer* SQLiteNode::getPeerByName(const string& name) const {

From 859ba9f500361ef02dbb79505d0abf1f6f1333b1 Mon Sep 17 00:00:00 2001
From: Tyler Karaszewski <tyler@expensify.com>
Date: Wed, 4 Dec 2024 18:37:00 -0800
Subject: [PATCH 17/28] Update sqlitecluster/SQLiteNode.cpp

Co-authored-by: Carlos Alvarez <cead22@gmail.com>
---
 sqlitecluster/SQLiteNode.cpp | 1 +
 1 file changed, 1 insertion(+)

diff --git a/sqlitecluster/SQLiteNode.cpp b/sqlitecluster/SQLiteNode.cpp
index d6c03bb70..38f676c10 100644
--- a/sqlitecluster/SQLiteNode.cpp
+++ b/sqlitecluster/SQLiteNode.cpp
@@ -1930,6 +1930,7 @@ void SQLiteNode::_sendToAllPeers(const SData& message, bool subscribedOnly) {
     for (auto peer : _peerList) {
         if (peer->forked) {
             PINFO("Skipping message " << message.methodLine << " to forked peer.");
+            continue;
         }
         // This check is strictly thread-safe, as SQLitePeer::subscribed is atomic, but there's still a race condition
         // around checking subscribed and then sending, as subscribed could technically change.

From 12cdd008b05440e883e8dbde799e9411446242f6 Mon Sep 17 00:00:00 2001
From: Tyler Karaszewski <tyler@expensify.com>
Date: Wed, 4 Dec 2024 18:37:25 -0800
Subject: [PATCH 18/28] Update sqlitecluster/SQLiteNode.cpp

Co-authored-by: Carlos Alvarez <cead22@gmail.com>
---
 sqlitecluster/SQLiteNode.cpp | 1 +
 1 file changed, 1 insertion(+)

diff --git a/sqlitecluster/SQLiteNode.cpp b/sqlitecluster/SQLiteNode.cpp
index 38f676c10..a26c84f71 100644
--- a/sqlitecluster/SQLiteNode.cpp
+++ b/sqlitecluster/SQLiteNode.cpp
@@ -1919,6 +1919,7 @@ void SQLiteNode::_sendToPeer(SQLitePeer* peer, const SData& message) {
     // const and will exist without changing until destruction.
     if (peer->forked) {
         PINFO("Skipping message " << message.methodLine << " to forked peer.");
+        return;
     }
     peer->sendMessage(_addPeerHeaders(message));
 }

From d8c495fe8bdb16dae2226d3baa0dd1229226e339 Mon Sep 17 00:00:00 2001
From: Tyler Karaszewski <tyler@expensify.com>
Date: Thu, 5 Dec 2024 09:42:34 -0800
Subject: [PATCH 19/28] Refactor test to make multiple tests easier to run

---
 test/clustertest/tests/ForkCheckTest.cpp | 61 ++++++++++++++----------
 1 file changed, 35 insertions(+), 26 deletions(-)

diff --git a/test/clustertest/tests/ForkCheckTest.cpp b/test/clustertest/tests/ForkCheckTest.cpp
index 330c8735f..631df5264 100644
--- a/test/clustertest/tests/ForkCheckTest.cpp
+++ b/test/clustertest/tests/ForkCheckTest.cpp
@@ -1,3 +1,4 @@
+#include "test/lib/BedrockTester.h"
 #include <sys/wait.h>
 
 #include <libstuff/SData.h>
@@ -7,7 +8,9 @@
 
 struct ForkCheckTest : tpunit::TestFixture {
     ForkCheckTest()
-        : tpunit::TestFixture("ForkCheck", TEST(ForkCheckTest::test)) {}
+        : tpunit::TestFixture("ForkCheck",
+          TEST(ForkCheckTest::forkAtShutDown),
+          TEST(ForkCheckTest::forkAtCrash)) {}
 
     pair<uint64_t, string> getMaxJournalCommit(BedrockTester& tester, bool online = true) {
         SQResult journals;
@@ -30,40 +33,42 @@ struct ForkCheckTest : tpunit::TestFixture {
         return make_pair(maxJournalCommit, maxJournalTable);
     }
 
-    void test() {
-        // Create a cluster, wait for it to come up.
-        BedrockClusterTester tester(ClusterSize::FIVE_NODE_CLUSTER);
-
-        // We'll tell the threads to stop when they're done.
-        atomic<bool> stop(false);
-
-        // We want to not spam a stopped leader.
-        atomic<bool> leaderIsUp(true);
-
+    vector<thread> createThreads(size_t num, BedrockClusterTester& tester, atomic<bool>& stop, atomic<bool>& leaderIsUp) {
         // Just use a bunch of copies of the same command.
-        SData spamCommand("idcollision");
-
-        // In a vector.
-        const vector<SData> commands(100, spamCommand);
-
-        // Now create 9 threads spamming 100 commands at a time, each. 9 cause we have three nodes.
         vector<thread> threads;
-        for (size_t i = 0; i < 9; i++) {
-            threads.emplace_back([&tester, i, &commands, &stop, &leaderIsUp](){
+        for (size_t num = 0; num < 9; num++) {
+            threads.emplace_back([&tester, num, &stop, &leaderIsUp](){
+                const vector<SData> commands(100, SData("idcollision"));
                 while (!stop) {
                     // Pick a tester, send, don't care about the result.
-                    size_t testerNum = i % 3;
+                    size_t testerNum = num % 5;
                     if (testerNum == 0 && !leaderIsUp) {
-                        // If we're looking for leader and it's down, wait a second to avoid pegging the CPU.
-                        sleep(1);
-                    } else {
-                        // If we're not leader or leader is up, spam away!
-                        tester.getTester(testerNum).executeWaitMultipleData(commands);
+                        // If leader's off, don't use it.
+                        testerNum = 1;
                     }
+                    tester.getTester(testerNum).executeWaitMultipleData(commands);
                 }
             });
         }
 
+        return threads;
+    }
+
+    // This primary test here checks that a node that is forked will not be able to rejoin the cluster when reconnecting.
+    // This is a reasonable test for a fork that happens at shutdown.
+    void forkAtShutDown() {
+        // Create a cluster, wait for it to come up.
+        BedrockClusterTester tester(ClusterSize::FIVE_NODE_CLUSTER);
+
+        // We'll tell the threads to stop when they're done.
+        atomic<bool> stop(false);
+
+        // We want to not spam a stopped leader.
+        atomic<bool> leaderIsUp(true);
+
+        // Now create 15 threads spamming 100 commands at a time, each. 15 cause we have five nodes.
+        vector<thread> threads = createThreads(15, tester, stop, leaderIsUp);
+
         // Let them spam for a second.
         sleep(1);
 
@@ -71,7 +76,7 @@ struct ForkCheckTest : tpunit::TestFixture {
         leaderIsUp = false;
         tester.getTester(0).stopServer();
 
-        // Spam a few more commands and then we can stop.
+        // Spam a few more commands so thar the follower is ahead of the stopped leader, and then we can stop.
         sleep(1);
         stop = true;
         for (auto& t : threads) {
@@ -125,4 +130,8 @@ struct ForkCheckTest : tpunit::TestFixture {
         // to wait for this timeout.
         tester.getTester(0).stopServer();
     }
+
+    void forkAtCrash() {
+
+    }
 } __ForkCheckTest;

From ac2aa66647d2fe6c331393bf2058ae62935bb2e9 Mon Sep 17 00:00:00 2001
From: Tyler Karaszewski <tyler@expensify.com>
Date: Thu, 5 Dec 2024 10:05:00 -0800
Subject: [PATCH 20/28] Remove second test

---
 test/clustertest/tests/ForkCheckTest.cpp | 10 +++-------
 1 file changed, 3 insertions(+), 7 deletions(-)

diff --git a/test/clustertest/tests/ForkCheckTest.cpp b/test/clustertest/tests/ForkCheckTest.cpp
index 631df5264..835cc76cd 100644
--- a/test/clustertest/tests/ForkCheckTest.cpp
+++ b/test/clustertest/tests/ForkCheckTest.cpp
@@ -4,13 +4,13 @@
 #include <libstuff/SData.h>
 #include <libstuff/SQResult.h>
 #include <sqlitecluster/SQLite.h>
+#include <sqlitecluster/SQLiteNode.h>
 #include <test/clustertest/BedrockClusterTester.h>
 
 struct ForkCheckTest : tpunit::TestFixture {
     ForkCheckTest()
         : tpunit::TestFixture("ForkCheck",
-          TEST(ForkCheckTest::forkAtShutDown),
-          TEST(ForkCheckTest::forkAtCrash)) {}
+          TEST(ForkCheckTest::forkAtShutDown)) {}
 
     pair<uint64_t, string> getMaxJournalCommit(BedrockTester& tester, bool online = true) {
         SQResult journals;
@@ -66,7 +66,7 @@ struct ForkCheckTest : tpunit::TestFixture {
         // We want to not spam a stopped leader.
         atomic<bool> leaderIsUp(true);
 
-        // Now create 15 threads spamming 100 commands at a time, each. 15 cause we have five nodes.
+        // Now create 15 threads spamming 100 commands at a time, each. 15 because we have five nodes.
         vector<thread> threads = createThreads(15, tester, stop, leaderIsUp);
 
         // Let them spam for a second.
@@ -130,8 +130,4 @@ struct ForkCheckTest : tpunit::TestFixture {
         // to wait for this timeout.
         tester.getTester(0).stopServer();
     }
-
-    void forkAtCrash() {
-
-    }
 } __ForkCheckTest;

From e6774554997c399f2626d3e5f417de3c8d382c71 Mon Sep 17 00:00:00 2001
From: Tyler Karaszewski <tyler@expensify.com>
Date: Thu, 5 Dec 2024 13:24:01 -0800
Subject: [PATCH 21/28] Fix last instance of _forkedFrom

---
 sqlitecluster/SQLiteNode.cpp | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/sqlitecluster/SQLiteNode.cpp b/sqlitecluster/SQLiteNode.cpp
index a26c84f71..5042e2e3e 100644
--- a/sqlitecluster/SQLiteNode.cpp
+++ b/sqlitecluster/SQLiteNode.cpp
@@ -2027,15 +2027,17 @@ void SQLiteNode::_changeState(SQLiteNodeState newState, uint64_t commitIDToCance
             _db.setCommitEnabled(true);
         }
 
-#if 0
         // If we're going searching and have forked from at least 1 peer, sleep for a second. This is intended to prevent thousands of lines of log spam when this happens in an infinite
         // loop. It's entirely possible that we do this for valid reasons - it may be the peer that has the bad database and not us, and there are plenty of other reasons we could switch to
         // SEARCHING, but in those cases, we just wait an extra second before trying again.
-        if (newState == SQLiteNodeState::SEARCHING && _forkedFrom.size()) {
+        bool forkedPeers = false;
+        for (const auto p : _peerList) {
+            forkedPeers = forkedPeers || p->forked;
+        }
+        if (newState == SQLiteNodeState::SEARCHING && forkedPeers) {
             SWARN("Going searching while forked peers present, sleeping 1 second." << _getLostQuorumLogMessage());
             sleep(1);
         }
-#endif
 
         // Additional logic for some new states
         if (newState == SQLiteNodeState::LEADING) {

From 54cdcbc1ca8e549387338781da27933e9336fab1 Mon Sep 17 00:00:00 2001
From: Tyler Karaszewski <tyler@expensify.com>
Date: Thu, 5 Dec 2024 19:44:30 -0800
Subject: [PATCH 22/28] Code review feedback

---
 sqlitecluster/SQLiteNode.cpp | 21 +++++++++++++++------
 1 file changed, 15 insertions(+), 6 deletions(-)

diff --git a/sqlitecluster/SQLiteNode.cpp b/sqlitecluster/SQLiteNode.cpp
index 2bd99cd2f..e7c1a3703 100644
--- a/sqlitecluster/SQLiteNode.cpp
+++ b/sqlitecluster/SQLiteNode.cpp
@@ -1720,7 +1720,7 @@ void SQLiteNode::_onMESSAGE(SQLitePeer* peer, const SData& message) {
             }
         } else if (SIEquals(message.methodLine, "FORKED")) {
             peer->forked = true;
-            PINFO("Peer said we're forked, beleiving them.");
+            PINFO("Peer said we're forked, believing them.");
             _dieIfForkedFromCluster();
 
             // If leader said we're forked from it, we need a new leader.
@@ -2825,14 +2825,23 @@ void SQLiteNode::_sendStandupResponse(SQLitePeer* peer, const SData& message) {
 }
 
 void SQLiteNode::_dieIfForkedFromCluster() {
-    size_t forkedCount = 0;
+    size_t quorumNodeCount = 0;
+    size_t forkedFullPeerCount = 0;
     for (const auto& p : _peerList) {
-        if (p->forked) {
-            forkedCount++;
+        if (!p->permaFollower) {
+            forkedFullPeerCount++;
+            if (p->forked) {
+                forkedFullPeerCount++;
+            }
         }
     }
 
-    if (forkedCount > ((_peerList.size() + 1) / 2)) {
-        SERROR("I have forked from over half the cluster (" << forkedCount << " nodes). This is unrecoverable." << _getLostQuorumLogMessage());
+    // Am *I* a permaFollower?
+    if (_originalPriority != 0) {
+        quorumNodeCount++;
+    }
+
+    if (forkedFullPeerCount >= (quorumNodeCount + 1) / 2) {
+        SERROR("I have forked from over half the cluster (" << forkedFullPeerCount << " nodes). This is unrecoverable." << _getLostQuorumLogMessage());
     }
 }

From 7c06bf595122db6b9ae23cdb20914295c7cd926e Mon Sep 17 00:00:00 2001
From: Tyler Karaszewski <tyler@tylerkaraszewski.com>
Date: Thu, 5 Dec 2024 19:48:43 -0800
Subject: [PATCH 23/28] Revert "Update SQLite with a small optimization + more
 logs (v15)"

---
 libstuff/sqlite3.c | 103 ++++++---------------------------------------
 libstuff/sqlite3.h |   2 +-
 2 files changed, 15 insertions(+), 90 deletions(-)

diff --git a/libstuff/sqlite3.c b/libstuff/sqlite3.c
index e95e639a2..8084ff4a9 100644
--- a/libstuff/sqlite3.c
+++ b/libstuff/sqlite3.c
@@ -18,7 +18,7 @@
 ** separate file. This file contains only code for the core SQLite library.
 **
 ** The content in this amalgamation comes from Fossil check-in
-** 5f9f6764e9dffef60213bbc9604940ddfc71.
+** 853f9cf453d13cf826443b0d27331e1f4e9e.
 */
 #define SQLITE_CORE 1
 #define SQLITE_AMALGAMATION 1
@@ -465,7 +465,7 @@ extern "C" {
 */
 #define SQLITE_VERSION        "3.47.0"
 #define SQLITE_VERSION_NUMBER 3047000
-#define SQLITE_SOURCE_ID      "2024-12-05 19:45:14 5f9f6764e9dffef60213bbc9604940ddfc713436333c3f62ed8a090697fcbb1e"
+#define SQLITE_SOURCE_ID      "2024-11-20 20:26:59 853f9cf453d13cf826443b0d27331e1f4e9e06f9f4ce674f22ccadad86e20937"
 
 /*
 ** CAPI3REF: Run-Time Library Version Numbers
@@ -18229,19 +18229,10 @@ struct sqlite3 {
 #define SCHEMA_TIME_BEFORE_PREPARE 8
 #define SCHEMA_TIME_BEFORE_FINALIZE 9
 #define SCHEMA_TIME_BEGIN_ANALYZE_LOAD 10
+#define SCHEMA_TIME_END_ANALYZE_LOAD 11
+#define SCHEMA_TIME_FINISH 12
 
-#define SCHEMA_TIME_AFTER_CLEAR_STATS 11
-#define SCHEMA_TIME_AFTER_STAT1 12
-#define SCHEMA_TIME_AFTER_DEFAULTS 13
-
-#define SCHEMA_TIME_AFTER_STAT4_Q1 14
-#define SCHEMA_TIME_AFTER_STAT4_Q2 15
-#define SCHEMA_TIME_AFTER_STAT4 16
-
-#define SCHEMA_TIME_END_ANALYZE_LOAD 17
-#define SCHEMA_TIME_FINISH 18
-
-#define SCHEMA_TIME_N 19
+#define SCHEMA_TIME_N 13
 #define SCHEMA_TIME_TIMEOUT (2 * 1000 * 1000)
 
 
@@ -67561,25 +67552,6 @@ static void walCleanupHash(Wal *pWal){
 #endif /* SQLITE_ENABLE_EXPENSIVE_ASSERT */
 }
 
-/*
-** Zero the n byte block indicated by pointer p. n Must be a multiple of
-** 8, and p must be aligned to an 8-byte boundary.
-*/
-static void zero64(void *p, int n){
-  size_t c = n / sizeof(u64);
-  void *d = p;
-
-  assert( (n & 0x7)==0 );
-  assert( EIGHT_BYTE_ALIGNMENT(p) );
-
-  __asm__ volatile (
-      "rep stosq"
-        : "+D" (d), "+c" (c)
-        : "a" (0)
-        : "memory"
-  );
-}
-
 /*
 ** Set an entry in the wal-index that will map database page number
 ** pPage into WAL frame iFrame.
@@ -67618,10 +67590,10 @@ static int walIndexAppend(Wal *pWal, int iWal, u32 iFrame, u32 iPage){
     ** entire hash table and aPgno[] array before proceeding.
     */
     if( pWal->aCommitTime ) t = sqlite3STimeNow();
-    if( idx==1 && sLoc.aPgno[0]!=0 ){
+    if( idx==1 ){
       int nByte = (int)((u8*)&sLoc.aHash[HASHTABLE_NSLOT] - (u8*)sLoc.aPgno);
-      assert( nByte>=0 && (nByte & 0x07)==0 );
-      zero64((void*)sLoc.aPgno, nByte);
+      assert( nByte>=0 );
+      memset((void*)sLoc.aPgno, 0, nByte);
     }
     if( pWal->aCommitTime ){
       pWal->aCommitTime[COMMIT_TIME_WALINDEX_MEMSETUS]+=sqlite3STimeNow()-t;
@@ -67791,7 +67763,6 @@ static int walIndexRecoverOne(Wal *pWal, int iWal, u32 *pnCkpt, int *pbZero){
         if( aShare==0 ) break;
         SEH_SET_ON_ERROR(iPg, aShare);
         pWal->apWiData[iPg] = aPrivate;
-        memset(aPrivate, 0, WALINDEX_PGSZ);
 
         if( iWal ){
           assert( version==WAL_VERSION2 );
@@ -68920,39 +68891,6 @@ static int walCheckpoint(
       }
     }
 
-    if( bWal2 && rc==SQLITE_OK && eMode!=SQLITE_CHECKPOINT_PASSIVE ){
-      /* In wal2 mode, a non-passive checkpoint waits for all readers of
-      ** the wal file just checkpointed to finish, then zeroes the hash
-      ** tables associated with that wal file. This is because in some
-      ** deployments, zeroing the hash tables as they are overwritten within
-      ** COMMIT commands is a significant performance hit.
-      **
-      ** Currently, both of the "PART" locks are held for the wal file
-      ** being checkpointed. i.e. if iCkpt==0, then we already hold both
-      ** WAL_LOCK_PART1 and WAL_LOCK_PART1_FULL2. If we now also take an
-      ** exclusive lock on WAL_LOCK_PART2_FULL1, then it is guaranteed that
-      ** there are no remaining readers of the (iCkpt==0) wal file. Similar
-      ** logic, with different locks, is used for (iCkpt==1).
-      */
-      int lockIdx = WAL_READ_LOCK(
-        iCkpt==0 ? WAL_LOCK_PART2_FULL1 : WAL_LOCK_PART1_FULL2
-      );
-      assert( iCkpt==0 || iCkpt==1 );
-      rc = walBusyLock(pWal, xBusy, pBusyArg, lockIdx, 1);
-      if( rc==SQLITE_OK ){
-        int iHash;
-        for(iHash = walFramePage2(iCkpt, mxSafeFrame); iHash>=0; iHash-=2){
-          WalHashLoc sLoc;
-          int nByte;
-          memset(&sLoc, 0, sizeof(sLoc));
-          walHashGet(pWal, iHash, &sLoc);
-          nByte = (int)((u8*)&sLoc.aHash[HASHTABLE_NSLOT] - (u8*)sLoc.aPgno);
-          memset((void*)sLoc.aPgno, 0, nByte);
-        }
-        walUnlockExclusive(pWal, lockIdx, 1);
-      }
-    }
-
     if( rc==SQLITE_BUSY ){
       /* Reset the return code so as not to report a checkpoint failure
       ** just because there are active readers.  */
@@ -93469,7 +93407,7 @@ SQLITE_PRIVATE void sqlite3CommitTimeLog(u64 *aCommit){
       }
       zStr = sqlite3_mprintf("%z%s%s%d%s", zStr, (zStr?", ":""),zHash,iVal,zU);
     }
-    sqlite3_log(SQLITE_WARNING, "slow commit (v=15): (%s)", zStr);
+    sqlite3_log(SQLITE_WARNING, "slow commit (v=12): (%s)", zStr);
     sqlite3_free(zStr);
   }
 }
@@ -93497,7 +93435,7 @@ SQLITE_PRIVATE void sqlite3PrepareTimeLog(const char *zSql, int nSql, u64 *aPrep
     }
     if( nByte<0 ){ nByte = sqlite3Strlen30(zSql); }
     sqlite3_log(SQLITE_WARNING,
-        "slow prepare (v=15): (%s) [%.*s]", zStr, nByte, zSql
+        "slow prepare (v=12): (%s) [%.*s]", zStr, nByte, zSql
     );
     sqlite3_free(zStr);
   }
@@ -93513,7 +93451,7 @@ SQLITE_PRIVATE void sqlite3SchemaTimeLog(u64 *aSchema){
         (aSchema[ii]==0 ? 0 : (int)(aSchema[ii] - i1))
       );
     }
-    sqlite3_log(SQLITE_WARNING, "slow schema (v=15): (%s)", zStr);
+    sqlite3_log(SQLITE_WARNING, "slow schema (v=12): (%s)", zStr);
     sqlite3_free(zStr);
   }
 }
@@ -100339,9 +100277,7 @@ case OP_AutoCommit: {
 
     u64 aCommit[COMMIT_TIME_N];
     memset(aCommit, 0, sizeof(aCommit));
-    if( iRollback==0 ){
-      sqlite3CommitTimeSet(aCommit, COMMIT_TIME_START);
-    }
+    sqlite3CommitTimeSet(aCommit, COMMIT_TIME_START);
 
     if( iRollback ){
       assert( desiredAutoCommit==1 );
@@ -100388,7 +100324,7 @@ case OP_AutoCommit: {
       rc = SQLITE_ERROR;
     }
     sqlite3CommitTimeSet(aCommit, COMMIT_TIME_FINISH);
-    if( desiredAutoCommit && !iRollback ) sqlite3CommitTimeLog(aCommit);
+    if( desiredAutoCommit ) sqlite3CommitTimeLog(aCommit);
     goto vdbe_return;
   }else{
     sqlite3VdbeError(p,
@@ -123847,8 +123783,6 @@ static int loadStatTbl(
   sqlite3DbFree(db, zSql);
   if( rc ) return rc;
 
-  sqlite3PrepareTimeSet(db->aSchemaTime, SCHEMA_TIME_AFTER_STAT4_Q1);
-
   while( sqlite3_step(pStmt)==SQLITE_ROW ){
     char *zIndex;                 /* Index name */
     Index *pIdx;                  /* Pointer to the index object */
@@ -123894,7 +123828,6 @@ static int loadStatTbl(
     pIdx->nSample++;
   }
   rc = sqlite3_finalize(pStmt);
-  sqlite3PrepareTimeSet(db->aSchemaTime, SCHEMA_TIME_AFTER_STAT4_Q2);
   if( rc==SQLITE_OK ) initAvgEq(pPrevIdx);
   return rc;
 }
@@ -123968,8 +123901,6 @@ SQLITE_PRIVATE int sqlite3AnalysisLoad(sqlite3 *db, int iDb){
 #endif
   }
 
-  sqlite3PrepareTimeSet(db->aSchemaTime, SCHEMA_TIME_AFTER_CLEAR_STATS);
-
   /* Load new statistics out of the sqlite_stat1 table */
   sInfo.db = db;
   sInfo.zDatabase = db->aDb[iDb].zDbSName;
@@ -123986,8 +123917,6 @@ SQLITE_PRIVATE int sqlite3AnalysisLoad(sqlite3 *db, int iDb){
     }
   }
 
-  sqlite3PrepareTimeSet(db->aSchemaTime, SCHEMA_TIME_AFTER_STAT1);
-
   /* Set appropriate defaults on all indexes not in the sqlite_stat1 table */
   assert( sqlite3SchemaMutexHeld(db, iDb, 0) );
   for(i=sqliteHashFirst(&pSchema->idxHash); i; i=sqliteHashNext(i)){
@@ -123995,8 +123924,6 @@ SQLITE_PRIVATE int sqlite3AnalysisLoad(sqlite3 *db, int iDb){
     if( !pIdx->hasStat1 ) sqlite3DefaultRowEst(pIdx);
   }
 
-  sqlite3PrepareTimeSet(db->aSchemaTime, SCHEMA_TIME_AFTER_DEFAULTS);
-
   /* Load the statistics from the sqlite_stat4 table. */
 #ifdef SQLITE_ENABLE_STAT4
   if( rc==SQLITE_OK ){
@@ -124011,8 +123938,6 @@ SQLITE_PRIVATE int sqlite3AnalysisLoad(sqlite3 *db, int iDb){
   }
 #endif
 
-  sqlite3PrepareTimeSet(db->aSchemaTime, SCHEMA_TIME_AFTER_STAT4);
-
   if( rc==SQLITE_NOMEM ){
     sqlite3OomFault(db);
   }
@@ -257904,7 +257829,7 @@ static void fts5SourceIdFunc(
 ){
   assert( nArg==0 );
   UNUSED_PARAM2(nArg, apUnused);
-  sqlite3_result_text(pCtx, "fts5: 2024-12-05 19:45:14 5f9f6764e9dffef60213bbc9604940ddfc713436333c3f62ed8a090697fcbb1e", -1, SQLITE_TRANSIENT);
+  sqlite3_result_text(pCtx, "fts5: 2024-11-20 20:26:59 853f9cf453d13cf826443b0d27331e1f4e9e06f9f4ce674f22ccadad86e20937", -1, SQLITE_TRANSIENT);
 }
 
 /*
diff --git a/libstuff/sqlite3.h b/libstuff/sqlite3.h
index 6287d400b..7d8654612 100644
--- a/libstuff/sqlite3.h
+++ b/libstuff/sqlite3.h
@@ -148,7 +148,7 @@ extern "C" {
 */
 #define SQLITE_VERSION        "3.47.0"
 #define SQLITE_VERSION_NUMBER 3047000
-#define SQLITE_SOURCE_ID      "2024-12-05 19:45:14 5f9f6764e9dffef60213bbc9604940ddfc713436333c3f62ed8a090697fcbb1e"
+#define SQLITE_SOURCE_ID      "2024-11-20 20:26:59 853f9cf453d13cf826443b0d27331e1f4e9e06f9f4ce674f22ccadad86e20937"
 
 /*
 ** CAPI3REF: Run-Time Library Version Numbers

From ff664a7857dc062662a8b4f39661d357c40925da Mon Sep 17 00:00:00 2001
From: Florent De Neve <florent@expensify.com>
Date: Fri, 6 Dec 2024 10:48:02 -0400
Subject: [PATCH 24/28] SQLite fix for non x86 arch

---
 libstuff/sqlite3.c | 107 +++++++++++++++++++++++++++++++++++++++------
 libstuff/sqlite3.h |   2 +-
 2 files changed, 94 insertions(+), 15 deletions(-)

diff --git a/libstuff/sqlite3.c b/libstuff/sqlite3.c
index 8084ff4a9..f10a739a6 100644
--- a/libstuff/sqlite3.c
+++ b/libstuff/sqlite3.c
@@ -18,7 +18,7 @@
 ** separate file. This file contains only code for the core SQLite library.
 **
 ** The content in this amalgamation comes from Fossil check-in
-** 853f9cf453d13cf826443b0d27331e1f4e9e.
+** 5fa1699e31856a6585cc59183641c4cc99e1.
 */
 #define SQLITE_CORE 1
 #define SQLITE_AMALGAMATION 1
@@ -465,7 +465,7 @@ extern "C" {
 */
 #define SQLITE_VERSION        "3.47.0"
 #define SQLITE_VERSION_NUMBER 3047000
-#define SQLITE_SOURCE_ID      "2024-11-20 20:26:59 853f9cf453d13cf826443b0d27331e1f4e9e06f9f4ce674f22ccadad86e20937"
+#define SQLITE_SOURCE_ID      "2024-12-06 09:36:28 5fa1699e31856a6585cc59183641c4cc99e1ccd99c384d13e9c4442e9c07f41a"
 
 /*
 ** CAPI3REF: Run-Time Library Version Numbers
@@ -18229,10 +18229,19 @@ struct sqlite3 {
 #define SCHEMA_TIME_BEFORE_PREPARE 8
 #define SCHEMA_TIME_BEFORE_FINALIZE 9
 #define SCHEMA_TIME_BEGIN_ANALYZE_LOAD 10
-#define SCHEMA_TIME_END_ANALYZE_LOAD 11
-#define SCHEMA_TIME_FINISH 12
 
-#define SCHEMA_TIME_N 13
+#define SCHEMA_TIME_AFTER_CLEAR_STATS 11
+#define SCHEMA_TIME_AFTER_STAT1 12
+#define SCHEMA_TIME_AFTER_DEFAULTS 13
+
+#define SCHEMA_TIME_AFTER_STAT4_Q1 14
+#define SCHEMA_TIME_AFTER_STAT4_Q2 15
+#define SCHEMA_TIME_AFTER_STAT4 16
+
+#define SCHEMA_TIME_END_ANALYZE_LOAD 17
+#define SCHEMA_TIME_FINISH 18
+
+#define SCHEMA_TIME_N 19
 #define SCHEMA_TIME_TIMEOUT (2 * 1000 * 1000)
 
 
@@ -67552,6 +67561,29 @@ static void walCleanupHash(Wal *pWal){
 #endif /* SQLITE_ENABLE_EXPENSIVE_ASSERT */
 }
 
+/*
+** Zero the n byte block indicated by pointer p. n Must be a multiple of
+** 8, and p must be aligned to an 8-byte boundary.
+*/
+static void zero64(void *p, int n){
+#if defined(__x86_64__)
+  size_t c = n / sizeof(u64);
+  void *d = p;
+
+  assert( (n & 0x7)==0 );
+  assert( EIGHT_BYTE_ALIGNMENT(p) );
+
+  __asm__ volatile (
+      "rep stosq"
+        : "+D" (d), "+c" (c)
+        : "a" (0)
+        : "memory"
+  );
+#else
+  memset(p, 0, n);
+#endif
+}
+
 /*
 ** Set an entry in the wal-index that will map database page number
 ** pPage into WAL frame iFrame.
@@ -67590,10 +67622,10 @@ static int walIndexAppend(Wal *pWal, int iWal, u32 iFrame, u32 iPage){
     ** entire hash table and aPgno[] array before proceeding.
     */
     if( pWal->aCommitTime ) t = sqlite3STimeNow();
-    if( idx==1 ){
+    if( idx==1 && sLoc.aPgno[0]!=0 ){
       int nByte = (int)((u8*)&sLoc.aHash[HASHTABLE_NSLOT] - (u8*)sLoc.aPgno);
-      assert( nByte>=0 );
-      memset((void*)sLoc.aPgno, 0, nByte);
+      assert( nByte>=0 && (nByte & 0x07)==0 );
+      zero64((void*)sLoc.aPgno, nByte);
     }
     if( pWal->aCommitTime ){
       pWal->aCommitTime[COMMIT_TIME_WALINDEX_MEMSETUS]+=sqlite3STimeNow()-t;
@@ -67763,6 +67795,7 @@ static int walIndexRecoverOne(Wal *pWal, int iWal, u32 *pnCkpt, int *pbZero){
         if( aShare==0 ) break;
         SEH_SET_ON_ERROR(iPg, aShare);
         pWal->apWiData[iPg] = aPrivate;
+        memset(aPrivate, 0, WALINDEX_PGSZ);
 
         if( iWal ){
           assert( version==WAL_VERSION2 );
@@ -68891,6 +68924,39 @@ static int walCheckpoint(
       }
     }
 
+    if( bWal2 && rc==SQLITE_OK && eMode!=SQLITE_CHECKPOINT_PASSIVE ){
+      /* In wal2 mode, a non-passive checkpoint waits for all readers of
+      ** the wal file just checkpointed to finish, then zeroes the hash
+      ** tables associated with that wal file. This is because in some
+      ** deployments, zeroing the hash tables as they are overwritten within
+      ** COMMIT commands is a significant performance hit.
+      **
+      ** Currently, both of the "PART" locks are held for the wal file
+      ** being checkpointed. i.e. if iCkpt==0, then we already hold both
+      ** WAL_LOCK_PART1 and WAL_LOCK_PART1_FULL2. If we now also take an
+      ** exclusive lock on WAL_LOCK_PART2_FULL1, then it is guaranteed that
+      ** there are no remaining readers of the (iCkpt==0) wal file. Similar
+      ** logic, with different locks, is used for (iCkpt==1).
+      */
+      int lockIdx = WAL_READ_LOCK(
+        iCkpt==0 ? WAL_LOCK_PART2_FULL1 : WAL_LOCK_PART1_FULL2
+      );
+      assert( iCkpt==0 || iCkpt==1 );
+      rc = walBusyLock(pWal, xBusy, pBusyArg, lockIdx, 1);
+      if( rc==SQLITE_OK ){
+        int iHash;
+        for(iHash = walFramePage2(iCkpt, mxSafeFrame); iHash>=0; iHash-=2){
+          WalHashLoc sLoc;
+          int nByte;
+          memset(&sLoc, 0, sizeof(sLoc));
+          walHashGet(pWal, iHash, &sLoc);
+          nByte = (int)((u8*)&sLoc.aHash[HASHTABLE_NSLOT] - (u8*)sLoc.aPgno);
+          memset((void*)sLoc.aPgno, 0, nByte);
+        }
+        walUnlockExclusive(pWal, lockIdx, 1);
+      }
+    }
+
     if( rc==SQLITE_BUSY ){
       /* Reset the return code so as not to report a checkpoint failure
       ** just because there are active readers.  */
@@ -93407,7 +93473,7 @@ SQLITE_PRIVATE void sqlite3CommitTimeLog(u64 *aCommit){
       }
       zStr = sqlite3_mprintf("%z%s%s%d%s", zStr, (zStr?", ":""),zHash,iVal,zU);
     }
-    sqlite3_log(SQLITE_WARNING, "slow commit (v=12): (%s)", zStr);
+    sqlite3_log(SQLITE_WARNING, "slow commit (v=15): (%s)", zStr);
     sqlite3_free(zStr);
   }
 }
@@ -93435,7 +93501,7 @@ SQLITE_PRIVATE void sqlite3PrepareTimeLog(const char *zSql, int nSql, u64 *aPrep
     }
     if( nByte<0 ){ nByte = sqlite3Strlen30(zSql); }
     sqlite3_log(SQLITE_WARNING,
-        "slow prepare (v=12): (%s) [%.*s]", zStr, nByte, zSql
+        "slow prepare (v=15): (%s) [%.*s]", zStr, nByte, zSql
     );
     sqlite3_free(zStr);
   }
@@ -93451,7 +93517,7 @@ SQLITE_PRIVATE void sqlite3SchemaTimeLog(u64 *aSchema){
         (aSchema[ii]==0 ? 0 : (int)(aSchema[ii] - i1))
       );
     }
-    sqlite3_log(SQLITE_WARNING, "slow schema (v=12): (%s)", zStr);
+    sqlite3_log(SQLITE_WARNING, "slow schema (v=15): (%s)", zStr);
     sqlite3_free(zStr);
   }
 }
@@ -100277,7 +100343,9 @@ case OP_AutoCommit: {
 
     u64 aCommit[COMMIT_TIME_N];
     memset(aCommit, 0, sizeof(aCommit));
-    sqlite3CommitTimeSet(aCommit, COMMIT_TIME_START);
+    if( iRollback==0 ){
+      sqlite3CommitTimeSet(aCommit, COMMIT_TIME_START);
+    }
 
     if( iRollback ){
       assert( desiredAutoCommit==1 );
@@ -100324,7 +100392,7 @@ case OP_AutoCommit: {
       rc = SQLITE_ERROR;
     }
     sqlite3CommitTimeSet(aCommit, COMMIT_TIME_FINISH);
-    if( desiredAutoCommit ) sqlite3CommitTimeLog(aCommit);
+    if( desiredAutoCommit && !iRollback ) sqlite3CommitTimeLog(aCommit);
     goto vdbe_return;
   }else{
     sqlite3VdbeError(p,
@@ -123783,6 +123851,8 @@ static int loadStatTbl(
   sqlite3DbFree(db, zSql);
   if( rc ) return rc;
 
+  sqlite3PrepareTimeSet(db->aSchemaTime, SCHEMA_TIME_AFTER_STAT4_Q1);
+
   while( sqlite3_step(pStmt)==SQLITE_ROW ){
     char *zIndex;                 /* Index name */
     Index *pIdx;                  /* Pointer to the index object */
@@ -123828,6 +123898,7 @@ static int loadStatTbl(
     pIdx->nSample++;
   }
   rc = sqlite3_finalize(pStmt);
+  sqlite3PrepareTimeSet(db->aSchemaTime, SCHEMA_TIME_AFTER_STAT4_Q2);
   if( rc==SQLITE_OK ) initAvgEq(pPrevIdx);
   return rc;
 }
@@ -123901,6 +123972,8 @@ SQLITE_PRIVATE int sqlite3AnalysisLoad(sqlite3 *db, int iDb){
 #endif
   }
 
+  sqlite3PrepareTimeSet(db->aSchemaTime, SCHEMA_TIME_AFTER_CLEAR_STATS);
+
   /* Load new statistics out of the sqlite_stat1 table */
   sInfo.db = db;
   sInfo.zDatabase = db->aDb[iDb].zDbSName;
@@ -123917,6 +123990,8 @@ SQLITE_PRIVATE int sqlite3AnalysisLoad(sqlite3 *db, int iDb){
     }
   }
 
+  sqlite3PrepareTimeSet(db->aSchemaTime, SCHEMA_TIME_AFTER_STAT1);
+
   /* Set appropriate defaults on all indexes not in the sqlite_stat1 table */
   assert( sqlite3SchemaMutexHeld(db, iDb, 0) );
   for(i=sqliteHashFirst(&pSchema->idxHash); i; i=sqliteHashNext(i)){
@@ -123924,6 +123999,8 @@ SQLITE_PRIVATE int sqlite3AnalysisLoad(sqlite3 *db, int iDb){
     if( !pIdx->hasStat1 ) sqlite3DefaultRowEst(pIdx);
   }
 
+  sqlite3PrepareTimeSet(db->aSchemaTime, SCHEMA_TIME_AFTER_DEFAULTS);
+
   /* Load the statistics from the sqlite_stat4 table. */
 #ifdef SQLITE_ENABLE_STAT4
   if( rc==SQLITE_OK ){
@@ -123938,6 +124015,8 @@ SQLITE_PRIVATE int sqlite3AnalysisLoad(sqlite3 *db, int iDb){
   }
 #endif
 
+  sqlite3PrepareTimeSet(db->aSchemaTime, SCHEMA_TIME_AFTER_STAT4);
+
   if( rc==SQLITE_NOMEM ){
     sqlite3OomFault(db);
   }
@@ -257829,7 +257908,7 @@ static void fts5SourceIdFunc(
 ){
   assert( nArg==0 );
   UNUSED_PARAM2(nArg, apUnused);
-  sqlite3_result_text(pCtx, "fts5: 2024-11-20 20:26:59 853f9cf453d13cf826443b0d27331e1f4e9e06f9f4ce674f22ccadad86e20937", -1, SQLITE_TRANSIENT);
+  sqlite3_result_text(pCtx, "fts5: 2024-12-06 09:36:28 5fa1699e31856a6585cc59183641c4cc99e1ccd99c384d13e9c4442e9c07f41a", -1, SQLITE_TRANSIENT);
 }
 
 /*
diff --git a/libstuff/sqlite3.h b/libstuff/sqlite3.h
index 7d8654612..c79479408 100644
--- a/libstuff/sqlite3.h
+++ b/libstuff/sqlite3.h
@@ -148,7 +148,7 @@ extern "C" {
 */
 #define SQLITE_VERSION        "3.47.0"
 #define SQLITE_VERSION_NUMBER 3047000
-#define SQLITE_SOURCE_ID      "2024-11-20 20:26:59 853f9cf453d13cf826443b0d27331e1f4e9e06f9f4ce674f22ccadad86e20937"
+#define SQLITE_SOURCE_ID      "2024-12-06 09:36:28 5fa1699e31856a6585cc59183641c4cc99e1ccd99c384d13e9c4442e9c07f41a"
 
 /*
 ** CAPI3REF: Run-Time Library Version Numbers

From 5456a9888fb1903ff29a2f97144df705fa6caaa3 Mon Sep 17 00:00:00 2001
From: Florent De Neve <florent@expensify.com>
Date: Fri, 6 Dec 2024 14:05:42 -0400
Subject: [PATCH 25/28] Update SQLite with more logs, v16

---
 libstuff/sqlite3.c | 41 +++++++++++++++++++++++++----------------
 libstuff/sqlite3.h |  2 +-
 2 files changed, 26 insertions(+), 17 deletions(-)

diff --git a/libstuff/sqlite3.c b/libstuff/sqlite3.c
index f10a739a6..b3c5eebf7 100644
--- a/libstuff/sqlite3.c
+++ b/libstuff/sqlite3.c
@@ -18,7 +18,7 @@
 ** separate file. This file contains only code for the core SQLite library.
 **
 ** The content in this amalgamation comes from Fossil check-in
-** 5fa1699e31856a6585cc59183641c4cc99e1.
+** 65b753735b8e8fb70d2b522d527426f1eb5c.
 */
 #define SQLITE_CORE 1
 #define SQLITE_AMALGAMATION 1
@@ -465,7 +465,7 @@ extern "C" {
 */
 #define SQLITE_VERSION        "3.47.0"
 #define SQLITE_VERSION_NUMBER 3047000
-#define SQLITE_SOURCE_ID      "2024-12-06 09:36:28 5fa1699e31856a6585cc59183641c4cc99e1ccd99c384d13e9c4442e9c07f41a"
+#define SQLITE_SOURCE_ID      "2024-12-06 17:52:38 65b753735b8e8fb70d2b522d527426f1eb5c09339fb4b15cf69cbd2e595b160f"
 
 /*
 ** CAPI3REF: Run-Time Library Version Numbers
@@ -18234,21 +18234,22 @@ struct sqlite3 {
 #define SCHEMA_TIME_AFTER_STAT1 12
 #define SCHEMA_TIME_AFTER_DEFAULTS 13
 
-#define SCHEMA_TIME_AFTER_STAT4_Q1 14
-#define SCHEMA_TIME_AFTER_STAT4_Q2 15
-#define SCHEMA_TIME_AFTER_STAT4 16
+#define SCHEMA_TIME_STAT4_Q1_BODY 14
+#define SCHEMA_TIME_AFTER_STAT4_Q1 15
+#define SCHEMA_TIME_AFTER_STAT4_Q2 16
+#define SCHEMA_TIME_AFTER_STAT4 17
 
-#define SCHEMA_TIME_END_ANALYZE_LOAD 17
-#define SCHEMA_TIME_FINISH 18
+#define SCHEMA_TIME_END_ANALYZE_LOAD 18
+#define SCHEMA_TIME_FINISH 19
 
-#define SCHEMA_TIME_N 19
-#define SCHEMA_TIME_TIMEOUT (2 * 1000 * 1000)
+#define SCHEMA_TIME_N 20
+#define SCHEMA_TIME_TIMEOUT (0 * 1000 * 1000)
 
 
 
 #define sqlite3PrepareTimeSet(x,y) sqlite3CommitTimeSet(x,y)
 SQLITE_PRIVATE void sqlite3PrepareTimeLog(const char *zSql, int nSql, u64 *aPrepareTime);
-SQLITE_PRIVATE void sqlite3SchemaTimeLog(u64 *aSchemaTime);
+SQLITE_PRIVATE void sqlite3SchemaTimeLog(u64 *aSchemaTime, const char *zFile);
 
 #define PREPARE_TIME_TIMEOUT (2 * 1000 * 1000)    /* 2 second timeout */
 
@@ -93473,7 +93474,7 @@ SQLITE_PRIVATE void sqlite3CommitTimeLog(u64 *aCommit){
       }
       zStr = sqlite3_mprintf("%z%s%s%d%s", zStr, (zStr?", ":""),zHash,iVal,zU);
     }
-    sqlite3_log(SQLITE_WARNING, "slow commit (v=15): (%s)", zStr);
+    sqlite3_log(SQLITE_WARNING, "slow commit (v=16): (%s)", zStr);
     sqlite3_free(zStr);
   }
 }
@@ -93501,12 +93502,12 @@ SQLITE_PRIVATE void sqlite3PrepareTimeLog(const char *zSql, int nSql, u64 *aPrep
     }
     if( nByte<0 ){ nByte = sqlite3Strlen30(zSql); }
     sqlite3_log(SQLITE_WARNING,
-        "slow prepare (v=15): (%s) [%.*s]", zStr, nByte, zSql
+        "slow prepare (v=16): (%s) [%.*s]", zStr, nByte, zSql
     );
     sqlite3_free(zStr);
   }
 }
-SQLITE_PRIVATE void sqlite3SchemaTimeLog(u64 *aSchema){
+SQLITE_PRIVATE void sqlite3SchemaTimeLog(u64 *aSchema, const char *zFile){
   u64 i1 = aSchema[SCHEMA_TIME_START];
   assert( SCHEMA_TIME_START==0 && SCHEMA_TIME_FINISH==SCHEMA_TIME_N-1 );
   if( aSchema[SCHEMA_TIME_FINISH]>(i1+SCHEMA_TIME_TIMEOUT) ){
@@ -93517,7 +93518,7 @@ SQLITE_PRIVATE void sqlite3SchemaTimeLog(u64 *aSchema){
         (aSchema[ii]==0 ? 0 : (int)(aSchema[ii] - i1))
       );
     }
-    sqlite3_log(SQLITE_WARNING, "slow schema (v=15): (%s)", zStr);
+    sqlite3_log(SQLITE_WARNING, "slow schema (%s) (v=16): (%s)", zFile, zStr);
     sqlite3_free(zStr);
   }
 }
@@ -123800,6 +123801,8 @@ static int loadStatTbl(
     tRowcnt *pSpace; /* Available allocated memory space */
     u8 *pPtr;        /* Available memory as a u8 for easier manipulation */
 
+    u64 t = sqlite3STimeNow();
+
     zIndex = (char *)sqlite3_column_text(pStmt, 0);
     if( zIndex==0 ) continue;
     nSample = sqlite3_column_int(pStmt, 1);
@@ -123839,6 +123842,9 @@ static int loadStatTbl(
       pIdx->aSample[i].anDLt = pSpace; pSpace += nIdxCol;
     }
     assert( ((u8*)pSpace)-nByte==(u8*)(pIdx->aSample) );
+    if( db->aSchemaTime ){
+      db->aSchemaTime[SCHEMA_TIME_STAT4_Q1_BODY] += (sqlite3STimeNow() - t);
+    }
   }
   rc = sqlite3_finalize(pStmt);
   if( rc ) return rc;
@@ -145944,7 +145950,10 @@ SQLITE_PRIVATE int sqlite3InitOne(sqlite3 *db, int iDb, char **pzErrMsg, u32 mFl
 error_out:
   db->aSchemaTime = 0;
   sqlite3PrepareTimeSet(aSchemaTime, SCHEMA_TIME_FINISH);
-  sqlite3SchemaTimeLog(aSchemaTime);
+  if( rc==SQLITE_OK && iDb==0 ){
+    const char *zFile = sqlite3BtreeGetFilename(pDb->pBt);
+    sqlite3SchemaTimeLog(aSchemaTime, zFile);
+  }
   if( rc ){
     if( rc==SQLITE_NOMEM || rc==SQLITE_IOERR_NOMEM ){
       sqlite3OomFault(db);
@@ -257908,7 +257917,7 @@ static void fts5SourceIdFunc(
 ){
   assert( nArg==0 );
   UNUSED_PARAM2(nArg, apUnused);
-  sqlite3_result_text(pCtx, "fts5: 2024-12-06 09:36:28 5fa1699e31856a6585cc59183641c4cc99e1ccd99c384d13e9c4442e9c07f41a", -1, SQLITE_TRANSIENT);
+  sqlite3_result_text(pCtx, "fts5: 2024-12-06 17:52:38 65b753735b8e8fb70d2b522d527426f1eb5c09339fb4b15cf69cbd2e595b160f", -1, SQLITE_TRANSIENT);
 }
 
 /*
diff --git a/libstuff/sqlite3.h b/libstuff/sqlite3.h
index c79479408..ff4f7b3f8 100644
--- a/libstuff/sqlite3.h
+++ b/libstuff/sqlite3.h
@@ -148,7 +148,7 @@ extern "C" {
 */
 #define SQLITE_VERSION        "3.47.0"
 #define SQLITE_VERSION_NUMBER 3047000
-#define SQLITE_SOURCE_ID      "2024-12-06 09:36:28 5fa1699e31856a6585cc59183641c4cc99e1ccd99c384d13e9c4442e9c07f41a"
+#define SQLITE_SOURCE_ID      "2024-12-06 17:52:38 65b753735b8e8fb70d2b522d527426f1eb5c09339fb4b15cf69cbd2e595b160f"
 
 /*
 ** CAPI3REF: Run-Time Library Version Numbers

From f4bb1f1a58c2076d75b2b3becbcd3810bfb33bac Mon Sep 17 00:00:00 2001
From: Tyler Karaszewski <tyler@expensify.com>
Date: Fri, 6 Dec 2024 14:22:41 -0800
Subject: [PATCH 26/28] Fix wrong count being incremented

---
 sqlitecluster/SQLiteNode.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sqlitecluster/SQLiteNode.cpp b/sqlitecluster/SQLiteNode.cpp
index 29aad0bcc..e0281c1e1 100644
--- a/sqlitecluster/SQLiteNode.cpp
+++ b/sqlitecluster/SQLiteNode.cpp
@@ -2837,7 +2837,7 @@ void SQLiteNode::_dieIfForkedFromCluster() {
     size_t forkedFullPeerCount = 0;
     for (const auto& p : _peerList) {
         if (!p->permaFollower) {
-            forkedFullPeerCount++;
+            quorumNodeCount++;
             if (p->forked) {
                 forkedFullPeerCount++;
             }

From 8d3fad988ae525735cef2fc54b7e0ce4ca87585b Mon Sep 17 00:00:00 2001
From: Tyler Karaszewski <tyler@expensify.com>
Date: Fri, 6 Dec 2024 14:25:30 -0800
Subject: [PATCH 27/28] Change comment

---
 sqlitecluster/SQLiteNode.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sqlitecluster/SQLiteNode.cpp b/sqlitecluster/SQLiteNode.cpp
index e0281c1e1..2907e7e84 100644
--- a/sqlitecluster/SQLiteNode.cpp
+++ b/sqlitecluster/SQLiteNode.cpp
@@ -1273,7 +1273,7 @@ void SQLiteNode::_onMESSAGE(SQLitePeer* peer, const SData& message) {
             return;
         }
 
-        // We allow PING and PONG even for bad peers just to avoid them getting caught in reconnect cycles.
+        // We ignore everything except PING and PONG from forked nodes, so we can return here in that case.
         if (peer->forked) {
             PINFO("Received message " << message.methodLine << " from forked peer, ignoring.");
             return;

From c86f79953454fe2b25ac3f8ad0321fdc03f82ae5 Mon Sep 17 00:00:00 2001
From: Tyler Karaszewski <tyler@expensify.com>
Date: Fri, 6 Dec 2024 14:26:08 -0800
Subject: [PATCH 28/28] Update sqlitecluster/SQLiteNode.cpp

Co-authored-by: Carlos Alvarez <cead22@gmail.com>
---
 sqlitecluster/SQLiteNode.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sqlitecluster/SQLiteNode.cpp b/sqlitecluster/SQLiteNode.cpp
index 2907e7e84..5a75611cd 100644
--- a/sqlitecluster/SQLiteNode.cpp
+++ b/sqlitecluster/SQLiteNode.cpp
@@ -2844,7 +2844,7 @@ void SQLiteNode::_dieIfForkedFromCluster() {
         }
     }
 
-    // Am *I* a permaFollower?
+    // Increase quorumNodeCount if *I* am not a permafollower
     if (_originalPriority != 0) {
         quorumNodeCount++;
     }