From a27c258f8933dcb7eaf333f91bbce247051d75ac Mon Sep 17 00:00:00 2001 From: Tyler Karaszewski Date: Mon, 9 Dec 2024 16:57:55 -0800 Subject: [PATCH 01/10] A bunch of notes --- sqlitecluster/SQLiteNode.cpp | 26 +++++++++++++++++----- sqlitecluster/SQLiteSequentialNotifier.cpp | 8 ++++++- 2 files changed, 28 insertions(+), 6 deletions(-) diff --git a/sqlitecluster/SQLiteNode.cpp b/sqlitecluster/SQLiteNode.cpp index 5a75611cd..e789bde1f 100644 --- a/sqlitecluster/SQLiteNode.cpp +++ b/sqlitecluster/SQLiteNode.cpp @@ -187,6 +187,10 @@ void SQLiteNode::_replicate(SQLitePeer* peer, SData command, size_t sqlitePoolIn } _replicateStartCV.notify_all(); + if (_replicationThreadsShouldExit) { + SINFO("Late replicate start, just exiting."); + } + // Initialize each new thread with a new number. SInitialize("replicate" + to_string(currentReplicateThreadID.fetch_add(1))); @@ -214,12 +218,18 @@ void SQLiteNode::_replicate(SQLitePeer* peer, SData command, size_t sqlitePoolIn uint64_t waitForCount = SStartsWith(command["ID"], "ASYNC") ? command.calcU64("dbCountAtStart") : currentCount; SINFO("[performance] BEGIN_TRANSACTION replicate thread for commit " << newCount << " waiting on DB count " << waitForCount << " (" << (quorum ? "QUORUM" : "ASYNC") << ")"); while (true) { + // Ok, why doesn't this get counted? + // It's waiting on: commit 26056807239 waiting on DB count 26056807238 + // It seems like this should return immediately. + // Ok, is it possible we got past here and waited somewhere else? We were either stuck here, or... SQLiteSequentialNotifier::RESULT result = _localCommitNotifier.waitFor(waitForCount, false); + // My current inclination is that maybe we reset the commit notifier before the thread really starts. if (result == SQLiteSequentialNotifier::RESULT::UNKNOWN) { // This should be impossible. SERROR("Got UNKNOWN result from waitFor, which shouldn't happen"); } else if (result == SQLiteSequentialNotifier::RESULT::COMPLETED) { // Success case. + // If we didn't get stuck above, we must have hit here, because otherwise we would have logged. break; } else if (result == SQLiteSequentialNotifier::RESULT::CANCELED) { SINFO("_localCommitNotifier.waitFor canceled early, returning."); @@ -237,6 +247,7 @@ void SQLiteNode::_replicate(SQLitePeer* peer, SData command, size_t sqlitePoolIn if (commitAttemptCount > 1) { SINFO("Commit attempt number " << commitAttemptCount << " for concurrent replication."); } + // We never log this line, so we can't have gotten to here. SINFO("[performance] BEGIN for commit " << newCount); bool uniqueContraintsError = false; try { @@ -1640,14 +1651,20 @@ void SQLiteNode::_onMESSAGE(SQLitePeer* peer, const SData& message) { throw e; } } else if (SIEquals(message.methodLine, "BEGIN_TRANSACTION") || SIEquals(message.methodLine, "COMMIT_TRANSACTION") || SIEquals(message.methodLine, "ROLLBACK_TRANSACTION")) { + // Race condition here. What if _replicationThreadsShouldExit changes after this check? if (_replicationThreadsShouldExit) { SINFO("Discarding replication message, stopping FOLLOWING"); } else { + // Ok, so the race condition could be here, right? + // Right this instance, the thread count is 0, so we can move past the check that + // Waits for it to be 0. + // But then this thread starts. Can that happen? auto threadID = _replicationThreadCount.fetch_add(1); SDEBUG("Spawning concurrent replicate thread (blocks until DB handle available): " << threadID); try { uint64_t threadAttemptStartTimestamp = STimeNow(); _replicateThreadStarted = false; + // Either here. thread(&SQLiteNode::_replicate, this, peer, message, _dbPool->getIndex(false), threadAttemptStartTimestamp).detach(); { unique_lock lock(_replicateStartMutex); @@ -1917,12 +1934,8 @@ void SQLiteNode::_changeState(SQLiteNodeState newState, uint64_t commitIDToCance // Polling wait for threads to quit. This could use a notification model such as with a condition_variable, // which would probably be "better" but introduces yet more state variables for a state that we're rarely // in, and so I've left it out for the time being. - size_t infoCount = 1; while (_replicationThreadCount) { - if (infoCount % 100 == 0) { - SINFO("Waiting for " << _replicationThreadCount << " remaining replication threads."); - } - infoCount++; + SINFO("Waiting for " << _replicationThreadCount << " remaining replication threads."); usleep(10'000); } @@ -1930,8 +1943,11 @@ void SQLiteNode::_changeState(SQLiteNodeState newState, uint64_t commitIDToCance _replicationThreadsShouldExit = false; // Guaranteed to be done right now. + // I bet this is wrong when these get reset. If we no threads, these get reset before they check? + // That doesn't make sense to me for when we should increment _replicationThreadCount _localCommitNotifier.reset(); _leaderCommitNotifier.reset(); + // If the above completed, we should immediately see `Switching from... ` logged. // We have no leader anymore. _leadPeer = nullptr; diff --git a/sqlitecluster/SQLiteSequentialNotifier.cpp b/sqlitecluster/SQLiteSequentialNotifier.cpp index 2af9c731c..69d30f1ac 100644 --- a/sqlitecluster/SQLiteSequentialNotifier.cpp +++ b/sqlitecluster/SQLiteSequentialNotifier.cpp @@ -31,6 +31,8 @@ SQLiteSequentialNotifier::RESULT SQLiteSequentialNotifier::waitFor(uint64_t valu SINFO("Canceled after " << _cancelAfter << ", but waiting for " << value << " so not returning yet."); } else { // Canceled and we're not before the cancellation cutoff. + // I don't see how we don't return here. Maybe we never acquire `waitingThreadMutex`? + SINFO("Returning canceled because _cancelAfter=" << _cancelAfter << " and value=" << value); return RESULT::CANCELED; } } else if (_globalResult != RESULT::UNKNOWN) { @@ -52,6 +54,8 @@ SQLiteSequentialNotifier::RESULT SQLiteSequentialNotifier::waitFor(uint64_t valu // We should investigate any instances of thew below logline to see if they're same as for the success cases mentioned above (i.e., the timeout happens simultaneously as the // cancellation) or if the log line is delayed by up to a second (indicating a problem). if (_globalResult == RESULT::CANCELED || state->result == RESULT::CANCELED) { + // I bet removing a 1 second delay means we don't hit this. we get to calling `reset` on this sooner, possibly while there are threads waiting here. + // It's possible that we hit the timeout here after `cancel()` has set the global value, but before we received the notification. // This isn't a problem, and we can jump back to the top of the loop and check again. If there's some problem, we'll see it there. SINFO("Hit 1s timeout while global cancel " << (_globalResult == RESULT::CANCELED) << " or " << " specific cancel " << (state->result == RESULT::CANCELED)); @@ -114,10 +118,12 @@ void SQLiteSequentialNotifier::cancel(uint64_t cancelAfter) { auto& valueThreadMap = *valueThreadMapPtr; // If cancelAfter is specified, start from that value. Otherwise, we start from the beginning. auto start = _cancelAfter ? valueThreadMap.upper_bound(_cancelAfter) : valueThreadMap.begin(); - SINFO("[performance] Next value to cancel after " << cancelAfter << " is " << start->first); if (start == valueThreadMap.end()) { // There's nothing to remove. + SINFO("[performance] Next value to cancel after " << cancelAfter << " is N/A"); return; + } else { + SINFO("[performance] Next value to cancel after " << cancelAfter << " is " << start->first); } // Now iterate across whatever's remaining and mark it canceled. From d9ec8964aa99d695601c335dbc458e71053fb361 Mon Sep 17 00:00:00 2001 From: Tyler Karaszewski Date: Mon, 9 Dec 2024 17:47:49 -0800 Subject: [PATCH 02/10] More notes --- sqlitecluster/SQLiteNode.cpp | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/sqlitecluster/SQLiteNode.cpp b/sqlitecluster/SQLiteNode.cpp index e789bde1f..920f03e10 100644 --- a/sqlitecluster/SQLiteNode.cpp +++ b/sqlitecluster/SQLiteNode.cpp @@ -199,6 +199,19 @@ void SQLiteNode::_replicate(SQLitePeer* peer, SData command, size_t sqlitePoolIn // Allow the DB handle to be returned regardless of how this function exits. SQLiteScopedHandle dbScope(*_dbPool, sqlitePoolIndex); + // In dev, we sometimes crash on the destructor for the above after `detach` which implies to me that the DB Pool could have been deleted before we exited. + // This is a different manifestation of what could be the same issue. + // IN dev, we get: + // 2024-12-09T21:12:45.030255+00:00 expensidev2004 bedrock10013: xxxxxx (SQLiteNode.cpp:215) _replicate [replicate2065] [info] {cluster_node_4/SEARCHING} [performance] BEGIN_TRANSACTION replicate thread for commit 15003 waiting on DB count 15001 (ASYNC) + // 2024-12-09T21:12:49.023666+00:00 expensidev2004 bedrock10013: xxxxxx (SQLiteNode.cpp:1913) _changeState [sync] [info] {cluster_node_4/FOLLOWING} Replication threads should exit, canceling commits after current leader commit 0 + // 2024-12-09T21:12:49.030957+00:00 expensidev2004 bedrock10013: xxxxxx (SQLiteSequentialNotifier.cpp:57) waitFor [replicate2065] [info] Hit 1s timeout while global cancel 1 or specific cancel 0 + // 2024-12-09T21:12:49.030963+00:00 expensidev2004 bedrock10013: xxxxxx (SQLiteNode.cpp:225) _replicate [replicate2065] [info] {cluster_node_4/FOLLOWING} _localCommitNotifier.waitFor canceled early, returning. + // 2024-12-09T21:12:49.030977+00:00 expensidev2004 bedrock10013: xxxxxx (SSignal.cpp:193) _SSignal_StackTrace [replicate2065] [warn] Signal Segmentation fault(11) caused crash, logging stack trace. + // Why doesn't this get cancelled as well? + // I'm not sure why dev thinks leader has commit 0. + // 2024-12-09T21:12:49.023613+00:00 expensidev2004 bedrock10013: xxxxxx (SQLiteNode.cpp:1902) _changeState [sync] [info] {cluster_node_4/FOLLOWING} [NOTIFY] setting commit count to: 17001 + // 2024-12-09T21:12:49.023674+00:00 expensidev2004 bedrock10013: xxxxxx (SQLiteSequentialNotifier.cpp:105) cancel [sync] [info] Canceling all pending transactions after 0 + SQLite& db = dbScope.db(); bool goSearchingOnExit = false; @@ -224,6 +237,10 @@ void SQLiteNode::_replicate(SQLitePeer* peer, SData command, size_t sqlitePoolIn // Ok, is it possible we got past here and waited somewhere else? We were either stuck here, or... SQLiteSequentialNotifier::RESULT result = _localCommitNotifier.waitFor(waitForCount, false); // My current inclination is that maybe we reset the commit notifier before the thread really starts. + + // I think we get stuck in `waitFor` because it's been reset to 0 and we're waiting for every commit from the + // Beginning of time. I'm not acutally sure why it ever returns, though. + if (result == SQLiteSequentialNotifier::RESULT::UNKNOWN) { // This should be impossible. SERROR("Got UNKNOWN result from waitFor, which shouldn't happen"); @@ -1653,12 +1670,17 @@ void SQLiteNode::_onMESSAGE(SQLitePeer* peer, const SData& message) { } else if (SIEquals(message.methodLine, "BEGIN_TRANSACTION") || SIEquals(message.methodLine, "COMMIT_TRANSACTION") || SIEquals(message.methodLine, "ROLLBACK_TRANSACTION")) { // Race condition here. What if _replicationThreadsShouldExit changes after this check? if (_replicationThreadsShouldExit) { + // Interestingly, this doesn't happen. + // I think load is light when this issue occurs. SINFO("Discarding replication message, stopping FOLLOWING"); } else { // Ok, so the race condition could be here, right? // Right this instance, the thread count is 0, so we can move past the check that // Waits for it to be 0. // But then this thread starts. Can that happen? + // So the sync thread does the state switch. + // Who is running this though, it should also be the sync thread? + // I don't see how anyone else would be able to do this. auto threadID = _replicationThreadCount.fetch_add(1); SDEBUG("Spawning concurrent replicate thread (blocks until DB handle available): " << threadID); try { @@ -1938,6 +1960,9 @@ void SQLiteNode::_changeState(SQLiteNodeState newState, uint64_t commitIDToCance SINFO("Waiting for " << _replicationThreadCount << " remaining replication threads."); usleep(10'000); } + // How can the above fail???? + // We only increment _replicationThreadCount in the sync thread and we are reading it here in the sync thread. + // It is feasible to call `_changeState` from another thread but that's not what's happening in the issue we're seeing. // Done exiting. Reset so that we can resume FOLLOWING in the future. _replicationThreadsShouldExit = false; From 7784a9700a72a68dd969809bbbae764bd42b290b Mon Sep 17 00:00:00 2001 From: Florent De Neve Date: Tue, 10 Dec 2024 10:11:12 -0400 Subject: [PATCH 03/10] Update SQLite with a fix for non passive checkpoints --- libstuff/sqlite3.c | 8 ++++---- libstuff/sqlite3.h | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/libstuff/sqlite3.c b/libstuff/sqlite3.c index b3c5eebf7..727617327 100644 --- a/libstuff/sqlite3.c +++ b/libstuff/sqlite3.c @@ -18,7 +18,7 @@ ** separate file. This file contains only code for the core SQLite library. ** ** The content in this amalgamation comes from Fossil check-in -** 65b753735b8e8fb70d2b522d527426f1eb5c. +** 1a59cae3c31aea25cef3705cce2477e26515. */ #define SQLITE_CORE 1 #define SQLITE_AMALGAMATION 1 @@ -465,7 +465,7 @@ extern "C" { */ #define SQLITE_VERSION "3.47.0" #define SQLITE_VERSION_NUMBER 3047000 -#define SQLITE_SOURCE_ID "2024-12-06 17:52:38 65b753735b8e8fb70d2b522d527426f1eb5c09339fb4b15cf69cbd2e595b160f" +#define SQLITE_SOURCE_ID "2024-12-09 21:26:21 1a59cae3c31aea25cef3705cce2477e26515a0463cf9094bd29951899b758767" /* ** CAPI3REF: Run-Time Library Version Numbers @@ -71575,7 +71575,7 @@ SQLITE_PRIVATE int sqlite3WalCheckpoint( ** writer lock retried until either the busy-handler returns 0 or the ** lock is successfully obtained. */ - if( eMode!=SQLITE_CHECKPOINT_PASSIVE ){ + if( eMode!=SQLITE_CHECKPOINT_PASSIVE && isWalMode2(pWal)==0 ){ rc = walBusyLock(pWal, xBusy2, pBusyArg, WAL_WRITE_LOCK, 1); if( rc==SQLITE_OK ){ pWal->writeLock = 1; @@ -257917,7 +257917,7 @@ static void fts5SourceIdFunc( ){ assert( nArg==0 ); UNUSED_PARAM2(nArg, apUnused); - sqlite3_result_text(pCtx, "fts5: 2024-12-06 17:52:38 65b753735b8e8fb70d2b522d527426f1eb5c09339fb4b15cf69cbd2e595b160f", -1, SQLITE_TRANSIENT); + sqlite3_result_text(pCtx, "fts5: 2024-12-09 21:26:21 1a59cae3c31aea25cef3705cce2477e26515a0463cf9094bd29951899b758767", -1, SQLITE_TRANSIENT); } /* diff --git a/libstuff/sqlite3.h b/libstuff/sqlite3.h index ff4f7b3f8..77aaa8cbd 100644 --- a/libstuff/sqlite3.h +++ b/libstuff/sqlite3.h @@ -148,7 +148,7 @@ extern "C" { */ #define SQLITE_VERSION "3.47.0" #define SQLITE_VERSION_NUMBER 3047000 -#define SQLITE_SOURCE_ID "2024-12-06 17:52:38 65b753735b8e8fb70d2b522d527426f1eb5c09339fb4b15cf69cbd2e595b160f" +#define SQLITE_SOURCE_ID "2024-12-09 21:26:21 1a59cae3c31aea25cef3705cce2477e26515a0463cf9094bd29951899b758767" /* ** CAPI3REF: Run-Time Library Version Numbers From c8524918cdb87c597aca669d5fb1fd0a034930b9 Mon Sep 17 00:00:00 2001 From: Florent De Neve Date: Tue, 10 Dec 2024 14:40:20 -0400 Subject: [PATCH 04/10] Update SQLite with more logs, v17 --- libstuff/sqlite3.c | 20 +++++++++++--------- libstuff/sqlite3.h | 2 +- 2 files changed, 12 insertions(+), 10 deletions(-) diff --git a/libstuff/sqlite3.c b/libstuff/sqlite3.c index 727617327..a7111ef6c 100644 --- a/libstuff/sqlite3.c +++ b/libstuff/sqlite3.c @@ -18,7 +18,7 @@ ** separate file. This file contains only code for the core SQLite library. ** ** The content in this amalgamation comes from Fossil check-in -** 1a59cae3c31aea25cef3705cce2477e26515. +** df4183ace93b788b798b258274bf6b651906. */ #define SQLITE_CORE 1 #define SQLITE_AMALGAMATION 1 @@ -465,7 +465,7 @@ extern "C" { */ #define SQLITE_VERSION "3.47.0" #define SQLITE_VERSION_NUMBER 3047000 -#define SQLITE_SOURCE_ID "2024-12-09 21:26:21 1a59cae3c31aea25cef3705cce2477e26515a0463cf9094bd29951899b758767" +#define SQLITE_SOURCE_ID "2024-12-10 14:56:20 df4183ace93b788b798b258274bf6b651906c9f1cf2af4983e447cdf52904523" /* ** CAPI3REF: Run-Time Library Version Numbers @@ -93474,7 +93474,7 @@ SQLITE_PRIVATE void sqlite3CommitTimeLog(u64 *aCommit){ } zStr = sqlite3_mprintf("%z%s%s%d%s", zStr, (zStr?", ":""),zHash,iVal,zU); } - sqlite3_log(SQLITE_WARNING, "slow commit (v=16): (%s)", zStr); + sqlite3_log(SQLITE_WARNING, "slow commit (v=17): (%s)", zStr); sqlite3_free(zStr); } } @@ -93502,7 +93502,7 @@ SQLITE_PRIVATE void sqlite3PrepareTimeLog(const char *zSql, int nSql, u64 *aPrep } if( nByte<0 ){ nByte = sqlite3Strlen30(zSql); } sqlite3_log(SQLITE_WARNING, - "slow prepare (v=16): (%s) [%.*s]", zStr, nByte, zSql + "slow prepare (v=17): (%s) [%.*s]", zStr, nByte, zSql ); sqlite3_free(zStr); } @@ -93514,11 +93514,13 @@ SQLITE_PRIVATE void sqlite3SchemaTimeLog(u64 *aSchema, const char *zFile){ char *zStr = 0; int ii; for(ii=1; ii Date: Tue, 10 Dec 2024 10:44:49 -0800 Subject: [PATCH 05/10] Fix but maybe not the right fix --- BedrockServer.cpp | 3 ++- sqlitecluster/SQLiteNode.cpp | 12 +++++++++--- sqlitecluster/SQLiteNode.h | 3 ++- 3 files changed, 13 insertions(+), 5 deletions(-) diff --git a/BedrockServer.cpp b/BedrockServer.cpp index 55bb30ae6..98e59414f 100644 --- a/BedrockServer.cpp +++ b/BedrockServer.cpp @@ -213,8 +213,9 @@ void BedrockServer::sync() // we're leading, then the next update() loop will set us to standing down, and then we won't accept any new // commands, and we'll shortly run through the existing queue. if (_shutdownState.load() == COMMANDS_FINISHED) { - SINFO("All clients responded to, " << BedrockCommand::getCommandCount() << " commands remaining. Shutting down sync node."); + SINFO("All clients responded to, " << BedrockCommand::getCommandCount() << " commands remaining."); if (_syncNode->beginShutdown()) { + SINFO("Beginning shuttdown of sync node."); // This will cause us to skip the next `poll` iteration which avoids a 1 second wait. _notifyDoneSync.push(true); } diff --git a/sqlitecluster/SQLiteNode.cpp b/sqlitecluster/SQLiteNode.cpp index 920f03e10..b641614ca 100644 --- a/sqlitecluster/SQLiteNode.cpp +++ b/sqlitecluster/SQLiteNode.cpp @@ -145,6 +145,7 @@ SQLiteNode::SQLiteNode(SQLiteServer& server, shared_ptr dbPool, cons _stateTimeout(STimeNow() + firstTimeout), _syncPeer(nullptr) { + SINFO("TYLER _replicationThreadCount reset to : " << _replicationThreadCount); KILLABLE_SQLITE_NODE = this; SASSERT(_originalPriority >= 0); onPrepareHandlerEnabled = false; @@ -1669,7 +1670,8 @@ void SQLiteNode::_onMESSAGE(SQLitePeer* peer, const SData& message) { } } else if (SIEquals(message.methodLine, "BEGIN_TRANSACTION") || SIEquals(message.methodLine, "COMMIT_TRANSACTION") || SIEquals(message.methodLine, "ROLLBACK_TRANSACTION")) { // Race condition here. What if _replicationThreadsShouldExit changes after this check? - if (_replicationThreadsShouldExit) { + if (_replicationThreadsShouldExit || _state == SQLiteNodeState::SEARCHING) { + // So this fix probably works, but maybe we don't even want to call _onMESSAGE when we're detaching? // Interestingly, this doesn't happen. // I think load is light when this issue occurs. SINFO("Discarding replication message, stopping FOLLOWING"); @@ -1682,6 +1684,7 @@ void SQLiteNode::_onMESSAGE(SQLitePeer* peer, const SData& message) { // Who is running this though, it should also be the sync thread? // I don't see how anyone else would be able to do this. auto threadID = _replicationThreadCount.fetch_add(1); + SINFO("TYLER _replicationThreadCount incremented to : " << threadID + 1); SDEBUG("Spawning concurrent replicate thread (blocks until DB handle available): " << threadID); try { uint64_t threadAttemptStartTimestamp = STimeNow(); @@ -1703,7 +1706,8 @@ void SQLiteNode::_onMESSAGE(SQLitePeer* peer, const SData& message) { // and waiting for the transaction that failed will be stuck in an infinite loop. To prevent that // we're changing the state to SEARCHING and sending the cancelAfter property to drop all threads // that depend on the transaction that failed to be threaded. - _replicationThreadCount.fetch_sub(1); + auto was = _replicationThreadCount.fetch_sub(1); + SINFO("TYLER _replicationThreadCount decremented to : " << was - 1); SWARN("Caught system_error starting _replicate thread with " << _replicationThreadCount.load() << " threads. e.what()=" << e.what()); _changeState(SQLiteNodeState::SEARCHING, message.calcU64("NewCount") - 1); STHROW("Error starting replicate thread so giving up and reconnecting."); @@ -1956,10 +1960,12 @@ void SQLiteNode::_changeState(SQLiteNodeState newState, uint64_t commitIDToCance // Polling wait for threads to quit. This could use a notification model such as with a condition_variable, // which would probably be "better" but introduces yet more state variables for a state that we're rarely // in, and so I've left it out for the time being. - while (_replicationThreadCount) { + SINFO("TYLER _replicationThreadCount before state change: " << _replicationThreadCount); + while (_replicationThreadCount.load()) { SINFO("Waiting for " << _replicationThreadCount << " remaining replication threads."); usleep(10'000); } + SINFO("TYLER _replicationThreadCount after state change: " << _replicationThreadCount); // How can the above fail???? // We only increment _replicationThreadCount in the sync thread and we are reading it here in the sync thread. // It is feasible to call `_changeState` from another thread but that's not what's happening in the issue we're seeing. diff --git a/sqlitecluster/SQLiteNode.h b/sqlitecluster/SQLiteNode.h index 57c5c1056..ab476b653 100644 --- a/sqlitecluster/SQLiteNode.h +++ b/sqlitecluster/SQLiteNode.h @@ -198,7 +198,8 @@ class SQLiteNode : public STCPManager { public: ScopedDecrement(CounterType& counter) : _counter(counter) {} ~ScopedDecrement() { - --_counter; + auto decrementedTo = --_counter; + SINFO("TYLER _counter decremented to : " << decrementedTo); } private: CounterType& _counter; From 39c0d877e27385aa87cc75989dfbec17a4c2960c Mon Sep 17 00:00:00 2001 From: Tyler Karaszewski Date: Tue, 10 Dec 2024 11:31:48 -0800 Subject: [PATCH 06/10] Add explanatory message --- sqlitecluster/SQLiteNode.cpp | 22 ++++++++++------------ 1 file changed, 10 insertions(+), 12 deletions(-) diff --git a/sqlitecluster/SQLiteNode.cpp b/sqlitecluster/SQLiteNode.cpp index b641614ca..2c1500240 100644 --- a/sqlitecluster/SQLiteNode.cpp +++ b/sqlitecluster/SQLiteNode.cpp @@ -145,7 +145,6 @@ SQLiteNode::SQLiteNode(SQLiteServer& server, shared_ptr dbPool, cons _stateTimeout(STimeNow() + firstTimeout), _syncPeer(nullptr) { - SINFO("TYLER _replicationThreadCount reset to : " << _replicationThreadCount); KILLABLE_SQLITE_NODE = this; SASSERT(_originalPriority >= 0); onPrepareHandlerEnabled = false; @@ -1669,11 +1668,15 @@ void SQLiteNode::_onMESSAGE(SQLitePeer* peer, const SData& message) { throw e; } } else if (SIEquals(message.methodLine, "BEGIN_TRANSACTION") || SIEquals(message.methodLine, "COMMIT_TRANSACTION") || SIEquals(message.methodLine, "ROLLBACK_TRANSACTION")) { - // Race condition here. What if _replicationThreadsShouldExit changes after this check? - if (_replicationThreadsShouldExit || _state == SQLiteNodeState::SEARCHING) { - // So this fix probably works, but maybe we don't even want to call _onMESSAGE when we're detaching? - // Interestingly, this doesn't happen. - // I think load is light when this issue occurs. + if (_state != SQLiteNodeState::FOLLOWING) { + // These messages are only valid while following, but we do not throw if we receive them in other states, as + // it's not neccesarily an error. Specifically, as we switch away from FOLLOWING, there may still be a stream + // of transactions being broadcast. We do not attempt to handle these, as we keep careful count of which + // replication threads are currently running, and reset the replication state tracking when we're not following. + // Attempting to handle replication messages in some other state will break that tracking. + return; + } + if (_replicationThreadsShouldExit) { SINFO("Discarding replication message, stopping FOLLOWING"); } else { // Ok, so the race condition could be here, right? @@ -1684,7 +1687,6 @@ void SQLiteNode::_onMESSAGE(SQLitePeer* peer, const SData& message) { // Who is running this though, it should also be the sync thread? // I don't see how anyone else would be able to do this. auto threadID = _replicationThreadCount.fetch_add(1); - SINFO("TYLER _replicationThreadCount incremented to : " << threadID + 1); SDEBUG("Spawning concurrent replicate thread (blocks until DB handle available): " << threadID); try { uint64_t threadAttemptStartTimestamp = STimeNow(); @@ -1706,8 +1708,6 @@ void SQLiteNode::_onMESSAGE(SQLitePeer* peer, const SData& message) { // and waiting for the transaction that failed will be stuck in an infinite loop. To prevent that // we're changing the state to SEARCHING and sending the cancelAfter property to drop all threads // that depend on the transaction that failed to be threaded. - auto was = _replicationThreadCount.fetch_sub(1); - SINFO("TYLER _replicationThreadCount decremented to : " << was - 1); SWARN("Caught system_error starting _replicate thread with " << _replicationThreadCount.load() << " threads. e.what()=" << e.what()); _changeState(SQLiteNodeState::SEARCHING, message.calcU64("NewCount") - 1); STHROW("Error starting replicate thread so giving up and reconnecting."); @@ -1960,12 +1960,10 @@ void SQLiteNode::_changeState(SQLiteNodeState newState, uint64_t commitIDToCance // Polling wait for threads to quit. This could use a notification model such as with a condition_variable, // which would probably be "better" but introduces yet more state variables for a state that we're rarely // in, and so I've left it out for the time being. - SINFO("TYLER _replicationThreadCount before state change: " << _replicationThreadCount); - while (_replicationThreadCount.load()) { + while (_replicationThreadCount) { SINFO("Waiting for " << _replicationThreadCount << " remaining replication threads."); usleep(10'000); } - SINFO("TYLER _replicationThreadCount after state change: " << _replicationThreadCount); // How can the above fail???? // We only increment _replicationThreadCount in the sync thread and we are reading it here in the sync thread. // It is feasible to call `_changeState` from another thread but that's not what's happening in the issue we're seeing. From 5d29d00945558a0bff37842bc6cceb217d5f2b77 Mon Sep 17 00:00:00 2001 From: Tyler Karaszewski Date: Tue, 10 Dec 2024 11:38:00 -0800 Subject: [PATCH 07/10] Cleanup --- sqlitecluster/SQLiteNode.cpp | 49 +++------------------- sqlitecluster/SQLiteNode.h | 3 +- sqlitecluster/SQLiteSequentialNotifier.cpp | 8 +--- 3 files changed, 8 insertions(+), 52 deletions(-) diff --git a/sqlitecluster/SQLiteNode.cpp b/sqlitecluster/SQLiteNode.cpp index 2c1500240..70bf83dab 100644 --- a/sqlitecluster/SQLiteNode.cpp +++ b/sqlitecluster/SQLiteNode.cpp @@ -187,10 +187,6 @@ void SQLiteNode::_replicate(SQLitePeer* peer, SData command, size_t sqlitePoolIn } _replicateStartCV.notify_all(); - if (_replicationThreadsShouldExit) { - SINFO("Late replicate start, just exiting."); - } - // Initialize each new thread with a new number. SInitialize("replicate" + to_string(currentReplicateThreadID.fetch_add(1))); @@ -199,19 +195,6 @@ void SQLiteNode::_replicate(SQLitePeer* peer, SData command, size_t sqlitePoolIn // Allow the DB handle to be returned regardless of how this function exits. SQLiteScopedHandle dbScope(*_dbPool, sqlitePoolIndex); - // In dev, we sometimes crash on the destructor for the above after `detach` which implies to me that the DB Pool could have been deleted before we exited. - // This is a different manifestation of what could be the same issue. - // IN dev, we get: - // 2024-12-09T21:12:45.030255+00:00 expensidev2004 bedrock10013: xxxxxx (SQLiteNode.cpp:215) _replicate [replicate2065] [info] {cluster_node_4/SEARCHING} [performance] BEGIN_TRANSACTION replicate thread for commit 15003 waiting on DB count 15001 (ASYNC) - // 2024-12-09T21:12:49.023666+00:00 expensidev2004 bedrock10013: xxxxxx (SQLiteNode.cpp:1913) _changeState [sync] [info] {cluster_node_4/FOLLOWING} Replication threads should exit, canceling commits after current leader commit 0 - // 2024-12-09T21:12:49.030957+00:00 expensidev2004 bedrock10013: xxxxxx (SQLiteSequentialNotifier.cpp:57) waitFor [replicate2065] [info] Hit 1s timeout while global cancel 1 or specific cancel 0 - // 2024-12-09T21:12:49.030963+00:00 expensidev2004 bedrock10013: xxxxxx (SQLiteNode.cpp:225) _replicate [replicate2065] [info] {cluster_node_4/FOLLOWING} _localCommitNotifier.waitFor canceled early, returning. - // 2024-12-09T21:12:49.030977+00:00 expensidev2004 bedrock10013: xxxxxx (SSignal.cpp:193) _SSignal_StackTrace [replicate2065] [warn] Signal Segmentation fault(11) caused crash, logging stack trace. - // Why doesn't this get cancelled as well? - // I'm not sure why dev thinks leader has commit 0. - // 2024-12-09T21:12:49.023613+00:00 expensidev2004 bedrock10013: xxxxxx (SQLiteNode.cpp:1902) _changeState [sync] [info] {cluster_node_4/FOLLOWING} [NOTIFY] setting commit count to: 17001 - // 2024-12-09T21:12:49.023674+00:00 expensidev2004 bedrock10013: xxxxxx (SQLiteSequentialNotifier.cpp:105) cancel [sync] [info] Canceling all pending transactions after 0 - SQLite& db = dbScope.db(); bool goSearchingOnExit = false; @@ -231,22 +214,12 @@ void SQLiteNode::_replicate(SQLitePeer* peer, SData command, size_t sqlitePoolIn uint64_t waitForCount = SStartsWith(command["ID"], "ASYNC") ? command.calcU64("dbCountAtStart") : currentCount; SINFO("[performance] BEGIN_TRANSACTION replicate thread for commit " << newCount << " waiting on DB count " << waitForCount << " (" << (quorum ? "QUORUM" : "ASYNC") << ")"); while (true) { - // Ok, why doesn't this get counted? - // It's waiting on: commit 26056807239 waiting on DB count 26056807238 - // It seems like this should return immediately. - // Ok, is it possible we got past here and waited somewhere else? We were either stuck here, or... SQLiteSequentialNotifier::RESULT result = _localCommitNotifier.waitFor(waitForCount, false); - // My current inclination is that maybe we reset the commit notifier before the thread really starts. - - // I think we get stuck in `waitFor` because it's been reset to 0 and we're waiting for every commit from the - // Beginning of time. I'm not acutally sure why it ever returns, though. - if (result == SQLiteSequentialNotifier::RESULT::UNKNOWN) { // This should be impossible. SERROR("Got UNKNOWN result from waitFor, which shouldn't happen"); } else if (result == SQLiteSequentialNotifier::RESULT::COMPLETED) { // Success case. - // If we didn't get stuck above, we must have hit here, because otherwise we would have logged. break; } else if (result == SQLiteSequentialNotifier::RESULT::CANCELED) { SINFO("_localCommitNotifier.waitFor canceled early, returning."); @@ -264,7 +237,6 @@ void SQLiteNode::_replicate(SQLitePeer* peer, SData command, size_t sqlitePoolIn if (commitAttemptCount > 1) { SINFO("Commit attempt number " << commitAttemptCount << " for concurrent replication."); } - // We never log this line, so we can't have gotten to here. SINFO("[performance] BEGIN for commit " << newCount); bool uniqueContraintsError = false; try { @@ -1679,19 +1651,11 @@ void SQLiteNode::_onMESSAGE(SQLitePeer* peer, const SData& message) { if (_replicationThreadsShouldExit) { SINFO("Discarding replication message, stopping FOLLOWING"); } else { - // Ok, so the race condition could be here, right? - // Right this instance, the thread count is 0, so we can move past the check that - // Waits for it to be 0. - // But then this thread starts. Can that happen? - // So the sync thread does the state switch. - // Who is running this though, it should also be the sync thread? - // I don't see how anyone else would be able to do this. auto threadID = _replicationThreadCount.fetch_add(1); SDEBUG("Spawning concurrent replicate thread (blocks until DB handle available): " << threadID); try { uint64_t threadAttemptStartTimestamp = STimeNow(); _replicateThreadStarted = false; - // Either here. thread(&SQLiteNode::_replicate, this, peer, message, _dbPool->getIndex(false), threadAttemptStartTimestamp).detach(); { unique_lock lock(_replicateStartMutex); @@ -1708,6 +1672,7 @@ void SQLiteNode::_onMESSAGE(SQLitePeer* peer, const SData& message) { // and waiting for the transaction that failed will be stuck in an infinite loop. To prevent that // we're changing the state to SEARCHING and sending the cancelAfter property to drop all threads // that depend on the transaction that failed to be threaded. + _replicationThreadCount.fetch_sub(1); SWARN("Caught system_error starting _replicate thread with " << _replicationThreadCount.load() << " threads. e.what()=" << e.what()); _changeState(SQLiteNodeState::SEARCHING, message.calcU64("NewCount") - 1); STHROW("Error starting replicate thread so giving up and reconnecting."); @@ -1960,23 +1925,21 @@ void SQLiteNode::_changeState(SQLiteNodeState newState, uint64_t commitIDToCance // Polling wait for threads to quit. This could use a notification model such as with a condition_variable, // which would probably be "better" but introduces yet more state variables for a state that we're rarely // in, and so I've left it out for the time being. + size_t infoCount = 1; while (_replicationThreadCount) { - SINFO("Waiting for " << _replicationThreadCount << " remaining replication threads."); + if (infoCount % 100 == 0) { + SINFO("Waiting for " << _replicationThreadCount << " remaining replication threads."); + } usleep(10'000); + infoCount++; } - // How can the above fail???? - // We only increment _replicationThreadCount in the sync thread and we are reading it here in the sync thread. - // It is feasible to call `_changeState` from another thread but that's not what's happening in the issue we're seeing. // Done exiting. Reset so that we can resume FOLLOWING in the future. _replicationThreadsShouldExit = false; // Guaranteed to be done right now. - // I bet this is wrong when these get reset. If we no threads, these get reset before they check? - // That doesn't make sense to me for when we should increment _replicationThreadCount _localCommitNotifier.reset(); _leaderCommitNotifier.reset(); - // If the above completed, we should immediately see `Switching from... ` logged. // We have no leader anymore. _leadPeer = nullptr; diff --git a/sqlitecluster/SQLiteNode.h b/sqlitecluster/SQLiteNode.h index ab476b653..57c5c1056 100644 --- a/sqlitecluster/SQLiteNode.h +++ b/sqlitecluster/SQLiteNode.h @@ -198,8 +198,7 @@ class SQLiteNode : public STCPManager { public: ScopedDecrement(CounterType& counter) : _counter(counter) {} ~ScopedDecrement() { - auto decrementedTo = --_counter; - SINFO("TYLER _counter decremented to : " << decrementedTo); + --_counter; } private: CounterType& _counter; diff --git a/sqlitecluster/SQLiteSequentialNotifier.cpp b/sqlitecluster/SQLiteSequentialNotifier.cpp index 69d30f1ac..c7814d344 100644 --- a/sqlitecluster/SQLiteSequentialNotifier.cpp +++ b/sqlitecluster/SQLiteSequentialNotifier.cpp @@ -31,8 +31,6 @@ SQLiteSequentialNotifier::RESULT SQLiteSequentialNotifier::waitFor(uint64_t valu SINFO("Canceled after " << _cancelAfter << ", but waiting for " << value << " so not returning yet."); } else { // Canceled and we're not before the cancellation cutoff. - // I don't see how we don't return here. Maybe we never acquire `waitingThreadMutex`? - SINFO("Returning canceled because _cancelAfter=" << _cancelAfter << " and value=" << value); return RESULT::CANCELED; } } else if (_globalResult != RESULT::UNKNOWN) { @@ -54,10 +52,9 @@ SQLiteSequentialNotifier::RESULT SQLiteSequentialNotifier::waitFor(uint64_t valu // We should investigate any instances of thew below logline to see if they're same as for the success cases mentioned above (i.e., the timeout happens simultaneously as the // cancellation) or if the log line is delayed by up to a second (indicating a problem). if (_globalResult == RESULT::CANCELED || state->result == RESULT::CANCELED) { - // I bet removing a 1 second delay means we don't hit this. we get to calling `reset` on this sooner, possibly while there are threads waiting here. - // It's possible that we hit the timeout here after `cancel()` has set the global value, but before we received the notification. // This isn't a problem, and we can jump back to the top of the loop and check again. If there's some problem, we'll see it there. + // Does this still happen?? Might be fixed. SINFO("Hit 1s timeout while global cancel " << (_globalResult == RESULT::CANCELED) << " or " << " specific cancel " << (state->result == RESULT::CANCELED)); continue; } @@ -120,10 +117,7 @@ void SQLiteSequentialNotifier::cancel(uint64_t cancelAfter) { auto start = _cancelAfter ? valueThreadMap.upper_bound(_cancelAfter) : valueThreadMap.begin(); if (start == valueThreadMap.end()) { // There's nothing to remove. - SINFO("[performance] Next value to cancel after " << cancelAfter << " is N/A"); return; - } else { - SINFO("[performance] Next value to cancel after " << cancelAfter << " is " << start->first); } // Now iterate across whatever's remaining and mark it canceled. From 9e2094597bf29c04233e48ba4484fb0bcd0059dd Mon Sep 17 00:00:00 2001 From: Tyler Karaszewski Date: Tue, 10 Dec 2024 11:41:09 -0800 Subject: [PATCH 08/10] Cleanup 2 --- sqlitecluster/SQLiteNode.cpp | 2 +- sqlitecluster/SQLiteSequentialNotifier.cpp | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/sqlitecluster/SQLiteNode.cpp b/sqlitecluster/SQLiteNode.cpp index 70bf83dab..34881cdf4 100644 --- a/sqlitecluster/SQLiteNode.cpp +++ b/sqlitecluster/SQLiteNode.cpp @@ -1930,8 +1930,8 @@ void SQLiteNode::_changeState(SQLiteNodeState newState, uint64_t commitIDToCance if (infoCount % 100 == 0) { SINFO("Waiting for " << _replicationThreadCount << " remaining replication threads."); } - usleep(10'000); infoCount++; + usleep(10'000); } // Done exiting. Reset so that we can resume FOLLOWING in the future. diff --git a/sqlitecluster/SQLiteSequentialNotifier.cpp b/sqlitecluster/SQLiteSequentialNotifier.cpp index c7814d344..a0f84a156 100644 --- a/sqlitecluster/SQLiteSequentialNotifier.cpp +++ b/sqlitecluster/SQLiteSequentialNotifier.cpp @@ -117,8 +117,10 @@ void SQLiteSequentialNotifier::cancel(uint64_t cancelAfter) { auto start = _cancelAfter ? valueThreadMap.upper_bound(_cancelAfter) : valueThreadMap.begin(); if (start == valueThreadMap.end()) { // There's nothing to remove. + SINFO("[performance] No available values to cancel after " << cancelAfter); return; } + SINFO("[performance] Next value to cancel after " << cancelAfter << " is " << start->first); // Now iterate across whatever's remaining and mark it canceled. auto current = start; From 6b211c5f16c8a5d2d633ed417d2b01e0bc00bb96 Mon Sep 17 00:00:00 2001 From: Tyler Karaszewski Date: Tue, 10 Dec 2024 12:18:03 -0800 Subject: [PATCH 09/10] Fix logline --- sqlitecluster/SQLiteSequentialNotifier.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/sqlitecluster/SQLiteSequentialNotifier.cpp b/sqlitecluster/SQLiteSequentialNotifier.cpp index a0f84a156..e12505ecd 100644 --- a/sqlitecluster/SQLiteSequentialNotifier.cpp +++ b/sqlitecluster/SQLiteSequentialNotifier.cpp @@ -54,8 +54,7 @@ SQLiteSequentialNotifier::RESULT SQLiteSequentialNotifier::waitFor(uint64_t valu if (_globalResult == RESULT::CANCELED || state->result == RESULT::CANCELED) { // It's possible that we hit the timeout here after `cancel()` has set the global value, but before we received the notification. // This isn't a problem, and we can jump back to the top of the loop and check again. If there's some problem, we'll see it there. - // Does this still happen?? Might be fixed. - SINFO("Hit 1s timeout while global cancel " << (_globalResult == RESULT::CANCELED) << " or " << " specific cancel " << (state->result == RESULT::CANCELED)); + SINFO("Hit 1s timeout while global cancel " << (_globalResult == RESULT::CANCELED) << " or specific cancel " << (state->result == RESULT::CANCELED)); continue; } } From 1723cfb3e28e364840efbba4b3453a9e18a1a79f Mon Sep 17 00:00:00 2001 From: Tyler Karaszewski Date: Tue, 10 Dec 2024 13:03:15 -0800 Subject: [PATCH 10/10] Add extra log line --- sqlitecluster/SQLiteNode.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/sqlitecluster/SQLiteNode.cpp b/sqlitecluster/SQLiteNode.cpp index 34881cdf4..9395cf6a0 100644 --- a/sqlitecluster/SQLiteNode.cpp +++ b/sqlitecluster/SQLiteNode.cpp @@ -1646,6 +1646,7 @@ void SQLiteNode::_onMESSAGE(SQLitePeer* peer, const SData& message) { // of transactions being broadcast. We do not attempt to handle these, as we keep careful count of which // replication threads are currently running, and reset the replication state tracking when we're not following. // Attempting to handle replication messages in some other state will break that tracking. + SINFO("Ignoring " << message.methodLine << " in state " << stateName(_state)); return; } if (_replicationThreadsShouldExit) {