diff --git a/BedrockServer.cpp b/BedrockServer.cpp index be9297eab..c0cad65f3 100644 --- a/BedrockServer.cpp +++ b/BedrockServer.cpp @@ -1823,6 +1823,7 @@ atomic __quiesceShouldUnlock(false); thread* __quiesceThread = nullptr; void BedrockServer::_control(unique_ptr& command) { + SINFO("Received control command: " << command->request.methodLine); SData& response = command->response; string reason = "MANUAL"; response.methodLine = "200 OK"; @@ -1913,7 +1914,9 @@ void BedrockServer::_control(unique_ptr& command) { if (dbPoolCopy) { SQLiteScopedHandle dbScope(*_dbPool, _dbPool->getIndex()); SQLite& db = dbScope.db(); + SINFO("[quiesce] Exclusive locking DB"); db.exclusiveLockDB(); + SINFO("[quiesce] Exclusive locked DB"); locked = true; while (true) { if (__quiesceShouldUnlock) { @@ -1936,12 +1939,16 @@ void BedrockServer::_control(unique_ptr& command) { response.methodLine = "200 Blocked"; } } else if (SIEquals(command->request.methodLine, "UnblockWrites")) { + SINFO("[quiesce] Locking __quiesceLock"); lock_guard lock(__quiesceLock); + SINFO("[quiesce] __quiesceLock locked"); if (!__quiesceThread) { response.methodLine = "200 Not Blocked"; } else { __quiesceShouldUnlock = true; + SINFO("[quiesce] Joining __quiesceThread"); __quiesceThread->join(); + SINFO("[quiesce] __quiesceThread joined"); delete __quiesceThread; __quiesceThread = nullptr; response.methodLine = "200 Unblocked"; diff --git a/sqlitecluster/SQLite.cpp b/sqlitecluster/SQLite.cpp index 5fb4543f1..80a4636fc 100644 --- a/sqlitecluster/SQLite.cpp +++ b/sqlitecluster/SQLite.cpp @@ -331,13 +331,17 @@ void SQLite::exclusiveLockDB() { // writes in this case. // So when these are both locked by the same thread at the same time, `commitLock` is always locked first, and we do it the same way here to avoid deadlocks. try { + SINFO("Locking commitLock"); _sharedData.commitLock.lock(); + SINFO("commitLock Locked"); } catch (const system_error& e) { SWARN("Caught system_error calling _sharedData.commitLock, code: " << e.code() << ", message: " << e.what()); throw; } try { + SINFO("Locking writeLock"); _sharedData.writeLock.lock(); + SINFO("writeLock Locked"); } catch(const system_error& e) { SWARN("Caught system_error calling _sharedData.writeLock, code: " << e.code() << ", message: " << e.what()); throw; @@ -673,6 +677,7 @@ bool SQLite::prepare(uint64_t* transactionID, string* transactionhash) { static const size_t deleteLimit = 10; if (minJournalEntry < oldestCommitToKeep) { auto startUS = STimeNow(); + shared_lock lock(_sharedData.writeLock); string query = "DELETE FROM " + _journalName + " WHERE id < " + SQ(oldestCommitToKeep) + " LIMIT " + SQ(deleteLimit); SASSERT(!SQuery(_db, "Deleting oldest journal rows", query)); size_t deletedCount = sqlite3_changes(_db); diff --git a/sqlitecluster/SQLiteNode.cpp b/sqlitecluster/SQLiteNode.cpp index 17db479ba..480b6f3f1 100644 --- a/sqlitecluster/SQLiteNode.cpp +++ b/sqlitecluster/SQLiteNode.cpp @@ -493,7 +493,12 @@ void SQLiteNode::_sendOutstandingTransactions(const set& commitOnlyIDs } list SQLiteNode::getPeerInfo() const { - shared_lock sharedLock(_stateMutex); + // This does not lock _stateMutex. It follows the rule in `SQLiteNode.h` that says: + // * Alternatively, a public `const` method that is a simple getter for an atomic property can skip the lock. + // peer->getData is atomic internally, so we can treat `peer->getData()` as a simple getter for an atomic property. + // _peerList is also `const` and so we can iterate this list safely regardless of the lock. + // This makes this function a slightly more complex getter for an atomic property, but it's still safe to skip + // The state lock here. list peerData; for (SQLitePeer* peer : _peerList) { peerData.emplace_back(peer->getData()); diff --git a/sqlitecluster/SQLiteNode.h b/sqlitecluster/SQLiteNode.h index fd4ba5f6f..817a4e9b9 100644 --- a/sqlitecluster/SQLiteNode.h +++ b/sqlitecluster/SQLiteNode.h @@ -25,12 +25,12 @@ * Rules for maintaining SQLiteNode methods so that atomicity works as intended. * * No non-const members should be publicly exposed. - * Any public method that is `const` must shared_lock<>(nodeMutex). + * Any public method that is `const` must shared_lock<>(_stateMutex). * Alternatively, a public `const` method that is a simple getter for an atomic property can skip the lock. - * Any public method that is non-const must unique_lock<>(nodeMutex) before changing any internal state, and must hold + * Any public method that is non-const must unique_lock<>(_stateMutex) before changing any internal state, and must hold * this lock until it is done changing state to make this method's changes atomic. * Any private methods must not call public methods. - * Any private methods must not lock nodeMutex (for recursion reasons). + * Any private methods must not lock _stateMutex (for recursion reasons). * Any public methods must not call other public methods. * * `_replicate` is a special exception because it runs in multiple threads internally. It needs to handle locking if it diff --git a/sqlitecluster/SQLitePeer.cpp b/sqlitecluster/SQLitePeer.cpp index c93589520..a9636ca1d 100644 --- a/sqlitecluster/SQLitePeer.cpp +++ b/sqlitecluster/SQLitePeer.cpp @@ -225,6 +225,7 @@ void SQLitePeer::getCommit(uint64_t& count, string& hashString) const { } STable SQLitePeer::getData() const { + lock_guard lock(peerMutex); // Add all of our standard stuff. STable result({ {"name", name},