diff --git a/BedrockServer.cpp b/BedrockServer.cpp index 98e59414f..be9297eab 100644 --- a/BedrockServer.cpp +++ b/BedrockServer.cpp @@ -97,7 +97,7 @@ void BedrockServer::sync() // We use fewer FDs on test machines that have other resource restrictions in place. SINFO("Setting dbPool size to: " << _dbPoolSize); - _dbPool = make_shared(_dbPoolSize, args["-db"], args.calc("-cacheSize"), args.calc("-maxJournalSize"), journalTables, mmapSizeGB, args.isSet("-hctree")); + _dbPool = make_shared(_dbPoolSize, args["-db"], args.calc("-cacheSize"), args.calc("-maxJournalSize"), journalTables, mmapSizeGB, args.isSet("-hctree"), args["-checkpointMode"]); SQLite& db = _dbPool->getBase(); // Initialize the command processor. @@ -358,7 +358,7 @@ void BedrockServer::sync() committingCommand = true; _syncNode->startCommit(SQLiteNode::QUORUM); _lastQuorumCommandTime = STimeNow(); - + // This interrupts the next poll loop immediately. This prevents a 1-second wait when running as a single server. _notifyDoneSync.push(true); SDEBUG("Finished sending distributed transaction for db upgrade."); @@ -1695,14 +1695,14 @@ void BedrockServer::_status(unique_ptr& command) { size_t totalCount = 0; for (const auto& s : _crashCommands) { totalCount += s.second.size(); - + vector paramsArray; for (const STable& params : s.second) { if (!params.empty()) { paramsArray.push_back(SComposeJSONObject(params)); } } - + STable commandObject; commandObject[s.first] = SComposeJSONArray(paramsArray); crashCommandListArray.push_back(SComposeJSONObject(commandObject)); diff --git a/main.cpp b/main.cpp index 240a1da88..e318e70ca 100644 --- a/main.cpp +++ b/main.cpp @@ -236,6 +236,7 @@ int main(int argc, char* argv[]) { << endl; cout << "-maxJournalSize <#commits> Number of commits to retain in the historical journal (default 1000000)" << endl; + cout << "-checkpointMode Accepts PASSIVE|FULL|RESTART|TRUNCATE, which is the value passed to https://www.sqlite.org/c3ref/wal_checkpoint_v2.html" << endl; cout << endl; cout << "Quick Start Tips:" << endl; cout << "-----------------" << endl; @@ -300,6 +301,9 @@ int main(int argc, char* argv[]) { SETDEFAULT("-queryLog", "queryLog.csv"); SETDEFAULT("-enableMultiWrite", "true"); + // We default to PASSIVE checkpoint everywhere as that has been the value proven to work fine for many years. + SETDEFAULT("-checkpointMode", "PASSIVE"); + args["-plugins"] = SComposeList(loadPlugins(args)); // Reset the database if requested diff --git a/sqlitecluster/SQLite.cpp b/sqlitecluster/SQLite.cpp index f31888cd6..5fb4543f1 100644 --- a/sqlitecluster/SQLite.cpp +++ b/sqlitecluster/SQLite.cpp @@ -198,17 +198,24 @@ void SQLite::commonConstructorInitialization(bool hctree) { // Always set synchronous commits to off for best commit performance in WAL mode. SASSERT(!SQuery(_db, "setting synchronous commits to off", "PRAGMA synchronous = OFF;")); + + // For non-passive checkpoints, we must set a busy timeout in order to wait on any readers. + // We set it to 2 minutes as the majority of transactions should take less than that. + if (_checkpointMode != SQLITE_CHECKPOINT_PASSIVE) { + sqlite3_busy_timeout(_db, 120'000); + } } SQLite::SQLite(const string& filename, int cacheSize, int maxJournalSize, - int minJournalTables, int64_t mmapSizeGB, bool hctree) : + int minJournalTables, int64_t mmapSizeGB, bool hctree, const string& checkpointMode) : _filename(initializeFilename(filename)), _maxJournalSize(maxJournalSize), _db(initializeDB(_filename, mmapSizeGB, hctree)), _journalNames(initializeJournal(_db, minJournalTables)), _sharedData(initializeSharedData(_db, _filename, _journalNames, hctree)), _cacheSize(cacheSize), - _mmapSizeGB(mmapSizeGB) + _mmapSizeGB(mmapSizeGB), + _checkpointMode(getCheckpointModeFromString(checkpointMode)) { commonConstructorInitialization(hctree); } @@ -220,7 +227,8 @@ SQLite::SQLite(const SQLite& from) : _journalNames(from._journalNames), _sharedData(from._sharedData), _cacheSize(from._cacheSize), - _mmapSizeGB(from._mmapSizeGB) + _mmapSizeGB(from._mmapSizeGB), + _checkpointMode(from._checkpointMode) { // This can always pass "true" because the copy constructor does not need to set the DB to WAL2 mode, it would have been set in the object being copied. commonConstructorInitialization(true); @@ -801,9 +809,9 @@ int SQLite::commit(const string& description, function* preCheckpointCal if (_sharedData.outstandingFramesToCheckpoint) { auto start = STimeNow(); int framesCheckpointed = 0; - sqlite3_wal_checkpoint_v2(_db, 0, SQLITE_CHECKPOINT_PASSIVE, NULL, &framesCheckpointed); + sqlite3_wal_checkpoint_v2(_db, 0, _checkpointMode, NULL, &framesCheckpointed); auto end = STimeNow(); - SINFO("Checkpointed " << framesCheckpointed << " (total) frames of " << _sharedData.outstandingFramesToCheckpoint << " in " << (end - start) << "us."); + SINFO("Checkpoint with type=" << _checkpointMode << " complete with " << framesCheckpointed << " frames checkpointed of " << _sharedData.outstandingFramesToCheckpoint << " frames outstanding in " << (end - start) << "us."); // It might not actually be 0, but we'll just let sqlite tell us what it is next time _walHookCallback runs. _sharedData.outstandingFramesToCheckpoint = 0; @@ -828,6 +836,22 @@ int SQLite::commit(const string& description, function* preCheckpointCal return result; } +int SQLite::getCheckpointModeFromString(const string& checkpointModeString) { + if (checkpointModeString == "PASSIVE") { + return SQLITE_CHECKPOINT_PASSIVE; + } + if (checkpointModeString == "FULL") { + return SQLITE_CHECKPOINT_FULL; + } + if (checkpointModeString == "RESTART") { + return SQLITE_CHECKPOINT_RESTART; + } + if (checkpointModeString == "TRUNCATE") { + return SQLITE_CHECKPOINT_TRUNCATE; + } + SERROR("Invalid checkpoint type: " << checkpointModeString); +} + map> SQLite::popCommittedTransactions() { return _sharedData.popCommittedTransactions(); } diff --git a/sqlitecluster/SQLite.h b/sqlitecluster/SQLite.h index 99c649d67..f5ddbc814 100644 --- a/sqlitecluster/SQLite.h +++ b/sqlitecluster/SQLite.h @@ -57,7 +57,7 @@ class SQLite { // // mmapSizeGB: address space to use for memory-mapped IO, in GB. SQLite(const string& filename, int cacheSize, int maxJournalSize, int minJournalTables, - int64_t mmapSizeGB = 0, bool hctree = false); + int64_t mmapSizeGB = 0, bool hctree = false, const string& checkpointMode = "PASSIVE"); // This constructor is not exactly a copy constructor. It creates an other SQLite object based on the first except // with a *different* journal table. This avoids a lot of locking around creating structures that we know already @@ -355,6 +355,7 @@ class SQLite { static sqlite3* initializeDB(const string& filename, int64_t mmapSizeGB, bool hctree); static vector initializeJournal(sqlite3* db, int minJournalTables); void commonConstructorInitialization(bool hctree = false); + static int getCheckpointModeFromString(const string& checkpointModeString); // The filename of this DB, canonicalized to its full path on disk. const string _filename; @@ -527,4 +528,7 @@ class SQLite { // Set to true inside of a write query. bool _currentlyWriting{false}; + + // One of 0|1|2|3 (a.k.a. PASSIVE|FULL|RESTART|TRUNCATE), which is the value to be passed to sqlite3_wal_checkpoint_v2. + int _checkpointMode; }; diff --git a/sqlitecluster/SQLitePool.cpp b/sqlitecluster/SQLitePool.cpp index ca3d7a4ad..75d1a31f2 100644 --- a/sqlitecluster/SQLitePool.cpp +++ b/sqlitecluster/SQLitePool.cpp @@ -8,9 +8,10 @@ SQLitePool::SQLitePool(size_t maxDBs, int maxJournalSize, int minJournalTables, int64_t mmapSizeGB, - bool hctree) + bool hctree, + const string& checkpointMode) : _maxDBs(max(maxDBs, 1ul)), - _baseDB(filename, cacheSize, maxJournalSize, minJournalTables, mmapSizeGB, hctree), + _baseDB(filename, cacheSize, maxJournalSize, minJournalTables, mmapSizeGB, hctree, checkpointMode), _objects(_maxDBs, nullptr) { } diff --git a/sqlitecluster/SQLitePool.h b/sqlitecluster/SQLitePool.h index 8cbc6c92e..cf12e38e2 100644 --- a/sqlitecluster/SQLitePool.h +++ b/sqlitecluster/SQLitePool.h @@ -7,7 +7,7 @@ class SQLitePool { public: // Create a pool of DB handles. SQLitePool(size_t maxDBs, const string& filename, int cacheSize, int maxJournalSize, int minJournalTables, - int64_t mmapSizeGB = 0, bool hctree = false); + int64_t mmapSizeGB = 0, bool hctree = false, const string& checkpointMode = "PASSIVE"); ~SQLitePool(); // Get the base object (the first one created, which uses the `journal` table). Note that if called by multiple