From 2ea57b621e910df4e8b087514a9737970f346a86 Mon Sep 17 00:00:00 2001 From: Alexander Hirsch Date: Fri, 15 Jun 2018 14:21:50 +0200 Subject: [PATCH] Eagerly fill LogFilteredData::filteredItemsCache_ and progressively fill/remove items instead of completely regenerating the whole cache. --- src/data/logfiltereddata.cpp | 149 ++++++++++++++++------- src/data/logfiltereddata.h | 7 +- src/data/logfiltereddataworkerthread.cpp | 25 +++- src/data/logfiltereddataworkerthread.h | 8 +- src/log.h | 2 +- src/marks.cpp | 17 ++- src/marks.h | 10 +- 7 files changed, 162 insertions(+), 56 deletions(-) diff --git a/src/data/logfiltereddata.cpp b/src/data/logfiltereddata.cpp index 2a9335ac4..f293c7d5d 100644 --- a/src/data/logfiltereddata.cpp +++ b/src/data/logfiltereddata.cpp @@ -47,8 +47,6 @@ LogFilteredData::LogFilteredData() : AbstractLogData(), maxLengthMarks_ = 0; searchDone_ = true; visibility_ = MarksAndMatches; - - filteredItemsCacheDirty_ = true; } // Usual constructor: just copy the data, the search is started by runSearch() @@ -72,8 +70,6 @@ LogFilteredData::LogFilteredData( const LogData* logData ) visibility_ = MarksAndMatches; - filteredItemsCacheDirty_ = true; - // Forward the update signal connect( &workerThread_, SIGNAL( searchProgressed( int, int, qint64 ) ), this, SLOT( handleSearchProgressed( int, int, qint64 ) ) ); @@ -123,7 +119,7 @@ void LogFilteredData::clearSearch() matching_lines_.clear(); maxLength_ = 0; nbLinesProcessed_ = 0; - filteredItemsCacheDirty_ = true; + removeAllFromFilteredItemsCache( Match ); } qint64 LogFilteredData::getMatchingLineNumber( int matchNum ) const @@ -172,11 +168,6 @@ LogFilteredData::FilteredLineType else if ( visibility_ == MarksOnly ) return Mark; else { - // If it is MarksAndMatches, we have to look. - // Regenerate the cache if needed - if ( filteredItemsCacheDirty_ ) - regenerateFilteredItemsCache(); - return filteredItemsCache_[ index ].type(); } } @@ -189,7 +180,7 @@ void LogFilteredData::addMark( qint64 line, QChar mark ) marks_.addMark( line, mark ); maxLengthMarks_ = qMax( maxLengthMarks_, sourceLogData_->getLineLength( line ) ); - filteredItemsCacheDirty_ = true; + insertIntoFilteredItemsCache( FilteredItem{ static_cast( line ), Mark } ); } else LOG(logERROR) << "LogFilteredData::addMark\ @@ -236,26 +227,41 @@ qint64 LogFilteredData::getMarkBefore( qint64 line ) const void LogFilteredData::deleteMark( QChar mark ) { - marks_.deleteMark( mark ); - filteredItemsCacheDirty_ = true; + int index = marks_.findMark( mark ); + qint64 line = marks_.getLineMarkedByIndex( index ); + marks_.deleteMarkAt( index ); - // FIXME: maxLengthMarks_ + if ( line < 0 ) { + // LOG(logWARNING)? + return; + } + + updateMaxLengthMarks( line ); + removeFromFilteredItemsCache( FilteredItem{ static_cast( line ), Mark } ); } void LogFilteredData::deleteMark( qint64 line ) { marks_.deleteMark( line ); - filteredItemsCacheDirty_ = true; + updateMaxLengthMarks( line ); + removeFromFilteredItemsCache( FilteredItem{ static_cast( line ), Mark } ); +} + +void LogFilteredData::updateMaxLengthMarks( qint64 removed_line ) +{ + if ( removed_line < 0 ) { + LOG(logWARNING) << "updateMaxLengthMarks called with negative line-number"; + return; + } // Now update the max length if needed - if ( sourceLogData_->getLineLength( line ) >= maxLengthMarks_ ) { + if ( sourceLogData_->getLineLength( removed_line ) >= maxLengthMarks_ ) { LOG(logDEBUG) << "deleteMark recalculating longest mark"; maxLengthMarks_ = 0; - for ( Marks::const_iterator i = marks_.begin(); - i != marks_.end(); ++i ) { - LOG(logDEBUG) << "line " << i->lineNumber(); + for ( auto &mark : marks_ ) { + LOG(logDEBUG) << "line " << mark.lineNumber(); maxLengthMarks_ = qMax( maxLengthMarks_, - sourceLogData_->getLineLength( i->lineNumber() ) ); + sourceLogData_->getLineLength( mark.lineNumber() ) ); } } } @@ -263,13 +269,16 @@ void LogFilteredData::deleteMark( qint64 line ) void LogFilteredData::clearMarks() { marks_.clear(); - filteredItemsCacheDirty_ = true; maxLengthMarks_ = 0; + removeAllFromFilteredItemsCache( Mark ); } void LogFilteredData::setVisibility( Visibility visi ) { visibility_ = visi; + + if ( visibility_ == MarksAndMatches ) + regenerateFilteredItemsCache(); } // @@ -281,8 +290,20 @@ void LogFilteredData::handleSearchProgressed( int nbMatches, int progress, qint6 << nbMatches << " progress=" << progress; // searchDone_ = true; - workerThread_.getSearchResult( &maxLength_, &matching_lines_, &nbLinesProcessed_ ); - filteredItemsCacheDirty_ = true; + assert( nbMatches >= 0 ); + + size_t start_index = matching_lines_.size(); + + workerThread_.updateSearchResult( &maxLength_, &matching_lines_, &nbLinesProcessed_ ); + + assert( matching_lines_.size() >= start_index ); + + filteredItemsCache_.reserve( matching_lines_.size() + marks_.size() ); + // (it's an overestimate but probably not by much so it's fine) + + for ( auto it = next( begin( matching_lines_ ), start_index ); it != end( matching_lines_ ); ++it ) { + insertIntoFilteredItemsCache( FilteredItem{ it->lineNumber(), Match } ); + } emit searchProgressed( nbMatches, progress, initial_position ); } @@ -305,10 +326,6 @@ LineNumber LogFilteredData::findLogDataLine( LineNumber lineNum ) const LOG(logERROR) << "Index too big in LogFilteredData: " << lineNum; } else { - // Regenerate the cache if needed - if ( filteredItemsCacheDirty_ ) - regenerateFilteredItemsCache(); - if ( lineNum < filteredItemsCache_.size() ) line = filteredItemsCache_[ lineNum ].lineNumber(); else @@ -333,11 +350,6 @@ LineNumber LogFilteredData::findFilteredLine( LineNumber lineNum ) const lineNum ); } else { - // Regenerate the cache if needed - if ( filteredItemsCacheDirty_ ) { - regenerateFilteredItemsCache(); - } - lineIndex = lookupLineNumber( filteredItemsCache_.begin(), filteredItemsCache_.end(), lineNum ); @@ -398,10 +410,6 @@ qint64 LogFilteredData::doGetNbLine() const else if ( visibility_ == MarksOnly ) nbLines = marks_.size(); else { - // Regenerate the cache if needed (hopefully most of the time - // it won't be necessarily) - if ( filteredItemsCacheDirty_ ) - regenerateFilteredItemsCache(); nbLines = filteredItemsCache_.size(); } @@ -439,13 +447,16 @@ void LogFilteredData::doSetMultibyteEncodingOffsets( int, int ) { } -// TODO: We might be a bit smarter and not regenerate the whole thing when -// e.g. stuff is added at the end of the search. void LogFilteredData::regenerateFilteredItemsCache() const { LOG(logDEBUG) << "regenerateFilteredItemsCache"; - filteredItemsCache_.clear(); + if ( filteredItemsCache_.size() > 0 ) { + // the cache was not invalidated, so we can keep it + LOG(logDEBUG) << "cache was not invalidated"; + return; + } + filteredItemsCache_.reserve( matching_lines_.size() + marks_.size() ); // (it's an overestimate but probably not by much so it's fine) @@ -474,7 +485,63 @@ void LogFilteredData::regenerateFilteredItemsCache() const filteredItemsCache_.push_back( FilteredItem( line, type ) ); } - filteredItemsCacheDirty_ = false; - LOG(logDEBUG) << "finished regenerateFilteredItemsCache"; } + +void LogFilteredData::insertIntoFilteredItemsCache( FilteredItem item ) +{ + if ( visibility_ != MarksAndMatches ) { + // this is invalidated and will be regenerated when we need it + filteredItemsCache_.clear(); + LOG(logDEBUG) << "cache is invalidated"; + return; + } + + // Search for the corresponding index. + auto found = lower_bound( begin( filteredItemsCache_ ), end( filteredItemsCache_ ), item ); + if ( found == end( filteredItemsCache_ ) || found->lineNumber() > item.lineNumber() ) { + filteredItemsCache_.insert( found, item ); + } else { + assert( found->lineNumber() == item.lineNumber() ); + found->add( item.type() ); + } +} + +void LogFilteredData::removeFromFilteredItemsCache( FilteredItem item ) +{ + if ( visibility_ != MarksAndMatches ) { + // this is invalidated and will be regenerated when we need it + filteredItemsCache_.clear(); + LOG(logDEBUG) << "cache is invalidated"; + return; + } + + // Search for the corresponding index. + auto found = equal_range( begin( filteredItemsCache_ ), end( filteredItemsCache_ ), item ); + if( found.first == end( filteredItemsCache_ ) ) { + LOG(logERROR) << "Attempt to remove line " << item.lineNumber() << " from filteredItemsCache_ failed, since it was not found"; + return; + } + + if ( distance( found.first, found.second ) > 1 ) { + LOG(logERROR) << "Multiple matches found for line " << item.lineNumber() << " in filteredItemsCache_"; + // FIXME: collapse them? + } + + if ( !found.first->remove( item.type() ) ){ + filteredItemsCache_.erase( found.first ); + } +} + +void LogFilteredData::removeAllFromFilteredItemsCache( FilteredLineType type ) +{ + if ( visibility_ != MarksAndMatches ) { + // this is invalidated and will be regenerated when we need it + filteredItemsCache_.clear(); + LOG(logDEBUG) << "cache is invalidated"; + return; + } + + auto erase_begin = remove_if( begin( filteredItemsCache_ ), end( filteredItemsCache_ ), [type]( FilteredItem& item ) { return !item.remove( type ); } ); + filteredItemsCache_.erase( erase_begin, end( filteredItemsCache_ ) ); +} diff --git a/src/data/logfiltereddata.h b/src/data/logfiltereddata.h index aa177062a..c4bf9959e 100644 --- a/src/data/logfiltereddata.h +++ b/src/data/logfiltereddata.h @@ -156,7 +156,6 @@ class LogFilteredData : public AbstractLogData { // when visibility_ == MarksAndMatches // (QVector store actual objects instead of pointers) mutable std::vector filteredItemsCache_; - mutable bool filteredItemsCacheDirty_; LogFilteredDataWorkerThread workerThread_; Marks marks_; @@ -166,6 +165,12 @@ class LogFilteredData : public AbstractLogData { LineNumber findFilteredLine( LineNumber lineNum ) const; void regenerateFilteredItemsCache() const; + void insertIntoFilteredItemsCache( FilteredItem item ); + void removeFromFilteredItemsCache( FilteredItem item ); + void removeAllFromFilteredItemsCache( FilteredLineType type ); + + // update maxLengthMarks_ when a Mark was removed. + void updateMaxLengthMarks( qint64 removed_line ); }; static LogFilteredData::FilteredLineType& operator|=(LogFilteredData::FilteredLineType& a, LogFilteredData::FilteredLineType b) diff --git a/src/data/logfiltereddataworkerthread.cpp b/src/data/logfiltereddataworkerthread.cpp index 76e5e1c7f..621af3578 100644 --- a/src/data/logfiltereddataworkerthread.cpp +++ b/src/data/logfiltereddataworkerthread.cpp @@ -28,15 +28,32 @@ const int SearchOperation::nbLinesInChunk = 5000; void SearchData::getAll( int* length, SearchResultArray* matches, - qint64* lines) const + qint64* lines ) const +{ + matches->clear(); + getAllMissing( length, matches, lines ); +} + +void SearchData::getAllMissing( int* length, SearchResultArray* matches, + qint64* lines ) const { QMutexLocker locker( &dataMutex_ ); + *length = maxLength_; *lines = nbLinesProcessed_; + if ( matches_.size() < matches->size() ) { + LOG(logWARNING) << "Cannot append search-data to smaller match-array"; + return; + } + + matches->reserve( matches_.size() - matches->size() ); + // This is a copy (potentially slow) - *matches = matches_; + size_t offset = matches->size(); + std::insert_iterator inserter{ *matches, next( begin( *matches ), offset ) }; + copy( next( begin( matches_ ), offset ), end( matches_ ), inserter ); } void SearchData::setAll( int length, @@ -167,10 +184,10 @@ void LogFilteredDataWorkerThread::interrupt() } // This will do an atomic copy of the object -void LogFilteredDataWorkerThread::getSearchResult( +void LogFilteredDataWorkerThread::updateSearchResult( int* maxLength, SearchResultArray* searchMatches, qint64* nbLinesProcessed ) { - searchData_.getAll( maxLength, searchMatches, nbLinesProcessed ); + searchData_.getAllMissing( maxLength, searchMatches, nbLinesProcessed ); } // This is the thread's main loop diff --git a/src/data/logfiltereddataworkerthread.h b/src/data/logfiltereddataworkerthread.h index 5b8f54553..6ab85460b 100644 --- a/src/data/logfiltereddataworkerthread.h +++ b/src/data/logfiltereddataworkerthread.h @@ -63,6 +63,10 @@ class SearchData // Atomically get all the search data void getAll( int* length, SearchResultArray* matches, qint64* nbLinesProcessed ) const; + // Atomically get all the search data + // Appends the missing entries. Does not check that the existing entries match. + void getAllMissing( int* length, SearchResultArray* matches, + qint64* lines ) const; // Atomically set all the search data // (overwriting the existing) // (the matches are always moved) @@ -155,8 +159,8 @@ class LogFilteredDataWorkerThread : public QThread // Interrupts the search if one is in progress void interrupt(); - // Returns a copy of the current indexing data - void getSearchResult( int* maxLength, SearchResultArray* searchMatches, + // Updates the array by copying the current indexing data + void updateSearchResult( int* maxLength, SearchResultArray* searchMatches, qint64* nbLinesProcessed ); signals: diff --git a/src/log.h b/src/log.h index 827c8f8a7..893367b34 100644 --- a/src/log.h +++ b/src/log.h @@ -25,7 +25,7 @@ #include // Modify here! -//#define FILELOG_MAX_LEVEL logDEBUG +// #define FILELOG_MAX_LEVEL logDEBUG inline std::string NowTime(); diff --git a/src/marks.cpp b/src/marks.cpp index 0abcf4115..9201331c7 100644 --- a/src/marks.cpp +++ b/src/marks.cpp @@ -66,20 +66,29 @@ bool Marks::isLineMarked( qint64 line ) const return lookupLineNumber< QList >( marks_, line, &index ); } -void Marks::deleteMark( QChar mark ) +int Marks::findMark( QChar mark ) { // 'mark' is not used yet mark = mark; + + return -1; } -void Marks::deleteMark( qint64 line ) +void Marks::deleteMarkAt( int index ) { - int index; + marks_.removeAt( index ); +} + +int Marks::deleteMark( qint64 line ) +{ + int index = -1; if ( lookupLineNumber< QList >( marks_, line, &index ) ) { - marks_.removeAt( index ); + deleteMarkAt( index ); } + + return index; } void Marks::clear() diff --git a/src/marks.h b/src/marks.h index 6871d3d57..e55ac6376 100644 --- a/src/marks.h +++ b/src/marks.h @@ -54,11 +54,15 @@ class Marks { qint64 getMark( QChar mark ) const; // Returns wheither the passed line has a mark on it. bool isLineMarked( qint64 line ) const; - // Delete the mark identified by the passed char. - void deleteMark( QChar mark ); + // Find a mark. + // Returns the index of the mark or -1 if it was not found. + int findMark( QChar mark ); + // Delete the mark at an index. + void deleteMarkAt( int index ); // Delete the mark present on the passed line or do nothing if there is // none. - void deleteMark( qint64 line ); + // Returns the index where the mark used to be. + int deleteMark( qint64 line ); // Get the line marked identified by the index (in this list) passed. qint64 getLineMarkedByIndex( int index ) const { return marks_[index].lineNumber(); }