Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

MB-64604: Fix interpreting scorch config: "fieldTFRCacheThreshold" #2117

Merged
merged 4 commits into from
Dec 17, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
34 changes: 21 additions & 13 deletions index/scorch/snapshot_index.go
Original file line number Diff line number Diff line change
Expand Up @@ -52,15 +52,6 @@ type asynchSegmentResult struct {

var reflectStaticSizeIndexSnapshot int

// DefaultFieldTFRCacheThreshold limits the number of TermFieldReaders(TFR) for
// a field in an index snapshot. Without this limit, when recycling TFRs, it is
// possible that a very large number of TFRs may be added to the recycle
// cache, which could eventually lead to significant memory consumption.
// This threshold can be overwritten by users at the library level by changing the
// exported variable, or at the index level by setting the FieldTFRCacheThreshold
// in the kvConfig.
var DefaultFieldTFRCacheThreshold uint64 = 10

func init() {
var is interface{} = IndexSnapshot{}
reflectStaticSizeIndexSnapshot = int(reflect.TypeOf(is).Size())
Expand Down Expand Up @@ -640,10 +631,27 @@ func (is *IndexSnapshot) allocTermFieldReaderDicts(field string) (tfr *IndexSnap
}
}

func (is *IndexSnapshot) getFieldTFRCacheThreshold() uint64 {
// DefaultFieldTFRCacheThreshold limits the number of TermFieldReaders(TFR) for
// a field in an index snapshot. Without this limit, when recycling TFRs, it is
// possible that a very large number of TFRs may be added to the recycle
// cache, which could eventually lead to significant memory consumption.
// This threshold can be overwritten by users at the library level by changing the
// exported variable, or at the index level by setting the "fieldTFRCacheThreshold"
// in the kvConfig.
var DefaultFieldTFRCacheThreshold int = 0 // disabled because it causes MB-64604

func (is *IndexSnapshot) getFieldTFRCacheThreshold() int {
if is.parent.config != nil {
if _, ok := is.parent.config["FieldTFRCacheThreshold"]; ok {
return is.parent.config["FieldTFRCacheThreshold"].(uint64)
if _, exists := is.parent.config["fieldTFRCacheThreshold"]; exists {
val := is.parent.config["fieldTFRCacheThreshold"]
if x, ok := val.(float64); ok {
// JSON unmarshal-ed into a map[string]interface{} will default
// to float64 for numbers, so we need to check for float64 first.
return int(x)
} else if x, ok := val.(int); ok {
// If library users provided an int in the config, we'll honor it.
return x
}
}
}
return DefaultFieldTFRCacheThreshold
Expand All @@ -670,7 +678,7 @@ func (is *IndexSnapshot) recycleTermFieldReader(tfr *IndexSnapshotTermFieldReade
if is.fieldTFRs == nil {
is.fieldTFRs = map[string][]*IndexSnapshotTermFieldReader{}
}
if uint64(len(is.fieldTFRs[tfr.field])) < is.getFieldTFRCacheThreshold() {
if len(is.fieldTFRs[tfr.field]) < is.getFieldTFRCacheThreshold() {
tfr.bytesRead = 0
is.fieldTFRs[tfr.field] = append(is.fieldTFRs[tfr.field], tfr)
}
Expand Down
32 changes: 16 additions & 16 deletions index_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -421,8 +421,8 @@ func TestBytesRead(t *testing.T) {
}
stats, _ = idx.StatsMap()["index"].(map[string]interface{})
bytesRead, _ := stats["num_bytes_read_at_query_time"].(uint64)
if bytesRead-prevBytesRead != 23 && res.Cost == bytesRead-prevBytesRead {
t.Fatalf("expected bytes read for query string 23, got %v",
if bytesRead-prevBytesRead != 66 && res.Cost == bytesRead-prevBytesRead {
t.Fatalf("expected bytes read for query string 66, got %v",
bytesRead-prevBytesRead)
}
prevBytesRead = bytesRead
Expand Down Expand Up @@ -454,8 +454,8 @@ func TestBytesRead(t *testing.T) {

stats, _ = idx.StatsMap()["index"].(map[string]interface{})
bytesRead, _ = stats["num_bytes_read_at_query_time"].(uint64)
if !approxSame(bytesRead-prevBytesRead, 150) && res.Cost == bytesRead-prevBytesRead {
t.Fatalf("expected bytes read for faceted query is around 150, got %v",
if !approxSame(bytesRead-prevBytesRead, 196) && res.Cost == bytesRead-prevBytesRead {
t.Fatalf("expected bytes read for faceted query is around 196, got %v",
bytesRead-prevBytesRead)
}
prevBytesRead = bytesRead
Expand Down Expand Up @@ -487,8 +487,8 @@ func TestBytesRead(t *testing.T) {

stats, _ = idx.StatsMap()["index"].(map[string]interface{})
bytesRead, _ = stats["num_bytes_read_at_query_time"].(uint64)
if bytesRead-prevBytesRead != 60 && res.Cost == bytesRead-prevBytesRead {
t.Fatalf("expected bytes read for query with highlighter is 60, got %v",
if bytesRead-prevBytesRead != 105 && res.Cost == bytesRead-prevBytesRead {
t.Fatalf("expected bytes read for query with highlighter is 105, got %v",
bytesRead-prevBytesRead)
}
prevBytesRead = bytesRead
Expand All @@ -504,8 +504,8 @@ func TestBytesRead(t *testing.T) {
// since it's created afresh and not reused
stats, _ = idx.StatsMap()["index"].(map[string]interface{})
bytesRead, _ = stats["num_bytes_read_at_query_time"].(uint64)
if bytesRead-prevBytesRead != 83 && res.Cost == bytesRead-prevBytesRead {
t.Fatalf("expected bytes read for disjunction query is 83, got %v",
if bytesRead-prevBytesRead != 120 && res.Cost == bytesRead-prevBytesRead {
t.Fatalf("expected bytes read for disjunction query is 120, got %v",
bytesRead-prevBytesRead)
}
}
Expand Down Expand Up @@ -577,8 +577,8 @@ func TestBytesReadStored(t *testing.T) {
}
stats, _ = idx.StatsMap()["index"].(map[string]interface{})
bytesRead, _ = stats["num_bytes_read_at_query_time"].(uint64)
if bytesRead-prevBytesRead != 15 && bytesRead-prevBytesRead == res.Cost {
t.Fatalf("expected the bytes read stat to be around 15, got %v", bytesRead-prevBytesRead)
if bytesRead-prevBytesRead != 48 && bytesRead-prevBytesRead == res.Cost {
t.Fatalf("expected the bytes read stat to be around 48, got %v", bytesRead-prevBytesRead)
}
prevBytesRead = bytesRead

Expand All @@ -592,8 +592,8 @@ func TestBytesReadStored(t *testing.T) {
stats, _ = idx.StatsMap()["index"].(map[string]interface{})
bytesRead, _ = stats["num_bytes_read_at_query_time"].(uint64)

if bytesRead-prevBytesRead != 26478 && bytesRead-prevBytesRead == res.Cost {
t.Fatalf("expected the bytes read stat to be around 26478, got %v",
if bytesRead-prevBytesRead != 26511 && bytesRead-prevBytesRead == res.Cost {
t.Fatalf("expected the bytes read stat to be around 26511, got %v",
bytesRead-prevBytesRead)
}
idx.Close()
Expand Down Expand Up @@ -653,8 +653,8 @@ func TestBytesReadStored(t *testing.T) {
}
stats, _ = idx1.StatsMap()["index"].(map[string]interface{})
bytesRead, _ = stats["num_bytes_read_at_query_time"].(uint64)
if bytesRead-prevBytesRead != 12 && bytesRead-prevBytesRead == res.Cost {
t.Fatalf("expected the bytes read stat to be around 12, got %v", bytesRead-prevBytesRead)
if bytesRead-prevBytesRead != 47 && bytesRead-prevBytesRead == res.Cost {
t.Fatalf("expected the bytes read stat to be around 47, got %v", bytesRead-prevBytesRead)
}
prevBytesRead = bytesRead

Expand All @@ -666,8 +666,8 @@ func TestBytesReadStored(t *testing.T) {

stats, _ = idx1.StatsMap()["index"].(map[string]interface{})
bytesRead, _ = stats["num_bytes_read_at_query_time"].(uint64)
if bytesRead-prevBytesRead != 42 && bytesRead-prevBytesRead == res.Cost {
t.Fatalf("expected the bytes read stat to be around 42, got %v", bytesRead-prevBytesRead)
if bytesRead-prevBytesRead != 77 && bytesRead-prevBytesRead == res.Cost {
t.Fatalf("expected the bytes read stat to be around 77, got %v", bytesRead-prevBytesRead)
}
}

Expand Down
Loading