From 7d627b9f2d8cda7af90bb1153fd0669ff0cace5e Mon Sep 17 00:00:00 2001 From: Aditi Ahuja <48997495+metonymic-smokey@users.noreply.github.com> Date: Thu, 28 Nov 2024 18:46:45 +0530 Subject: [PATCH 1/3] MB-64360 - Upgrade zapx v16 (#2107) Update zapx version. --- go.mod | 2 +- go.sum | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/go.mod b/go.mod index 19e826daa..8f567dd22 100644 --- a/go.mod +++ b/go.mod @@ -24,7 +24,7 @@ require ( github.com/blevesearch/zapx/v13 v13.3.10 github.com/blevesearch/zapx/v14 v14.3.10 github.com/blevesearch/zapx/v15 v15.3.16 - github.com/blevesearch/zapx/v16 v16.1.8 + github.com/blevesearch/zapx/v16 v16.1.9-0.20241128100831-c85f4c5695fd github.com/couchbase/moss v0.2.0 github.com/golang/protobuf v1.3.2 github.com/spf13/cobra v1.7.0 diff --git a/go.sum b/go.sum index bf94edfae..4fced8775 100644 --- a/go.sum +++ b/go.sum @@ -43,8 +43,8 @@ github.com/blevesearch/zapx/v14 v14.3.10 h1:SG6xlsL+W6YjhX5N3aEiL/2tcWh3DO75Bnz7 github.com/blevesearch/zapx/v14 v14.3.10/go.mod h1:qqyuR0u230jN1yMmE4FIAuCxmahRQEOehF78m6oTgns= github.com/blevesearch/zapx/v15 v15.3.16 h1:Ct3rv7FUJPfPk99TI/OofdC+Kpb4IdyfdMH48sb+FmE= github.com/blevesearch/zapx/v15 v15.3.16/go.mod h1:Turk/TNRKj9es7ZpKK95PS7f6D44Y7fAFy8F4LXQtGg= -github.com/blevesearch/zapx/v16 v16.1.8 h1:Bxzpw6YQpFs7UjoCV1+RvDw6fmAT2GZxldwX8b3wVBM= -github.com/blevesearch/zapx/v16 v16.1.8/go.mod h1:JqQlOqlRVaYDkpLIl3JnKql8u4zKTNlVEa3nLsi0Gn8= +github.com/blevesearch/zapx/v16 v16.1.9-0.20241128100831-c85f4c5695fd h1:Es5x1U5ii+lM/zStrb4YCY4R1sKVEi0d6XAOZQ5Q0N4= +github.com/blevesearch/zapx/v16 v16.1.9-0.20241128100831-c85f4c5695fd/go.mod h1:JqQlOqlRVaYDkpLIl3JnKql8u4zKTNlVEa3nLsi0Gn8= github.com/couchbase/ghistogram v0.1.0 h1:b95QcQTCzjTUocDXp/uMgSNQi8oj1tGwnJ4bODWZnps= github.com/couchbase/ghistogram v0.1.0/go.mod h1:s1Jhy76zqfEecpNWJfWUiKZookAFaiGOEoyzgHt9i7k= github.com/couchbase/moss v0.2.0 h1:VCYrMzFwEryyhRSeI+/b3tRBSeTpi/8gn5Kf6dxqn+o= From 78cf78999e64fe8b257f215a4087b223cd8d210a Mon Sep 17 00:00:00 2001 From: Abhi Dangeti Date: Tue, 17 Dec 2024 10:44:30 -0700 Subject: [PATCH 2/3] MB-64604: Fix interpreting scorch config: "fieldTFRCacheThreshold" (#2117) + Setting the default to 0 on account of the panics caught in the MB. + Firstly, refactor `FieldTFRCacheThreshold` to `fieldTFRCacheThreshold` for _some_ naming consistency here. + This threshold can be used to toggle recycling of TermFieldReaders on/off. + Couchbase users will have the ability to provide this setting within a JSON payload, which when interpreted into a `map[string]interface{}` will need to be interpreted as a `float64`. + Should library users set it as an `int` within the index config - we'll honor that setting as well. --- index/scorch/snapshot_index.go | 34 +++++++++++++++++++++------------- index_test.go | 32 ++++++++++++++++---------------- 2 files changed, 37 insertions(+), 29 deletions(-) diff --git a/index/scorch/snapshot_index.go b/index/scorch/snapshot_index.go index 79840a41f..8ad073f0f 100644 --- a/index/scorch/snapshot_index.go +++ b/index/scorch/snapshot_index.go @@ -52,15 +52,6 @@ type asynchSegmentResult struct { var reflectStaticSizeIndexSnapshot int -// DefaultFieldTFRCacheThreshold limits the number of TermFieldReaders(TFR) for -// a field in an index snapshot. Without this limit, when recycling TFRs, it is -// possible that a very large number of TFRs may be added to the recycle -// cache, which could eventually lead to significant memory consumption. -// This threshold can be overwritten by users at the library level by changing the -// exported variable, or at the index level by setting the FieldTFRCacheThreshold -// in the kvConfig. -var DefaultFieldTFRCacheThreshold uint64 = 10 - func init() { var is interface{} = IndexSnapshot{} reflectStaticSizeIndexSnapshot = int(reflect.TypeOf(is).Size()) @@ -640,10 +631,27 @@ func (is *IndexSnapshot) allocTermFieldReaderDicts(field string) (tfr *IndexSnap } } -func (is *IndexSnapshot) getFieldTFRCacheThreshold() uint64 { +// DefaultFieldTFRCacheThreshold limits the number of TermFieldReaders(TFR) for +// a field in an index snapshot. Without this limit, when recycling TFRs, it is +// possible that a very large number of TFRs may be added to the recycle +// cache, which could eventually lead to significant memory consumption. +// This threshold can be overwritten by users at the library level by changing the +// exported variable, or at the index level by setting the "fieldTFRCacheThreshold" +// in the kvConfig. +var DefaultFieldTFRCacheThreshold int = 0 // disabled because it causes MB-64604 + +func (is *IndexSnapshot) getFieldTFRCacheThreshold() int { if is.parent.config != nil { - if _, ok := is.parent.config["FieldTFRCacheThreshold"]; ok { - return is.parent.config["FieldTFRCacheThreshold"].(uint64) + if _, exists := is.parent.config["fieldTFRCacheThreshold"]; exists { + val := is.parent.config["fieldTFRCacheThreshold"] + if x, ok := val.(float64); ok { + // JSON unmarshal-ed into a map[string]interface{} will default + // to float64 for numbers, so we need to check for float64 first. + return int(x) + } else if x, ok := val.(int); ok { + // If library users provided an int in the config, we'll honor it. + return x + } } } return DefaultFieldTFRCacheThreshold @@ -670,7 +678,7 @@ func (is *IndexSnapshot) recycleTermFieldReader(tfr *IndexSnapshotTermFieldReade if is.fieldTFRs == nil { is.fieldTFRs = map[string][]*IndexSnapshotTermFieldReader{} } - if uint64(len(is.fieldTFRs[tfr.field])) < is.getFieldTFRCacheThreshold() { + if len(is.fieldTFRs[tfr.field]) < is.getFieldTFRCacheThreshold() { tfr.bytesRead = 0 is.fieldTFRs[tfr.field] = append(is.fieldTFRs[tfr.field], tfr) } diff --git a/index_test.go b/index_test.go index e5094440d..a857d5ecb 100644 --- a/index_test.go +++ b/index_test.go @@ -421,8 +421,8 @@ func TestBytesRead(t *testing.T) { } stats, _ = idx.StatsMap()["index"].(map[string]interface{}) bytesRead, _ := stats["num_bytes_read_at_query_time"].(uint64) - if bytesRead-prevBytesRead != 23 && res.Cost == bytesRead-prevBytesRead { - t.Fatalf("expected bytes read for query string 23, got %v", + if bytesRead-prevBytesRead != 66 && res.Cost == bytesRead-prevBytesRead { + t.Fatalf("expected bytes read for query string 66, got %v", bytesRead-prevBytesRead) } prevBytesRead = bytesRead @@ -454,8 +454,8 @@ func TestBytesRead(t *testing.T) { stats, _ = idx.StatsMap()["index"].(map[string]interface{}) bytesRead, _ = stats["num_bytes_read_at_query_time"].(uint64) - if !approxSame(bytesRead-prevBytesRead, 150) && res.Cost == bytesRead-prevBytesRead { - t.Fatalf("expected bytes read for faceted query is around 150, got %v", + if !approxSame(bytesRead-prevBytesRead, 196) && res.Cost == bytesRead-prevBytesRead { + t.Fatalf("expected bytes read for faceted query is around 196, got %v", bytesRead-prevBytesRead) } prevBytesRead = bytesRead @@ -487,8 +487,8 @@ func TestBytesRead(t *testing.T) { stats, _ = idx.StatsMap()["index"].(map[string]interface{}) bytesRead, _ = stats["num_bytes_read_at_query_time"].(uint64) - if bytesRead-prevBytesRead != 60 && res.Cost == bytesRead-prevBytesRead { - t.Fatalf("expected bytes read for query with highlighter is 60, got %v", + if bytesRead-prevBytesRead != 105 && res.Cost == bytesRead-prevBytesRead { + t.Fatalf("expected bytes read for query with highlighter is 105, got %v", bytesRead-prevBytesRead) } prevBytesRead = bytesRead @@ -504,8 +504,8 @@ func TestBytesRead(t *testing.T) { // since it's created afresh and not reused stats, _ = idx.StatsMap()["index"].(map[string]interface{}) bytesRead, _ = stats["num_bytes_read_at_query_time"].(uint64) - if bytesRead-prevBytesRead != 83 && res.Cost == bytesRead-prevBytesRead { - t.Fatalf("expected bytes read for disjunction query is 83, got %v", + if bytesRead-prevBytesRead != 120 && res.Cost == bytesRead-prevBytesRead { + t.Fatalf("expected bytes read for disjunction query is 120, got %v", bytesRead-prevBytesRead) } } @@ -577,8 +577,8 @@ func TestBytesReadStored(t *testing.T) { } stats, _ = idx.StatsMap()["index"].(map[string]interface{}) bytesRead, _ = stats["num_bytes_read_at_query_time"].(uint64) - if bytesRead-prevBytesRead != 15 && bytesRead-prevBytesRead == res.Cost { - t.Fatalf("expected the bytes read stat to be around 15, got %v", bytesRead-prevBytesRead) + if bytesRead-prevBytesRead != 48 && bytesRead-prevBytesRead == res.Cost { + t.Fatalf("expected the bytes read stat to be around 48, got %v", bytesRead-prevBytesRead) } prevBytesRead = bytesRead @@ -592,8 +592,8 @@ func TestBytesReadStored(t *testing.T) { stats, _ = idx.StatsMap()["index"].(map[string]interface{}) bytesRead, _ = stats["num_bytes_read_at_query_time"].(uint64) - if bytesRead-prevBytesRead != 26478 && bytesRead-prevBytesRead == res.Cost { - t.Fatalf("expected the bytes read stat to be around 26478, got %v", + if bytesRead-prevBytesRead != 26511 && bytesRead-prevBytesRead == res.Cost { + t.Fatalf("expected the bytes read stat to be around 26511, got %v", bytesRead-prevBytesRead) } idx.Close() @@ -653,8 +653,8 @@ func TestBytesReadStored(t *testing.T) { } stats, _ = idx1.StatsMap()["index"].(map[string]interface{}) bytesRead, _ = stats["num_bytes_read_at_query_time"].(uint64) - if bytesRead-prevBytesRead != 12 && bytesRead-prevBytesRead == res.Cost { - t.Fatalf("expected the bytes read stat to be around 12, got %v", bytesRead-prevBytesRead) + if bytesRead-prevBytesRead != 47 && bytesRead-prevBytesRead == res.Cost { + t.Fatalf("expected the bytes read stat to be around 47, got %v", bytesRead-prevBytesRead) } prevBytesRead = bytesRead @@ -666,8 +666,8 @@ func TestBytesReadStored(t *testing.T) { stats, _ = idx1.StatsMap()["index"].(map[string]interface{}) bytesRead, _ = stats["num_bytes_read_at_query_time"].(uint64) - if bytesRead-prevBytesRead != 42 && bytesRead-prevBytesRead == res.Cost { - t.Fatalf("expected the bytes read stat to be around 42, got %v", bytesRead-prevBytesRead) + if bytesRead-prevBytesRead != 77 && bytesRead-prevBytesRead == res.Cost { + t.Fatalf("expected the bytes read stat to be around 77, got %v", bytesRead-prevBytesRead) } } From 5c53634221cd2b2c8a129f489f74ca315c3ae3a1 Mon Sep 17 00:00:00 2001 From: Abhi Dangeti Date: Tue, 17 Dec 2024 11:01:09 -0700 Subject: [PATCH 3/3] MB-64604: Remove unnecessary second map lookup (#2121) --- index/scorch/snapshot_index.go | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/index/scorch/snapshot_index.go b/index/scorch/snapshot_index.go index 8ad073f0f..51ffc859b 100644 --- a/index/scorch/snapshot_index.go +++ b/index/scorch/snapshot_index.go @@ -642,8 +642,7 @@ var DefaultFieldTFRCacheThreshold int = 0 // disabled because it causes MB-64604 func (is *IndexSnapshot) getFieldTFRCacheThreshold() int { if is.parent.config != nil { - if _, exists := is.parent.config["fieldTFRCacheThreshold"]; exists { - val := is.parent.config["fieldTFRCacheThreshold"] + if val, exists := is.parent.config["fieldTFRCacheThreshold"]; exists { if x, ok := val.(float64); ok { // JSON unmarshal-ed into a map[string]interface{} will default // to float64 for numbers, so we need to check for float64 first.