From c0a1cfd0d96a3bd010aa5ad8e5f30a0bed300fa4 Mon Sep 17 00:00:00 2001 From: Ganesh Vernekar Date: Thu, 3 Oct 2024 15:08:16 -0400 Subject: [PATCH 01/11] query-tee: Add an option to skip samples for comparison before a given timestamp Signed-off-by: Ganesh Vernekar --- cmd/query-tee/main.go | 1 + tools/querytee/proxy.go | 2 + tools/querytee/response_comparator.go | 82 +++++-- tools/querytee/response_comparator_test.go | 248 +++++++++++++++++++++ 4 files changed, 319 insertions(+), 14 deletions(-) diff --git a/cmd/query-tee/main.go b/cmd/query-tee/main.go index 7ebb2089fb7..4e627f682bf 100644 --- a/cmd/query-tee/main.go +++ b/cmd/query-tee/main.go @@ -106,6 +106,7 @@ func mimirReadRoutes(cfg Config) []querytee.Route { Tolerance: cfg.ProxyConfig.ValueComparisonTolerance, UseRelativeError: cfg.ProxyConfig.UseRelativeError, SkipRecentSamples: cfg.ProxyConfig.SkipRecentSamples, + SkipSamplesBefore: cfg.ProxyConfig.SkipSamplesBefore * 1000, RequireExactErrorMatch: cfg.ProxyConfig.RequireExactErrorMatch, }) diff --git a/tools/querytee/proxy.go b/tools/querytee/proxy.go index 02d1e9f43a8..53359ff5546 100644 --- a/tools/querytee/proxy.go +++ b/tools/querytee/proxy.go @@ -39,6 +39,7 @@ type ProxyConfig struct { UseRelativeError bool PassThroughNonRegisteredRoutes bool SkipRecentSamples time.Duration + SkipSamplesBefore int64 RequireExactErrorMatch bool BackendSkipTLSVerify bool AddMissingTimeParamToInstantQueries bool @@ -65,6 +66,7 @@ func (cfg *ProxyConfig) RegisterFlags(f *flag.FlagSet) { f.Float64Var(&cfg.ValueComparisonTolerance, "proxy.value-comparison-tolerance", 0.000001, "The tolerance to apply when comparing floating point values in the responses. 0 to disable tolerance and require exact match (not recommended).") f.BoolVar(&cfg.UseRelativeError, "proxy.compare-use-relative-error", false, "Use relative error tolerance when comparing floating point values.") f.DurationVar(&cfg.SkipRecentSamples, "proxy.compare-skip-recent-samples", 2*time.Minute, "The window from now to skip comparing samples. 0 to disable.") + f.Int64Var(&cfg.SkipSamplesBefore, "proxy.compare-skip-samples-before", 0, "Skip the samples before the given unix timestamp in seconds for comparison.") f.BoolVar(&cfg.RequireExactErrorMatch, "proxy.compare-exact-error-matching", false, "If true, errors will be considered the same only if they are exactly the same. If false, errors will be considered the same if they are considered equivalent.") f.BoolVar(&cfg.PassThroughNonRegisteredRoutes, "proxy.passthrough-non-registered-routes", false, "Passthrough requests for non-registered routes to preferred backend.") f.BoolVar(&cfg.AddMissingTimeParamToInstantQueries, "proxy.add-missing-time-parameter-to-instant-queries", true, "Add a 'time' parameter to proxied instant query requests if they do not have one.") diff --git a/tools/querytee/response_comparator.go b/tools/querytee/response_comparator.go index f992a279fe8..e746945f3c5 100644 --- a/tools/querytee/response_comparator.go +++ b/tools/querytee/response_comparator.go @@ -40,6 +40,7 @@ type SampleComparisonOptions struct { Tolerance float64 UseRelativeError bool SkipRecentSamples time.Duration + SkipSamplesBefore int64 // unix time in milliseconds RequireExactErrorMatch bool } @@ -220,7 +221,7 @@ func compareMatrix(expectedRaw, actualRaw json.RawMessage, queryEvaluationTime t return err } - if allMatrixSamplesWithinRecentSampleWindow(expected, queryEvaluationTime, opts) && allMatrixSamplesWithinRecentSampleWindow(actual, queryEvaluationTime, opts) { + if allMatrixSamplesOutsideComparableWindow(expected, queryEvaluationTime, opts) && allMatrixSamplesOutsideComparableWindow(actual, queryEvaluationTime, opts) { return nil } @@ -250,6 +251,19 @@ func compareMatrix(expectedRaw, actualRaw json.RawMessage, queryEvaluationTime t } func compareMatrixSamples(expected, actual *model.SampleStream, queryEvaluationTime time.Time, opts SampleComparisonOptions) error { + expected.Values = trimBeginning(expected.Values, func(p model.SamplePair) bool { + return int64(p.Timestamp) < opts.SkipSamplesBefore + }) + actual.Values = trimBeginning(actual.Values, func(p model.SamplePair) bool { + return int64(p.Timestamp) < opts.SkipSamplesBefore + }) + expected.Histograms = trimBeginning(expected.Histograms, func(p model.SampleHistogramPair) bool { + return int64(p.Timestamp) < opts.SkipSamplesBefore + }) + actual.Histograms = trimBeginning(actual.Histograms, func(p model.SampleHistogramPair) bool { + return int64(p.Timestamp) < opts.SkipSamplesBefore + }) + expectedSamplesTail, actualSamplesTail, err := comparePairs(expected.Values, actual.Values, func(p1 model.SamplePair, p2 model.SamplePair) error { err := compareSamplePair(p1, p2, queryEvaluationTime, opts) if err != nil { @@ -281,15 +295,15 @@ func compareMatrixSamples(expected, actual *model.SampleStream, queryEvaluationT return nil } - skipAllRecentFloatSamples := canSkipAllSamples(func(p model.SamplePair) bool { - return queryEvaluationTime.Sub(p.Timestamp.Time())-opts.SkipRecentSamples < 0 + skipAllFloatSamples := canSkipAllSamples(func(p model.SamplePair) bool { + return queryEvaluationTime.Sub(p.Timestamp.Time())-opts.SkipRecentSamples < 0 || int64(p.Timestamp) < opts.SkipSamplesBefore }, expectedSamplesTail, actualSamplesTail) - skipAllRecentHistogramSamples := canSkipAllSamples(func(p model.SampleHistogramPair) bool { - return queryEvaluationTime.Sub(p.Timestamp.Time())-opts.SkipRecentSamples < 0 + skipAllHistogramSamples := canSkipAllSamples(func(p model.SampleHistogramPair) bool { + return queryEvaluationTime.Sub(p.Timestamp.Time())-opts.SkipRecentSamples < 0 || int64(p.Timestamp) < opts.SkipSamplesBefore }, expectedHistogramSamplesTail, actualHistogramSamplesTail) - if skipAllRecentFloatSamples && skipAllRecentHistogramSamples { + if skipAllFloatSamples && skipAllHistogramSamples { return nil } @@ -345,6 +359,29 @@ func comparePairs[S ~[]M, M any](s1, s2 S, compare func(M, M) error) (S, S, erro return s1[i:], s2[i:], nil } +// trimBeginning returns s without the prefix that satisfies skip(). +func trimBeginning[S ~[]M, M any](s S, skip func(M) bool) S { + var i int + for ; i < len(s); i++ { + if !skip(s[i]) { + break + } + } + return s[i:] +} + +// filterSlice returns a new slice with elements from s removed that satisfy skip(). +func filterSlice[S ~[]M, M any](s S, skip func(M) bool) S { + var i int + var res S + for ; i < len(s); i++ { + if !skip(s[i]) { + res = append(res, s[i]) + } + } + return res +} + func canSkipAllSamples[S ~[]M, M any](skip func(M) bool, ss ...S) bool { for _, s := range ss { for _, p := range s { @@ -356,20 +393,22 @@ func canSkipAllSamples[S ~[]M, M any](skip func(M) bool, ss ...S) bool { return true } -func allMatrixSamplesWithinRecentSampleWindow(m model.Matrix, queryEvaluationTime time.Time, opts SampleComparisonOptions) bool { - if opts.SkipRecentSamples == 0 { +func allMatrixSamplesOutsideComparableWindow(m model.Matrix, queryEvaluationTime time.Time, opts SampleComparisonOptions) bool { + if opts.SkipRecentSamples == 0 && opts.SkipSamplesBefore == 0 { return false } for _, series := range m { for _, sample := range series.Values { - if queryEvaluationTime.Sub(sample.Timestamp.Time()) > opts.SkipRecentSamples { + st := sample.Timestamp + if queryEvaluationTime.Sub(st.Time()) > opts.SkipRecentSamples && int64(st) >= opts.SkipSamplesBefore { return false } } for _, sample := range series.Histograms { - if queryEvaluationTime.Sub(sample.Timestamp.Time()) > opts.SkipRecentSamples { + st := sample.Timestamp + if queryEvaluationTime.Sub(st.Time()) > opts.SkipRecentSamples && int64(st) >= opts.SkipSamplesBefore { return false } } @@ -391,10 +430,17 @@ func compareVector(expectedRaw, actualRaw json.RawMessage, queryEvaluationTime t return err } - if allVectorSamplesWithinRecentSampleWindow(expected, queryEvaluationTime, opts) && allVectorSamplesWithinRecentSampleWindow(actual, queryEvaluationTime, opts) { + if allVectorSamplesOutsideComparableWindow(expected, queryEvaluationTime, opts) && allVectorSamplesOutsideComparableWindow(actual, queryEvaluationTime, opts) { return nil } + expected = filterSlice(expected, func(p *model.Sample) bool { + return int64(p.Timestamp) < opts.SkipSamplesBefore + }) + actual = filterSlice(actual, func(p *model.Sample) bool { + return int64(p.Timestamp) < opts.SkipSamplesBefore + }) + if len(expected) != len(actual) { return fmt.Errorf("expected %d metrics but got %d", len(expected), len(actual)) } @@ -454,13 +500,14 @@ func compareVector(expectedRaw, actualRaw json.RawMessage, queryEvaluationTime t return nil } -func allVectorSamplesWithinRecentSampleWindow(v model.Vector, queryEvaluationTime time.Time, opts SampleComparisonOptions) bool { - if opts.SkipRecentSamples == 0 { +func allVectorSamplesOutsideComparableWindow(v model.Vector, queryEvaluationTime time.Time, opts SampleComparisonOptions) bool { + if opts.SkipRecentSamples == 0 && opts.SkipSamplesBefore == 0 { return false } for _, sample := range v { - if queryEvaluationTime.Sub(sample.Timestamp.Time()) > opts.SkipRecentSamples { + st := sample.Timestamp + if queryEvaluationTime.Sub(st.Time()) > opts.SkipRecentSamples && int64(st) >= opts.SkipSamplesBefore { return false } } @@ -495,6 +542,9 @@ func compareScalar(expectedRaw, actualRaw json.RawMessage, queryEvaluationTime t } func compareSamplePair(expected, actual model.SamplePair, queryEvaluationTime time.Time, opts SampleComparisonOptions) error { + if int64(expected.Timestamp) < opts.SkipSamplesBefore && int64(actual.Timestamp) < opts.SkipSamplesBefore { + return nil + } if expected.Timestamp != actual.Timestamp { return fmt.Errorf("expected timestamp %v but got %v", expected.Timestamp, actual.Timestamp) } @@ -523,6 +573,10 @@ func compareSampleValue(first, second float64, opts SampleComparisonOptions) boo } func compareSampleHistogramPair(expected, actual model.SampleHistogramPair, queryEvaluationTime time.Time, opts SampleComparisonOptions) error { + if int64(expected.Timestamp) < opts.SkipSamplesBefore { + return nil + } + if expected.Timestamp != actual.Timestamp { return fmt.Errorf("expected timestamp %v but got %v", expected.Timestamp, actual.Timestamp) } diff --git a/tools/querytee/response_comparator_test.go b/tools/querytee/response_comparator_test.go index 4603c087a3b..d7b0349410b 100644 --- a/tools/querytee/response_comparator_test.go +++ b/tools/querytee/response_comparator_test.go @@ -1139,6 +1139,7 @@ func TestCompareSamplesResponse(t *testing.T) { err error useRelativeError bool skipRecentSamples time.Duration + skipSamplesBefore int64 // In unix milliseconds }{ { name: "difference in response status", @@ -2166,12 +2167,259 @@ func TestCompareSamplesResponse(t *testing.T) { }`), err: errors.New(`expected info annotations ["\"info\" #1"] but got ["\"info\" #2"]`), }, + { + name: "should not fail when we skip samples from the beginning of a matrix for expected and actual - float", + expected: json.RawMessage(`{ + "status": "success", + "data": {"resultType":"matrix","result":[{"metric":{"foo":"bar"},"values":[[90,"9"], [100,"10"]]}, {"metric":{"foo":"bar2"},"values":[[100,"10"]]}]} + }`), + actual: json.RawMessage(`{ + "status": "success", + "data": {"resultType":"matrix","result":[{"metric":{"foo":"bar"},"values":[[100,"10"]]}, {"metric":{"foo":"bar2"},"values":[[80,"9"], [100,"10"]]}]} + }`), + skipSamplesBefore: 95 * 1000, + }, + { + name: "should not fail when we skip all samples starting from the beginning - float", + expected: json.RawMessage(`{ + "status": "success", + "data": {"resultType":"matrix","result":[{"metric":{"foo":"bar"},"values":[[90,"9"], [100,"10"]]}]} + }`), + actual: json.RawMessage(`{ + "status": "success", + "data": {"resultType":"matrix","result":[{"metric":{"foo":"bar"},"values":[[100,"10"]]}]} + }`), + skipSamplesBefore: 105 * 1000, + }, + { + name: "should fail when we skip partial samples in the beginning but compare some other - float", + expected: json.RawMessage(`{ + "status": "success", + "data": {"resultType":"matrix","result":[{"metric":{"foo":"bar"},"values":[[90,"9"], [97,"7"], [100,"10"]]}, {"metric":{"foo":"bar2"},"values":[[100,"10"]]}]} + }`), + actual: json.RawMessage(`{ + "status": "success", + "data": {"resultType":"matrix","result":[{"metric":{"foo":"bar"},"values":[[100,"10"]]}, {"metric":{"foo":"bar2"},"values":[[90,"10"], [100,"10"]]}]} + }`), + skipSamplesBefore: 95 * 1000, + // 9 @[90] is not compared. foo=bar2 does not fail. + err: errors.New(`float sample pair does not match for metric {foo="bar"}: expected timestamp 97 but got 100 +Expected result for series: +{foo="bar"} => +7 @[97] +10 @[100] + +Actual result for series: +{foo="bar"} => +10 @[100]`), + }, + { + name: "should not fail when we skip samples from the beginning of a matrix for expected and actual - histogram", + expected: json.RawMessage(`{ + "status": "success", + "data": {"resultType":"matrix","result":[ + {"metric":{"foo":"bar"},"histograms":[[90,{"count": "2","sum": "4","buckets": [[1,"0","2","2"]]}], [100,{"count":"2","sum":"3","buckets":[[1,"0","2","2"]]}]]}, + {"metric":{"foo":"bar2"},"histograms":[[100,{"count":"2","sum":"3","buckets":[[1,"0","2","2"]]}]]}]} + }`), + actual: json.RawMessage(`{ + "status": "success", + "data": {"resultType":"matrix","result":[ + {"metric":{"foo":"bar"},"histograms":[[100,{"count":"2","sum":"3","buckets":[[1,"0","2","2"]]}]]}, + {"metric":{"foo":"bar2"},"histograms":[[80,{"count": "2","sum": "4","buckets": [[1,"0","2","2"]]}], [100,{"count":"2","sum":"3","buckets":[[1,"0","2","2"]]}]]}]} + }`), + skipSamplesBefore: 95 * 1000, + }, + { + name: "should not fail when we skip all samples starting from the beginning - histogram", + expected: json.RawMessage(`{ + "status": "success", + "data": {"resultType":"matrix","result":[{"metric":{"foo":"bar"},"histograms":[[90,{"count":"2","sum":"3","buckets":[[1,"0","2","2"]]}], [100,{"count":"2","sum":"3","buckets":[[1,"0","2","2"]]}]]}]} + }`), + actual: json.RawMessage(`{ + "status": "success", + "data": {"resultType":"matrix","result":[{"metric":{"foo":"bar"},"histograms":[[100,{"count":"2","sum":"3","buckets":[[1,"0","2","2"]]}]]}]} + }`), + skipSamplesBefore: 105 * 1000, + }, + { + name: "should fail when we skip partial samples in the beginning but compare some other - histogram", + expected: json.RawMessage(`{ + "status": "success", + "data": {"resultType":"matrix","result":[ + {"metric":{"foo":"bar"},"histograms":[[90,{"count":"2","sum":"3","buckets":[[1,"0","2","2"]]}], [97,{"count": "2","sum": "33","buckets": [[1,"0","2","2"]]}], [100,{"count":"2","sum":"3","buckets":[[1,"0","2","2"]]}]]}, + {"metric":{"foo":"bar2"},"histograms":[[100,{"count":"2","sum":"3","buckets":[[1,"0","2","2"]]}]]}]} + }`), + actual: json.RawMessage(`{ + "status": "success", + "data": {"resultType":"matrix","result":[ + {"metric":{"foo":"bar"},"histograms":[[100,{"count":"2","sum":"3","buckets":[[1,"0","2","2"]]}]]}, + {"metric":{"foo":"bar2"},"histograms":[[90,{"count": "2","sum": "44","buckets": [[1,"0","2","2"]]}], [100,{"count":"2","sum":"3","buckets":[[1,"0","2","2"]]}]]}]} + }`), + skipSamplesBefore: 95 * 1000, + // @[90] is not compared. foo=bar2 does not fail. + err: errors.New(`histogram sample pair does not match for metric {foo="bar"}: expected timestamp 97 but got 100 +Expected result for series: +{foo="bar"} => +Count: 2.000000, Sum: 33.000000, Buckets: [[0,2):2] @[97] +Count: 2.000000, Sum: 3.000000, Buckets: [[0,2):2] @[100] + +Actual result for series: +{foo="bar"} => +Count: 2.000000, Sum: 3.000000, Buckets: [[0,2):2] @[100]`), + }, + { + name: "should not fail when skipped samples properly for vectors", + expected: json.RawMessage(`{ + "status": "success", + "data": {"resultType":"vector","result":[{"metric":{"foo":"bar"},"value":[90,"1"]}]} + }`), + actual: json.RawMessage(`{ + "status": "success", + "data": {"resultType":"vector","result":[{"metric":{"foo":"bar"},"value":[95,"1"]}]} + }`), + skipSamplesBefore: 100 * 1000, + }, + { + name: "should not fail when skipped samples properly for vectors - histogram", + expected: json.RawMessage(`{ + "status": "success", + "data": {"resultType":"vector","result":[{"metric":{"foo":"bar"},"histogram":[90,{"count":"2","sum":"3","buckets":[[1,"0","2","2"]]}]}]} + }`), + actual: json.RawMessage(`{ + "status": "success", + "data": {"resultType":"vector","result":[{"metric":{"foo":"bar"},"histogram":[95,{"count":"2","sum":"3","buckets":[[1,"0","2","2"]]}]}]} + }`), + skipSamplesBefore: 100 * 1000, + }, + { + name: "should fail when skipped samples only for expected vector", + expected: json.RawMessage(`{ + "status": "success", + "data": {"resultType":"vector","result":[{"metric":{"foo":"bar"},"value":[90,"1"]}]} + }`), + actual: json.RawMessage(`{ + "status": "success", + "data": {"resultType":"vector","result":[{"metric":{"foo":"bar"},"value":[105,"1"]}]} + }`), + skipSamplesBefore: 100 * 1000, + err: errors.New(`expected 0 metrics but got 1`), + }, + { + name: "should fail when skipped samples only for actual vector", + expected: json.RawMessage(`{ + "status": "success", + "data": {"resultType":"vector","result":[{"metric":{"foo":"bar"},"value":[105,"1"]}]} + }`), + actual: json.RawMessage(`{ + "status": "success", + "data": {"resultType":"vector","result":[{"metric":{"foo":"bar"},"value":[95,"1"]}]} + }`), + skipSamplesBefore: 100 * 1000, + err: errors.New(`expected 1 metrics but got 0`), + }, + { + name: "should skip properly when there are multiple series in a vector", + expected: json.RawMessage(`{ + "status": "success", + "data": {"resultType":"vector","result":[{"metric":{"foo":"bar"},"value":[90,"1"]}, {"metric":{"foo":"bar2"},"value":[105,"1"]}]} + }`), + actual: json.RawMessage(`{ + "status": "success", + "data": {"resultType":"vector","result":[{"metric":{"foo":"bar"},"value":[95,"1"]}, {"metric":{"foo":"bar2"},"value":[105,"1"]}]} + }`), + skipSamplesBefore: 100 * 1000, + }, + { + name: "should skip properly when there are multiple series in a vector - histogram", + expected: json.RawMessage(`{ + "status": "success", + "data": {"resultType":"vector","result":[ + {"metric":{"foo":"bar"},"histogram":[90,{"count":"2","sum":"333","buckets":[[1,"0","2","2"]]}]}, + {"metric":{"foo":"bar2"},"histogram":[105,{"count":"2","sum":"3","buckets":[[1,"0","2","2"]]}]}]} + }`), + actual: json.RawMessage(`{ + "status": "success", + "data": {"resultType":"vector","result":[ + {"metric":{"foo":"bar"},"histogram":[95,{"count":"2","sum":"3","buckets":[[1,"0","2","2"]]}]}, + {"metric":{"foo":"bar2"},"histogram":[105,{"count":"2","sum":"3","buckets":[[1,"0","2","2"]]}]}]} + }`), + skipSamplesBefore: 100 * 1000, + }, + { + name: "different series skipped in expected and actual, causing an error", + expected: json.RawMessage(`{ + "status": "success", + "data": {"resultType":"vector","result":[{"metric":{"foo":"bar"},"value":[105,"1"]}, {"metric":{"foo":"bar2"},"value":[90,"1"]}]} + }`), + actual: json.RawMessage(`{ + "status": "success", + "data": {"resultType":"vector","result":[{"metric":{"foo":"bar"},"value":[95,"1"]}, {"metric":{"foo":"bar2"},"value":[105,"1"]}]} + }`), + skipSamplesBefore: 100 * 1000, + err: errors.New(`expected metric {foo="bar"} missing from actual response`), + }, + { + name: "different series skipped in expected and actual, causing an error - histogram", + expected: json.RawMessage(`{ + "status": "success", + "data": {"resultType":"vector","result":[ + {"metric":{"foo":"bar"},"histogram":[105,{"count":"2","sum":"3","buckets":[[1,"0","2","2"]]}]}, + {"metric":{"foo":"bar2"},"histogram":[90,{"count":"2","sum":"3","buckets":[[1,"0","2","2"]]}]}]} + }`), + actual: json.RawMessage(`{ + "status": "success", + "data": {"resultType":"vector","result":[ + {"metric":{"foo":"bar"},"histogram":[95,{"count":"2","sum":"3","buckets":[[1,"0","2","2"]]}]}, + {"metric":{"foo":"bar2"},"histogram":[105,{"count":"2","sum":"3","buckets":[[1,"0","2","2"]]}]}]} + }`), + skipSamplesBefore: 100 * 1000, + err: errors.New(`expected metric {foo="bar"} missing from actual response`), + }, + { + name: "expected is skippable but not the actual, causing an error", + expected: json.RawMessage(`{ + "status": "success", + "data": {"resultType":"scalar","result":[90,"1"]} + }`), + actual: json.RawMessage(`{ + "status": "success", + "data": {"resultType":"scalar","result":[100,"1"]} + }`), + skipSamplesBefore: 95 * 1000, + err: errors.New(`expected timestamp 90 but got 100`), + }, + { + name: "actual is skippable but not the expected, causing an error", + expected: json.RawMessage(`{ + "status": "success", + "data": {"resultType":"scalar","result":[100,"1"]} + }`), + actual: json.RawMessage(`{ + "status": "success", + "data": {"resultType":"scalar","result":[90,"1"]} + }`), + skipSamplesBefore: 95 * 1000, + err: errors.New(`expected timestamp 100 but got 90`), + }, + { + name: "both expected and actual are skippable", + expected: json.RawMessage(`{ + "status": "success", + "data": {"resultType":"scalar","result":[95,"1"]} + }`), + actual: json.RawMessage(`{ + "status": "success", + "data": {"resultType":"scalar","result":[90,"1"]} + }`), + skipSamplesBefore: 100 * 1000, + }, } { t.Run(tc.name, func(t *testing.T) { samplesComparator := NewSamplesComparator(SampleComparisonOptions{ Tolerance: tc.tolerance, UseRelativeError: tc.useRelativeError, SkipRecentSamples: tc.skipRecentSamples, + SkipSamplesBefore: tc.skipSamplesBefore, }) result, err := samplesComparator.Compare(tc.expected, tc.actual, nowT.Time()) if tc.err == nil { From 2f5d8f0d5f8e31558acf90b8064a84488094c7d0 Mon Sep 17 00:00:00 2001 From: Ganesh Vernekar Date: Thu, 3 Oct 2024 15:22:13 -0400 Subject: [PATCH 02/11] CHANGELOG Signed-off-by: Ganesh Vernekar --- CHANGELOG.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 836993c0e65..d49db2a972d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -48,6 +48,8 @@ ### Query-tee +* [FEATURE] Added `-proxy.compare-skip-samples-before` to skip samples before a given unix timestamp in seconds when comparing responses. #9515 + ### Documentation ### Tools From 003c2cf84abdad9ad3fa4847b81b7681a1baeca8 Mon Sep 17 00:00:00 2001 From: Ganesh Vernekar Date: Fri, 4 Oct 2024 18:41:50 -0400 Subject: [PATCH 03/11] Change flag type Signed-off-by: Ganesh Vernekar --- CHANGELOG.md | 2 +- cmd/query-tee/main.go | 3 ++- tools/querytee/proxy.go | 13 ++++++++-- tools/querytee/response_comparator.go | 28 +++++++++++----------- tools/querytee/response_comparator_test.go | 2 +- 5 files changed, 29 insertions(+), 19 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index d49db2a972d..bffc117c7fb 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -48,7 +48,7 @@ ### Query-tee -* [FEATURE] Added `-proxy.compare-skip-samples-before` to skip samples before a given unix timestamp in seconds when comparing responses. #9515 +* [FEATURE] Added `-proxy.compare-skip-samples-before` to skip samples before the given time when comparing responses. The time must be in RFC3339 format. #9515 ### Documentation diff --git a/cmd/query-tee/main.go b/cmd/query-tee/main.go index 4e627f682bf..6bc8a263ca6 100644 --- a/cmd/query-tee/main.go +++ b/cmd/query-tee/main.go @@ -17,6 +17,7 @@ import ( "github.com/grafana/dskit/tracing" "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/client_golang/prometheus/collectors" + "github.com/prometheus/common/model" jaegercfg "github.com/uber/jaeger-client-go/config" "github.com/grafana/mimir/pkg/util/instrumentation" @@ -106,7 +107,7 @@ func mimirReadRoutes(cfg Config) []querytee.Route { Tolerance: cfg.ProxyConfig.ValueComparisonTolerance, UseRelativeError: cfg.ProxyConfig.UseRelativeError, SkipRecentSamples: cfg.ProxyConfig.SkipRecentSamples, - SkipSamplesBefore: cfg.ProxyConfig.SkipSamplesBefore * 1000, + SkipSamplesBefore: model.Time(cfg.ProxyConfig.SkipSamplesBefore.UnixMilli()), RequireExactErrorMatch: cfg.ProxyConfig.RequireExactErrorMatch, }) diff --git a/tools/querytee/proxy.go b/tools/querytee/proxy.go index 53359ff5546..d8ea4da1d92 100644 --- a/tools/querytee/proxy.go +++ b/tools/querytee/proxy.go @@ -39,7 +39,7 @@ type ProxyConfig struct { UseRelativeError bool PassThroughNonRegisteredRoutes bool SkipRecentSamples time.Duration - SkipSamplesBefore int64 + SkipSamplesBefore time.Time RequireExactErrorMatch bool BackendSkipTLSVerify bool AddMissingTimeParamToInstantQueries bool @@ -66,11 +66,20 @@ func (cfg *ProxyConfig) RegisterFlags(f *flag.FlagSet) { f.Float64Var(&cfg.ValueComparisonTolerance, "proxy.value-comparison-tolerance", 0.000001, "The tolerance to apply when comparing floating point values in the responses. 0 to disable tolerance and require exact match (not recommended).") f.BoolVar(&cfg.UseRelativeError, "proxy.compare-use-relative-error", false, "Use relative error tolerance when comparing floating point values.") f.DurationVar(&cfg.SkipRecentSamples, "proxy.compare-skip-recent-samples", 2*time.Minute, "The window from now to skip comparing samples. 0 to disable.") - f.Int64Var(&cfg.SkipSamplesBefore, "proxy.compare-skip-samples-before", 0, "Skip the samples before the given unix timestamp in seconds for comparison.") f.BoolVar(&cfg.RequireExactErrorMatch, "proxy.compare-exact-error-matching", false, "If true, errors will be considered the same only if they are exactly the same. If false, errors will be considered the same if they are considered equivalent.") f.BoolVar(&cfg.PassThroughNonRegisteredRoutes, "proxy.passthrough-non-registered-routes", false, "Passthrough requests for non-registered routes to preferred backend.") f.BoolVar(&cfg.AddMissingTimeParamToInstantQueries, "proxy.add-missing-time-parameter-to-instant-queries", true, "Add a 'time' parameter to proxied instant query requests if they do not have one.") f.Float64Var(&cfg.SecondaryBackendsRequestProportion, "proxy.secondary-backends-request-proportion", 1.0, "Proportion of requests to send to secondary backends. Must be between 0 and 1 (inclusive), and if not 1, then -backend.preferred must be set.") + + var skipSamplesBefore string + f.StringVar(&skipSamplesBefore, "proxy.compare-skip-samples-before", "", "Skip the samples before the given time for comparison. The time must be in RFC3339 format.") + if skipSamplesBefore != "" { + var err error + cfg.SkipSamplesBefore, err = time.Parse(time.RFC3339, skipSamplesBefore) + if err != nil { + cfg.SkipSamplesBefore = time.Time{} + } + } } type Route struct { diff --git a/tools/querytee/response_comparator.go b/tools/querytee/response_comparator.go index e746945f3c5..77f6b1cd834 100644 --- a/tools/querytee/response_comparator.go +++ b/tools/querytee/response_comparator.go @@ -40,7 +40,7 @@ type SampleComparisonOptions struct { Tolerance float64 UseRelativeError bool SkipRecentSamples time.Duration - SkipSamplesBefore int64 // unix time in milliseconds + SkipSamplesBefore model.Time RequireExactErrorMatch bool } @@ -252,16 +252,16 @@ func compareMatrix(expectedRaw, actualRaw json.RawMessage, queryEvaluationTime t func compareMatrixSamples(expected, actual *model.SampleStream, queryEvaluationTime time.Time, opts SampleComparisonOptions) error { expected.Values = trimBeginning(expected.Values, func(p model.SamplePair) bool { - return int64(p.Timestamp) < opts.SkipSamplesBefore + return p.Timestamp < opts.SkipSamplesBefore }) actual.Values = trimBeginning(actual.Values, func(p model.SamplePair) bool { - return int64(p.Timestamp) < opts.SkipSamplesBefore + return p.Timestamp < opts.SkipSamplesBefore }) expected.Histograms = trimBeginning(expected.Histograms, func(p model.SampleHistogramPair) bool { - return int64(p.Timestamp) < opts.SkipSamplesBefore + return p.Timestamp < opts.SkipSamplesBefore }) actual.Histograms = trimBeginning(actual.Histograms, func(p model.SampleHistogramPair) bool { - return int64(p.Timestamp) < opts.SkipSamplesBefore + return p.Timestamp < opts.SkipSamplesBefore }) expectedSamplesTail, actualSamplesTail, err := comparePairs(expected.Values, actual.Values, func(p1 model.SamplePair, p2 model.SamplePair) error { @@ -296,11 +296,11 @@ func compareMatrixSamples(expected, actual *model.SampleStream, queryEvaluationT } skipAllFloatSamples := canSkipAllSamples(func(p model.SamplePair) bool { - return queryEvaluationTime.Sub(p.Timestamp.Time())-opts.SkipRecentSamples < 0 || int64(p.Timestamp) < opts.SkipSamplesBefore + return queryEvaluationTime.Sub(p.Timestamp.Time())-opts.SkipRecentSamples < 0 || p.Timestamp < opts.SkipSamplesBefore }, expectedSamplesTail, actualSamplesTail) skipAllHistogramSamples := canSkipAllSamples(func(p model.SampleHistogramPair) bool { - return queryEvaluationTime.Sub(p.Timestamp.Time())-opts.SkipRecentSamples < 0 || int64(p.Timestamp) < opts.SkipSamplesBefore + return queryEvaluationTime.Sub(p.Timestamp.Time())-opts.SkipRecentSamples < 0 || p.Timestamp < opts.SkipSamplesBefore }, expectedHistogramSamplesTail, actualHistogramSamplesTail) if skipAllFloatSamples && skipAllHistogramSamples { @@ -401,14 +401,14 @@ func allMatrixSamplesOutsideComparableWindow(m model.Matrix, queryEvaluationTime for _, series := range m { for _, sample := range series.Values { st := sample.Timestamp - if queryEvaluationTime.Sub(st.Time()) > opts.SkipRecentSamples && int64(st) >= opts.SkipSamplesBefore { + if queryEvaluationTime.Sub(st.Time()) > opts.SkipRecentSamples && st >= opts.SkipSamplesBefore { return false } } for _, sample := range series.Histograms { st := sample.Timestamp - if queryEvaluationTime.Sub(st.Time()) > opts.SkipRecentSamples && int64(st) >= opts.SkipSamplesBefore { + if queryEvaluationTime.Sub(st.Time()) > opts.SkipRecentSamples && st >= opts.SkipSamplesBefore { return false } } @@ -435,10 +435,10 @@ func compareVector(expectedRaw, actualRaw json.RawMessage, queryEvaluationTime t } expected = filterSlice(expected, func(p *model.Sample) bool { - return int64(p.Timestamp) < opts.SkipSamplesBefore + return p.Timestamp < opts.SkipSamplesBefore }) actual = filterSlice(actual, func(p *model.Sample) bool { - return int64(p.Timestamp) < opts.SkipSamplesBefore + return p.Timestamp < opts.SkipSamplesBefore }) if len(expected) != len(actual) { @@ -507,7 +507,7 @@ func allVectorSamplesOutsideComparableWindow(v model.Vector, queryEvaluationTime for _, sample := range v { st := sample.Timestamp - if queryEvaluationTime.Sub(st.Time()) > opts.SkipRecentSamples && int64(st) >= opts.SkipSamplesBefore { + if queryEvaluationTime.Sub(st.Time()) > opts.SkipRecentSamples && st >= opts.SkipSamplesBefore { return false } } @@ -542,7 +542,7 @@ func compareScalar(expectedRaw, actualRaw json.RawMessage, queryEvaluationTime t } func compareSamplePair(expected, actual model.SamplePair, queryEvaluationTime time.Time, opts SampleComparisonOptions) error { - if int64(expected.Timestamp) < opts.SkipSamplesBefore && int64(actual.Timestamp) < opts.SkipSamplesBefore { + if expected.Timestamp < opts.SkipSamplesBefore && actual.Timestamp < opts.SkipSamplesBefore { return nil } if expected.Timestamp != actual.Timestamp { @@ -573,7 +573,7 @@ func compareSampleValue(first, second float64, opts SampleComparisonOptions) boo } func compareSampleHistogramPair(expected, actual model.SampleHistogramPair, queryEvaluationTime time.Time, opts SampleComparisonOptions) error { - if int64(expected.Timestamp) < opts.SkipSamplesBefore { + if expected.Timestamp < opts.SkipSamplesBefore { return nil } diff --git a/tools/querytee/response_comparator_test.go b/tools/querytee/response_comparator_test.go index d7b0349410b..87682eaded3 100644 --- a/tools/querytee/response_comparator_test.go +++ b/tools/querytee/response_comparator_test.go @@ -2419,7 +2419,7 @@ Count: 2.000000, Sum: 3.000000, Buckets: [[0,2):2] @[100]`), Tolerance: tc.tolerance, UseRelativeError: tc.useRelativeError, SkipRecentSamples: tc.skipRecentSamples, - SkipSamplesBefore: tc.skipSamplesBefore, + SkipSamplesBefore: model.Time(tc.skipSamplesBefore), }) result, err := samplesComparator.Compare(tc.expected, tc.actual, nowT.Time()) if tc.err == nil { From bb10a8d000b6cf8551c4fe89d22ebd7bdd46a8b9 Mon Sep 17 00:00:00 2001 From: Ganesh Vernekar Date: Tue, 8 Oct 2024 22:35:02 -0400 Subject: [PATCH 04/11] Fix review comments Signed-off-by: Ganesh Vernekar --- tools/querytee/proxy.go | 11 ++++------- tools/querytee/response_comparator.go | 22 +++++++++++++++++++++- 2 files changed, 25 insertions(+), 8 deletions(-) diff --git a/tools/querytee/proxy.go b/tools/querytee/proxy.go index d8ea4da1d92..0c4a100d104 100644 --- a/tools/querytee/proxy.go +++ b/tools/querytee/proxy.go @@ -8,6 +8,7 @@ package querytee import ( "flag" "fmt" + "github.com/grafana/dskit/flagext" "net/http" "net/http/httputil" "net/url" @@ -73,13 +74,9 @@ func (cfg *ProxyConfig) RegisterFlags(f *flag.FlagSet) { var skipSamplesBefore string f.StringVar(&skipSamplesBefore, "proxy.compare-skip-samples-before", "", "Skip the samples before the given time for comparison. The time must be in RFC3339 format.") - if skipSamplesBefore != "" { - var err error - cfg.SkipSamplesBefore, err = time.Parse(time.RFC3339, skipSamplesBefore) - if err != nil { - cfg.SkipSamplesBefore = time.Time{} - } - } + var t flagext.Time + _ = t.Set(skipSamplesBefore) // If there is any error, t will be time.Time{}. + cfg.SkipSamplesBefore = time.Time(t) } type Route struct { diff --git a/tools/querytee/response_comparator.go b/tools/querytee/response_comparator.go index 77f6b1cd834..16020fabb02 100644 --- a/tools/querytee/response_comparator.go +++ b/tools/querytee/response_comparator.go @@ -417,7 +417,7 @@ func allMatrixSamplesOutsideComparableWindow(m model.Matrix, queryEvaluationTime return true } -func compareVector(expectedRaw, actualRaw json.RawMessage, queryEvaluationTime time.Time, opts SampleComparisonOptions) error { +func compareVector(expectedRaw, actualRaw json.RawMessage, queryEvaluationTime time.Time, opts SampleComparisonOptions) (retErr error) { var expected, actual model.Vector err := json.Unmarshal(expectedRaw, &expected) @@ -434,12 +434,32 @@ func compareVector(expectedRaw, actualRaw json.RawMessage, queryEvaluationTime t return nil } + // Warning: filtering samples can give out confusing error messages. For example if the actual response had + // matching series, but all sample timestamps were before SkipSamplesBefore, while expected response had samples + // after SkipSamplesBefore: instead of saying mismatch, it will instead say that some metrics is missing, because + // we filter them here. + eChanged, aChanged := false, false expected = filterSlice(expected, func(p *model.Sample) bool { + eChanged = true return p.Timestamp < opts.SkipSamplesBefore }) actual = filterSlice(actual, func(p *model.Sample) bool { + aChanged = true return p.Timestamp < opts.SkipSamplesBefore }) + defer func() { + if retErr != nil { + warning := "" + if eChanged && aChanged { + warning = "(also, some samples were filtered out from the expected and actual response)" + } else if aChanged { + warning = "(also, some samples were filtered out from the actual response)" + } else if eChanged { + warning = "(also, some samples were filtered out from the expected response)" + } + retErr = fmt.Errorf("%w %s", retErr, warning) + } + }() if len(expected) != len(actual) { return fmt.Errorf("expected %d metrics but got %d", len(expected), len(actual)) From 9226157f40c01cffb90f9c05af8e1fe2f505e0fb Mon Sep 17 00:00:00 2001 From: Ganesh Vernekar Date: Tue, 8 Oct 2024 22:47:18 -0400 Subject: [PATCH 05/11] update tests with the new error message Signed-off-by: Ganesh Vernekar --- tools/querytee/response_comparator.go | 13 ++++++------- tools/querytee/response_comparator_test.go | 8 ++++---- 2 files changed, 10 insertions(+), 11 deletions(-) diff --git a/tools/querytee/response_comparator.go b/tools/querytee/response_comparator.go index 16020fabb02..4ec0781b065 100644 --- a/tools/querytee/response_comparator.go +++ b/tools/querytee/response_comparator.go @@ -438,26 +438,25 @@ func compareVector(expectedRaw, actualRaw json.RawMessage, queryEvaluationTime t // matching series, but all sample timestamps were before SkipSamplesBefore, while expected response had samples // after SkipSamplesBefore: instead of saying mismatch, it will instead say that some metrics is missing, because // we filter them here. - eChanged, aChanged := false, false + eOrgLen, aOrgLen := len(expected), len(actual) expected = filterSlice(expected, func(p *model.Sample) bool { - eChanged = true return p.Timestamp < opts.SkipSamplesBefore }) actual = filterSlice(actual, func(p *model.Sample) bool { - aChanged = true return p.Timestamp < opts.SkipSamplesBefore }) + eChanged, aChanged := len(expected) != eOrgLen, len(actual) != aOrgLen defer func() { if retErr != nil { warning := "" if eChanged && aChanged { - warning = "(also, some samples were filtered out from the expected and actual response)" + warning = " (also, some samples were filtered out from the expected and actual response)" } else if aChanged { - warning = "(also, some samples were filtered out from the actual response)" + warning = " (also, some samples were filtered out from the actual response)" } else if eChanged { - warning = "(also, some samples were filtered out from the expected response)" + warning = " (also, some samples were filtered out from the expected response)" } - retErr = fmt.Errorf("%w %s", retErr, warning) + retErr = fmt.Errorf("%w%s", retErr, warning) } }() diff --git a/tools/querytee/response_comparator_test.go b/tools/querytee/response_comparator_test.go index 87682eaded3..51383ca383e 100644 --- a/tools/querytee/response_comparator_test.go +++ b/tools/querytee/response_comparator_test.go @@ -2302,7 +2302,7 @@ Count: 2.000000, Sum: 3.000000, Buckets: [[0,2):2] @[100]`), "data": {"resultType":"vector","result":[{"metric":{"foo":"bar"},"value":[105,"1"]}]} }`), skipSamplesBefore: 100 * 1000, - err: errors.New(`expected 0 metrics but got 1`), + err: errors.New(`expected 0 metrics but got 1 (also, some samples were filtered out from the expected response)`), }, { name: "should fail when skipped samples only for actual vector", @@ -2315,7 +2315,7 @@ Count: 2.000000, Sum: 3.000000, Buckets: [[0,2):2] @[100]`), "data": {"resultType":"vector","result":[{"metric":{"foo":"bar"},"value":[95,"1"]}]} }`), skipSamplesBefore: 100 * 1000, - err: errors.New(`expected 1 metrics but got 0`), + err: errors.New(`expected 1 metrics but got 0 (also, some samples were filtered out from the actual response)`), }, { name: "should skip properly when there are multiple series in a vector", @@ -2356,7 +2356,7 @@ Count: 2.000000, Sum: 3.000000, Buckets: [[0,2):2] @[100]`), "data": {"resultType":"vector","result":[{"metric":{"foo":"bar"},"value":[95,"1"]}, {"metric":{"foo":"bar2"},"value":[105,"1"]}]} }`), skipSamplesBefore: 100 * 1000, - err: errors.New(`expected metric {foo="bar"} missing from actual response`), + err: errors.New(`expected metric {foo="bar"} missing from actual response (also, some samples were filtered out from the expected and actual response)`), }, { name: "different series skipped in expected and actual, causing an error - histogram", @@ -2373,7 +2373,7 @@ Count: 2.000000, Sum: 3.000000, Buckets: [[0,2):2] @[100]`), {"metric":{"foo":"bar2"},"histogram":[105,{"count":"2","sum":"3","buckets":[[1,"0","2","2"]]}]}]} }`), skipSamplesBefore: 100 * 1000, - err: errors.New(`expected metric {foo="bar"} missing from actual response`), + err: errors.New(`expected metric {foo="bar"} missing from actual response (also, some samples were filtered out from the expected and actual response)`), }, { name: "expected is skippable but not the actual, causing an error", From 91920c3991a5c28c307275cd7e7a6abacb095c66 Mon Sep 17 00:00:00 2001 From: Ganesh Vernekar Date: Tue, 8 Oct 2024 22:52:19 -0400 Subject: [PATCH 06/11] make linter happy Signed-off-by: Ganesh Vernekar --- tools/querytee/proxy.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/querytee/proxy.go b/tools/querytee/proxy.go index 0c4a100d104..b5e6d6a24d1 100644 --- a/tools/querytee/proxy.go +++ b/tools/querytee/proxy.go @@ -8,7 +8,6 @@ package querytee import ( "flag" "fmt" - "github.com/grafana/dskit/flagext" "net/http" "net/http/httputil" "net/url" @@ -19,6 +18,7 @@ import ( "github.com/go-kit/log" "github.com/go-kit/log/level" + "github.com/grafana/dskit/flagext" "github.com/grafana/dskit/server" "github.com/grafana/dskit/spanlogger" "github.com/pkg/errors" From dd4aa260106aa4146f7723e06a8c4b8fb3f6b7be Mon Sep 17 00:00:00 2001 From: Ganesh Vernekar Date: Wed, 9 Oct 2024 11:25:29 -0400 Subject: [PATCH 07/11] Fix review comments Signed-off-by: Ganesh Vernekar --- cmd/query-tee/main.go | 3 ++- tools/querytee/proxy.go | 9 ++------- tools/querytee/response_comparator.go | 6 +++--- tools/querytee/response_comparator_test.go | 8 ++++---- 4 files changed, 11 insertions(+), 15 deletions(-) diff --git a/cmd/query-tee/main.go b/cmd/query-tee/main.go index 6bc8a263ca6..d3ab70fcca7 100644 --- a/cmd/query-tee/main.go +++ b/cmd/query-tee/main.go @@ -10,6 +10,7 @@ import ( "fmt" "io" "os" + "time" "github.com/go-kit/log/level" "github.com/grafana/dskit/flagext" @@ -107,7 +108,7 @@ func mimirReadRoutes(cfg Config) []querytee.Route { Tolerance: cfg.ProxyConfig.ValueComparisonTolerance, UseRelativeError: cfg.ProxyConfig.UseRelativeError, SkipRecentSamples: cfg.ProxyConfig.SkipRecentSamples, - SkipSamplesBefore: model.Time(cfg.ProxyConfig.SkipSamplesBefore.UnixMilli()), + SkipSamplesBefore: model.Time(time.Time(cfg.ProxyConfig.SkipSamplesBefore).UnixMilli()), RequireExactErrorMatch: cfg.ProxyConfig.RequireExactErrorMatch, }) diff --git a/tools/querytee/proxy.go b/tools/querytee/proxy.go index b5e6d6a24d1..a976ece524b 100644 --- a/tools/querytee/proxy.go +++ b/tools/querytee/proxy.go @@ -40,7 +40,7 @@ type ProxyConfig struct { UseRelativeError bool PassThroughNonRegisteredRoutes bool SkipRecentSamples time.Duration - SkipSamplesBefore time.Time + SkipSamplesBefore flagext.Time RequireExactErrorMatch bool BackendSkipTLSVerify bool AddMissingTimeParamToInstantQueries bool @@ -67,16 +67,11 @@ func (cfg *ProxyConfig) RegisterFlags(f *flag.FlagSet) { f.Float64Var(&cfg.ValueComparisonTolerance, "proxy.value-comparison-tolerance", 0.000001, "The tolerance to apply when comparing floating point values in the responses. 0 to disable tolerance and require exact match (not recommended).") f.BoolVar(&cfg.UseRelativeError, "proxy.compare-use-relative-error", false, "Use relative error tolerance when comparing floating point values.") f.DurationVar(&cfg.SkipRecentSamples, "proxy.compare-skip-recent-samples", 2*time.Minute, "The window from now to skip comparing samples. 0 to disable.") + f.Var(&cfg.SkipSamplesBefore, "proxy.compare-skip-samples-before", "Skip the samples before the given time for comparison. The time must be in RFC3339 format.") f.BoolVar(&cfg.RequireExactErrorMatch, "proxy.compare-exact-error-matching", false, "If true, errors will be considered the same only if they are exactly the same. If false, errors will be considered the same if they are considered equivalent.") f.BoolVar(&cfg.PassThroughNonRegisteredRoutes, "proxy.passthrough-non-registered-routes", false, "Passthrough requests for non-registered routes to preferred backend.") f.BoolVar(&cfg.AddMissingTimeParamToInstantQueries, "proxy.add-missing-time-parameter-to-instant-queries", true, "Add a 'time' parameter to proxied instant query requests if they do not have one.") f.Float64Var(&cfg.SecondaryBackendsRequestProportion, "proxy.secondary-backends-request-proportion", 1.0, "Proportion of requests to send to secondary backends. Must be between 0 and 1 (inclusive), and if not 1, then -backend.preferred must be set.") - - var skipSamplesBefore string - f.StringVar(&skipSamplesBefore, "proxy.compare-skip-samples-before", "", "Skip the samples before the given time for comparison. The time must be in RFC3339 format.") - var t flagext.Time - _ = t.Set(skipSamplesBefore) // If there is any error, t will be time.Time{}. - cfg.SkipSamplesBefore = time.Time(t) } type Route struct { diff --git a/tools/querytee/response_comparator.go b/tools/querytee/response_comparator.go index 4ec0781b065..69db3c05fad 100644 --- a/tools/querytee/response_comparator.go +++ b/tools/querytee/response_comparator.go @@ -450,11 +450,11 @@ func compareVector(expectedRaw, actualRaw json.RawMessage, queryEvaluationTime t if retErr != nil { warning := "" if eChanged && aChanged { - warning = " (also, some samples were filtered out from the expected and actual response)" + warning = " (also, some samples were filtered out from the expected and actual response due to the 'skip samples before')" } else if aChanged { - warning = " (also, some samples were filtered out from the actual response)" + warning = " (also, some samples were filtered out from the actual response due to the 'skip samples before')" } else if eChanged { - warning = " (also, some samples were filtered out from the expected response)" + warning = " (also, some samples were filtered out from the expected response due to the 'skip samples before')" } retErr = fmt.Errorf("%w%s", retErr, warning) } diff --git a/tools/querytee/response_comparator_test.go b/tools/querytee/response_comparator_test.go index 51383ca383e..ce2dbe5a9c5 100644 --- a/tools/querytee/response_comparator_test.go +++ b/tools/querytee/response_comparator_test.go @@ -2302,7 +2302,7 @@ Count: 2.000000, Sum: 3.000000, Buckets: [[0,2):2] @[100]`), "data": {"resultType":"vector","result":[{"metric":{"foo":"bar"},"value":[105,"1"]}]} }`), skipSamplesBefore: 100 * 1000, - err: errors.New(`expected 0 metrics but got 1 (also, some samples were filtered out from the expected response)`), + err: errors.New(`expected 0 metrics but got 1 (also, some samples were filtered out from the expected response due to the 'skip samples before')`), }, { name: "should fail when skipped samples only for actual vector", @@ -2315,7 +2315,7 @@ Count: 2.000000, Sum: 3.000000, Buckets: [[0,2):2] @[100]`), "data": {"resultType":"vector","result":[{"metric":{"foo":"bar"},"value":[95,"1"]}]} }`), skipSamplesBefore: 100 * 1000, - err: errors.New(`expected 1 metrics but got 0 (also, some samples were filtered out from the actual response)`), + err: errors.New(`expected 1 metrics but got 0 (also, some samples were filtered out from the actual response due to the 'skip samples before')`), }, { name: "should skip properly when there are multiple series in a vector", @@ -2356,7 +2356,7 @@ Count: 2.000000, Sum: 3.000000, Buckets: [[0,2):2] @[100]`), "data": {"resultType":"vector","result":[{"metric":{"foo":"bar"},"value":[95,"1"]}, {"metric":{"foo":"bar2"},"value":[105,"1"]}]} }`), skipSamplesBefore: 100 * 1000, - err: errors.New(`expected metric {foo="bar"} missing from actual response (also, some samples were filtered out from the expected and actual response)`), + err: errors.New(`expected metric {foo="bar"} missing from actual response (also, some samples were filtered out from the expected and actual response due to the 'skip samples before')`), }, { name: "different series skipped in expected and actual, causing an error - histogram", @@ -2373,7 +2373,7 @@ Count: 2.000000, Sum: 3.000000, Buckets: [[0,2):2] @[100]`), {"metric":{"foo":"bar2"},"histogram":[105,{"count":"2","sum":"3","buckets":[[1,"0","2","2"]]}]}]} }`), skipSamplesBefore: 100 * 1000, - err: errors.New(`expected metric {foo="bar"} missing from actual response (also, some samples were filtered out from the expected and actual response)`), + err: errors.New(`expected metric {foo="bar"} missing from actual response (also, some samples were filtered out from the expected and actual response due to the 'skip samples before')`), }, { name: "expected is skippable but not the actual, causing an error", From 7cbf94a6a0a71ec6cd57512764890b581ae2ed13 Mon Sep 17 00:00:00 2001 From: Ganesh Vernekar Date: Wed, 9 Oct 2024 11:31:15 -0400 Subject: [PATCH 08/11] Update flag description Signed-off-by: Ganesh Vernekar --- CHANGELOG.md | 2 +- tools/querytee/proxy.go | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index bffc117c7fb..fffa8f33e22 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -48,7 +48,7 @@ ### Query-tee -* [FEATURE] Added `-proxy.compare-skip-samples-before` to skip samples before the given time when comparing responses. The time must be in RFC3339 format. #9515 +* [FEATURE] Added `-proxy.compare-skip-samples-before` to skip samples before the given time when comparing responses. The time can be in RFC3339 format (or) RFC3339 without the timezone and seconds (or) date only. #9515 ### Documentation diff --git a/tools/querytee/proxy.go b/tools/querytee/proxy.go index a976ece524b..3a4f52e21bb 100644 --- a/tools/querytee/proxy.go +++ b/tools/querytee/proxy.go @@ -67,7 +67,7 @@ func (cfg *ProxyConfig) RegisterFlags(f *flag.FlagSet) { f.Float64Var(&cfg.ValueComparisonTolerance, "proxy.value-comparison-tolerance", 0.000001, "The tolerance to apply when comparing floating point values in the responses. 0 to disable tolerance and require exact match (not recommended).") f.BoolVar(&cfg.UseRelativeError, "proxy.compare-use-relative-error", false, "Use relative error tolerance when comparing floating point values.") f.DurationVar(&cfg.SkipRecentSamples, "proxy.compare-skip-recent-samples", 2*time.Minute, "The window from now to skip comparing samples. 0 to disable.") - f.Var(&cfg.SkipSamplesBefore, "proxy.compare-skip-samples-before", "Skip the samples before the given time for comparison. The time must be in RFC3339 format.") + f.Var(&cfg.SkipSamplesBefore, "proxy.compare-skip-samples-before", "Skip the samples before the given time for comparison. The time can be in RFC3339 format (or) RFC3339 without the timezone and seconds (or) date only.") f.BoolVar(&cfg.RequireExactErrorMatch, "proxy.compare-exact-error-matching", false, "If true, errors will be considered the same only if they are exactly the same. If false, errors will be considered the same if they are considered equivalent.") f.BoolVar(&cfg.PassThroughNonRegisteredRoutes, "proxy.passthrough-non-registered-routes", false, "Passthrough requests for non-registered routes to preferred backend.") f.BoolVar(&cfg.AddMissingTimeParamToInstantQueries, "proxy.add-missing-time-parameter-to-instant-queries", true, "Add a 'time' parameter to proxied instant query requests if they do not have one.") From 2722a9fa69e63757a026021e67c540f2b60d429b Mon Sep 17 00:00:00 2001 From: Ganesh Vernekar Date: Wed, 9 Oct 2024 13:26:43 -0400 Subject: [PATCH 09/11] Update tools/querytee/response_comparator.go Co-authored-by: Marco Pracucci --- tools/querytee/response_comparator.go | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tools/querytee/response_comparator.go b/tools/querytee/response_comparator.go index 69db3c05fad..5e896b21ee5 100644 --- a/tools/querytee/response_comparator.go +++ b/tools/querytee/response_comparator.go @@ -561,9 +561,12 @@ func compareScalar(expectedRaw, actualRaw json.RawMessage, queryEvaluationTime t } func compareSamplePair(expected, actual model.SamplePair, queryEvaluationTime time.Time, opts SampleComparisonOptions) error { + // If the timestamp is before the configured SkipSamplesBefore then we don't even check if the timestamp is correct. + // The reason is that the SkipSamplesBefore feature may be used to compare queries hitting a different storage and one of two storages has no historical data. if expected.Timestamp < opts.SkipSamplesBefore && actual.Timestamp < opts.SkipSamplesBefore { return nil } + if expected.Timestamp != actual.Timestamp { return fmt.Errorf("expected timestamp %v but got %v", expected.Timestamp, actual.Timestamp) } From affff28413f10b482e16c8c101a0a38691711d86 Mon Sep 17 00:00:00 2001 From: Ganesh Vernekar Date: Wed, 9 Oct 2024 13:32:43 -0400 Subject: [PATCH 10/11] Update tools/querytee/response_comparator.go Co-authored-by: Marco Pracucci --- tools/querytee/response_comparator.go | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tools/querytee/response_comparator.go b/tools/querytee/response_comparator.go index 5e896b21ee5..6426c3c58f4 100644 --- a/tools/querytee/response_comparator.go +++ b/tools/querytee/response_comparator.go @@ -456,7 +456,9 @@ func compareVector(expectedRaw, actualRaw json.RawMessage, queryEvaluationTime t } else if eChanged { warning = " (also, some samples were filtered out from the expected response due to the 'skip samples before')" } - retErr = fmt.Errorf("%w%s", retErr, warning) + if warning != "" { + retErr = fmt.Errorf("%w%s", retErr, warning) + } } }() From fa25f8f5b4ebd8668c23615c77daa53034bc812d Mon Sep 17 00:00:00 2001 From: Ganesh Vernekar Date: Wed, 9 Oct 2024 13:36:52 -0400 Subject: [PATCH 11/11] Fix review comments Signed-off-by: Ganesh Vernekar --- tools/querytee/response_comparator.go | 53 ++++++++++------------ tools/querytee/response_comparator_test.go | 8 ++-- 2 files changed, 29 insertions(+), 32 deletions(-) diff --git a/tools/querytee/response_comparator.go b/tools/querytee/response_comparator.go index 69db3c05fad..bf55c92d05c 100644 --- a/tools/querytee/response_comparator.go +++ b/tools/querytee/response_comparator.go @@ -372,9 +372,8 @@ func trimBeginning[S ~[]M, M any](s S, skip func(M) bool) S { // filterSlice returns a new slice with elements from s removed that satisfy skip(). func filterSlice[S ~[]M, M any](s S, skip func(M) bool) S { - var i int - var res S - for ; i < len(s); i++ { + res := make(S, 0, len(s)) + for i := 0; i < len(s); i++ { if !skip(s[i]) { res = append(res, s[i]) } @@ -434,31 +433,29 @@ func compareVector(expectedRaw, actualRaw json.RawMessage, queryEvaluationTime t return nil } - // Warning: filtering samples can give out confusing error messages. For example if the actual response had - // matching series, but all sample timestamps were before SkipSamplesBefore, while expected response had samples - // after SkipSamplesBefore: instead of saying mismatch, it will instead say that some metrics is missing, because - // we filter them here. - eOrgLen, aOrgLen := len(expected), len(actual) - expected = filterSlice(expected, func(p *model.Sample) bool { - return p.Timestamp < opts.SkipSamplesBefore - }) - actual = filterSlice(actual, func(p *model.Sample) bool { - return p.Timestamp < opts.SkipSamplesBefore - }) - eChanged, aChanged := len(expected) != eOrgLen, len(actual) != aOrgLen - defer func() { - if retErr != nil { - warning := "" - if eChanged && aChanged { - warning = " (also, some samples were filtered out from the expected and actual response due to the 'skip samples before')" - } else if aChanged { - warning = " (also, some samples were filtered out from the actual response due to the 'skip samples before')" - } else if eChanged { - warning = " (also, some samples were filtered out from the expected response due to the 'skip samples before')" + if opts.SkipSamplesBefore > 0 { + // Warning: filtering samples can give out confusing error messages. For example if the actual response had + // matching series, but all sample timestamps were before SkipSamplesBefore, while expected response had samples + // after SkipSamplesBefore: instead of saying mismatch, it will instead say that some metrics is missing, because + // we filter them here. + eOrgLen, aOrgLen := len(expected), len(actual) + expected = filterSlice(expected, func(p *model.Sample) bool { return p.Timestamp < opts.SkipSamplesBefore }) + actual = filterSlice(actual, func(p *model.Sample) bool { return p.Timestamp < opts.SkipSamplesBefore }) + eChanged, aChanged := len(expected) != eOrgLen, len(actual) != aOrgLen + defer func() { + if retErr != nil { + warning := "" + if eChanged && aChanged { + warning = " (also, some samples were filtered out from the expected and actual response due to the 'skip samples before'; if all samples have been filtered out, this could cause the check on the expected number of metrics to fail)" + } else if aChanged { + warning = " (also, some samples were filtered out from the actual response due to the 'skip samples before'; if all samples have been filtered out, this could cause the check on the expected number of metrics to fail)" + } else if eChanged { + warning = " (also, some samples were filtered out from the expected response due to the 'skip samples before'; if all samples have been filtered out, this could cause the check on the expected number of metrics to fail)" + } + retErr = fmt.Errorf("%w%s", retErr, warning) } - retErr = fmt.Errorf("%w%s", retErr, warning) - } - }() + }() + } if len(expected) != len(actual) { return fmt.Errorf("expected %d metrics but got %d", len(expected), len(actual)) @@ -592,7 +589,7 @@ func compareSampleValue(first, second float64, opts SampleComparisonOptions) boo } func compareSampleHistogramPair(expected, actual model.SampleHistogramPair, queryEvaluationTime time.Time, opts SampleComparisonOptions) error { - if expected.Timestamp < opts.SkipSamplesBefore { + if expected.Timestamp < opts.SkipSamplesBefore && actual.Timestamp < opts.SkipSamplesBefore { return nil } diff --git a/tools/querytee/response_comparator_test.go b/tools/querytee/response_comparator_test.go index ce2dbe5a9c5..a9ec0a1565d 100644 --- a/tools/querytee/response_comparator_test.go +++ b/tools/querytee/response_comparator_test.go @@ -2302,7 +2302,7 @@ Count: 2.000000, Sum: 3.000000, Buckets: [[0,2):2] @[100]`), "data": {"resultType":"vector","result":[{"metric":{"foo":"bar"},"value":[105,"1"]}]} }`), skipSamplesBefore: 100 * 1000, - err: errors.New(`expected 0 metrics but got 1 (also, some samples were filtered out from the expected response due to the 'skip samples before')`), + err: errors.New(`expected 0 metrics but got 1 (also, some samples were filtered out from the expected response due to the 'skip samples before'; if all samples have been filtered out, this could cause the check on the expected number of metrics to fail)`), }, { name: "should fail when skipped samples only for actual vector", @@ -2315,7 +2315,7 @@ Count: 2.000000, Sum: 3.000000, Buckets: [[0,2):2] @[100]`), "data": {"resultType":"vector","result":[{"metric":{"foo":"bar"},"value":[95,"1"]}]} }`), skipSamplesBefore: 100 * 1000, - err: errors.New(`expected 1 metrics but got 0 (also, some samples were filtered out from the actual response due to the 'skip samples before')`), + err: errors.New(`expected 1 metrics but got 0 (also, some samples were filtered out from the actual response due to the 'skip samples before'; if all samples have been filtered out, this could cause the check on the expected number of metrics to fail)`), }, { name: "should skip properly when there are multiple series in a vector", @@ -2356,7 +2356,7 @@ Count: 2.000000, Sum: 3.000000, Buckets: [[0,2):2] @[100]`), "data": {"resultType":"vector","result":[{"metric":{"foo":"bar"},"value":[95,"1"]}, {"metric":{"foo":"bar2"},"value":[105,"1"]}]} }`), skipSamplesBefore: 100 * 1000, - err: errors.New(`expected metric {foo="bar"} missing from actual response (also, some samples were filtered out from the expected and actual response due to the 'skip samples before')`), + err: errors.New(`expected metric {foo="bar"} missing from actual response (also, some samples were filtered out from the expected and actual response due to the 'skip samples before'; if all samples have been filtered out, this could cause the check on the expected number of metrics to fail)`), }, { name: "different series skipped in expected and actual, causing an error - histogram", @@ -2373,7 +2373,7 @@ Count: 2.000000, Sum: 3.000000, Buckets: [[0,2):2] @[100]`), {"metric":{"foo":"bar2"},"histogram":[105,{"count":"2","sum":"3","buckets":[[1,"0","2","2"]]}]}]} }`), skipSamplesBefore: 100 * 1000, - err: errors.New(`expected metric {foo="bar"} missing from actual response (also, some samples were filtered out from the expected and actual response due to the 'skip samples before')`), + err: errors.New(`expected metric {foo="bar"} missing from actual response (also, some samples were filtered out from the expected and actual response due to the 'skip samples before'; if all samples have been filtered out, this could cause the check on the expected number of metrics to fail)`), }, { name: "expected is skippable but not the actual, causing an error",