From 5d73d9541c3016e4ef72bdecc081c503ecb0233e Mon Sep 17 00:00:00 2001 From: Dimitar Dimitrov Date: Tue, 24 Dec 2024 10:22:26 +0100 Subject: [PATCH] ingester: always log CPU samples for CPU limiter (#10284) * ingester: always log CPU samples for CPU limiter I don't see a reason why we might disable this. The memory footprint is miniscule and the CPU usage is minimal too. The samples give better granularity on what event in the past 60s might have triggered this: "did CPU increase just now or has it been increasing since 30s ago" Since I think it's harmless, we also don't need a flag for it. The flag is experimental so we can just drop it without notice. Signed-off-by: Dimitar Dimitrov * Add CHANGELOG.md entry Signed-off-by: Dimitar Dimitrov * Update CHANGELOG.md * Always log CPU samples Signed-off-by: Dimitar Dimitrov --------- Signed-off-by: Dimitar Dimitrov --- cmd/mimir/config-descriptor.json | 11 ----------- cmd/mimir/help-all.txt.tmpl | 2 -- .../mimir/configure/configuration-parameters/index.md | 4 ---- pkg/ingester/ingester.go | 8 +++----- pkg/util/limiter/utilization.go | 10 +++++----- 5 files changed, 8 insertions(+), 27 deletions(-) diff --git a/cmd/mimir/config-descriptor.json b/cmd/mimir/config-descriptor.json index 3ac7f5c294b..dcc0ebfb925 100644 --- a/cmd/mimir/config-descriptor.json +++ b/cmd/mimir/config-descriptor.json @@ -3403,17 +3403,6 @@ "fieldType": "int", "fieldCategory": "experimental" }, - { - "kind": "field", - "name": "log_utilization_based_limiter_cpu_samples", - "required": false, - "desc": "Enable logging of utilization based limiter CPU samples.", - "fieldValue": null, - "fieldDefaultValue": false, - "fieldFlag": "ingester.log-utilization-based-limiter-cpu-samples", - "fieldType": "boolean", - "fieldCategory": "experimental" - }, { "kind": "field", "name": "error_sample_rate", diff --git a/cmd/mimir/help-all.txt.tmpl b/cmd/mimir/help-all.txt.tmpl index 08bc71314d3..bd0256e3f6c 100644 --- a/cmd/mimir/help-all.txt.tmpl +++ b/cmd/mimir/help-all.txt.tmpl @@ -1625,8 +1625,6 @@ Usage of ./cmd/mimir/mimir: Max series that this ingester can hold (across all tenants). Requests to create additional series will be rejected. 0 = unlimited. -ingester.instance-limits.max-tenants int Max tenants that this ingester can hold. Requests from additional tenants will be rejected. 0 = unlimited. - -ingester.log-utilization-based-limiter-cpu-samples - [experimental] Enable logging of utilization based limiter CPU samples. -ingester.max-global-exemplars-per-user int [experimental] The maximum number of exemplars in memory, across the cluster. 0 to disable exemplars ingestion. -ingester.max-global-metadata-per-metric int diff --git a/docs/sources/mimir/configure/configuration-parameters/index.md b/docs/sources/mimir/configure/configuration-parameters/index.md index f9a147e7bca..dfd84fbca61 100644 --- a/docs/sources/mimir/configure/configuration-parameters/index.md +++ b/docs/sources/mimir/configure/configuration-parameters/index.md @@ -1238,10 +1238,6 @@ instance_limits: # CLI flag: -ingester.read-path-memory-utilization-limit [read_path_memory_utilization_limit: | default = 0] -# (experimental) Enable logging of utilization based limiter CPU samples. -# CLI flag: -ingester.log-utilization-based-limiter-cpu-samples -[log_utilization_based_limiter_cpu_samples: | default = false] - # (advanced) Each error will be logged once in this many times. Use 0 to log all # of them. # CLI flag: -ingester.error-sample-rate diff --git a/pkg/ingester/ingester.go b/pkg/ingester/ingester.go index d5a934e781a..e69cda6448a 100644 --- a/pkg/ingester/ingester.go +++ b/pkg/ingester/ingester.go @@ -198,9 +198,8 @@ type Config struct { IgnoreSeriesLimitForMetricNames string `yaml:"ignore_series_limit_for_metric_names" category:"advanced"` - ReadPathCPUUtilizationLimit float64 `yaml:"read_path_cpu_utilization_limit" category:"experimental"` - ReadPathMemoryUtilizationLimit uint64 `yaml:"read_path_memory_utilization_limit" category:"experimental"` - LogUtilizationBasedLimiterCPUSamples bool `yaml:"log_utilization_based_limiter_cpu_samples" category:"experimental"` + ReadPathCPUUtilizationLimit float64 `yaml:"read_path_cpu_utilization_limit" category:"experimental"` + ReadPathMemoryUtilizationLimit uint64 `yaml:"read_path_memory_utilization_limit" category:"experimental"` ErrorSampleRate int64 `yaml:"error_sample_rate" json:"error_sample_rate" category:"advanced"` @@ -238,7 +237,6 @@ func (cfg *Config) RegisterFlags(f *flag.FlagSet, logger log.Logger) { f.StringVar(&cfg.IgnoreSeriesLimitForMetricNames, "ingester.ignore-series-limit-for-metric-names", "", "Comma-separated list of metric names, for which the -ingester.max-global-series-per-metric limit will be ignored. Does not affect the -ingester.max-global-series-per-user limit.") f.Float64Var(&cfg.ReadPathCPUUtilizationLimit, "ingester.read-path-cpu-utilization-limit", 0, "CPU utilization limit, as CPU cores, for CPU/memory utilization based read request limiting. Use 0 to disable it.") f.Uint64Var(&cfg.ReadPathMemoryUtilizationLimit, "ingester.read-path-memory-utilization-limit", 0, "Memory limit, in bytes, for CPU/memory utilization based read request limiting. Use 0 to disable it.") - f.BoolVar(&cfg.LogUtilizationBasedLimiterCPUSamples, "ingester.log-utilization-based-limiter-cpu-samples", false, "Enable logging of utilization based limiter CPU samples.") f.Int64Var(&cfg.ErrorSampleRate, "ingester.error-sample-rate", 10, "Each error will be logged once in this many times. Use 0 to log all of them.") f.BoolVar(&cfg.UseIngesterOwnedSeriesForLimits, "ingester.use-ingester-owned-series-for-limits", false, "When enabled, only series currently owned by ingester according to the ring are used when checking user per-tenant series limit.") f.BoolVar(&cfg.UpdateIngesterOwnedSeries, "ingester.track-ingester-owned-series", false, "This option enables tracking of ingester-owned series based on ring state, even if -ingester.use-ingester-owned-series-for-limits is disabled.") @@ -420,7 +418,7 @@ func New(cfg Config, limits *validation.Overrides, ingestersRing ring.ReadRing, if cfg.ReadPathCPUUtilizationLimit > 0 || cfg.ReadPathMemoryUtilizationLimit > 0 { i.utilizationBasedLimiter = limiter.NewUtilizationBasedLimiter(cfg.ReadPathCPUUtilizationLimit, - cfg.ReadPathMemoryUtilizationLimit, cfg.LogUtilizationBasedLimiterCPUSamples, + cfg.ReadPathMemoryUtilizationLimit, true, log.WithPrefix(logger, "context", "read path"), prometheus.WrapRegistererWithPrefix("cortex_ingester_", registerer)) } diff --git a/pkg/util/limiter/utilization.go b/pkg/util/limiter/utilization.go index 98de134a306..9a9acc64ad2 100644 --- a/pkg/util/limiter/utilization.go +++ b/pkg/util/limiter/utilization.go @@ -209,12 +209,12 @@ func (l *UtilizationBasedLimiter) compute(nowFn func() time.Time) (currCPUUtil f return } + logger := l.logger + if l.cpuSamples != nil { + // Log also the CPU samples the CPU load EWMA is based on + logger = log.WithSuffix(logger, "source_samples", l.cpuSamples.String()) + } if enable { - logger := l.logger - if l.cpuSamples != nil { - // Log also the CPU samples the CPU load EWMA is based on - logger = log.WithSuffix(logger, "source_samples", l.cpuSamples.String()) - } level.Info(logger).Log("msg", "enabling resource utilization based limiting", "reason", reason, "memory_limit", formatMemoryLimit(l.memoryLimit), "memory_utilization", formatMemory(currHeapSize), "cpu_limit", formatCPULimit(l.cpuLimit), "cpu_utilization", formatCPU(currCPUUtil))