Skip to content

Commit

Permalink
Add changelog + metric
Browse files Browse the repository at this point in the history
  • Loading branch information
julienduchesne committed Dec 17, 2024
1 parent 8e909c5 commit 30d64c6
Show file tree
Hide file tree
Showing 2 changed files with 11 additions and 0 deletions.
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,10 +8,12 @@
* [CHANGE] Querier: pass query matchers to queryable `IsApplicable` hook. #10256
* [ENHANCEMENT] Distributor: OTLP receiver now converts also metric metadata. See also https://github.com/prometheus/prometheus/pull/15416. #10168
* [ENHANCEMENT] Distributor: discard float and histogram samples with duplicated timestamps from each timeseries in a request before the request is forwarded to ingesters. Discarded samples are tracked by the `cortex_discarded_samples_total` metrics with reason `sample_duplicate_timestamp`. #10145
* [CHANGE] Ruler: Add `cortex_prometheus_rule_group_last_rule_duration_sum_seconds` metric to track the total evalation duration of a rule group regardless of concurrency #10189
* [BUGFIX] Distributor: Use a boolean to track changes while merging the ReplicaDesc components, rather than comparing the objects directly. #10185
* [BUGFIX] Querier: fix timeout responding to query-frontend when response size is very close to `-querier.frontend-client.grpc-max-send-msg-size`. #10154
* [BUGFIX] Ruler: fix indeterminate rules being always run concurrently (instead of never) when `-ruler.max-independent-rule-evaluation-concurrency` is set. https://github.com/prometheus/prometheus/pull/15560 #10258
* [BUGFIX] PromQL: Fix various UTF-8 bugs related to quoting. https://github.com/prometheus/prometheus/pull/15531 #10258
* [BUGFIX] Ruler: Prevent flapping (enabled/disabled) of the concurrency feature #10189

### Mixin

Expand Down
9 changes: 9 additions & 0 deletions pkg/ruler/manager_metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ type ManagerMetrics struct {
GroupInterval *prometheus.Desc
GroupLastEvalTime *prometheus.Desc
GroupLastDuration *prometheus.Desc
GroupLastRuleDurationSum *prometheus.Desc
GroupLastRestoreDuration *prometheus.Desc
GroupRules *prometheus.Desc
GroupLastEvalSamples *prometheus.Desc
Expand Down Expand Up @@ -89,6 +90,12 @@ func NewManagerMetrics(logger log.Logger) *ManagerMetrics {
[]string{"user", "rule_group"},
nil,
),
GroupLastRuleDurationSum: prometheus.NewDesc(
"cortex_prometheus_rule_group_last_rule_duration_sum_seconds",
"The sum of time in seconds it took to evaluate each rule in the group regardless of concurrency. This should be higher than the group duration if rules are evaluated concurrently.",
[]string{"user", "rule_group"},
nil,
),
GroupLastRestoreDuration: prometheus.NewDesc(
"cortex_prometheus_rule_group_last_restore_duration_seconds",
"The duration of the last alert rules alerts restoration using the `ALERTS_FOR_STATE` series across all rule groups.",
Expand Down Expand Up @@ -131,6 +138,7 @@ func (m *ManagerMetrics) Describe(out chan<- *prometheus.Desc) {
out <- m.GroupInterval
out <- m.GroupLastEvalTime
out <- m.GroupLastDuration
out <- m.GroupLastRuleDurationSum
out <- m.GroupLastRestoreDuration
out <- m.GroupRules
out <- m.GroupLastEvalSamples
Expand All @@ -156,6 +164,7 @@ func (m *ManagerMetrics) Collect(out chan<- prometheus.Metric) {
data.SendSumOfGaugesPerTenant(out, m.GroupInterval, "prometheus_rule_group_interval_seconds", dskit_metrics.WithLabels("rule_group"))
data.SendSumOfGaugesPerTenant(out, m.GroupLastEvalTime, "prometheus_rule_group_last_evaluation_timestamp_seconds", dskit_metrics.WithLabels("rule_group"))
data.SendSumOfGaugesPerTenant(out, m.GroupLastDuration, "prometheus_rule_group_last_duration_seconds", dskit_metrics.WithLabels("rule_group"))
data.SendSumOfGaugesPerTenant(out, m.GroupLastRuleDurationSum, "cortex_prometheus_rule_group_last_rule_duration_sum_seconds", dskit_metrics.WithLabels("rule_group"))
data.SendSumOfGaugesPerTenant(out, m.GroupRules, "prometheus_rule_group_rules", dskit_metrics.WithLabels("rule_group"))
data.SendSumOfGaugesPerTenant(out, m.GroupLastEvalSamples, "prometheus_rule_group_last_evaluation_samples", dskit_metrics.WithLabels("rule_group"))
}

0 comments on commit 30d64c6

Please sign in to comment.