Skip to content

Commit

Permalink
cgroup-rstat-flushing: Default disable per cgroup recording
Browse files Browse the repository at this point in the history
Recording two counters per cgroup generates too much data
for prometheus.  For troubleshooting this will be a practical
feature, but don't enable this on all servers per default.

Signed-off-by: Jesper Dangaard Brouer <[email protected]>
  • Loading branch information
netoptimizer committed Aug 19, 2024
1 parent ebe07e7 commit 3397837
Show file tree
Hide file tree
Showing 2 changed files with 35 additions and 24 deletions.
9 changes: 9 additions & 0 deletions examples/cgroup-rstat-flushing.bpf.c
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,12 @@ struct flush_key_t {
u32 level;
} __attribute__((packed));

/* This provide easy way to enable per cgroup flush recording
* Default this is disabled, as it generates too much data.
* For troubleshooting purposes this can be enabled on some servers.
*/
#undef CONFIG_TRACK_PER_CGROUP_FLUSH
#ifdef CONFIG_TRACK_PER_CGROUP_FLUSH
struct {
__uint(type, BPF_MAP_TYPE_PERCPU_HASH);
__uint(max_entries, 10000);
Expand All @@ -99,6 +105,7 @@ struct {
__type(key, struct flush_key_t);
__type(value, u64);
} cgroup_rstat_flush_nanoseconds_count SEC(".maps");
#endif /* CONFIG_TRACK_PER_CGROUP_FLUSH */

/* Complex key for encoding lock properties */
struct lock_key_t {
Expand Down Expand Up @@ -386,12 +393,14 @@ int BPF_PROG(cgroup_rstat_flush_locked_exit, struct cgroup *cgrp)
* rate(_seconds_sum[1m]) / rate(_seconds_count[1m])
*/

#ifdef CONFIG_TRACK_PER_CGROUP_FLUSH
/* Per cgroup record average latency via nanoseconds_count and nanoseconds_sum */
struct flush_key_t flush_key;
flush_key.cgrp_id = cgroup_id(cgrp);
flush_key.level = cgrp->level;
increment_map_nosync(&cgroup_rstat_flush_nanoseconds_sum, &flush_key, delta_ns);
increment_map_nosync(&cgroup_rstat_flush_nanoseconds_count, &flush_key, 1);
#endif

return 0;
}
Expand Down
50 changes: 26 additions & 24 deletions examples/cgroup-rstat-flushing.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -22,30 +22,32 @@ metrics:
size: 2
decoders:
- name: uint
- name: cgroup_rstat_flush_nanoseconds_sum
help: Sum amount of time (in nanoseconds) spent for flushes per cgroup
labels:
- name: cgroup
size: 8
decoders:
- name: uint
- name: cgroup
- name: level
size: 4
decoders:
- name: uint
- name: cgroup_rstat_flush_nanoseconds_count
help: Number of flush calls related to cgroup_rstat_flush_nanoseconds_sum
labels:
- name: cgroup
size: 8
decoders:
- name: uint
- name: cgroup
- name: level
size: 4
decoders:
- name: uint
# --- See: CONFIG_TRACK_PER_CGROUP_FLUSH
# - name: cgroup_rstat_flush_nanoseconds_sum
# help: Sum amount of time (in nanoseconds) spent for flushes per cgroup
# labels:
# - name: cgroup
# size: 8
# decoders:
# - name: uint
# - name: cgroup
# - name: level
# size: 4
# decoders:
# - name: uint
# - name: cgroup_rstat_flush_nanoseconds_count
# help: Number of flush calls related to cgroup_rstat_flush_nanoseconds_sum
# labels:
# - name: cgroup
# size: 8
# decoders:
# - name: uint
# - name: cgroup
# - name: level
# size: 4
# decoders:
# - name: uint
# ---
- name: cgroup_rstat_map_errors_total
help: Map related errors
labels:
Expand Down

0 comments on commit 3397837

Please sign in to comment.