From 845aec954e4afc354957493750c26b3e994e4465 Mon Sep 17 00:00:00 2001 From: inwardvessel <5782523+inwardvessel@users.noreply.github.com> Date: Thu, 15 Jun 2023 15:31:44 -0700 Subject: [PATCH] use resizing of datasec maps in examples --- tools/sched_ext/scx_central.bpf.c | 17 ++++++----- tools/sched_ext/scx_central.c | 4 +++ tools/sched_ext/scx_common.bpf.h | 48 +++++++++++++++++++++++++++++++ tools/sched_ext/scx_pair.bpf.c | 19 ++++++------ tools/sched_ext/scx_pair.c | 30 ++++++++++++------- tools/sched_ext/scx_pair.h | 1 - tools/sched_ext/scx_user_common.h | 23 +++++++++++++++ 7 files changed, 112 insertions(+), 30 deletions(-) diff --git a/tools/sched_ext/scx_central.bpf.c b/tools/sched_ext/scx_central.bpf.c index f44b9365a1778..67e6412bd5d83 100644 --- a/tools/sched_ext/scx_central.bpf.c +++ b/tools/sched_ext/scx_central.bpf.c @@ -51,14 +51,13 @@ char _license[] SEC("license") = "GPL"; enum { FALLBACK_DSQ_ID = 0, - MAX_CPUS = 4096, MS_TO_NS = 1000LLU * 1000, TIMER_INTERVAL_NS = 1 * MS_TO_NS, }; const volatile bool switch_partial; const volatile s32 central_cpu; -const volatile u32 nr_cpu_ids = 64; /* !0 for veristat, set during init */ +const volatile u32 nr_cpu_ids = 1; /* !0 for veristat, set during init */ u64 nr_total, nr_locals, nr_queued, nr_lost_pids; u64 nr_timers, nr_dispatches, nr_mismatches, nr_retries; @@ -73,8 +72,8 @@ struct { } central_q SEC(".maps"); /* can't use percpu map due to bad lookups */ -static bool cpu_gimme_task[MAX_CPUS]; -static u64 cpu_started_at[MAX_CPUS]; +bool RESIZABLE_ARRAY(data, cpu_gimme_task); +u64 RESIZABLE_ARRAY(data, cpu_started_at); struct central_timer { struct bpf_timer timer; @@ -189,7 +188,7 @@ void BPF_STRUCT_OPS(central_dispatch, s32 cpu, struct task_struct *prev) break; /* central's gimme is never set */ - gimme = MEMBER_VPTR(cpu_gimme_task, [cpu]); + gimme = ARRAY_ELEM_PTR(cpu_gimme_task, cpu, nr_cpu_ids); if (gimme && !*gimme) continue; @@ -220,7 +219,7 @@ void BPF_STRUCT_OPS(central_dispatch, s32 cpu, struct task_struct *prev) if (scx_bpf_consume(FALLBACK_DSQ_ID)) return; - gimme = MEMBER_VPTR(cpu_gimme_task, [cpu]); + gimme = ARRAY_ELEM_PTR(cpu_gimme_task, cpu, nr_cpu_ids); if (gimme) *gimme = true; @@ -235,7 +234,7 @@ void BPF_STRUCT_OPS(central_dispatch, s32 cpu, struct task_struct *prev) void BPF_STRUCT_OPS(central_running, struct task_struct *p) { s32 cpu = scx_bpf_task_cpu(p); - u64 *started_at = MEMBER_VPTR(cpu_started_at, [cpu]); + u64 *started_at = ARRAY_ELEM_PTR(cpu_started_at, cpu, nr_cpu_ids); if (started_at) *started_at = bpf_ktime_get_ns() ?: 1; /* 0 indicates idle */ } @@ -243,7 +242,7 @@ void BPF_STRUCT_OPS(central_running, struct task_struct *p) void BPF_STRUCT_OPS(central_stopping, struct task_struct *p, bool runnable) { s32 cpu = scx_bpf_task_cpu(p); - u64 *started_at = MEMBER_VPTR(cpu_started_at, [cpu]); + u64 *started_at = ARRAY_ELEM_PTR(cpu_started_at, cpu, nr_cpu_ids); if (started_at) *started_at = 0; } @@ -262,7 +261,7 @@ static int central_timerfn(void *map, int *key, struct bpf_timer *timer) continue; /* kick iff the current one exhausted its slice */ - started_at = MEMBER_VPTR(cpu_started_at, [cpu]); + started_at = ARRAY_ELEM_PTR(cpu_started_at, cpu, nr_cpu_ids); if (started_at && *started_at && vtime_before(now, *started_at + SCX_SLICE_DFL)) continue; diff --git a/tools/sched_ext/scx_central.c b/tools/sched_ext/scx_central.c index a303401ffe1a1..580d4b50172fa 100644 --- a/tools/sched_ext/scx_central.c +++ b/tools/sched_ext/scx_central.c @@ -63,6 +63,10 @@ int main(int argc, char **argv) } } + /* Resize arrays so their element count is equal to cpu count. */ + RESIZE_ARRAY(data, cpu_gimme_task, skel->rodata->nr_cpu_ids); + RESIZE_ARRAY(data, cpu_started_at, skel->rodata->nr_cpu_ids); + SCX_BUG_ON(scx_central__load(skel), "Failed to load skel"); link = bpf_map__attach_struct_ops(skel->maps.central_ops); diff --git a/tools/sched_ext/scx_common.bpf.h b/tools/sched_ext/scx_common.bpf.h index 06cd1892af3b3..81bfe3d041c9a 100644 --- a/tools/sched_ext/scx_common.bpf.h +++ b/tools/sched_ext/scx_common.bpf.h @@ -81,6 +81,26 @@ BPF_PROG(name, ##args) SEC("struct_ops.s/"#name) \ BPF_PROG(name, ##args) +/** + * RESIZABLE_ARRAY - Generates annotations for an array that may be resized + * @elfsec: the data section of the BPF program in which to place the array + * @arr: the name of the array + * + * libbpf has an API for setting map value sizes. Since data sections (i.e. + * bss, data, rodata) themselves are maps, a data section can be resized. If + * a data section has an array as its last element, the BTF info for that + * array will be adjusted so that length of the array is extended to meet the + * new length of the data section. This macro annotates an array to have an + * element count of one with the assumption that this array can be resized + * within the userspace program. It also annotates the section specifier so + * this array exists in a custom sub data section which can be resized + * independently. + * + * See RESIZE_ARRAY() for the userspace convenience macro for resizing an + * array declared with RESIZABLE_ARRAY(). + */ +#define RESIZABLE_ARRAY(elfsec, arr) arr[1] SEC("."#elfsec"."#arr) + /** * MEMBER_VPTR - Obtain the verified pointer to a struct or array member * @base: struct or array to index @@ -117,6 +137,34 @@ BPF_PROG(name, ##args) __addr; \ }) +/** + * ARRAY_ELEM_PTR - Obtain the verified pointer to an array element + * @arr: array to index into + * @i: array index + * @n: number of elements in array + * + * Similar to MEMBER_VPTR() but is intended for use with arrays where the + * element count needs to be explicit. + * It can be used in cases where a global array is defined with an initial + * size but is intended to be be resized before loading the BPF program. + * Without this version of the macro, MEMBER_VPTR() will use the compile time + * size of the array to compute the max, which will result in rejection by + * the verifier. + */ +#define ARRAY_ELEM_PTR(arr, i, n) (typeof(arr[i]) *)({ \ + u64 __base = (u64)arr; \ + u64 __addr = (u64)&(arr[i]) - __base; \ + asm volatile ( \ + "if %0 <= %[max] goto +2\n" \ + "%0 = 0\n" \ + "goto +1\n" \ + "%0 += %1\n" \ + : "+r"(__addr) \ + : "r"(__base), \ + [max]"r"(sizeof(arr[0]) * ((n) - 1))); \ + __addr; \ +}) + /* * BPF core and other generic helpers */ diff --git a/tools/sched_ext/scx_pair.bpf.c b/tools/sched_ext/scx_pair.bpf.c index cda126980ed51..9c9cf97f4feeb 100644 --- a/tools/sched_ext/scx_pair.bpf.c +++ b/tools/sched_ext/scx_pair.bpf.c @@ -123,19 +123,19 @@ char _license[] SEC("license") = "GPL"; const volatile bool switch_partial; /* !0 for veristat, set during init */ -const volatile u32 nr_cpu_ids = 64; +const volatile u32 nr_cpu_ids = 1; /* a pair of CPUs stay on a cgroup for this duration */ const volatile u32 pair_batch_dur_ns = SCX_SLICE_DFL; /* cpu ID -> pair cpu ID */ -const volatile s32 pair_cpu[MAX_CPUS] = { [0 ... MAX_CPUS - 1] = -1 }; +const volatile s32 RESIZABLE_ARRAY(rodata, pair_cpu); /* cpu ID -> pair_id */ -const volatile u32 pair_id[MAX_CPUS]; +const volatile u32 RESIZABLE_ARRAY(rodata, pair_id); /* CPU ID -> CPU # in the pair (0 or 1) */ -const volatile u32 in_pair_idx[MAX_CPUS]; +const volatile u32 RESIZABLE_ARRAY(rodata, in_pair_idx); struct pair_ctx { struct bpf_spin_lock lock; @@ -161,7 +161,6 @@ struct pair_ctx { struct { __uint(type, BPF_MAP_TYPE_ARRAY); - __uint(max_entries, MAX_CPUS / 2); __type(key, u32); __type(value, struct pair_ctx); } pair_ctx SEC(".maps"); @@ -299,7 +298,7 @@ static int lookup_pairc_and_mask(s32 cpu, struct pair_ctx **pairc, u32 *mask) { u32 *vptr; - vptr = (u32 *)MEMBER_VPTR(pair_id, [cpu]); + vptr = (u32 *)ARRAY_ELEM_PTR(pair_id, cpu, nr_cpu_ids); if (!vptr) return -EINVAL; @@ -307,7 +306,7 @@ static int lookup_pairc_and_mask(s32 cpu, struct pair_ctx **pairc, u32 *mask) if (!(*pairc)) return -EINVAL; - vptr = (u32 *)MEMBER_VPTR(in_pair_idx, [cpu]); + vptr = (u32 *)ARRAY_ELEM_PTR(in_pair_idx, cpu, nr_cpu_ids); if (!vptr) return -EINVAL; @@ -490,7 +489,7 @@ static int try_dispatch(s32 cpu) out_maybe_kick: if (kick_pair) { - s32 *pair = (s32 *)MEMBER_VPTR(pair_cpu, [cpu]); + s32 *pair = (s32 *)ARRAY_ELEM_PTR(pair_cpu, cpu, nr_cpu_ids); if (pair) { __sync_fetch_and_add(&nr_kicks, 1); scx_bpf_kick_cpu(*pair, SCX_KICK_PREEMPT); @@ -525,7 +524,7 @@ void BPF_STRUCT_OPS(pair_cpu_acquire, s32 cpu, struct scx_cpu_acquire_args *args bpf_spin_unlock(&pairc->lock); if (kick_pair) { - s32 *pair = (s32 *)MEMBER_VPTR(pair_cpu, [cpu]); + s32 *pair = (s32 *)ARRAY_ELEM_PTR(pair_cpu, cpu, nr_cpu_ids); if (pair) { __sync_fetch_and_add(&nr_kicks, 1); @@ -554,7 +553,7 @@ void BPF_STRUCT_OPS(pair_cpu_release, s32 cpu, struct scx_cpu_release_args *args bpf_spin_unlock(&pairc->lock); if (kick_pair) { - s32 *pair = (s32 *)MEMBER_VPTR(pair_cpu, [cpu]); + s32 *pair = (s32 *)ARRAY_ELEM_PTR(pair_cpu, cpu, nr_cpu_ids); if (pair) { __sync_fetch_and_add(&nr_kicks, 1); diff --git a/tools/sched_ext/scx_pair.c b/tools/sched_ext/scx_pair.c index c2de48430c5b3..9e6f3109653c2 100644 --- a/tools/sched_ext/scx_pair.c +++ b/tools/sched_ext/scx_pair.c @@ -67,27 +67,37 @@ int main(int argc, char **argv) } } + bpf_map__set_max_entries(skel->maps.pair_ctx, skel->rodata->nr_cpu_ids / 2); + + /* Resize arrays so their element count is equal to cpu count. */ + RESIZE_ARRAY(rodata, pair_cpu, skel->rodata->nr_cpu_ids); + RESIZE_ARRAY(rodata, pair_id, skel->rodata->nr_cpu_ids); + RESIZE_ARRAY(rodata, in_pair_idx, skel->rodata->nr_cpu_ids); + + for (i = 0; i < skel->rodata->nr_cpu_ids; i++) + skel->rodata_pair_cpu->pair_cpu[i] = -1; + printf("Pairs: "); for (i = 0; i < skel->rodata->nr_cpu_ids; i++) { int j = (i + stride) % skel->rodata->nr_cpu_ids; - if (skel->rodata->pair_cpu[i] >= 0) + if (skel->rodata_pair_cpu->pair_cpu[i] >= 0) continue; SCX_BUG_ON(i == j, "Invalid stride %d - CPU%d wants to be its own pair", stride, i); - SCX_BUG_ON(skel->rodata->pair_cpu[j] >= 0, + SCX_BUG_ON(skel->rodata_pair_cpu->pair_cpu[j] >= 0, "Invalid stride %d - three CPUs (%d, %d, %d) want to be a pair", - stride, i, j, skel->rodata->pair_cpu[j]); - - skel->rodata->pair_cpu[i] = j; - skel->rodata->pair_cpu[j] = i; - skel->rodata->pair_id[i] = i; - skel->rodata->pair_id[j] = i; - skel->rodata->in_pair_idx[i] = 0; - skel->rodata->in_pair_idx[j] = 1; + stride, i, j, skel->rodata_pair_cpu->pair_cpu[j]); + + skel->rodata_pair_cpu->pair_cpu[i] = j; + skel->rodata_pair_cpu->pair_cpu[j] = i; + skel->rodata_pair_id->pair_id[i] = i; + skel->rodata_pair_id->pair_id[j] = i; + skel->rodata_in_pair_idx->in_pair_idx[i] = 0; + skel->rodata_in_pair_idx->in_pair_idx[j] = 1; printf("[%d, %d] ", i, j); } diff --git a/tools/sched_ext/scx_pair.h b/tools/sched_ext/scx_pair.h index f60b824272f75..d9666a447d3fd 100644 --- a/tools/sched_ext/scx_pair.h +++ b/tools/sched_ext/scx_pair.h @@ -2,7 +2,6 @@ #define __SCX_EXAMPLE_PAIR_H enum { - MAX_CPUS = 4096, MAX_QUEUED = 4096, MAX_CGRPS = 4096, }; diff --git a/tools/sched_ext/scx_user_common.h b/tools/sched_ext/scx_user_common.h index 76a0d12eba28c..d5b7ce48cd6d7 100644 --- a/tools/sched_ext/scx_user_common.h +++ b/tools/sched_ext/scx_user_common.h @@ -31,4 +31,27 @@ SCX_BUG((__fmt) __VA_OPT__(,) __VA_ARGS__); \ } while (0) +/** + * RESIZE_ARRAY - Convenience macro for resizing a BPF array + * @elfsec: the data section of the BPF program in which to the array exists + * @arr: the name of the array + * @n: the desired array element count + * + * For BPF arrays declared with RESIZABLE_ARRAY(), this macro performs two + * operations. It resizes the map which corresponds to the custom data + * section that contains the target array. As a side effect, the BTF info for + * the array is adjusted so that the array length is sized to cover the new + * data section size. The second operation is reassigning the skeleton pointer + * for that custom data section so that it points to the newly memory mapped + * region. + */ +#define RESIZE_ARRAY(elfsec, arr, n) \ + do { \ + size_t __sz; \ + bpf_map__set_value_size(skel->maps.elfsec##_##arr, \ + sizeof(skel->elfsec##_##arr->arr[0]) * (n)); \ + skel->elfsec##_##arr = \ + bpf_map__initial_value(skel->maps.elfsec##_##arr, &__sz); \ + } while (0) + #endif /* __SCHED_EXT_USER_COMMON_H */