Skip to content

Commit

Permalink
Merge pull request #20 from inwardvessel/resize_percpu_arrays_in_exam…
Browse files Browse the repository at this point in the history
…ples

use resizing of datasec maps in examples
  • Loading branch information
Byte-Lab authored Aug 14, 2023
2 parents 35aef07 + 845aec9 commit 8ade500
Show file tree
Hide file tree
Showing 7 changed files with 112 additions and 30 deletions.
17 changes: 8 additions & 9 deletions tools/sched_ext/scx_central.bpf.c
Original file line number Diff line number Diff line change
Expand Up @@ -51,14 +51,13 @@ char _license[] SEC("license") = "GPL";

enum {
FALLBACK_DSQ_ID = 0,
MAX_CPUS = 4096,
MS_TO_NS = 1000LLU * 1000,
TIMER_INTERVAL_NS = 1 * MS_TO_NS,
};

const volatile bool switch_partial;
const volatile s32 central_cpu;
const volatile u32 nr_cpu_ids = 64; /* !0 for veristat, set during init */
const volatile u32 nr_cpu_ids = 1; /* !0 for veristat, set during init */

u64 nr_total, nr_locals, nr_queued, nr_lost_pids;
u64 nr_timers, nr_dispatches, nr_mismatches, nr_retries;
Expand All @@ -73,8 +72,8 @@ struct {
} central_q SEC(".maps");

/* can't use percpu map due to bad lookups */
static bool cpu_gimme_task[MAX_CPUS];
static u64 cpu_started_at[MAX_CPUS];
bool RESIZABLE_ARRAY(data, cpu_gimme_task);
u64 RESIZABLE_ARRAY(data, cpu_started_at);

struct central_timer {
struct bpf_timer timer;
Expand Down Expand Up @@ -189,7 +188,7 @@ void BPF_STRUCT_OPS(central_dispatch, s32 cpu, struct task_struct *prev)
break;

/* central's gimme is never set */
gimme = MEMBER_VPTR(cpu_gimme_task, [cpu]);
gimme = ARRAY_ELEM_PTR(cpu_gimme_task, cpu, nr_cpu_ids);
if (gimme && !*gimme)
continue;

Expand Down Expand Up @@ -220,7 +219,7 @@ void BPF_STRUCT_OPS(central_dispatch, s32 cpu, struct task_struct *prev)
if (scx_bpf_consume(FALLBACK_DSQ_ID))
return;

gimme = MEMBER_VPTR(cpu_gimme_task, [cpu]);
gimme = ARRAY_ELEM_PTR(cpu_gimme_task, cpu, nr_cpu_ids);
if (gimme)
*gimme = true;

Expand All @@ -235,15 +234,15 @@ void BPF_STRUCT_OPS(central_dispatch, s32 cpu, struct task_struct *prev)
void BPF_STRUCT_OPS(central_running, struct task_struct *p)
{
s32 cpu = scx_bpf_task_cpu(p);
u64 *started_at = MEMBER_VPTR(cpu_started_at, [cpu]);
u64 *started_at = ARRAY_ELEM_PTR(cpu_started_at, cpu, nr_cpu_ids);
if (started_at)
*started_at = bpf_ktime_get_ns() ?: 1; /* 0 indicates idle */
}

void BPF_STRUCT_OPS(central_stopping, struct task_struct *p, bool runnable)
{
s32 cpu = scx_bpf_task_cpu(p);
u64 *started_at = MEMBER_VPTR(cpu_started_at, [cpu]);
u64 *started_at = ARRAY_ELEM_PTR(cpu_started_at, cpu, nr_cpu_ids);
if (started_at)
*started_at = 0;
}
Expand All @@ -262,7 +261,7 @@ static int central_timerfn(void *map, int *key, struct bpf_timer *timer)
continue;

/* kick iff the current one exhausted its slice */
started_at = MEMBER_VPTR(cpu_started_at, [cpu]);
started_at = ARRAY_ELEM_PTR(cpu_started_at, cpu, nr_cpu_ids);
if (started_at && *started_at &&
vtime_before(now, *started_at + SCX_SLICE_DFL))
continue;
Expand Down
4 changes: 4 additions & 0 deletions tools/sched_ext/scx_central.c
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,10 @@ int main(int argc, char **argv)
}
}

/* Resize arrays so their element count is equal to cpu count. */
RESIZE_ARRAY(data, cpu_gimme_task, skel->rodata->nr_cpu_ids);
RESIZE_ARRAY(data, cpu_started_at, skel->rodata->nr_cpu_ids);

SCX_BUG_ON(scx_central__load(skel), "Failed to load skel");

link = bpf_map__attach_struct_ops(skel->maps.central_ops);
Expand Down
48 changes: 48 additions & 0 deletions tools/sched_ext/scx_common.bpf.h
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,26 @@ BPF_PROG(name, ##args)
SEC("struct_ops.s/"#name) \
BPF_PROG(name, ##args)

/**
* RESIZABLE_ARRAY - Generates annotations for an array that may be resized
* @elfsec: the data section of the BPF program in which to place the array
* @arr: the name of the array
*
* libbpf has an API for setting map value sizes. Since data sections (i.e.
* bss, data, rodata) themselves are maps, a data section can be resized. If
* a data section has an array as its last element, the BTF info for that
* array will be adjusted so that length of the array is extended to meet the
* new length of the data section. This macro annotates an array to have an
* element count of one with the assumption that this array can be resized
* within the userspace program. It also annotates the section specifier so
* this array exists in a custom sub data section which can be resized
* independently.
*
* See RESIZE_ARRAY() for the userspace convenience macro for resizing an
* array declared with RESIZABLE_ARRAY().
*/
#define RESIZABLE_ARRAY(elfsec, arr) arr[1] SEC("."#elfsec"."#arr)

/**
* MEMBER_VPTR - Obtain the verified pointer to a struct or array member
* @base: struct or array to index
Expand Down Expand Up @@ -117,6 +137,34 @@ BPF_PROG(name, ##args)
__addr; \
})

/**
* ARRAY_ELEM_PTR - Obtain the verified pointer to an array element
* @arr: array to index into
* @i: array index
* @n: number of elements in array
*
* Similar to MEMBER_VPTR() but is intended for use with arrays where the
* element count needs to be explicit.
* It can be used in cases where a global array is defined with an initial
* size but is intended to be be resized before loading the BPF program.
* Without this version of the macro, MEMBER_VPTR() will use the compile time
* size of the array to compute the max, which will result in rejection by
* the verifier.
*/
#define ARRAY_ELEM_PTR(arr, i, n) (typeof(arr[i]) *)({ \
u64 __base = (u64)arr; \
u64 __addr = (u64)&(arr[i]) - __base; \
asm volatile ( \
"if %0 <= %[max] goto +2\n" \
"%0 = 0\n" \
"goto +1\n" \
"%0 += %1\n" \
: "+r"(__addr) \
: "r"(__base), \
[max]"r"(sizeof(arr[0]) * ((n) - 1))); \
__addr; \
})

/*
* BPF core and other generic helpers
*/
Expand Down
19 changes: 9 additions & 10 deletions tools/sched_ext/scx_pair.bpf.c
Original file line number Diff line number Diff line change
Expand Up @@ -123,19 +123,19 @@ char _license[] SEC("license") = "GPL";
const volatile bool switch_partial;

/* !0 for veristat, set during init */
const volatile u32 nr_cpu_ids = 64;
const volatile u32 nr_cpu_ids = 1;

/* a pair of CPUs stay on a cgroup for this duration */
const volatile u32 pair_batch_dur_ns = SCX_SLICE_DFL;

/* cpu ID -> pair cpu ID */
const volatile s32 pair_cpu[MAX_CPUS] = { [0 ... MAX_CPUS - 1] = -1 };
const volatile s32 RESIZABLE_ARRAY(rodata, pair_cpu);

/* cpu ID -> pair_id */
const volatile u32 pair_id[MAX_CPUS];
const volatile u32 RESIZABLE_ARRAY(rodata, pair_id);

/* CPU ID -> CPU # in the pair (0 or 1) */
const volatile u32 in_pair_idx[MAX_CPUS];
const volatile u32 RESIZABLE_ARRAY(rodata, in_pair_idx);

struct pair_ctx {
struct bpf_spin_lock lock;
Expand All @@ -161,7 +161,6 @@ struct pair_ctx {

struct {
__uint(type, BPF_MAP_TYPE_ARRAY);
__uint(max_entries, MAX_CPUS / 2);
__type(key, u32);
__type(value, struct pair_ctx);
} pair_ctx SEC(".maps");
Expand Down Expand Up @@ -299,15 +298,15 @@ static int lookup_pairc_and_mask(s32 cpu, struct pair_ctx **pairc, u32 *mask)
{
u32 *vptr;

vptr = (u32 *)MEMBER_VPTR(pair_id, [cpu]);
vptr = (u32 *)ARRAY_ELEM_PTR(pair_id, cpu, nr_cpu_ids);
if (!vptr)
return -EINVAL;

*pairc = bpf_map_lookup_elem(&pair_ctx, vptr);
if (!(*pairc))
return -EINVAL;

vptr = (u32 *)MEMBER_VPTR(in_pair_idx, [cpu]);
vptr = (u32 *)ARRAY_ELEM_PTR(in_pair_idx, cpu, nr_cpu_ids);
if (!vptr)
return -EINVAL;

Expand Down Expand Up @@ -490,7 +489,7 @@ static int try_dispatch(s32 cpu)

out_maybe_kick:
if (kick_pair) {
s32 *pair = (s32 *)MEMBER_VPTR(pair_cpu, [cpu]);
s32 *pair = (s32 *)ARRAY_ELEM_PTR(pair_cpu, cpu, nr_cpu_ids);
if (pair) {
__sync_fetch_and_add(&nr_kicks, 1);
scx_bpf_kick_cpu(*pair, SCX_KICK_PREEMPT);
Expand Down Expand Up @@ -525,7 +524,7 @@ void BPF_STRUCT_OPS(pair_cpu_acquire, s32 cpu, struct scx_cpu_acquire_args *args
bpf_spin_unlock(&pairc->lock);

if (kick_pair) {
s32 *pair = (s32 *)MEMBER_VPTR(pair_cpu, [cpu]);
s32 *pair = (s32 *)ARRAY_ELEM_PTR(pair_cpu, cpu, nr_cpu_ids);

if (pair) {
__sync_fetch_and_add(&nr_kicks, 1);
Expand Down Expand Up @@ -554,7 +553,7 @@ void BPF_STRUCT_OPS(pair_cpu_release, s32 cpu, struct scx_cpu_release_args *args
bpf_spin_unlock(&pairc->lock);

if (kick_pair) {
s32 *pair = (s32 *)MEMBER_VPTR(pair_cpu, [cpu]);
s32 *pair = (s32 *)ARRAY_ELEM_PTR(pair_cpu, cpu, nr_cpu_ids);

if (pair) {
__sync_fetch_and_add(&nr_kicks, 1);
Expand Down
30 changes: 20 additions & 10 deletions tools/sched_ext/scx_pair.c
Original file line number Diff line number Diff line change
Expand Up @@ -67,27 +67,37 @@ int main(int argc, char **argv)
}
}

bpf_map__set_max_entries(skel->maps.pair_ctx, skel->rodata->nr_cpu_ids / 2);

/* Resize arrays so their element count is equal to cpu count. */
RESIZE_ARRAY(rodata, pair_cpu, skel->rodata->nr_cpu_ids);
RESIZE_ARRAY(rodata, pair_id, skel->rodata->nr_cpu_ids);
RESIZE_ARRAY(rodata, in_pair_idx, skel->rodata->nr_cpu_ids);

for (i = 0; i < skel->rodata->nr_cpu_ids; i++)
skel->rodata_pair_cpu->pair_cpu[i] = -1;

printf("Pairs: ");
for (i = 0; i < skel->rodata->nr_cpu_ids; i++) {
int j = (i + stride) % skel->rodata->nr_cpu_ids;

if (skel->rodata->pair_cpu[i] >= 0)
if (skel->rodata_pair_cpu->pair_cpu[i] >= 0)
continue;

SCX_BUG_ON(i == j,
"Invalid stride %d - CPU%d wants to be its own pair",
stride, i);

SCX_BUG_ON(skel->rodata->pair_cpu[j] >= 0,
SCX_BUG_ON(skel->rodata_pair_cpu->pair_cpu[j] >= 0,
"Invalid stride %d - three CPUs (%d, %d, %d) want to be a pair",
stride, i, j, skel->rodata->pair_cpu[j]);

skel->rodata->pair_cpu[i] = j;
skel->rodata->pair_cpu[j] = i;
skel->rodata->pair_id[i] = i;
skel->rodata->pair_id[j] = i;
skel->rodata->in_pair_idx[i] = 0;
skel->rodata->in_pair_idx[j] = 1;
stride, i, j, skel->rodata_pair_cpu->pair_cpu[j]);

skel->rodata_pair_cpu->pair_cpu[i] = j;
skel->rodata_pair_cpu->pair_cpu[j] = i;
skel->rodata_pair_id->pair_id[i] = i;
skel->rodata_pair_id->pair_id[j] = i;
skel->rodata_in_pair_idx->in_pair_idx[i] = 0;
skel->rodata_in_pair_idx->in_pair_idx[j] = 1;

printf("[%d, %d] ", i, j);
}
Expand Down
1 change: 0 additions & 1 deletion tools/sched_ext/scx_pair.h
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@
#define __SCX_EXAMPLE_PAIR_H

enum {
MAX_CPUS = 4096,
MAX_QUEUED = 4096,
MAX_CGRPS = 4096,
};
Expand Down
23 changes: 23 additions & 0 deletions tools/sched_ext/scx_user_common.h
Original file line number Diff line number Diff line change
Expand Up @@ -31,4 +31,27 @@
SCX_BUG((__fmt) __VA_OPT__(,) __VA_ARGS__); \
} while (0)

/**
* RESIZE_ARRAY - Convenience macro for resizing a BPF array
* @elfsec: the data section of the BPF program in which to the array exists
* @arr: the name of the array
* @n: the desired array element count
*
* For BPF arrays declared with RESIZABLE_ARRAY(), this macro performs two
* operations. It resizes the map which corresponds to the custom data
* section that contains the target array. As a side effect, the BTF info for
* the array is adjusted so that the array length is sized to cover the new
* data section size. The second operation is reassigning the skeleton pointer
* for that custom data section so that it points to the newly memory mapped
* region.
*/
#define RESIZE_ARRAY(elfsec, arr, n) \
do { \
size_t __sz; \
bpf_map__set_value_size(skel->maps.elfsec##_##arr, \
sizeof(skel->elfsec##_##arr->arr[0]) * (n)); \
skel->elfsec##_##arr = \
bpf_map__initial_value(skel->maps.elfsec##_##arr, &__sz); \
} while (0)

#endif /* __SCHED_EXT_USER_COMMON_H */

0 comments on commit 8ade500

Please sign in to comment.