forked from iovisor/bcc
-
Notifications
You must be signed in to change notification settings - Fork 0
/
runqlen.py
executable file
·185 lines (164 loc) · 5.54 KB
/
runqlen.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
#!/usr/bin/python
# @lint-avoid-python-3-compatibility-imports
#
# runqlen Summarize scheduler run queue length as a histogram.
# For Linux, uses BCC, eBPF.
#
# This counts the length of the run queue, excluding the currently running
# thread, and shows it as a histogram.
#
# Also answers run queue occupancy.
#
# USAGE: runqlen [-h] [-T] [-Q] [-m] [-D] [interval] [count]
#
# REQUIRES: Linux 4.9+ (BPF_PROG_TYPE_PERF_EVENT support). Under tools/old is
# a version of this tool that may work on Linux 4.6 - 4.8.
#
# Copyright 2016 Netflix, Inc.
# Licensed under the Apache License, Version 2.0 (the "License")
#
# 12-Dec-2016 Brendan Gregg Created this.
from __future__ import print_function
from bcc import BPF, PerfType, PerfSWConfig
from time import sleep, strftime
import argparse
# arguments
examples = """examples:
./runqlen # summarize run queue length as a histogram
./runqlen 1 10 # print 1 second summaries, 10 times
./runqlen -T 1 # 1s summaries and timestamps
./runqlen -O # report run queue occupancy
./runqlen -C # show each CPU separately
"""
parser = argparse.ArgumentParser(
description="Summarize scheduler run queue length as a histogram",
formatter_class=argparse.RawDescriptionHelpFormatter,
epilog=examples)
parser.add_argument("-T", "--timestamp", action="store_true",
help="include timestamp on output")
parser.add_argument("-O", "--runqocc", action="store_true",
help="report run queue occupancy")
parser.add_argument("-C", "--cpus", action="store_true",
help="print output for each CPU separately")
parser.add_argument("interval", nargs="?", default=99999999,
help="output interval, in seconds")
parser.add_argument("count", nargs="?", default=99999999,
help="number of outputs")
args = parser.parse_args()
countdown = int(args.count)
debug = 0
frequency = 99
# define BPF program
bpf_text = """
#include <uapi/linux/ptrace.h>
#include <linux/sched.h>
// Declare enough of cfs_rq to find nr_running, since we can't #import the
// header. This will need maintenance. It is from kernel/sched/sched.h:
struct cfs_rq_partial {
struct load_weight load;
unsigned int nr_running, h_nr_running;
};
typedef struct cpu_key {
int cpu;
unsigned int slot;
} cpu_key_t;
STORAGE
int do_perf_event()
{
unsigned int len = 0;
pid_t pid = 0;
struct task_struct *task = NULL;
struct cfs_rq_partial *my_q = NULL;
// Fetch the run queue length from task->se.cfs_rq->nr_running. This is an
// unstable interface and may need maintenance. Perhaps a future version
// of BPF will support task_rq(p) or something similar as a more reliable
// interface.
task = (struct task_struct *)bpf_get_current_task();
bpf_probe_read(&my_q, sizeof(my_q), &task->se.cfs_rq);
bpf_probe_read(&len, sizeof(len), &my_q->nr_running);
// Calculate run queue length by subtracting the currently running task,
// if present. len 0 == idle, len 1 == one running task.
if (len > 0)
len--;
STORE
return 0;
}
"""
if args.cpus:
bpf_text = bpf_text.replace('STORAGE',
'BPF_HISTOGRAM(dist, cpu_key_t);')
bpf_text = bpf_text.replace('STORE', 'cpu_key_t key = {.slot = len}; ' +
'key.cpu = bpf_get_smp_processor_id(); ' +
'dist.increment(key);')
else:
bpf_text = bpf_text.replace('STORAGE',
'BPF_HISTOGRAM(dist, unsigned int);')
bpf_text = bpf_text.replace('STORE', 'dist.increment(len);')
# code substitutions
if debug:
print(bpf_text)
# load BPF program
b = BPF(text=bpf_text)
# initialize BPF & perf_events
b = BPF(text=bpf_text)
b.attach_perf_event(ev_type=PerfType.SOFTWARE,
ev_config=PerfSWConfig.CPU_CLOCK, fn_name="do_perf_event",
sample_period=0, sample_freq=frequency)
print("Sampling run queue length... Hit Ctrl-C to end.")
# output
exiting = 0 if args.interval else 1
dist = b.get_table("dist")
while (1):
try:
sleep(int(args.interval))
except KeyboardInterrupt:
exiting = 1
print()
if args.timestamp:
print("%-8s\n" % strftime("%H:%M:%S"), end="")
if args.runqocc:
if args.cpus:
# run queue occupancy, per-CPU summary
idle = {}
queued = {}
cpumax = 0
for k, v in dist.items():
if k.cpu > cpumax:
cpumax = k.cpu
for c in range(0, cpumax + 1):
idle[c] = 0
queued[c] = 0
for k, v in dist.items():
if k.slot == 0:
idle[k.cpu] += v.value
else:
queued[k.cpu] += v.value
for c in range(0, cpumax + 1):
samples = idle[c] + queued[c]
if samples:
runqocc = float(queued[c]) / samples
else:
runqocc = 0
print("runqocc, CPU %-3d %6.2f%%" % (c, 100 * runqocc))
else:
# run queue occupancy, system-wide summary
idle = 0
queued = 0
for k, v in dist.items():
if k.value == 0:
idle += v.value
else:
queued += v.value
samples = idle + queued
if samples:
runqocc = float(queued) / samples
else:
runqocc = 0
print("runqocc: %0.2f%%" % (100 * runqocc))
else:
# run queue length histograms
dist.print_linear_hist("runqlen", "cpu")
dist.clear()
countdown -= 1
if exiting or countdown == 0:
exit()