diff --git a/README.md b/README.md index 94a55a1d..b801d289 100644 --- a/README.md +++ b/README.md @@ -7,7 +7,7 @@ [![GitHub](https://img.shields.io/github/license/thediveo/lxkns)](https://img.shields.io/github/license/thediveo/lxkns) ![build and test](https://github.com/thediveo/lxkns/workflows/build%20and%20test/badge.svg?branch=master) -![Coverage](https://img.shields.io/badge/Coverage-82.0%25-brightgreen) +![Coverage](https://img.shields.io/badge/Coverage-81.1%25-brightgreen) ![goroutines](https://img.shields.io/badge/go%20routines-not%20leaking-success) ![file descriptors](https://img.shields.io/badge/file%20descriptors-not%20leaking-success) [![Go Report Card](https://goreportcard.com/badge/github.com/thediveo/lxkns)](https://goreportcard.com/report/github.com/thediveo/lxkns) diff --git a/api/types/discovery_test.go b/api/types/discovery_test.go index 8272a50f..5ec4e6f7 100644 --- a/api/types/discovery_test.go +++ b/api/types/discovery_test.go @@ -40,6 +40,7 @@ var _ = Describe("discovery result JSON", func() { "with-freezer": true, "with-mounts": true, "with-socket-processes": false, + "with-affinity-scheduling": false, "labels": {}, "scanned-namespace-types": [ "time", diff --git a/cmd/lxkns/endpoints.go b/cmd/lxkns/endpoints.go index 114b685e..0a0c3ad3 100644 --- a/cmd/lxkns/endpoints.go +++ b/cmd/lxkns/endpoints.go @@ -34,6 +34,7 @@ func GetNamespacesHandler(cizer containerizer.Containerizer) http.HandlerFunc { discover.WithFullDiscovery(), discover.WithContainerizer(cizer), discover.WithPIDMapper(), // recommended when using WithContainerizer. + discover.WithAffinityAndScheduling(), ) // Note bene: set header before writing the header with the status code; // actually makes sense, innit? @@ -50,7 +51,11 @@ func GetNamespacesHandler(cizer containerizer.Containerizer) http.HandlerFunc { // GetProcessesHandler returns the process table (including tasks) with // namespace references, as JSON. func GetProcessesHandler(w http.ResponseWriter, req *http.Request) { - disco := discover.Namespaces(discover.FromProcs(), discover.FromTasks()) + disco := discover.Namespaces( + discover.FromProcs(), + discover.FromTasks(), + discover.WithAffinityAndScheduling(), + ) w.Header().Set("Content-Type", "application/json") diff --git a/cmd/pidtree/treevisitor.go b/cmd/pidtree/treevisitor.go index 6ec3eecb..1306bf74 100644 --- a/cmd/pidtree/treevisitor.go +++ b/cmd/pidtree/treevisitor.go @@ -18,7 +18,7 @@ package main import ( "reflect" - "sort" + "slices" "github.com/thediveo/lxkns/cmd/internal/tool" "github.com/thediveo/lxkns/model" @@ -74,8 +74,8 @@ func (v *TreeVisitor) Get(node reflect.Value) ( clist := []interface{}{} if proc, ok := node.Interface().(*model.Process); ok { pidns := proc.Namespaces[model.PIDNS] - childprocesses := model.ProcessListByPID(proc.Children) - sort.Sort(childprocesses) + childprocesses := slices.Clone(proc.Children) + slices.SortFunc(childprocesses, model.SortProcessByPID) childpidns := map[species.NamespaceID]bool{} for _, childproc := range childprocesses { if childproc.Namespaces[model.PIDNS] == pidns { @@ -104,8 +104,8 @@ func (v *TreeVisitor) Get(node reflect.Value) ( } else { // The child nodes of a PID namespace tree node will be the "leader" // (or "topmost") processes inside the PID namespace. - leaders := model.ProcessListByPID(node.Interface().(model.Namespace).Leaders()) - sort.Sort(leaders) + leaders := slices.Clone(node.Interface().(model.Namespace).Leaders()) + slices.SortFunc(leaders, model.SortProcessByPID) for _, proc := range leaders { clist = append(clist, proc) } diff --git a/discover/discover.go b/discover/discover.go index 7ad9bce4..80dbbb38 100644 --- a/discover/discover.go +++ b/discover/discover.go @@ -226,9 +226,13 @@ func Namespaces(options ...DiscoveryOption) *Result { result.PIDMap = NewPIDMap(result) } - // Optionally discover alive containers and relate the. + // Optionally discover alive containers and relate the containers to + // processes and vice versa. discoverContainers(result) + // Pick up leader process CPU affinity and scheduling setup. + discoverAffinityScheduling(result) + // As a C oldie it gives me the shivers to return a pointer to what might // look like an "auto" local struct ;) return result diff --git a/discover/discovery_affinity_sched.go b/discover/discovery_affinity_sched.go new file mode 100644 index 00000000..286c9499 --- /dev/null +++ b/discover/discovery_affinity_sched.go @@ -0,0 +1,37 @@ +// Copyright 2024 Harald Albrecht. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +//go:build linux + +package discover + +import "github.com/thediveo/lxkns/model" + +// discoverAffinityScheduling discovers the CPU affinity lists and scheduler +// settings for the leader processes of all discovered namespaces. +func discoverAffinityScheduling(result *Result) { + if !result.Options.DiscoverAffinityScheduling { + return + } + for nstype := model.MountNS; nstype < model.NamespaceTypesCount; nstype++ { + for _, ns := range result.Namespaces[nstype] { + for _, leader := range ns.Leaders() { + if leader.Affinity != nil { + continue + } + _ = leader.RetrieveAffinityScheduling() + } + } + } +} diff --git a/discover/discovery_opt.go b/discover/discovery_opt.go index 2a8a7824..68bc1659 100644 --- a/discover/discovery_opt.go +++ b/discover/discovery_opt.go @@ -32,16 +32,17 @@ type DiscoverOpts struct { // If zero, defaults to discovering all namespaces. NamespaceTypes species.NamespaceType `json:"-"` - ScanProcs bool `json:"from-procs"` // Scan processes for attached namespaces. - ScanTasks bool `json:"from-tasks"` // Scan all tasks for attached namespaces. - ScanFds bool `json:"from-fds"` // Scan open file descriptors for namespaces. - ScanBindmounts bool `json:"from-bindmounts"` // Scan bind-mounts for namespaces. - DiscoverHierarchy bool `json:"with-hierarchy"` // Discover the hierarchy of PID and user namespaces. - DiscoverOwnership bool `json:"with-ownership"` // Discover the ownership of non-user namespaces. - DiscoverFreezerState bool `json:"with-freezer"` // Discover the cgroup freezer state of processes. - DiscoverMounts bool `json:"with-mounts"` // Discover mount point hierarchy with mount paths and visibility. - DiscoverSocketProcesses bool `json:"with-socket-processes"` // Discover the processes related to specific socket inode numbers. - Labels map[string]string `json:"labels"` // Pass options (in form of labels) to decorators + ScanProcs bool `json:"from-procs"` // Scan processes for attached namespaces. + ScanTasks bool `json:"from-tasks"` // Scan all tasks for attached namespaces. + ScanFds bool `json:"from-fds"` // Scan open file descriptors for namespaces. + ScanBindmounts bool `json:"from-bindmounts"` // Scan bind-mounts for namespaces. + DiscoverHierarchy bool `json:"with-hierarchy"` // Discover the hierarchy of PID and user namespaces. + DiscoverOwnership bool `json:"with-ownership"` // Discover the ownership of non-user namespaces. + DiscoverFreezerState bool `json:"with-freezer"` // Discover the cgroup freezer state of processes. + DiscoverMounts bool `json:"with-mounts"` // Discover mount point hierarchy with mount paths and visibility. + DiscoverSocketProcesses bool `json:"with-socket-processes"` // Discover the processes related to specific socket inode numbers. + DiscoverAffinityScheduling bool `json:"with-affinity-scheduling"` // Disover CPU affinity and scheduling of leader processes. + Labels map[string]string `json:"labels"` // Pass options (in form of labels) to decorators Containerizer containerizer.Containerizer `json:"-"` // Discover containers using containerizer. @@ -88,6 +89,7 @@ func WithFullDiscovery() DiscoveryOption { o.ScanTasks = true o.DiscoverMounts = true o.withPIDmap = true + o.DiscoverAffinityScheduling = true } } @@ -131,6 +133,14 @@ func NotFromTasks() DiscoveryOption { return func(o *DiscoverOpts) { o.ScanTasks = false } } +func WithAffinityAndScheduling() DiscoveryOption { + return func(o *DiscoverOpts) { o.DiscoverAffinityScheduling = true } +} + +func WithoutAffinityAndScheduling() DiscoveryOption { + return func(o *DiscoverOpts) { o.DiscoverAffinityScheduling = false } +} + // FromFds opts to find namespaces from the open file descriptors of processes. func FromFds() DiscoveryOption { return func(o *DiscoverOpts) { o.ScanFds = true } diff --git a/model/cpulist.go b/model/cpulist.go new file mode 100644 index 00000000..c4996c9c --- /dev/null +++ b/model/cpulist.go @@ -0,0 +1,199 @@ +// Copyright 2024 Harald Albrecht. +// +// Licensed under the Apache License, Version 2.0 (the "License"); you may not +// use this file except in compliance with the License. You may obtain a copy +// of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +// License for the specific language governing permissions and limitations +// under the License. + +package model + +import ( + "sync/atomic" + "unsafe" + + "golang.org/x/sys/unix" +) + +// CPUList is a list of CPU [from...to] ranges. CPU numbers are starting from +// zero. +type CPUList [][2]uint + +// CPUSet is a CPU bit string, such as used for CPU affinity masks. See also +// [sched_getaffinity(2)]. +// +// [sched_getaffinity(2)]: https://man7.org/linux/man-pages/man2/sched_getaffinity.2.html +type CPUSet []uint64 + +// The dynamically determined size of CPUSets on this system (size in uint64 +// words). This is usually smaller than the fixed-sized [unix.CPUSet] that Go's +// [unix.SchedGetaffinity] uses. +var setsize atomic.Uint64 +var wordbytesize = uint64(unsafe.Sizeof(CPUSet{0}[0])) + +func init() { + setsize.Store(1) +} + +// NewAffinityCPUList returns the affinity CPUList (list of CPU ranges) of the +// process with the passed PID. Otherwise, it returns an error. If pid is zero, +// then the affinity CPU list of the calling thread is returned (make sure to +// have the OS-level thread locked to the calling go routine in this case). +// +// Notes: +// - we don't use [unix.SchedGetaffinity] as this is tied to the fixed size +// [unix.CPUSet] type; instead, we dynamically figure out the size needed +// and cache the size internally. +// - retrieving the affinity CPU mask and then speed-running it to +// generate the range list is roughly two orders of magnitude faster than +// fetching “/proc/$PID/status” and looking for the “Cpus_allowed_list”, +// because generating the broad status procfs file is expensive. +func NewAffinityCPUList(pid PIDType) (CPUList, error) { + var set CPUSet + + setlenStart := setsize.Load() + setlen := setlenStart + for { + set = make([]uint64, setlen) + // see also: + // https://man7.org/linux/man-pages/man2/sched_setaffinity.2.html; we + // use RawSyscall here instead of Syscall as we know that + // SYS_SCHED_GETAFFINITY does not block, following Go's stdlib + // implementation. + _, _, e := unix.RawSyscall(unix.SYS_SCHED_GETAFFINITY, + uintptr(pid), uintptr(setlen*wordbytesize), uintptr(unsafe.Pointer(&set[0]))) + if e != 0 { + if e == unix.EINVAL { + setlen *= 2 + continue + } + return nil, e + } + // Set the new size; if this fails because another go routine already + // upped the set size, retry until we either notice that we're smaller + // than what was set as the new set size, or we succeed in setting the + // size. + for { + if setsize.CompareAndSwap(setlenStart, setlen) { + break + } + setlenStart = setsize.Load() + if setlenStart > setlen { + break + } + } + break + } + return set.NewCPUList(), nil +} + +// NewCPUList returns a list of CPU ranges for the given bitmap CPUSet. +// +// This is an optimized implementation that does not use any division and modulo +// operations; instead, it only uses increment and (single bit position) shift +// operations. Additionally, this implementation fast-forwards through all-0s +// and all-1s CPUSet words (uint64's). +func (s CPUSet) NewCPUList() CPUList { + setlen := uint64(len(s)) + cpulist := CPUList{} + cpuno := uint(0) + cpuwordidx := uint64(0) + cpuwordmask := uint64(1) + +findNextCPUInWord: + for { + // If we're inside a cpu mask word, try to find the next set cpu bit, if + // any, otherwise stop after we've fallen off the MSB end of the cpu + // mask word. + if cpuwordmask != 1 { + for { + if s[cpuwordidx]&cpuwordmask != 0 { + break + } + cpuno++ + cpuwordmask <<= 1 + if cpuwordmask == 0 { + // Oh no! We've fallen off the disc^Wcpu mask word. + cpuwordidx++ + cpuwordmask = 1 + break + } + } + } + // Try to fast-forward through completely unset cpu mask words, where + // possible. + for cpuwordidx < setlen && s[cpuwordidx] == 0 { + cpuno += 64 + cpuwordidx++ + } + if cpuwordidx >= setlen { + return cpulist + } + // We arrived at a non-zero cpu mask word, so let's now find the first + // cpu in it. + for { + if s[cpuwordidx]&cpuwordmask != 0 { + break + } + cpuno++ + cpuwordmask <<= 1 + } + // Hooray! We've finally located a CPU in use. Move on to the next CPU, + // handling a word boundary when necessary. + cpufrom := cpuno + cpuno++ + cpuwordmask <<= 1 + if cpuwordmask == 0 { + // Oh no! We've again fallen off the disc^Wcpu mask word. + cpuwordidx++ + cpuwordmask = 1 + } + // Now locate the next unset cpu within the currently inspected cpu mask + // word, until we find one or have exhausted our search within the + // current cpu mask word. + if cpuwordmask != 1 { + for { + if s[cpuwordidx]&cpuwordmask == 0 { + cpulist = append(cpulist, [2]uint{cpufrom, cpuno - 1}) + continue findNextCPUInWord + } + cpuno++ + cpuwordmask <<= 1 + if cpuwordmask == 0 { + cpuwordidx++ + cpuwordmask = 1 + break + } + } + } + // Try to fast-forward through completely set cpu mask words, where + // applicable. + for cpuwordidx < setlen && s[cpuwordidx] == ^uint64(0) { + cpuno += 64 + cpuwordidx++ + } + // Are we completely done? If so, add the final CPU span and then call + // it a day. + if cpuwordidx >= setlen { + cpulist = append(cpulist, [2]uint{cpufrom, cpuno - 1}) + return cpulist + } + // We arrived at a non-all-1s cpu mask word, so let's now find the first + // cpu in it that is unset. Add the CPU span, and then rinse and repeat + // from the beginning: find the next set CPU or fall off the disc. + for { + if s[cpuwordidx]&cpuwordmask == 0 { + cpulist = append(cpulist, [2]uint{cpufrom, cpuno - 1}) + break + } + cpuno++ + cpuwordmask <<= 1 + } + } +} diff --git a/model/cpulist_test.go b/model/cpulist_test.go new file mode 100644 index 00000000..b2a21f42 --- /dev/null +++ b/model/cpulist_test.go @@ -0,0 +1,72 @@ +// Copyright 2024 Harald Albrecht. +// +// Licensed under the Apache License, Version 2.0 (the "License"); you may not +// use this file except in compliance with the License. You may obtain a copy +// of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +// License for the specific language governing permissions and limitations +// under the License. + +package model + +import ( + "os" + + . "github.com/onsi/ginkgo/v2/dsl/core" + . "github.com/onsi/ginkgo/v2/dsl/table" + . "github.com/onsi/gomega" + . "github.com/thediveo/success" +) + +var _ = Describe("cpu affinity sets", func() { + + DescribeTable("parsing cpu sets", + func(set CPUSet, expected CPUList) { + Expect(set.NewCPUList()).To(Equal(expected)) + }, + Entry("nil set", nil, CPUList{}), + Entry("all-zeros set", CPUSet{0}, CPUList{}), + Entry("all-zeros set", CPUSet{0, 0}, CPUList{}), + + // all in first word + Entry("single cpu #0", CPUSet{1 << 0, 0}, CPUList{{0, 0}}), + Entry("single cpu #1", CPUSet{1 << 1}, CPUList{{1, 1}}), + Entry("single cpu #63", CPUSet{1 << 63}, CPUList{{63, 63}}), + Entry("single cpu #63, none else", CPUSet{1 << 63, 0, 0}, CPUList{{63, 63}}), + Entry("cpus #1-3", CPUSet{0xe, 0}, CPUList{{1, 3}}), + + // skip first zero words + Entry("single cpu #64", CPUSet{0, 1 << 0}, CPUList{{64, 64}}), + + // multiple cpu ranges in same word + Entry("cpu #1-2, #62", CPUSet{1<<62 | 1<<2 | 1<<1}, CPUList{{1, 2}, {62, 62}}), + + // range across boundaries + Entry("cpus #63-64", CPUSet{1 << 63, 1 << 0}, CPUList{{63, 64}}), + Entry("cpus #63-127", CPUSet{1 << 63, ^uint64(0)}, CPUList{{63, 127}}), + + // multiple all-1s words + Entry("cpu #0-127", CPUSet{^uint64(0), ^uint64(0)}, CPUList{{0, 127}}), + + // mixed + Entry("cpu #0-64", CPUSet{^uint64(0), 1 << 0}, CPUList{{0, 64}}), + Entry("cpu #0-64, 67", CPUSet{^uint64(0), 1<<3 | 1<<0}, CPUList{{0, 64}, {67, 67}}), + Entry("cpu #65-127, 129", CPUSet{0, ^uint64(0) - 1, 1 << 1}, CPUList{{65, 127}, {129, 129}}), + + Entry("b/w", CPUSet{0xaa0}, CPUList{{5, 5}, {7, 7}, {9, 9}, {11, 11}}), + Entry("art", CPUSet{0x5a0}, CPUList{{5, 5}, {7, 8}, {10, 10}}), + ) + + It("gets this process'es CPU affinity mask", func() { + Expect(wordbytesize).To(Equal(uint64(64 /* bits in uint64 */ / 8 /* bits/byte*/))) + cpulist := Successful(NewAffinityCPUList(PIDType(os.Getpid()))) + Expect(cpulist).NotTo(BeEmpty()) + Expect(setsize.Load()).NotTo(BeZero()) + }) + +}) diff --git a/model/process.go b/model/process.go index 0ebbb644..55019551 100644 --- a/model/process.go +++ b/model/process.go @@ -29,6 +29,7 @@ import ( "github.com/thediveo/lxkns/log" "github.com/thediveo/lxkns/plural" + "golang.org/x/sys/unix" ) // PIDType expresses things more clearly. @@ -51,6 +52,24 @@ type ProTaskCommon struct { // always be the same as for CpuCgroup. FridgeCgroup string `json:"fridgecgroup"` FridgeFrozen bool `json:"fridgefrozen"` // effective freezer state. + // CPU ranges affinity list, need explicit request via + // ProTaskCommon.GetAffinity. + Affinity CPUList `json:"affinity,omitempty"` + Policy int `json:"policy,omitempty"` + // priority value is considered by the following schedulers: + // - SCHED_FIFO: prio 1..99. + // - SCHED_RR: prio 1..99. + // - SCHED_NORMAL (=SCHED_OTHER): not used/prio is 0. + // - SCHED_IDLE: not used/prio is 0. + // - SCHED_BATCH: not used/prio is 0. + // - SCHED_DEADLINE: doesn't use prio. + Priority int `json:"priority,omitempty"` + // nice value in the range +19..-20 (very nice ... less nice) is considered + // by the following schedulers: + // - SCHED_NORMAL (=SCHED_OTHER): nice is taken into account. + // - SCHED_BATCH: nice is taken into account. + // - SCHED_IDLE: nice is ignored (basically below a nic of +19). + Nice int `json:"nice,omitempty"` } // Task represents our very, very limited view and interest in a particular @@ -381,16 +400,6 @@ func (t ProcessTable) ProcessesByPIDs(pid ...PIDType) []*Process { return procs } -// ProcessListByPID is a type alias for sorting slices of *[model.Process] by -// their PIDs in numerically ascending order. -type ProcessListByPID []*Process - -func (l ProcessListByPID) Len() int { return len(l) } -func (l ProcessListByPID) Swap(i, j int) { l[i], l[j] = l[j], l[i] } -func (l ProcessListByPID) Less(i, j int) bool { - return l[i].PID < l[j].PID -} - // newTaskFromStatline parses a task (process) status line (as read from // /proc/[PID]/task/[TID]/status) into a Task object. func newTaskFromStatline(procstat string, proc *Process) (task *Task) { @@ -413,3 +422,33 @@ func newTaskFromStatline(procstat string, proc *Process) (task *Task) { func (t *Task) MainTask() bool { return t.TID == t.Process.PID } + +func (c *ProTaskCommon) retrieveAffinityScheduling(pid PIDType) error { + var err error + c.Affinity, err = NewAffinityCPUList(pid) + if err != nil { + return err + } + schedattr, err := unix.SchedGetAttr(int(pid), 0) + if err != nil { + return err + } + c.Policy = int(schedattr.Policy) + c.Nice = int(schedattr.Nice) + c.Priority = int(schedattr.Priority) + return nil +} + +// RetrieveAffinity updates this Process object's Affinity CPU range list and +// scheduling information (policy, priority, ...), returning nil when +// successful. Otherweise, it returns an error. +func (p *Process) RetrieveAffinityScheduling() error { + return p.retrieveAffinityScheduling(p.PID) +} + +// RetrieveAffinity updates this Task object's Affinity CPU range list and +// scheduling information (policy, priority, ...), returning nil when +// successful. Otherweise, it returns an error. +func (t *Task) RetrieveAffinityScheduling() error { + return t.retrieveAffinityScheduling(t.TID) +} diff --git a/model/process_cgroups.go b/model/process_cgroups.go index 7605934d..cd5685c8 100644 --- a/model/process_cgroups.go +++ b/model/process_cgroups.go @@ -174,36 +174,34 @@ func processCgroup(controllertypes []string, pid PIDType) (paths []string) { scanner := bufio.NewScanner(cgroup) unifiedroot := "" // (if detected) the cgroups v2 unified hierarchy root for scanner.Scan() { - if err == nil { - // See https://man7.org/linux/man-pages/man7/cgroups.7.html, section - // "NOTES", subsection "/proc files". For cgroups v1 controllers, - // the second field specifies the comma-separated list of the - // controllers bound to the hierarchy: here, we look for, say, the - // "cpu" controller. The third field specifies the path in the - // cgroups hierarchy; it is relative to the mount point of the - // hierarchy -- which in turn depends on the mount namespace of this - // process :) - // - // For the unified cgroups v2 hierarchy the second field will be - // empty, which otherwise would specify the particular cgroup v1 - // hierarchy/-ies. - if fields := strings.Split(scanner.Text(), ":"); len(fields) == 3 { - if fields[1] != "" { - // cgroups v1 hierarchies - controllers := strings.Split(fields[1], ",") - for _, ctrl := range controllers { - for idx, controllertype := range controllertypes { - if ctrl == controllertype { - paths[idx] = fields[2] - } + // See https://man7.org/linux/man-pages/man7/cgroups.7.html, section + // "NOTES", subsection "/proc files". For cgroups v1 controllers, + // the second field specifies the comma-separated list of the + // controllers bound to the hierarchy: here, we look for, say, the + // "cpu" controller. The third field specifies the path in the + // cgroups hierarchy; it is relative to the mount point of the + // hierarchy -- which in turn depends on the mount namespace of this + // process :) + // + // For the unified cgroups v2 hierarchy the second field will be + // empty, which otherwise would specify the particular cgroup v1 + // hierarchy/-ies. + if fields := strings.Split(scanner.Text(), ":"); len(fields) == 3 { + if fields[1] != "" { + // cgroups v1 hierarchies + controllers := strings.Split(fields[1], ",") + for _, ctrl := range controllers { + for idx, controllertype := range controllertypes { + if ctrl == controllertype { + paths[idx] = fields[2] } } - } else { - // when we come across a single unified cgroups v2 hierarchy - // root, remember it so we can later fix any missing - // controller paths. - unifiedroot = fields[2] } + } else { + // when we come across a single unified cgroups v2 hierarchy + // root, remember it so we can later fix any missing + // controller paths. + unifiedroot = fields[2] } } } diff --git a/model/process_cgroups_test.go b/model/process_cgroups_test.go index 9c4752e9..d1827559 100644 --- a/model/process_cgroups_test.go +++ b/model/process_cgroups_test.go @@ -121,7 +121,7 @@ var _ = Describe("cgrouping", func() { sleepyproc := func() *Process { p := NewProcessTable(true) - proc, _ := p[sleepypid] + proc := p[sleepypid] return proc } sleepytask := func() *Task { diff --git a/model/process_sort.go b/model/process_sort.go new file mode 100644 index 00000000..9d40bbbe --- /dev/null +++ b/model/process_sort.go @@ -0,0 +1,83 @@ +// Copyright 2024 Harald Albrecht. +// +// Licensed under the Apache License, Version 2.0 (the "License"); you may not +// use this file except in compliance with the License. You may obtain a copy +// of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +// License for the specific language governing permissions and limitations +// under the License. + +package model + +import ( + "os" + "strconv" + "strings" +) + +// SortProcessByPID sorts processes by increasing PID numbers (no interval +// arithmetics though). +func SortProcessByPID(a, b *Process) int { + return int(a.PID) - int(b.PID) +} + +// SortProcessByAgeThenPIDDistance sorts processes first by their “age” +// (starttime) and then by their PIDs, taking PID number wrap-arounds into +// consideration. +// +// As PIDs are monotonously increasing, wrapping around at “N” (which defaults +// to 1<<22 on Linux 64 bit systems), we consider a PID “B” to be after PID “A” +// if the “positive” distance from “A” to “B” (in increasing PIDs, distance +// taken modulo N) is at most N/2. +// +// For a nice write-up see also [The ryg blog: Intervals in modular arithmetic]. +// +// [The ryg blog: Intervals in modular arithmetic]: https://fgiesen.wordpress.com/2015/09/24/intervals-in-modular-arithmetic/ +func SortProcessByAgeThenPIDDistance(a, b *Process) int { + switch { + case a.Starttime < b.Starttime: + return -1 + case a.Starttime > b.Starttime: + return 1 + } + pidA := uint64(a.PID) + pidB := uint64(b.PID) + switch dist := (pidB - pidA) & pidMaxMask; { + case dist == 0: + return 0 + case dist <= pidMaxDist: + return -1 + default: + return 1 + } +} + +var pidMaxMask uint64 // N-1 +var pidMaxDist uint64 // N/2 + +// pidWrapping reads the PID interval “N” set for this system (which must be to +// the power of two) and then returns N-1 and N/2, falling back to the specified +// default N in case the system configuration cannot be read. +func pidWrapping(defaultMax uint64) (mask, maxdist uint64) { + mask = defaultMax - 1 + maxdist = defaultMax >> 1 + // https://www.man7.org/linux/man-pages/man5/proc_sys_kernel.5.html + pidmaxb, err := os.ReadFile("/proc/sys/kernel/pid_max") + if err != nil { + return + } + pidmax, err := strconv.ParseUint(strings.TrimSuffix(string(pidmaxb), "\n"), 10, 32) + if err != nil { + return + } + return pidmax - 1, pidmax >> 1 +} + +func init() { + pidMaxMask, pidMaxDist = pidWrapping((uint64(1) << 22)) +} diff --git a/model/process_sort_test.go b/model/process_sort_test.go new file mode 100644 index 00000000..ed24258e --- /dev/null +++ b/model/process_sort_test.go @@ -0,0 +1,61 @@ +// Copyright 2024 Harald Albrecht. +// +// Licensed under the Apache License, Version 2.0 (the "License"); you may not +// use this file except in compliance with the License. You may obtain a copy +// of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +// License for the specific language governing permissions and limitations +// under the License. + +package model + +import ( + deco "github.com/onsi/ginkgo/v2/dsl/decorators" + + . "github.com/onsi/ginkgo/v2/dsl/core" + . "github.com/onsi/ginkgo/v2/dsl/table" + . "github.com/onsi/gomega" +) + +var _ = Describe("Mr Wehrli sorting processes", func() { + + It("detects the system's PID wrap-around", func() { + mask, dist := pidWrapping(0) + Expect(mask).NotTo(BeZero()) + Expect(dist).NotTo(BeZero()) + Expect((dist << 1) - 1).To(Equal(mask)) + }) + + Context("interval arithmetic", deco.Ordered, func() { + + BeforeAll(func() { + pidMaxMask, pidMaxDist = 8-1, 8>>1 + DeferCleanup(func() { + pidMaxMask, pidMaxDist = pidWrapping((uint64(1) << 22)) + }) + }) + + DescribeTable("sorting by age and PID distance", + func(ageA int, pidA int, ageB int, pidB int, expect int) { + delta := SortProcessByAgeThenPIDDistance( + &Process{PID: PIDType(pidA), ProTaskCommon: ProTaskCommon{Starttime: uint64(ageA)}}, + &Process{PID: PIDType(pidB), ProTaskCommon: ProTaskCommon{Starttime: uint64(ageB)}}) + Expect(delta).To(Equal(expect), + "(%d-%d)&%x=%x ?? %x", pidA, pidB, pidMaxMask, (pidB-pidA)&int(pidMaxMask), pidMaxDist) + }, + Entry("a older than b", 100, 1, 200, 2, -1), + Entry("a younger than b", 200, 1, 100, 2, 1), + Entry("a same age as b, PID a before PID b, nowrap", 100, 4, 100, 5, -1), + Entry("a same age as b, PID b before PID a, nowrap", 100, 5, 100, 4, 1), + Entry("a same age as b, PID a before PID b, wrap", 100, 7, 100, 1, -1), + Entry("a same age as b, PID b before PID a, wrap", 100, 1, 100, 7, 1), + ) + + }) + +}) diff --git a/model/process_test.go b/model/process_test.go index 3c795c7b..178d57fa 100644 --- a/model/process_test.go +++ b/model/process_test.go @@ -17,7 +17,7 @@ package model import ( "os" "runtime" - "sort" + "slices" "strconv" "time" @@ -28,6 +28,7 @@ import ( . "github.com/onsi/gomega" . "github.com/onsi/gomega/gleak" . "github.com/thediveo/fdooze" + . "github.com/thediveo/success" ) var _ = Describe("processes and tasks", func() { @@ -226,10 +227,66 @@ var _ = Describe("process lists", func() { {p42, p1}, } for _, pl := range pls { - sort.Sort(ProcessListByPID(pl)) + slices.SortFunc(pl, SortProcessByPID) Expect(pl[0].PID).To(Equal(PIDType(1))) Expect(pl[1].PID).To(Equal(PIDType(42))) } }) }) + +var _ = Describe("cpu affinity", func() { + + It("retrieves cpu affinities of processes and tasks", func() { + proc := NewProcess(PIDType(os.Getpid()), true) + Expect(proc).NotTo(BeNil()) + Expect(proc.RetrieveAffinityScheduling()).To(Succeed()) + Expect(proc.Affinity).NotTo(BeEmpty()) + Expect(proc.Nice).To(Equal(0)) + Expect(proc.Priority).To(Equal(0)) + + runtime.LockOSThread() + defer runtime.UnlockOSThread() + var task *Task + Expect(proc.Tasks).To(ContainElement(HaveField("TID", PIDType(unix.Gettid())), &task)) + Expect(task.RetrieveAffinityScheduling()).To(Succeed()) + Expect(task.Affinity).NotTo(BeEmpty()) + Expect(task.Affinity).To(Equal(proc.Affinity)) + Expect(proc.Nice).To(Equal(0)) + Expect(proc.Priority).To(Equal(0)) + }) + + It("has no fun without scheduling risk", func() { + if os.Getuid() != 0 { + Skip("needs root") + } + + runtime.LockOSThread() + + tid := unix.Gettid() + proc := NewProcess(PIDType(os.Getpid()), true) + var task *Task + Expect(proc.Tasks).To(ContainElement(HaveField("TID", PIDType(tid)), &task)) + + fun := func() { + oldschedattr := Successful(unix.SchedGetAttr(0, 0)) + Expect(oldschedattr.Size).NotTo(BeZero()) + defer func() { + Expect(unix.SchedSetAttr(0, oldschedattr, 0)).To(Succeed()) + }() + newschedattr := *oldschedattr + newschedattr.Flags = unix.SCHED_FLAG_RESET_ON_FORK + newschedattr.Policy = unix.SCHED_BATCH + newschedattr.Nice = -20 + Expect(unix.SchedSetAttr(0, &newschedattr, 0)).To(Succeed()) + Expect(task.RetrieveAffinityScheduling()).To(Succeed()) + } + fun() + + runtime.UnlockOSThread() + + Expect(task.Policy).To(Equal(unix.SCHED_BATCH)) + Expect(task.Nice).To(Equal(-20)) + }) + +}) diff --git a/web/lxkns/icons/CPUAffinity.svg b/web/lxkns/icons/CPUAffinity.svg new file mode 100644 index 00000000..1d85db72 --- /dev/null +++ b/web/lxkns/icons/CPUAffinity.svg @@ -0,0 +1,67 @@ + + + + + + image/svg+xml + + + + + + + + + + diff --git a/web/lxkns/src/app/appstyles.tsx b/web/lxkns/src/app/appstyles.tsx index 47a206d4..083ee129 100644 --- a/web/lxkns/src/app/appstyles.tsx +++ b/web/lxkns/src/app/appstyles.tsx @@ -12,7 +12,7 @@ // License for the specific language governing permissions and limitations // under the License. -import { amber, lightBlue, blue, blueGrey, brown, green, grey, indigo, lime, pink, purple, red, teal, yellow } from '@mui/material/colors' +import { amber, lightBlue, blue, blueGrey, brown, green, grey, indigo, lime, pink, purple, red, teal, yellow, lightGreen, orange } from '@mui/material/colors' import { cloneDeep, merge as mergeDeep } from 'lodash' // We augment the existing Material-UI theme with new elements for uniform color @@ -49,6 +49,12 @@ declare module '@mui/material/styles' { run: string // color for run icon. frozen: string // color for pause icon. } + cpulist: string // CPU (affinity) list color + nice: string // nice nice value color + notnice: string // not-nice value color + prio: string // non-0/non-1 prio value color + relaxedsched: string // scheduler NORMAL/BATCH/IDLE color + stressedsched: string // scheduler FIFO/RR/DEADLINE color } // allow configuration using `createTheme` interface PaletteOptions { @@ -75,6 +81,12 @@ declare module '@mui/material/styles' { run?: string, frozen?: string, }, + cpulist?: string + nice?: string + notnice?: string + prio?: string + relaxedsched?: string + stressedsched?: string } } @@ -132,6 +144,12 @@ export const lxknsLightTheme = { run: green[500], froozen: red[900], }, + cpulist: grey[600], + nice: lightGreen[700], + notnice: orange[900], + prio: red[400], + relaxedsched: lightGreen[400], + stressedsched: red[400], }, } @@ -181,6 +199,9 @@ export const lxknsDarkTheme = mergeDeep( run: green[500], froozen: red[700], }, + cpulist: grey[500], + nice: lightGreen[500], + notnice: orange[500], }, } ) diff --git a/web/lxkns/src/components/cpulist/CPUList.tsx b/web/lxkns/src/components/cpulist/CPUList.tsx new file mode 100644 index 00000000..3f5786b3 --- /dev/null +++ b/web/lxkns/src/components/cpulist/CPUList.tsx @@ -0,0 +1,69 @@ +// Copyright 2024 Harald Albrecht. +// +// Licensed under the Apache License, Version 2.0 (the "License"); you may not +// use this file except in compliance with the License. You may obtain a copy +// of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +// License for the specific language governing permissions and limitations +// under the License. + +import React from 'react' +import clsx from 'clsx' + +import { styled, Tooltip } from '@mui/material' +import CPUIcon from 'icons/CPUAffinity' + + +const CPURangeList = styled('span')(({ theme }) => ({ + color: theme.palette.cpulist, + '&.cpulist > .MuiSvgIcon-root': { + verticalAlign: 'text-top', + position: 'relative', + top: '0.1ex', + marginRight: '0.2em', + color: theme.palette.cpulist, + }, +})) + +export interface CPUListProps { + /* list of CPU ranges */ + cpus: number[][] | null + /* show/hide a CPU icon before the CPU ranges */ + showIcon?: boolean + /* allow line breaks after range (after the comma) */ + noWrap?: boolean + /** optional tooltip override */ + tooltip?: string + /** optional CSS class name(s). */ + className?: string +} + +/** + * The `CPUList` component renders a list of CPU ranges. + */ +export const CPUList = ({ cpus, showIcon, noWrap, tooltip, className }: CPUListProps) => { + const sep = noWrap ? ',' : ',\u200b' + tooltip = tooltip || 'CPU list' + return !!cpus && ( + + + {!!showIcon && } + { + cpus.map((cpurange, index) => { + if (cpurange[0] === cpurange[1]) { + return <>{index > 0 && sep}{cpurange[0]} + } + return <>{index > 0 && sep}{cpurange[0]}–{cpurange[1]} + }) + } + + + ) +} + +export default CPUList diff --git a/web/lxkns/src/components/cpulist/index.ts b/web/lxkns/src/components/cpulist/index.ts new file mode 100644 index 00000000..e434b6dd --- /dev/null +++ b/web/lxkns/src/components/cpulist/index.ts @@ -0,0 +1,2 @@ +export * from './CPUList' +export { default } from './CPUList' diff --git a/web/lxkns/src/components/processinfo/ProcessInfo.tsx b/web/lxkns/src/components/processinfo/ProcessInfo.tsx index 65cc2157..08274ca3 100644 --- a/web/lxkns/src/components/processinfo/ProcessInfo.tsx +++ b/web/lxkns/src/components/processinfo/ProcessInfo.tsx @@ -23,7 +23,8 @@ import Init1Icon from 'icons/Init1' import { Process } from 'models/lxkns' import ContainerInfo from 'components/containerinfo/ContainerInfo' import CgroupInfo from 'components/cgroupinfo/CgroupInfo' - +import CPUList from 'components/cpulist/CPUList' +import SchedulerInfo from 'components/schedinfo/SchedulerInfo' const piShort = "short-processinfo" @@ -43,6 +44,9 @@ const ProcessInformation = styled('span')(({ theme }) => ({ }, [`&.${piShort} *`]: { color: theme.palette.text.disabled, + }, + '& .cpulist': { + marginLeft: '0.4em', } })) @@ -106,7 +110,11 @@ export const ProcessInfo = ({ process, short, className }: ProcessInfoProps) => {process.name}  ({process.pid}) - {!short && process.cpucgroup && process.cpucgroup !== "/" && !process.container + {!short && <> + + + } + {!short && process.cpucgroup && process.cpucgroup !== "/" && !process.container && } ) diff --git a/web/lxkns/src/components/schedinfo/SchedulerInfo.tsx b/web/lxkns/src/components/schedinfo/SchedulerInfo.tsx new file mode 100644 index 00000000..62c46795 --- /dev/null +++ b/web/lxkns/src/components/schedinfo/SchedulerInfo.tsx @@ -0,0 +1,84 @@ +// Copyright 2024 Harald Albrecht. +// +// Licensed under the Apache License, Version 2.0 (the "License"); you may not +// use this file except in compliance with the License. You may obtain a copy +// of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +// License for the specific language governing permissions and limitations +// under the License. + +import React from 'react' +import clsx from 'clsx' + +import { styled, Tooltip } from '@mui/material' + +import { Process } from 'models/lxkns/model' + + +const SchedInformation = styled('span')(({ theme }) => ({ + color: theme.palette.cpulist, + '& .policy': { + fontSize: '80%', + }, + '& .normal,& .batch,& .idle': { + color: theme.palette.relaxedsched, + }, + '& .fifo,& .rr': { + color: theme.palette.stressedsched, + }, + '& .nice': { + color: theme.palette.nice, + }, + '& .notnice': { + color: theme.palette.notnice, + }, + '& .prio': { + color: theme.palette.prio, + }, +})) + +const schedulerPolicies: { [key: string]: string } = { + '0': 'NORMAL', + '1': 'FIFO', + '2': 'RR', + '3': 'BATCH', + '5': 'IDLE', + '6': 'DEADLINE', +} + +const hasPriority = (process: Process) => { + const policy = process.policy || 0 + return policy === 1 || policy === 2 +} + +const hasNice = (process: Process) => { + const policy = process.policy || 0 + return policy === 0 || policy === 3 +} + +export interface SchedulerInfoProps { + /** information about a discovered Linux OS process. */ + process: Process + /** also schow (SCHED_) NORMAL? */ + showNormal?: boolean +} + +export const SchedulerInfo = ({ process, showNormal }: SchedulerInfoProps) => { + const schedpol = schedulerPolicies[process.policy || 0] + const prio = process.priority || 0 + return + {(showNormal || !!process.policy) &&  {schedpol}} + {hasPriority(process) && 0 && 'prio')}> priority {prio}}{ + hasNice(process) && !!process.nice && + = 0 ? 'nice!' : 'not nice'}> + = 0 ? 'nice' : 'notnice'}> nice {process.nice} + } + +} + +export default SchedulerInfo diff --git a/web/lxkns/src/components/schedinfo/index.ts b/web/lxkns/src/components/schedinfo/index.ts new file mode 100644 index 00000000..7ce70771 --- /dev/null +++ b/web/lxkns/src/components/schedinfo/index.ts @@ -0,0 +1,2 @@ +export * from './SchedulerInfo' +export { default } from './SchedulerInfo' diff --git a/web/lxkns/src/icons/CPUAffinity.tsx b/web/lxkns/src/icons/CPUAffinity.tsx new file mode 100644 index 00000000..4d1f8880 --- /dev/null +++ b/web/lxkns/src/icons/CPUAffinity.tsx @@ -0,0 +1,5 @@ +// autogenerated from icon svg file "icons/CPUAffinity.svg", do not edit; +import * as React from 'react'; +import { SvgIcon, SvgIconProps } from '@mui/material'; +export const CPUAffinityIcon = (props: SvgIconProps) => ; +export default CPUAffinityIcon; \ No newline at end of file diff --git a/web/lxkns/src/models/lxkns/model.ts b/web/lxkns/src/models/lxkns/model.ts index b872fc66..b58d29dd 100644 --- a/web/lxkns/src/models/lxkns/model.ts +++ b/web/lxkns/src/models/lxkns/model.ts @@ -125,6 +125,10 @@ export interface Process { fridgefrozen: boolean namespaces: NamespaceSet container: Container | null + affinity: number[][] | null + policy: number | null + priority: number | null + nice: number | null } export interface ProcessMap { [key: string]: Process } @@ -149,7 +153,7 @@ export interface TaskMap { [key: string]: Task } export type Busybody = (Process | Task) -export const isTask = (bb: Busybody): bb is Task => bb && (bb as Task).tid !== undefined +export const isTask = (bb: Busybody): bb is Task => bb && (bb as Task).tid !== undefined export const isProcess = (bb: Busybody): bb is Process => bb && (bb as Process).pid !== undefined export interface ContainerMap { [id: string]: Container }