Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Feature/affinity #50

Closed
wants to merge 13 commits into from
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
[![GitHub](https://img.shields.io/github/license/thediveo/lxkns)](https://img.shields.io/github/license/thediveo/lxkns)

![build and test](https://github.com/thediveo/lxkns/workflows/build%20and%20test/badge.svg?branch=master)
![Coverage](https://img.shields.io/badge/Coverage-82.0%25-brightgreen)
![Coverage](https://img.shields.io/badge/Coverage-81.1%25-brightgreen)
![goroutines](https://img.shields.io/badge/go%20routines-not%20leaking-success)
![file descriptors](https://img.shields.io/badge/file%20descriptors-not%20leaking-success)
[![Go Report Card](https://goreportcard.com/badge/github.com/thediveo/lxkns)](https://goreportcard.com/report/github.com/thediveo/lxkns)
Expand Down
1 change: 1 addition & 0 deletions api/types/discovery_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ var _ = Describe("discovery result JSON", func() {
"with-freezer": true,
"with-mounts": true,
"with-socket-processes": false,
"with-affinity-scheduling": false,
"labels": {},
"scanned-namespace-types": [
"time",
Expand Down
7 changes: 6 additions & 1 deletion cmd/lxkns/endpoints.go
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ func GetNamespacesHandler(cizer containerizer.Containerizer) http.HandlerFunc {
discover.WithFullDiscovery(),
discover.WithContainerizer(cizer),
discover.WithPIDMapper(), // recommended when using WithContainerizer.
discover.WithAffinityAndScheduling(),
)
// Note bene: set header before writing the header with the status code;
// actually makes sense, innit?
Expand All @@ -50,7 +51,11 @@ func GetNamespacesHandler(cizer containerizer.Containerizer) http.HandlerFunc {
// GetProcessesHandler returns the process table (including tasks) with
// namespace references, as JSON.
func GetProcessesHandler(w http.ResponseWriter, req *http.Request) {
disco := discover.Namespaces(discover.FromProcs(), discover.FromTasks())
disco := discover.Namespaces(
discover.FromProcs(),
discover.FromTasks(),
discover.WithAffinityAndScheduling(),
)

w.Header().Set("Content-Type", "application/json")

Expand Down
10 changes: 5 additions & 5 deletions cmd/pidtree/treevisitor.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ package main

import (
"reflect"
"sort"
"slices"

"github.com/thediveo/lxkns/cmd/internal/tool"
"github.com/thediveo/lxkns/model"
Expand Down Expand Up @@ -74,8 +74,8 @@ func (v *TreeVisitor) Get(node reflect.Value) (
clist := []interface{}{}
if proc, ok := node.Interface().(*model.Process); ok {
pidns := proc.Namespaces[model.PIDNS]
childprocesses := model.ProcessListByPID(proc.Children)
sort.Sort(childprocesses)
childprocesses := slices.Clone(proc.Children)
slices.SortFunc(childprocesses, model.SortProcessByPID)
childpidns := map[species.NamespaceID]bool{}
for _, childproc := range childprocesses {
if childproc.Namespaces[model.PIDNS] == pidns {
Expand Down Expand Up @@ -104,8 +104,8 @@ func (v *TreeVisitor) Get(node reflect.Value) (
} else {
// The child nodes of a PID namespace tree node will be the "leader"
// (or "topmost") processes inside the PID namespace.
leaders := model.ProcessListByPID(node.Interface().(model.Namespace).Leaders())
sort.Sort(leaders)
leaders := slices.Clone(node.Interface().(model.Namespace).Leaders())
slices.SortFunc(leaders, model.SortProcessByPID)
for _, proc := range leaders {
clist = append(clist, proc)
}
Expand Down
6 changes: 5 additions & 1 deletion discover/discover.go
Original file line number Diff line number Diff line change
Expand Up @@ -226,9 +226,13 @@ func Namespaces(options ...DiscoveryOption) *Result {
result.PIDMap = NewPIDMap(result)
}

// Optionally discover alive containers and relate the.
// Optionally discover alive containers and relate the containers to
// processes and vice versa.
discoverContainers(result)

// Pick up leader process CPU affinity and scheduling setup.
discoverAffinityScheduling(result)

// As a C oldie it gives me the shivers to return a pointer to what might
// look like an "auto" local struct ;)
return result
Expand Down
37 changes: 37 additions & 0 deletions discover/discovery_affinity_sched.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
// Copyright 2024 Harald Albrecht.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

//go:build linux

package discover

import "github.com/thediveo/lxkns/model"

// discoverAffinityScheduling discovers the CPU affinity lists and scheduler
// settings for the leader processes of all discovered namespaces.
func discoverAffinityScheduling(result *Result) {
if !result.Options.DiscoverAffinityScheduling {
return
}
for nstype := model.MountNS; nstype < model.NamespaceTypesCount; nstype++ {
for _, ns := range result.Namespaces[nstype] {
for _, leader := range ns.Leaders() {
if leader.Affinity != nil {
continue
}
_ = leader.RetrieveAffinityScheduling()
}
}
}
}
30 changes: 20 additions & 10 deletions discover/discovery_opt.go
Original file line number Diff line number Diff line change
Expand Up @@ -32,16 +32,17 @@ type DiscoverOpts struct {
// If zero, defaults to discovering all namespaces.
NamespaceTypes species.NamespaceType `json:"-"`

ScanProcs bool `json:"from-procs"` // Scan processes for attached namespaces.
ScanTasks bool `json:"from-tasks"` // Scan all tasks for attached namespaces.
ScanFds bool `json:"from-fds"` // Scan open file descriptors for namespaces.
ScanBindmounts bool `json:"from-bindmounts"` // Scan bind-mounts for namespaces.
DiscoverHierarchy bool `json:"with-hierarchy"` // Discover the hierarchy of PID and user namespaces.
DiscoverOwnership bool `json:"with-ownership"` // Discover the ownership of non-user namespaces.
DiscoverFreezerState bool `json:"with-freezer"` // Discover the cgroup freezer state of processes.
DiscoverMounts bool `json:"with-mounts"` // Discover mount point hierarchy with mount paths and visibility.
DiscoverSocketProcesses bool `json:"with-socket-processes"` // Discover the processes related to specific socket inode numbers.
Labels map[string]string `json:"labels"` // Pass options (in form of labels) to decorators
ScanProcs bool `json:"from-procs"` // Scan processes for attached namespaces.
ScanTasks bool `json:"from-tasks"` // Scan all tasks for attached namespaces.
ScanFds bool `json:"from-fds"` // Scan open file descriptors for namespaces.
ScanBindmounts bool `json:"from-bindmounts"` // Scan bind-mounts for namespaces.
DiscoverHierarchy bool `json:"with-hierarchy"` // Discover the hierarchy of PID and user namespaces.
DiscoverOwnership bool `json:"with-ownership"` // Discover the ownership of non-user namespaces.
DiscoverFreezerState bool `json:"with-freezer"` // Discover the cgroup freezer state of processes.
DiscoverMounts bool `json:"with-mounts"` // Discover mount point hierarchy with mount paths and visibility.
DiscoverSocketProcesses bool `json:"with-socket-processes"` // Discover the processes related to specific socket inode numbers.
DiscoverAffinityScheduling bool `json:"with-affinity-scheduling"` // Disover CPU affinity and scheduling of leader processes.
Labels map[string]string `json:"labels"` // Pass options (in form of labels) to decorators

Containerizer containerizer.Containerizer `json:"-"` // Discover containers using containerizer.

Expand Down Expand Up @@ -88,6 +89,7 @@ func WithFullDiscovery() DiscoveryOption {
o.ScanTasks = true
o.DiscoverMounts = true
o.withPIDmap = true
o.DiscoverAffinityScheduling = true
}
}

Expand Down Expand Up @@ -131,6 +133,14 @@ func NotFromTasks() DiscoveryOption {
return func(o *DiscoverOpts) { o.ScanTasks = false }
}

func WithAffinityAndScheduling() DiscoveryOption {
return func(o *DiscoverOpts) { o.DiscoverAffinityScheduling = true }
}

func WithoutAffinityAndScheduling() DiscoveryOption {
return func(o *DiscoverOpts) { o.DiscoverAffinityScheduling = false }
}

// FromFds opts to find namespaces from the open file descriptors of processes.
func FromFds() DiscoveryOption {
return func(o *DiscoverOpts) { o.ScanFds = true }
Expand Down
199 changes: 199 additions & 0 deletions model/cpulist.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,199 @@
// Copyright 2024 Harald Albrecht.
//
// Licensed under the Apache License, Version 2.0 (the "License"); you may not
// use this file except in compliance with the License. You may obtain a copy
// of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
// License for the specific language governing permissions and limitations
// under the License.

package model

import (
"sync/atomic"
"unsafe"

"golang.org/x/sys/unix"
)

// CPUList is a list of CPU [from...to] ranges. CPU numbers are starting from
// zero.
type CPUList [][2]uint

// CPUSet is a CPU bit string, such as used for CPU affinity masks. See also
// [sched_getaffinity(2)].
//
// [sched_getaffinity(2)]: https://man7.org/linux/man-pages/man2/sched_getaffinity.2.html
type CPUSet []uint64

// The dynamically determined size of CPUSets on this system (size in uint64
// words). This is usually smaller than the fixed-sized [unix.CPUSet] that Go's
// [unix.SchedGetaffinity] uses.
var setsize atomic.Uint64
var wordbytesize = uint64(unsafe.Sizeof(CPUSet{0}[0]))

func init() {
setsize.Store(1)
}

// NewAffinityCPUList returns the affinity CPUList (list of CPU ranges) of the
// process with the passed PID. Otherwise, it returns an error. If pid is zero,
// then the affinity CPU list of the calling thread is returned (make sure to
// have the OS-level thread locked to the calling go routine in this case).
//
// Notes:
// - we don't use [unix.SchedGetaffinity] as this is tied to the fixed size
// [unix.CPUSet] type; instead, we dynamically figure out the size needed
// and cache the size internally.
// - retrieving the affinity CPU mask and then speed-running it to
// generate the range list is roughly two orders of magnitude faster than
// fetching “/proc/$PID/status” and looking for the “Cpus_allowed_list”,
// because generating the broad status procfs file is expensive.
func NewAffinityCPUList(pid PIDType) (CPUList, error) {
var set CPUSet

setlenStart := setsize.Load()
setlen := setlenStart
for {
set = make([]uint64, setlen)
// see also:
// https://man7.org/linux/man-pages/man2/sched_setaffinity.2.html; we
// use RawSyscall here instead of Syscall as we know that
// SYS_SCHED_GETAFFINITY does not block, following Go's stdlib
// implementation.
_, _, e := unix.RawSyscall(unix.SYS_SCHED_GETAFFINITY,
uintptr(pid), uintptr(setlen*wordbytesize), uintptr(unsafe.Pointer(&set[0])))
if e != 0 {
if e == unix.EINVAL {
setlen *= 2
continue
}
return nil, e
}
// Set the new size; if this fails because another go routine already
// upped the set size, retry until we either notice that we're smaller
// than what was set as the new set size, or we succeed in setting the
// size.
for {
if setsize.CompareAndSwap(setlenStart, setlen) {
break
}
setlenStart = setsize.Load()
if setlenStart > setlen {
break
}
}
break
}
return set.NewCPUList(), nil
}

// NewCPUList returns a list of CPU ranges for the given bitmap CPUSet.
//
// This is an optimized implementation that does not use any division and modulo
// operations; instead, it only uses increment and (single bit position) shift
// operations. Additionally, this implementation fast-forwards through all-0s
// and all-1s CPUSet words (uint64's).
func (s CPUSet) NewCPUList() CPUList {
setlen := uint64(len(s))
cpulist := CPUList{}
cpuno := uint(0)
cpuwordidx := uint64(0)
cpuwordmask := uint64(1)

findNextCPUInWord:
for {
// If we're inside a cpu mask word, try to find the next set cpu bit, if
// any, otherwise stop after we've fallen off the MSB end of the cpu
// mask word.
if cpuwordmask != 1 {
for {
if s[cpuwordidx]&cpuwordmask != 0 {
break
}
cpuno++
cpuwordmask <<= 1
if cpuwordmask == 0 {
// Oh no! We've fallen off the disc^Wcpu mask word.
cpuwordidx++
cpuwordmask = 1
break
}
}
}
// Try to fast-forward through completely unset cpu mask words, where
// possible.
for cpuwordidx < setlen && s[cpuwordidx] == 0 {
cpuno += 64
cpuwordidx++
}
if cpuwordidx >= setlen {
return cpulist
}
// We arrived at a non-zero cpu mask word, so let's now find the first
// cpu in it.
for {
if s[cpuwordidx]&cpuwordmask != 0 {
break
}
cpuno++
cpuwordmask <<= 1
}
// Hooray! We've finally located a CPU in use. Move on to the next CPU,
// handling a word boundary when necessary.
cpufrom := cpuno
cpuno++
cpuwordmask <<= 1
if cpuwordmask == 0 {
// Oh no! We've again fallen off the disc^Wcpu mask word.
cpuwordidx++
cpuwordmask = 1
}
// Now locate the next unset cpu within the currently inspected cpu mask
// word, until we find one or have exhausted our search within the
// current cpu mask word.
if cpuwordmask != 1 {
for {
if s[cpuwordidx]&cpuwordmask == 0 {
cpulist = append(cpulist, [2]uint{cpufrom, cpuno - 1})
continue findNextCPUInWord
}
cpuno++
cpuwordmask <<= 1
if cpuwordmask == 0 {
cpuwordidx++
cpuwordmask = 1
break
}
}
}
// Try to fast-forward through completely set cpu mask words, where
// applicable.
for cpuwordidx < setlen && s[cpuwordidx] == ^uint64(0) {
cpuno += 64
cpuwordidx++
}
// Are we completely done? If so, add the final CPU span and then call
// it a day.
if cpuwordidx >= setlen {
cpulist = append(cpulist, [2]uint{cpufrom, cpuno - 1})
return cpulist
}
// We arrived at a non-all-1s cpu mask word, so let's now find the first
// cpu in it that is unset. Add the CPU span, and then rinse and repeat
// from the beginning: find the next set CPU or fall off the disc.
for {
if s[cpuwordidx]&cpuwordmask == 0 {
cpulist = append(cpulist, [2]uint{cpufrom, cpuno - 1})
break
}
cpuno++
cpuwordmask <<= 1
}
}
}
Loading
Loading