From 7f4b520a5978a962c098b659d528c5e689b8931d Mon Sep 17 00:00:00 2001 From: Evan Lezar Date: Sat, 26 Oct 2024 21:10:14 +0200 Subject: [PATCH 1/4] [no-relnote] Merge verifyFlags and validateFlags Signed-off-by: Evan Lezar --- tools/container/nvidia-toolkit/run.go | 25 +++++++------------------ 1 file changed, 7 insertions(+), 18 deletions(-) diff --git a/tools/container/nvidia-toolkit/run.go b/tools/container/nvidia-toolkit/run.go index 842d7099..abc0a2cd 100644 --- a/tools/container/nvidia-toolkit/run.go +++ b/tools/container/nvidia-toolkit/run.go @@ -130,6 +130,12 @@ func main() { } func validateFlags(_ *cli.Context, o *options) error { + if o.root == "" { + return fmt.Errorf("the install root must be specified") + } + if _, exists := availableRuntimes[o.runtime]; !exists { + return fmt.Errorf("unknown runtime: %v", o.runtime) + } if filepath.Base(o.pidFile) != toolkitPidFilename { return fmt.Errorf("invalid toolkit.pid path %v", o.pidFile) } @@ -144,12 +150,7 @@ func validateFlags(_ *cli.Context, o *options) error { // Run runs the core logic of the CLI func Run(c *cli.Context, o *options) error { - err := verifyFlags(o) - if err != nil { - return fmt.Errorf("unable to verify flags: %v", err) - } - - err = initialize(o.pidFile) + err := initialize(o.pidFile) if err != nil { return fmt.Errorf("unable to initialize: %v", err) } @@ -217,18 +218,6 @@ func ParseArgs(args []string) ([]string, string, error) { return nil, "", fmt.Errorf("unexpected positional argument(s) %v", args[2:lastPositionalArg+1]) } -func verifyFlags(o *options) error { - log.Infof("Verifying Flags") - if o.root == "" { - return fmt.Errorf("the install root must be specified") - } - - if _, exists := availableRuntimes[o.runtime]; !exists { - return fmt.Errorf("unknown runtime: %v", o.runtime) - } - return nil -} - func initialize(pidFile string) error { log.Infof("Initializing") From dfc6dc0332a4d59b16765bff22b356b78a1c066b Mon Sep 17 00:00:00 2001 From: Evan Lezar Date: Sat, 26 Oct 2024 21:28:17 +0200 Subject: [PATCH 2/4] [no-relnote] Remove unused TryDelete function Signed-off-by: Evan Lezar --- tools/container/toolkit/toolkit.go | 30 ------------------------------ 1 file changed, 30 deletions(-) diff --git a/tools/container/toolkit/toolkit.go b/tools/container/toolkit/toolkit.go index 9b97b419..4aa04fc9 100644 --- a/tools/container/toolkit/toolkit.go +++ b/tools/container/toolkit/toolkit.go @@ -24,7 +24,6 @@ import ( "path/filepath" "strings" - log "github.com/sirupsen/logrus" "github.com/urfave/cli/v2" "tags.cncf.io/container-device-interface/pkg/cdi" "tags.cncf.io/container-device-interface/pkg/parser" @@ -44,8 +43,6 @@ const ( nvidiaContainerToolkitConfigSource = "/etc/nvidia-container-runtime/config.toml" configFilename = "config.toml" - - toolkitPidFilename = "toolkit.pid" ) type Options struct { @@ -257,33 +254,6 @@ func ValidateOptions(opts *Options, toolkitRoot string) error { return nil } -// TryDelete attempts to remove the specified toolkit folder. -// A toolkit.pid file -- if present -- is skipped. -func TryDelete(cli *cli.Context, toolkitRoot string) error { - log.Infof("Attempting to delete NVIDIA container toolkit from '%v'", toolkitRoot) - - contents, err := os.ReadDir(toolkitRoot) - if err != nil && errors.Is(err, os.ErrNotExist) { - return nil - } else if err != nil { - return fmt.Errorf("failed to read the contents of %v: %w", toolkitRoot, err) - } - - for _, content := range contents { - if content.Name() == toolkitPidFilename { - continue - } - name := filepath.Join(toolkitRoot, content.Name()) - if err := os.RemoveAll(name); err != nil { - log.Warningf("could not remove %v: %v", name, err) - } - } - if err := os.RemoveAll(toolkitRoot); err != nil { - log.Warningf("could not remove %v: %v", toolkitRoot, err) - } - return nil -} - // Install installs the components of the NVIDIA container toolkit. // The specified sourceRoot is searched for the components to install. // Any existing installation is removed. From 8955ee5895e460052fd7cccf3744a52420d9d130 Mon Sep 17 00:00:00 2001 From: Evan Lezar Date: Sat, 26 Oct 2024 21:13:27 +0200 Subject: [PATCH 3/4] [no-relnote] Add app struct for nvidia-toolkit Signed-off-by: Evan Lezar --- tools/container/nvidia-toolkit/run.go | 86 +++++++++++++++++---------- 1 file changed, 54 insertions(+), 32 deletions(-) diff --git a/tools/container/nvidia-toolkit/run.go b/tools/container/nvidia-toolkit/run.go index abc0a2cd..673c2b52 100644 --- a/tools/container/nvidia-toolkit/run.go +++ b/tools/container/nvidia-toolkit/run.go @@ -8,10 +8,10 @@ import ( "strings" "syscall" - log "github.com/sirupsen/logrus" "github.com/urfave/cli/v2" "golang.org/x/sys/unix" + "github.com/NVIDIA/nvidia-container-toolkit/internal/logger" "github.com/NVIDIA/nvidia-container-toolkit/tools/container/runtime" "github.com/NVIDIA/nvidia-container-toolkit/tools/container/toolkit" ) @@ -51,12 +51,40 @@ func (o options) toolkitRoot() string { var Version = "development" func main() { - remainingArgs, root, err := ParseArgs(os.Args) + logger := logger.New() + + remainingArgs, root, err := ParseArgs(logger, os.Args) if err != nil { - log.Errorf("Error: unable to parse arguments: %v", err) + logger.Errorf("Error: unable to parse arguments: %v", err) + os.Exit(1) + } + + c := new(logger, root) + + // Run the CLI + logger.Infof("Starting %v", c.Name) + if err := c.Run(remainingArgs); err != nil { + logger.Errorf("error running nvidia-toolkit: %v", err) os.Exit(1) } + logger.Infof("Completed %v", c.Name) +} + +type app struct { + logger logger.Interface + defaultRoot string +} + +func new(logger logger.Interface, defaultRoot string) *cli.App { + a := app{ + logger: logger, + defaultRoot: defaultRoot, + } + return a.build() +} + +func (a app) build() *cli.App { options := options{ toolkitOptions: toolkit.Options{}, } @@ -68,10 +96,10 @@ func main() { c.Description = "DESTINATION points to the host path underneath which the nvidia-container-toolkit should be installed.\nIt will be installed at ${DESTINATION}/toolkit" c.Version = Version c.Before = func(ctx *cli.Context) error { - return validateFlags(ctx, &options) + return a.validateFlags(ctx, &options) } c.Action = func(ctx *cli.Context) error { - return Run(ctx, &options) + return a.Run(ctx, &options) } // Setup flags for the CLI @@ -102,7 +130,7 @@ func main() { }, &cli.StringFlag{ Name: "root", - Value: root, + Value: a.defaultRoot, Usage: "the folder where the NVIDIA Container Toolkit is to be installed. It will be installed to `ROOT`/toolkit", Destination: &options.root, EnvVars: []string{"ROOT"}, @@ -119,17 +147,10 @@ func main() { c.Flags = append(c.Flags, toolkit.Flags(&options.toolkitOptions)...) c.Flags = append(c.Flags, runtime.Flags(&options.runtimeOptions)...) - // Run the CLI - log.Infof("Starting %v", c.Name) - if err := c.Run(remainingArgs); err != nil { - log.Errorf("error running nvidia-toolkit: %v", err) - os.Exit(1) - } - - log.Infof("Completed %v", c.Name) + return c } -func validateFlags(_ *cli.Context, o *options) error { +func (a *app) validateFlags(_ *cli.Context, o *options) error { if o.root == "" { return fmt.Errorf("the install root must be specified") } @@ -139,6 +160,7 @@ func validateFlags(_ *cli.Context, o *options) error { if filepath.Base(o.pidFile) != toolkitPidFilename { return fmt.Errorf("invalid toolkit.pid path %v", o.pidFile) } + if err := toolkit.ValidateOptions(&o.toolkitOptions, o.toolkitRoot()); err != nil { return err } @@ -149,12 +171,12 @@ func validateFlags(_ *cli.Context, o *options) error { } // Run runs the core logic of the CLI -func Run(c *cli.Context, o *options) error { - err := initialize(o.pidFile) +func (a *app) Run(c *cli.Context, o *options) error { + err := a.initialize(o.pidFile) if err != nil { return fmt.Errorf("unable to initialize: %v", err) } - defer shutdown(o.pidFile) + defer a.shutdown(o.pidFile) if len(o.toolkitOptions.ContainerRuntimeRuntimes.Value()) == 0 { lowlevelRuntimePaths, err := runtime.GetLowlevelRuntimePaths(&o.runtimeOptions, o.runtime) @@ -176,7 +198,7 @@ func Run(c *cli.Context, o *options) error { } if !o.noDaemon { - err = waitForSignal() + err = a.waitForSignal() if err != nil { return fmt.Errorf("unable to wait for signal: %v", err) } @@ -192,8 +214,8 @@ func Run(c *cli.Context, o *options) error { // ParseArgs checks if a single positional argument was defined and extracts this the root. // If no positional arguments are defined, it is assumed that the root is specified as a flag. -func ParseArgs(args []string) ([]string, string, error) { - log.Infof("Parsing arguments") +func ParseArgs(logger logger.Interface, args []string) ([]string, string, error) { + logger.Infof("Parsing arguments") if len(args) < 2 { return args, "", nil @@ -218,8 +240,8 @@ func ParseArgs(args []string) ([]string, string, error) { return nil, "", fmt.Errorf("unexpected positional argument(s) %v", args[2:lastPositionalArg+1]) } -func initialize(pidFile string) error { - log.Infof("Initializing") +func (a *app) initialize(pidFile string) error { + a.logger.Infof("Initializing") if dir := filepath.Dir(pidFile); dir != "" { err := os.MkdirAll(dir, 0755) @@ -235,8 +257,8 @@ func initialize(pidFile string) error { err = unix.Flock(int(f.Fd()), unix.LOCK_EX|unix.LOCK_NB) if err != nil { - log.Warningf("Unable to get exclusive lock on '%v'", pidFile) - log.Warningf("This normally means an instance of the NVIDIA toolkit Container is already running, aborting") + a.logger.Warningf("Unable to get exclusive lock on '%v'", pidFile) + a.logger.Warningf("This normally means an instance of the NVIDIA toolkit Container is already running, aborting") return fmt.Errorf("unable to get flock on pidfile: %v", err) } @@ -253,8 +275,8 @@ func initialize(pidFile string) error { case <-waitingForSignal: signalReceived <- true default: - log.Infof("Signal received, exiting early") - shutdown(pidFile) + a.logger.Infof("Signal received, exiting early") + a.shutdown(pidFile) os.Exit(0) } }() @@ -262,18 +284,18 @@ func initialize(pidFile string) error { return nil } -func waitForSignal() error { - log.Infof("Waiting for signal") +func (a *app) waitForSignal() error { + a.logger.Infof("Waiting for signal") waitingForSignal <- true <-signalReceived return nil } -func shutdown(pidFile string) { - log.Infof("Shutting Down") +func (a *app) shutdown(pidFile string) { + a.logger.Infof("Shutting Down") err := os.Remove(pidFile) if err != nil { - log.Warningf("Unable to remove pidfile: %v", err) + a.logger.Warningf("Unable to remove pidfile: %v", err) } } From a71dcc2b3a6ac673f3e22898525863e80034fc70 Mon Sep 17 00:00:00 2001 From: Evan Lezar Date: Sat, 26 Oct 2024 21:49:21 +0200 Subject: [PATCH 4/4] [no-relnote] Use logger in toolkit installation Signed-off-by: Evan Lezar --- tools/container/nvidia-toolkit/run.go | 19 +- tools/container/nvidia-toolkit/run_test.go | 4 +- tools/container/toolkit/executable.go | 11 +- tools/container/toolkit/executable_test.go | 10 ++ tools/container/toolkit/options.go | 40 +++++ tools/container/toolkit/runtime.go | 19 +- tools/container/toolkit/runtime_test.go | 9 +- tools/container/toolkit/toolkit.go | 196 ++++++++++++--------- tools/container/toolkit/toolkit_test.go | 11 +- 9 files changed, 217 insertions(+), 102 deletions(-) create mode 100644 tools/container/toolkit/options.go diff --git a/tools/container/nvidia-toolkit/run.go b/tools/container/nvidia-toolkit/run.go index 673c2b52..15ab843c 100644 --- a/tools/container/nvidia-toolkit/run.go +++ b/tools/container/nvidia-toolkit/run.go @@ -74,6 +74,8 @@ func main() { type app struct { logger logger.Interface defaultRoot string + + toolkit *toolkit.Installer } func new(logger logger.Interface, defaultRoot string) *cli.App { @@ -96,6 +98,7 @@ func (a app) build() *cli.App { c.Description = "DESTINATION points to the host path underneath which the nvidia-container-toolkit should be installed.\nIt will be installed at ${DESTINATION}/toolkit" c.Version = Version c.Before = func(ctx *cli.Context) error { + a.init(&options) return a.validateFlags(ctx, &options) } c.Action = func(ctx *cli.Context) error { @@ -150,6 +153,13 @@ func (a app) build() *cli.App { return c } +func (a *app) init(o *options) { + a.toolkit = toolkit.NewInstaller( + toolkit.WithLogger(a.logger), + toolkit.WithToolkitRoot(o.toolkitRoot()), + ) +} + func (a *app) validateFlags(_ *cli.Context, o *options) error { if o.root == "" { return fmt.Errorf("the install root must be specified") @@ -161,7 +171,7 @@ func (a *app) validateFlags(_ *cli.Context, o *options) error { return fmt.Errorf("invalid toolkit.pid path %v", o.pidFile) } - if err := toolkit.ValidateOptions(&o.toolkitOptions, o.toolkitRoot()); err != nil { + if err := a.toolkit.ValidateOptions(&o.toolkitOptions); err != nil { return err } if err := runtime.ValidateOptions(&o.runtimeOptions, o.runtime, o.toolkitRoot()); err != nil { @@ -187,7 +197,12 @@ func (a *app) Run(c *cli.Context, o *options) error { o.toolkitOptions.ContainerRuntimeRuntimes = *cli.NewStringSlice(lowlevelRuntimePaths...) } - err = toolkit.Install(c, &o.toolkitOptions, "", o.toolkitRoot()) + + installer := toolkit.NewInstaller( + toolkit.WithLogger(a.logger), + toolkit.WithToolkitRoot(o.toolkitRoot()), + ) + err = installer.Install(c, &o.toolkitOptions) if err != nil { return fmt.Errorf("unable to install toolkit: %v", err) } diff --git a/tools/container/nvidia-toolkit/run_test.go b/tools/container/nvidia-toolkit/run_test.go index 8a0bb50e..f7ba5866 100644 --- a/tools/container/nvidia-toolkit/run_test.go +++ b/tools/container/nvidia-toolkit/run_test.go @@ -20,10 +20,12 @@ import ( "fmt" "testing" + testlog "github.com/sirupsen/logrus/hooks/test" "github.com/stretchr/testify/require" ) func TestParseArgs(t *testing.T) { + logger, _ := testlog.NewNullLogger() testCases := []struct { args []string expectedRemaining []string @@ -70,7 +72,7 @@ func TestParseArgs(t *testing.T) { for i, tc := range testCases { t.Run(fmt.Sprintf("%d", i), func(t *testing.T) { - remaining, root, err := ParseArgs(tc.args) + remaining, root, err := ParseArgs(logger, tc.args) if tc.expectedError != nil { require.EqualError(t, err, tc.expectedError.Error()) } else { diff --git a/tools/container/toolkit/executable.go b/tools/container/toolkit/executable.go index 394ca007..2cf66086 100644 --- a/tools/container/toolkit/executable.go +++ b/tools/container/toolkit/executable.go @@ -23,8 +23,6 @@ import ( "path/filepath" "sort" "strings" - - log "github.com/sirupsen/logrus" ) type executableTarget struct { @@ -33,6 +31,7 @@ type executableTarget struct { } type executable struct { + fileInstaller source string target executableTarget env map[string]string @@ -43,21 +42,21 @@ type executable struct { // install installs an executable component of the NVIDIA container toolkit. The source executable // is copied to a `.real` file and a wapper is created to set up the environment as required. func (e executable) install(destFolder string) (string, error) { - log.Infof("Installing executable '%v' to %v", e.source, destFolder) + e.logger.Infof("Installing executable '%v' to %v", e.source, destFolder) dotfileName := e.dotfileName() - installedDotfileName, err := installFileToFolderWithName(destFolder, dotfileName, e.source) + installedDotfileName, err := e.installFileToFolderWithName(destFolder, dotfileName, e.source) if err != nil { return "", fmt.Errorf("error installing file '%v' as '%v': %v", e.source, dotfileName, err) } - log.Infof("Installed '%v'", installedDotfileName) + e.logger.Infof("Installed '%v'", installedDotfileName) wrapperFilename, err := e.installWrapper(destFolder, installedDotfileName) if err != nil { return "", fmt.Errorf("error wrapping '%v': %v", installedDotfileName, err) } - log.Infof("Installed wrapper '%v'", wrapperFilename) + e.logger.Infof("Installed wrapper '%v'", wrapperFilename) return wrapperFilename, nil } diff --git a/tools/container/toolkit/executable_test.go b/tools/container/toolkit/executable_test.go index 8cb47596..58ba0139 100644 --- a/tools/container/toolkit/executable_test.go +++ b/tools/container/toolkit/executable_test.go @@ -23,10 +23,13 @@ import ( "strings" "testing" + testlog "github.com/sirupsen/logrus/hooks/test" "github.com/stretchr/testify/require" ) func TestWrapper(t *testing.T) { + logger, _ := testlog.NewNullLogger() + const shebang = "#! /bin/sh" const destFolder = "/dest/folder" const dotfileName = "source.real" @@ -98,6 +101,8 @@ func TestWrapper(t *testing.T) { for i, tc := range testCases { buf := &bytes.Buffer{} + tc.e.logger = logger + err := tc.e.writeWrapperTo(buf, destFolder, dotfileName) require.NoError(t, err) @@ -107,6 +112,8 @@ func TestWrapper(t *testing.T) { } func TestInstallExecutable(t *testing.T) { + logger, _ := testlog.NewNullLogger() + inputFolder, err := os.MkdirTemp("", "") require.NoError(t, err) defer os.RemoveAll(inputFolder) @@ -121,6 +128,9 @@ func TestInstallExecutable(t *testing.T) { require.NoError(t, sourceFile.Close()) e := executable{ + fileInstaller: fileInstaller{ + logger: logger, + }, source: source, target: executableTarget{ dotfileName: "input.real", diff --git a/tools/container/toolkit/options.go b/tools/container/toolkit/options.go new file mode 100644 index 00000000..10e49b95 --- /dev/null +++ b/tools/container/toolkit/options.go @@ -0,0 +1,40 @@ +/** +# Copyright 2024 NVIDIA CORPORATION +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +**/ + +package toolkit + +import "github.com/NVIDIA/nvidia-container-toolkit/internal/logger" + +// An Option provides a mechanism to configure an Installer. +type Option func(*Installer) + +func WithLogger(logger logger.Interface) Option { + return func(i *Installer) { + i.logger = logger + } +} + +func WithToolkitRoot(toolkitRoot string) Option { + return func(i *Installer) { + i.toolkitRoot = toolkitRoot + } +} + +func WithSourceRoot(sourceRoot string) Option { + return func(i *Installer) { + i.sourceRoot = sourceRoot + } +} diff --git a/tools/container/toolkit/runtime.go b/tools/container/toolkit/runtime.go index 7bdf7f77..5553d881 100644 --- a/tools/container/toolkit/runtime.go +++ b/tools/container/toolkit/runtime.go @@ -29,10 +29,10 @@ const ( // installContainerRuntimes sets up the NVIDIA container runtimes, copying the executables // and implementing the required wrapper -func installContainerRuntimes(sourceRoot string, toolkitDir string) error { +func (t *Installer) installContainerRuntimes(toolkitDir string) error { runtimes := operator.GetRuntimes() for _, runtime := range runtimes { - r := newNvidiaContainerRuntimeInstaller(filepath.Join(sourceRoot, runtime.Path)) + r := t.newNvidiaContainerRuntimeInstaller(runtime.Path) _, err := r.install(toolkitDir) if err != nil { @@ -46,17 +46,17 @@ func installContainerRuntimes(sourceRoot string, toolkitDir string) error { // This installer will copy the specified source executable to the toolkit directory. // The executable is copied to a file with the same name as the source, but with a ".real" suffix and a wrapper is // created to allow for the configuration of the runtime environment. -func newNvidiaContainerRuntimeInstaller(source string) *executable { +func (t *Installer) newNvidiaContainerRuntimeInstaller(source string) *executable { wrapperName := filepath.Base(source) dotfileName := wrapperName + ".real" target := executableTarget{ dotfileName: dotfileName, wrapperName: wrapperName, } - return newRuntimeInstaller(source, target, nil) + return t.newRuntimeInstaller(source, target, nil) } -func newRuntimeInstaller(source string, target executableTarget, env map[string]string) *executable { +func (t *Installer) newRuntimeInstaller(source string, target executableTarget, env map[string]string) *executable { preLines := []string{ "", "cat /proc/modules | grep -e \"^nvidia \" >/dev/null 2>&1", @@ -74,10 +74,11 @@ func newRuntimeInstaller(source string, target executableTarget, env map[string] } r := executable{ - source: source, - target: target, - env: runtimeEnv, - preLines: preLines, + fileInstaller: t.fileInstaller, + source: source, + target: target, + env: runtimeEnv, + preLines: preLines, } return &r diff --git a/tools/container/toolkit/runtime_test.go b/tools/container/toolkit/runtime_test.go index d2841506..36765409 100644 --- a/tools/container/toolkit/runtime_test.go +++ b/tools/container/toolkit/runtime_test.go @@ -21,11 +21,18 @@ import ( "strings" "testing" + testlog "github.com/sirupsen/logrus/hooks/test" "github.com/stretchr/testify/require" ) func TestNvidiaContainerRuntimeInstallerWrapper(t *testing.T) { - r := newNvidiaContainerRuntimeInstaller(nvidiaContainerRuntimeSource) + logger, _ := testlog.NewNullLogger() + i := Installer{ + fileInstaller: fileInstaller{ + logger: logger, + }, + } + r := i.newNvidiaContainerRuntimeInstaller(nvidiaContainerRuntimeSource) const shebang = "#! /bin/sh" const destFolder = "/dest/folder" diff --git a/tools/container/toolkit/toolkit.go b/tools/container/toolkit/toolkit.go index 4aa04fc9..666708dc 100644 --- a/tools/container/toolkit/toolkit.go +++ b/tools/container/toolkit/toolkit.go @@ -17,7 +17,6 @@ package toolkit import ( - "errors" "fmt" "io" "os" @@ -29,6 +28,7 @@ import ( "tags.cncf.io/container-device-interface/pkg/parser" "github.com/NVIDIA/nvidia-container-toolkit/internal/config" + "github.com/NVIDIA/nvidia-container-toolkit/internal/logger" "github.com/NVIDIA/nvidia-container-toolkit/internal/system/nvdevices" "github.com/NVIDIA/nvidia-container-toolkit/pkg/nvcdi" transformroot "github.com/NVIDIA/nvidia-container-toolkit/pkg/nvcdi/transform/root" @@ -80,6 +80,28 @@ type Options struct { optInFeatures cli.StringSlice } +type Installer struct { + fileInstaller + toolkitRoot string +} + +type fileInstaller struct { + logger logger.Interface + sourceRoot string +} + +func NewInstaller(opts ...Option) *Installer { + i := &Installer{} + for _, opt := range opts { + opt(i) + } + + if i.logger == nil { + i.logger = logger.New() + } + return i +} + func Flags(opts *Options) []cli.Flag { flags := []cli.Flag{ &cli.StringFlag{ @@ -213,9 +235,12 @@ func Flags(opts *Options) []cli.Flag { } // ValidateOptions checks whether the specified options are valid -func ValidateOptions(opts *Options, toolkitRoot string) error { - if toolkitRoot == "" { - return fmt.Errorf("invalid --toolkit-root option: %v", toolkitRoot) +func (t *Installer) ValidateOptions(opts *Options) error { + if t == nil { + return fmt.Errorf("toolkit installer is not initilized") + } + if t.toolkitRoot == "" { + return fmt.Errorf("invalid --toolkit-root option: %v", t.toolkitRoot) } vendor, class := parser.ParseQualifier(opts.cdiKind) @@ -229,7 +254,7 @@ func ValidateOptions(opts *Options, toolkitRoot string) error { opts.cdiClass = class if opts.cdiEnabled && opts.cdiOutputDir == "" { - log.Warning("Skipping CDI spec generation (no output directory specified)") + t.logger.Warning("Skipping CDI spec generation (no output directory specified)") opts.cdiEnabled = false } @@ -244,7 +269,7 @@ func ValidateOptions(opts *Options, toolkitRoot string) error { } } if !opts.cdiEnabled && !isDisabled { - log.Info("disabling device node creation since --cdi-enabled=false") + t.logger.Info("disabling device node creation since --cdi-enabled=false") isDisabled = true } if isDisabled { @@ -257,88 +282,91 @@ func ValidateOptions(opts *Options, toolkitRoot string) error { // Install installs the components of the NVIDIA container toolkit. // The specified sourceRoot is searched for the components to install. // Any existing installation is removed. -func Install(cli *cli.Context, opts *Options, sourceRoot string, toolkitRoot string) error { - log.Infof("Installing NVIDIA container toolkit to '%v'", toolkitRoot) +func (t *Installer) Install(cli *cli.Context, opts *Options) error { + if t == nil { + return fmt.Errorf("toolkit installer is not initilized") + } + t.logger.Infof("Installing NVIDIA container toolkit to '%v'", t.toolkitRoot) - log.Infof("Removing existing NVIDIA container toolkit installation") - err := os.RemoveAll(toolkitRoot) + t.logger.Infof("Removing existing NVIDIA container toolkit installation") + err := os.RemoveAll(t.toolkitRoot) if err != nil && !opts.ignoreErrors { return fmt.Errorf("error removing toolkit directory: %v", err) } else if err != nil { - log.Errorf("Ignoring error: %v", fmt.Errorf("error removing toolkit directory: %v", err)) + t.logger.Errorf("Ignoring error: %v", fmt.Errorf("error removing toolkit directory: %v", err)) } - toolkitConfigDir := filepath.Join(toolkitRoot, ".config", "nvidia-container-runtime") + toolkitConfigDir := filepath.Join(t.toolkitRoot, ".config", "nvidia-container-runtime") toolkitConfigPath := filepath.Join(toolkitConfigDir, configFilename) - err = createDirectories(toolkitRoot, toolkitConfigDir) + err = t.createDirectories(t.toolkitRoot, toolkitConfigDir) if err != nil && !opts.ignoreErrors { return fmt.Errorf("could not create required directories: %v", err) } else if err != nil { - log.Errorf("Ignoring error: %v", fmt.Errorf("could not create required directories: %v", err)) + t.logger.Errorf("Ignoring error: %v", fmt.Errorf("could not create required directories: %v", err)) } - err = installContainerLibraries(sourceRoot, toolkitRoot) + err = t.installContainerLibraries(t.toolkitRoot) if err != nil && !opts.ignoreErrors { return fmt.Errorf("error installing NVIDIA container library: %v", err) } else if err != nil { - log.Errorf("Ignoring error: %v", fmt.Errorf("error installing NVIDIA container library: %v", err)) + t.logger.Errorf("Ignoring error: %v", fmt.Errorf("error installing NVIDIA container library: %v", err)) } - err = installContainerRuntimes(sourceRoot, toolkitRoot) + err = t.installContainerRuntimes(t.toolkitRoot) if err != nil && !opts.ignoreErrors { return fmt.Errorf("error installing NVIDIA container runtime: %v", err) } else if err != nil { - log.Errorf("Ignoring error: %v", fmt.Errorf("error installing NVIDIA container runtime: %v", err)) + t.logger.Errorf("Ignoring error: %v", fmt.Errorf("error installing NVIDIA container runtime: %v", err)) } - nvidiaContainerCliExecutable, err := installContainerCLI(sourceRoot, toolkitRoot) + nvidiaContainerCliExecutable, err := t.installContainerCLI(t.toolkitRoot) if err != nil && !opts.ignoreErrors { return fmt.Errorf("error installing NVIDIA container CLI: %v", err) } else if err != nil { - log.Errorf("Ignoring error: %v", fmt.Errorf("error installing NVIDIA container CLI: %v", err)) + t.logger.Errorf("Ignoring error: %v", fmt.Errorf("error installing NVIDIA container CLI: %v", err)) } - nvidiaContainerRuntimeHookPath, err := installRuntimeHook(sourceRoot, toolkitRoot, toolkitConfigPath) + nvidiaContainerRuntimeHookPath, err := t.installRuntimeHook(t.toolkitRoot, toolkitConfigPath) if err != nil && !opts.ignoreErrors { return fmt.Errorf("error installing NVIDIA container runtime hook: %v", err) } else if err != nil { - log.Errorf("Ignoring error: %v", fmt.Errorf("error installing NVIDIA container runtime hook: %v", err)) + t.logger.Errorf("Ignoring error: %v", fmt.Errorf("error installing NVIDIA container runtime hook: %v", err)) } - nvidiaCTKPath, err := installContainerToolkitCLI(sourceRoot, toolkitRoot) + nvidiaCTKPath, err := t.installContainerToolkitCLI(t.toolkitRoot) if err != nil && !opts.ignoreErrors { return fmt.Errorf("error installing NVIDIA Container Toolkit CLI: %v", err) } else if err != nil { - log.Errorf("Ignoring error: %v", fmt.Errorf("error installing NVIDIA Container Toolkit CLI: %v", err)) + t.logger.Errorf("Ignoring error: %v", fmt.Errorf("error installing NVIDIA Container Toolkit CLI: %v", err)) } - nvidiaCDIHookPath, err := installContainerCDIHookCLI(sourceRoot, toolkitRoot) + nvidiaCDIHookPath, err := t.installContainerCDIHookCLI(t.toolkitRoot) if err != nil && !opts.ignoreErrors { return fmt.Errorf("error installing NVIDIA Container CDI Hook CLI: %v", err) } else if err != nil { - log.Errorf("Ignoring error: %v", fmt.Errorf("error installing NVIDIA Container CDI Hook CLI: %v", err)) + t.logger.Errorf("Ignoring error: %v", fmt.Errorf("error installing NVIDIA Container CDI Hook CLI: %v", err)) } - err = installToolkitConfig(cli, toolkitConfigPath, nvidiaContainerCliExecutable, nvidiaCTKPath, nvidiaContainerRuntimeHookPath, opts) + err = t.installToolkitConfig(cli, toolkitConfigPath, nvidiaContainerCliExecutable, nvidiaCTKPath, nvidiaContainerRuntimeHookPath, opts) if err != nil && !opts.ignoreErrors { return fmt.Errorf("error installing NVIDIA container toolkit config: %v", err) } else if err != nil { - log.Errorf("Ignoring error: %v", fmt.Errorf("error installing NVIDIA container toolkit config: %v", err)) + t.logger.Errorf("Ignoring error: %v", fmt.Errorf("error installing NVIDIA container toolkit config: %v", err)) } - err = createDeviceNodes(opts) + err = t.createDeviceNodes(opts) if err != nil && !opts.ignoreErrors { return fmt.Errorf("error creating device nodes: %v", err) } else if err != nil { - log.Errorf("Ignoring error: %v", fmt.Errorf("error creating device nodes: %v", err)) + t.logger.Errorf("Ignoring error: %v", fmt.Errorf("error creating device nodes: %v", err)) } - err = generateCDISpec(opts, nvidiaCDIHookPath) + err = t.generateCDISpec(opts, nvidiaCDIHookPath) if err != nil && !opts.ignoreErrors { return fmt.Errorf("error generating CDI specification: %v", err) } else if err != nil { - log.Errorf("Ignoring error: %v", fmt.Errorf("error generating CDI specification: %v", err)) + t.logger.Errorf("Ignoring error: %v", fmt.Errorf("error generating CDI specification: %v", err)) } return nil @@ -349,8 +377,8 @@ func Install(cli *cli.Context, opts *Options, sourceRoot string, toolkitRoot str // A predefined set of library candidates are considered, with the first one // resulting in success being installed to the toolkit folder. The install process // resolves the symlink for the library and copies the versioned library itself. -func installContainerLibraries(sourceRoot string, toolkitRoot string) error { - log.Infof("Installing NVIDIA container library to '%v'", toolkitRoot) +func (t *Installer) installContainerLibraries(toolkitRoot string) error { + t.logger.Infof("Installing NVIDIA container library to '%v'", toolkitRoot) libs := []string{ "libnvidia-container.so.1", @@ -358,7 +386,7 @@ func installContainerLibraries(sourceRoot string, toolkitRoot string) error { } for _, l := range libs { - err := installLibrary(l, sourceRoot, toolkitRoot) + err := t.installLibrary(l, toolkitRoot) if err != nil { return fmt.Errorf("failed to install %s: %v", l, err) } @@ -368,23 +396,23 @@ func installContainerLibraries(sourceRoot string, toolkitRoot string) error { } // installLibrary installs the specified library to the toolkit directory. -func installLibrary(libName string, sourceRoot string, toolkitRoot string) error { - libraryPath, err := findLibrary(sourceRoot, libName) +func (t *Installer) installLibrary(libName string, toolkitRoot string) error { + libraryPath, err := t.findLibrary(libName) if err != nil { return fmt.Errorf("error locating NVIDIA container library: %v", err) } - installedLibPath, err := installFileToFolder(toolkitRoot, libraryPath) + installedLibPath, err := t.installFileToFolder(toolkitRoot, libraryPath) if err != nil { return fmt.Errorf("error installing %v to %v: %v", libraryPath, toolkitRoot, err) } - log.Infof("Installed '%v' to '%v'", libraryPath, installedLibPath) + t.logger.Infof("Installed '%v' to '%v'", libraryPath, installedLibPath) if filepath.Base(installedLibPath) == libName { return nil } - err = installSymlink(toolkitRoot, libName, installedLibPath) + err = t.installSymlink(toolkitRoot, libName, installedLibPath) if err != nil { return fmt.Errorf("error installing symlink for NVIDIA container library: %v", err) } @@ -394,8 +422,8 @@ func installLibrary(libName string, sourceRoot string, toolkitRoot string) error // installToolkitConfig installs the config file for the NVIDIA container toolkit ensuring // that the settings are updated to match the desired install and nvidia driver directories. -func installToolkitConfig(c *cli.Context, toolkitConfigPath string, nvidiaContainerCliExecutablePath string, nvidiaCTKPath string, nvidaContainerRuntimeHookPath string, opts *Options) error { - log.Infof("Installing NVIDIA container toolkit config '%v'", toolkitConfigPath) +func (t *Installer) installToolkitConfig(c *cli.Context, toolkitConfigPath string, nvidiaContainerCliExecutablePath string, nvidiaCTKPath string, nvidaContainerRuntimeHookPath string, opts *Options) error { + t.logger.Infof("Installing NVIDIA container toolkit config '%v'", toolkitConfigPath) cfg, err := config.New( config.WithConfigFile(nvidiaContainerToolkitConfigSource), @@ -457,11 +485,11 @@ func installToolkitConfig(c *cli.Context, toolkitConfigPath string, nvidiaContai for key, value := range optionalConfigValues { if !c.IsSet(key) { - log.Infof("Skipping unset option: %v", key) + t.logger.Infof("Skipping unset option: %v", key) continue } if value == nil { - log.Infof("Skipping option with nil value: %v", key) + t.logger.Infof("Skipping option with nil value: %v", key) continue } @@ -476,7 +504,7 @@ func installToolkitConfig(c *cli.Context, toolkitConfigPath string, nvidiaContai } value = v.Value() default: - log.Warningf("Unexpected type for option %v=%v: %T", key, value, v) + t.logger.Warningf("Unexpected type for option %v=%v: %T", key, value, v) } cfg.Set(key, value) @@ -488,16 +516,17 @@ func installToolkitConfig(c *cli.Context, toolkitConfigPath string, nvidiaContai os.Stdout.WriteString("Using config:\n") if _, err = cfg.WriteTo(os.Stdout); err != nil { - log.Warningf("Failed to output config to STDOUT: %v", err) + t.logger.Warningf("Failed to output config to STDOUT: %v", err) } return nil } // installContainerToolkitCLI installs the nvidia-ctk CLI executable and wrapper. -func installContainerToolkitCLI(sourceRoot string, toolkitDir string) (string, error) { +func (t *Installer) installContainerToolkitCLI(toolkitDir string) (string, error) { e := executable{ - source: filepath.Join(sourceRoot, "/usr/bin/nvidia-ctk"), + fileInstaller: t.fileInstaller, + source: "/usr/bin/nvidia-ctk", target: executableTarget{ dotfileName: "nvidia-ctk.real", wrapperName: "nvidia-ctk", @@ -508,9 +537,10 @@ func installContainerToolkitCLI(sourceRoot string, toolkitDir string) (string, e } // installContainerCDIHookCLI installs the nvidia-cdi-hook CLI executable and wrapper. -func installContainerCDIHookCLI(sourceRoot string, toolkitDir string) (string, error) { +func (t *Installer) installContainerCDIHookCLI(toolkitDir string) (string, error) { e := executable{ - source: filepath.Join(sourceRoot, "/usr/bin/nvidia-cdi-hook"), + fileInstaller: t.fileInstaller, + source: "/usr/bin/nvidia-cdi-hook", target: executableTarget{ dotfileName: "nvidia-cdi-hook.real", wrapperName: "nvidia-cdi-hook", @@ -522,15 +552,16 @@ func installContainerCDIHookCLI(sourceRoot string, toolkitDir string) (string, e // installContainerCLI sets up the NVIDIA container CLI executable, copying the executable // and implementing the required wrapper -func installContainerCLI(sourceRoot string, toolkitRoot string) (string, error) { - log.Infof("Installing NVIDIA container CLI from '%v'", nvidiaContainerCliSource) +func (t *Installer) installContainerCLI(toolkitRoot string) (string, error) { + t.logger.Infof("Installing NVIDIA container CLI from '%v'", nvidiaContainerCliSource) env := map[string]string{ "LD_LIBRARY_PATH": toolkitRoot, } e := executable{ - source: filepath.Join(sourceRoot, nvidiaContainerCliSource), + fileInstaller: t.fileInstaller, + source: nvidiaContainerCliSource, target: executableTarget{ dotfileName: "nvidia-container-cli.real", wrapperName: "nvidia-container-cli", @@ -547,15 +578,16 @@ func installContainerCLI(sourceRoot string, toolkitRoot string) (string, error) // installRuntimeHook sets up the NVIDIA runtime hook, copying the executable // and implementing the required wrapper -func installRuntimeHook(sourceRoot string, toolkitRoot string, configFilePath string) (string, error) { - log.Infof("Installing NVIDIA container runtime hook from '%v'", nvidiaContainerRuntimeHookSource) +func (t *Installer) installRuntimeHook(toolkitRoot string, configFilePath string) (string, error) { + t.logger.Infof("Installing NVIDIA container runtime hook from '%v'", nvidiaContainerRuntimeHookSource) argLines := []string{ fmt.Sprintf("-config \"%s\"", configFilePath), } e := executable{ - source: filepath.Join(sourceRoot, nvidiaContainerRuntimeHookSource), + fileInstaller: t.fileInstaller, + source: nvidiaContainerRuntimeHookSource, target: executableTarget{ dotfileName: "nvidia-container-runtime-hook.real", wrapperName: "nvidia-container-runtime-hook", @@ -568,7 +600,7 @@ func installRuntimeHook(sourceRoot string, toolkitRoot string, configFilePath st return "", fmt.Errorf("error installing NVIDIA container runtime hook: %v", err) } - err = installSymlink(toolkitRoot, "nvidia-container-toolkit", installedPath) + err = t.installSymlink(toolkitRoot, "nvidia-container-toolkit", installedPath) if err != nil { return "", fmt.Errorf("error installing symlink to NVIDIA container runtime hook: %v", err) } @@ -578,10 +610,10 @@ func installRuntimeHook(sourceRoot string, toolkitRoot string, configFilePath st // installSymlink creates a symlink in the toolkitDirectory that points to the specified target. // Note: The target is assumed to be local to the toolkit directory -func installSymlink(toolkitRoot string, link string, target string) error { +func (t *Installer) installSymlink(toolkitRoot string, link string, target string) error { symlinkPath := filepath.Join(toolkitRoot, link) targetPath := filepath.Base(target) - log.Infof("Creating symlink '%v' -> '%v'", symlinkPath, targetPath) + t.logger.Infof("Creating symlink '%v' -> '%v'", symlinkPath, targetPath) err := os.Symlink(targetPath, symlinkPath) if err != nil { @@ -594,15 +626,15 @@ func installSymlink(toolkitRoot string, link string, target string) error { // The path of the input file is ignored. // e.g. installFileToFolder("/some/path/file.txt", "/output/path") // will result in a file "/output/path/file.txt" being generated -func installFileToFolder(destFolder string, src string) (string, error) { +func (t *fileInstaller) installFileToFolder(destFolder string, src string) (string, error) { name := filepath.Base(src) - return installFileToFolderWithName(destFolder, name, src) + return t.installFileToFolderWithName(destFolder, name, src) } // cp src destFolder/name -func installFileToFolderWithName(destFolder string, name, src string) (string, error) { +func (t *fileInstaller) installFileToFolderWithName(destFolder string, name, src string) (string, error) { dest := filepath.Join(destFolder, name) - err := installFile(dest, src) + err := t.installFile(dest, src) if err != nil { return "", fmt.Errorf("error copying '%v' to '%v': %v", src, dest, err) } @@ -611,8 +643,9 @@ func installFileToFolderWithName(destFolder string, name, src string) (string, e // installFile copies a file from src to dest and maintains // file modes -func installFile(dest string, src string) error { - log.Infof("Installing '%v' to '%v'", src, dest) +func (t *fileInstaller) installFile(dest string, src string) error { + src = filepath.Join(t.sourceRoot, src) + t.logger.Infof("Installing '%v' to '%v'", src, dest) source, err := os.Open(src) if err != nil { @@ -654,8 +687,8 @@ func applyModeFromSource(dest string, src string) error { // findLibrary searches a set of candidate libraries in the specified root for // a given library name -func findLibrary(root string, libName string) (string, error) { - log.Infof("Finding library %v (root=%v)", libName, root) +func (t *Installer) findLibrary(libName string) (string, error) { + t.logger.Infof("Finding library %v (root=%v)", libName) candidateDirs := []string{ "/usr/lib64", @@ -664,16 +697,16 @@ func findLibrary(root string, libName string) (string, error) { } for _, d := range candidateDirs { - l := filepath.Join(root, d, libName) - log.Infof("Checking library candidate '%v'", l) + l := filepath.Join(t.sourceRoot, d, libName) + t.logger.Infof("Checking library candidate '%v'", l) - libraryCandidate, err := resolveLink(l) + libraryCandidate, err := t.resolveLink(l) if err != nil { - log.Infof("Skipping library candidate '%v': %v", l, err) + t.logger.Infof("Skipping library candidate '%v': %v", l, err) continue } - return libraryCandidate, nil + return strings.TrimPrefix(libraryCandidate, t.sourceRoot), nil } return "", fmt.Errorf("error locating library '%v'", libName) @@ -682,20 +715,20 @@ func findLibrary(root string, libName string) (string, error) { // resolveLink finds the target of a symlink or the file itself in the // case of a regular file. // This is equivalent to running `readlink -f ${l}` -func resolveLink(l string) (string, error) { +func (t *Installer) resolveLink(l string) (string, error) { resolved, err := filepath.EvalSymlinks(l) if err != nil { return "", fmt.Errorf("error resolving link '%v': %v", l, err) } if l != resolved { - log.Infof("Resolved link: '%v' => '%v'", l, resolved) + t.logger.Infof("Resolved link: '%v' => '%v'", l, resolved) } return resolved, nil } -func createDirectories(dir ...string) error { +func (t *Installer) createDirectories(dir ...string) error { for _, d := range dir { - log.Infof("Creating directory '%v'", d) + t.logger.Infof("Creating directory '%v'", d) err := os.MkdirAll(d, 0755) if err != nil { return fmt.Errorf("error creating directory: %v", err) @@ -704,7 +737,7 @@ func createDirectories(dir ...string) error { return nil } -func createDeviceNodes(opts *Options) error { +func (t *Installer) createDeviceNodes(opts *Options) error { modes := opts.createDeviceNodes.Value() if len(modes) == 0 { return nil @@ -718,9 +751,9 @@ func createDeviceNodes(opts *Options) error { } for _, mode := range modes { - log.Infof("Creating %v device nodes at %v", mode, opts.DevRootCtrPath) + t.logger.Infof("Creating %v device nodes at %v", mode, opts.DevRootCtrPath) if mode != "control" { - log.Warningf("Unrecognised device mode: %v", mode) + t.logger.Warningf("Unrecognised device mode: %v", mode) continue } if err := devices.CreateNVIDIAControlDevices(); err != nil { @@ -731,12 +764,13 @@ func createDeviceNodes(opts *Options) error { } // generateCDISpec generates a CDI spec for use in management containers -func generateCDISpec(opts *Options, nvidiaCDIHookPath string) error { +func (t *Installer) generateCDISpec(opts *Options, nvidiaCDIHookPath string) error { if !opts.cdiEnabled { return nil } - log.Info("Generating CDI spec for management containers") + t.logger.Info("Generating CDI spec for management containers") cdilib, err := nvcdi.New( + nvcdi.WithLogger(t.logger), nvcdi.WithMode(nvcdi.ModeManagement), nvcdi.WithDriverRoot(opts.DriverRootCtrPath), nvcdi.WithDevRoot(opts.DevRootCtrPath), diff --git a/tools/container/toolkit/toolkit_test.go b/tools/container/toolkit/toolkit_test.go index bab94c4a..ba60cb87 100644 --- a/tools/container/toolkit/toolkit_test.go +++ b/tools/container/toolkit/toolkit_test.go @@ -23,6 +23,7 @@ import ( "strings" "testing" + testlog "github.com/sirupsen/logrus/hooks/test" "github.com/stretchr/testify/require" "github.com/urfave/cli/v2" @@ -33,6 +34,7 @@ import ( func TestInstall(t *testing.T) { t.Setenv("__NVCT_TESTING_DEVICES_ARE_FILES", "true") + logger, _ := testlog.NewNullLogger() moduleRoot, err := test.GetModuleRoot() require.NoError(t, err) @@ -121,9 +123,14 @@ kind: example.com/class cdiKind: "example.com/class", } - require.NoError(t, ValidateOptions(&options, toolkitRoot)) + ti := NewInstaller( + WithLogger(logger), + WithToolkitRoot(toolkitRoot), + WithSourceRoot(sourceRoot), + ) + require.NoError(t, ti.ValidateOptions(&options)) - err := Install(&cli.Context{}, &options, sourceRoot, toolkitRoot) + err := ti.Install(&cli.Context{}, &options) if tc.expectedError == nil { require.NoError(t, err) } else {