From 8e10651ee94b8bc63e770c91b0809abaaecd62d4 Mon Sep 17 00:00:00 2001 From: Tully Foote Date: Wed, 18 Nov 2020 16:21:42 -0800 Subject: [PATCH] add checks for whether nvidia-docker2 is installed Fixes #88 --- src/rocker/core.py | 36 +++++++++++++++++++++++++++++----- src/rocker/nvidia_extension.py | 19 ++++++++++++++++++ 2 files changed, 50 insertions(+), 5 deletions(-) diff --git a/src/rocker/core.py b/src/rocker/core.py index 89f98a87..ade86a21 100755 --- a/src/rocker/core.py +++ b/src/rocker/core.py @@ -47,6 +47,10 @@ class DependencyMissing(RuntimeError): pass +class PrerequisiteCheckError(RuntimeError): + pass + + class RockerExtension(object): """The base class for Rocker extension points""" @@ -54,10 +58,20 @@ def precondition_environment(self, cliargs): """Modify the local environment such as setup tempfiles""" pass - def validate_environment(self, cliargs): - """ Check that the environment is something that can be used. + def check_build_prerequisites(self, cliargs): + """ Check that the environment is something that can be used for the build. + This will check that we're on the right base OS and that the + necessary resources are available, like hardware. + Raises PrerequisiteCheckError on failure + """ + pass + + def check_run_prerequisites(self, cliargs): + """ Check that the environment is something that can be used for running the container. This will check that we're on the right base OS and that the - necessary resources are available, like hardware.""" + necessary resources are available, like hardware. + Raises PrerequisiteCheckError on failure + """ pass def get_preamble(self, cliargs): @@ -203,6 +217,14 @@ def __init__(self, active_extensions, cliargs, base_image): self.image_id = None def build(self, **kwargs): + # Check prerequisites + for e in self.active_extensions: + try: + e.check_build_prerequisites(self.cliargs) + except PrerequisiteCheckError as ex: + print("Failed to validate prerequisites to build for extension [%s] with error: %s\nNot executing run." % (e.get_name(), ex)) + return 1 + with tempfile.TemporaryDirectory() as td: df = os.path.join(td, 'Dockerfile') print("Writing dockerfile to %s" % df) @@ -240,10 +262,14 @@ def run(self, command='', **kwargs): for e in self.active_extensions: try: + e.check_run_prerequisites(self.cliargs) e.precondition_environment(self.cliargs) + except PrerequisiteCheckError as ex: + print("Failed to validate prerequisites to run for extension [%s] with error: %s\nNot executing run." % (e.get_name(), ex)) + return 1 except subprocess.CalledProcessError as ex: - print("Failed to precondition for extension [%s] with error: %s\ndeactivating" % (e.get_name(), ex)) - # TODO(tfoote) remove the extension from the list + print("Failed to precondition environment for extension [%s] with error: %s\nNot executing run." % (e.get_name(), ex)) + return 1 docker_args = '' diff --git a/src/rocker/nvidia_extension.py b/src/rocker/nvidia_extension.py index 1441eadc..e12bae09 100644 --- a/src/rocker/nvidia_extension.py +++ b/src/rocker/nvidia_extension.py @@ -27,6 +27,7 @@ from .extensions import name_to_argument from .core import get_docker_client from .core import RockerExtension +from .core import PrerequisiteCheckError def get_docker_version(): docker_version_raw = get_docker_client().version()['Version'] @@ -125,6 +126,24 @@ def get_docker_args(self, cliargs): return " --gpus all" return " --runtime=nvidia" + def check_run_prerequisites(self, cliargs): + nvidia_flag = '' + if get_docker_version() >= Version("19.03"): + nvidia_flag = " --gpus all" + else: + nvidia_flag = " --runtime=nvidia" + + cmd = 'docker run --rm %s nvidia/cuda:11.0-base nvidia-smi' % nvidia_flag + try: + subprocess.check_call(cmd.split()) + except: + errstr = 'Failed to detect nvidia hardware.' + if get_docker_version() >= Version("19.03"): + errstr += ' Is nvidia-container-toolkit installed?' + else: + errstr += 'Is nvidia-docker2 installed?' + raise PrerequisiteCheckError(errstr) + @staticmethod def register_arguments(parser, defaults={}): parser.add_argument(name_to_argument(Nvidia.get_name()),