diff --git a/backend/dataall/base/cdkproxy/cdk_cli_wrapper.py b/backend/dataall/base/cdkproxy/cdk_cli_wrapper.py index 6760fea94..5c1e8d730 100644 --- a/backend/dataall/base/cdkproxy/cdk_cli_wrapper.py +++ b/backend/dataall/base/cdkproxy/cdk_cli_wrapper.py @@ -18,6 +18,7 @@ from dataall.base.aws.sts import SessionHelper from dataall.base.db import Engine from dataall.base.utils.alarm_service import AlarmService +from dataall.base.utils.shell_utils import CommandSanitizer logger = logging.getLogger('cdksass') @@ -44,12 +45,18 @@ def aws_configure(profile_name='default'): print('..............................................') print(' Running configure ') print('..............................................') - print(f"AWS_CONTAINER_CREDENTIALS_RELATIVE_URI: {os.getenv('AWS_CONTAINER_CREDENTIALS_RELATIVE_URI')}") - cmd = ['curl', '169.254.170.2$AWS_CONTAINER_CREDENTIALS_RELATIVE_URI'] - process = subprocess.run(' '.join(cmd), text=True, shell=True, encoding='utf-8', capture_output=True) # nosec # nosemgrep + AWS_CONTAINER_CREDENTIALS_RELATIVE_URI = os.getenv('AWS_CONTAINER_CREDENTIALS_RELATIVE_URI') + print(f"AWS_CONTAINER_CREDENTIALS_RELATIVE_URI: {AWS_CONTAINER_CREDENTIALS_RELATIVE_URI}") + cmd = ['curl', f'169.254.170.2{AWS_CONTAINER_CREDENTIALS_RELATIVE_URI}'] + process = subprocess.run(cmd, text=True, shell=False, encoding='utf-8', capture_output=True) creds = None if process.returncode == 0: creds = ast.literal_eval(process.stdout) + print(f"Successfully curled credentials: {str(process.stdout)}, credentials = {creds}") + else: + print( + f'Failed clean curl credentials due to {str(process.stderr)}' + ) return creds @@ -130,6 +137,16 @@ def deploy_cdk_stack(engine: Engine, stackid: str, app_path: str = None, path: s app_path = app_path or './app.py' logger.info(f'app_path: {app_path}') + input_args = [ + stack.name, + stack.accountid, + stack.region, + stack.stack, + stack.targetUri + ] + + CommandSanitizer(input_args) + cmd = [ '' '. ~/.nvm/nvm.sh &&', 'cdk', @@ -157,9 +174,11 @@ def deploy_cdk_stack(engine: Engine, stackid: str, app_path: str = None, path: s f'"{sys.executable} {app_path}"', '--verbose', ] - logger.info(f"Running command : \n {' '.join(cmd)}") + # This command is too complex to be executed as a list of commands. We need to run it with shell=True + # However, the input arguments have to be sanitized with the CommandSanitizer + process = subprocess.run( # nosemgrep ' '.join(cmd), # nosemgrep text=True, # nosemgrep @@ -208,17 +227,28 @@ def describe_stack(stack, engine: Engine = None, stackid: str = None): def cdk_installed(): - cmd = ['. ~/.nvm/nvm.sh && cdk', '--version'] - logger.info(f"Running command {' '.join(cmd)}") - - subprocess.run( # nosemgrep - cmd, # nosemgrep - text=True, # nosemgrep - shell=True, # nosec # nosemgrep - encoding='utf-8', # nosemgrep - stdout=subprocess.PIPE, # nosemgrep - stderr=subprocess.PIPE, # nosemgrep - cwd=os.path.dirname(__file__), # nosemgrep + cmd1 = ['.', '~/.nvm/nvm.sh'] + logger.info(f"Running command {' '.join(cmd1)}") + subprocess.run( + cmd1, + text=True, + shell=False, + encoding='utf-8', + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + cwd=os.path.dirname(__file__), + ) + + cmd2 = ['cdk', '--version'] + logger.info(f"Running command {' '.join(cmd2)}") + subprocess.run( + cmd2, + text=True, + shell=False, + encoding='utf-8', + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + cwd=os.path.dirname(__file__), ) diff --git a/backend/dataall/base/utils/shell_utils.py b/backend/dataall/base/utils/shell_utils.py new file mode 100644 index 000000000..373c6ebca --- /dev/null +++ b/backend/dataall/base/utils/shell_utils.py @@ -0,0 +1,23 @@ +import re + + +class CommandSanitizer: + """ + Takes a list of arguments and verifies that each argument is alphanumeric or "-" or "_" and of type string + Trows a TypeError if any of these conditions is violated. + """ + def __init__(self, args) -> None: + if not args: + raise TypeError("Arguments cannot be empty") + + for arg in args: + if not isinstance(arg, str): + raise TypeError(f"arguments must be strings, not {type(arg)} of {str(arg)}") + if re.search(r"[^a-zA-Z0-9-_]", arg): + raise TypeError(f"argument contains invalid characters: {arg}") + + self._arguments = args + + @property + def arguments(self): + return self._arguments diff --git a/backend/dataall/modules/datapipelines/cdk/datapipelines_cdk_pipeline.py b/backend/dataall/modules/datapipelines/cdk/datapipelines_cdk_pipeline.py index 9c2bd2050..d82f7c2ca 100644 --- a/backend/dataall/modules/datapipelines/cdk/datapipelines_cdk_pipeline.py +++ b/backend/dataall/modules/datapipelines/cdk/datapipelines_cdk_pipeline.py @@ -7,6 +7,7 @@ from dataall.base import db from dataall.base.aws.sts import SessionHelper +from dataall.base.utils.shell_utils import CommandSanitizer from dataall.core.environment.services.environment_service import EnvironmentService from dataall.modules.datapipelines.db.datapipelines_repositories import DatapipelinesRepository @@ -71,6 +72,10 @@ def __init__(self, target_uri): 'COMMITID=$(aws codecommit get-branch --repository-name ${REPO_NAME} --branch-name main --query branch.commitId --output text)', 'aws codecommit put-file --repository-name ${REPO_NAME} --branch-name main --file-content file://app.py --file-path app.py --parent-commit-id ${COMMITID} --cli-binary-format raw-in-base64-out', ] + CommandSanitizer(args=[self.pipeline.repo]) + + # This command is too complex to be executed as a list of commands. We need to run it with shell=True + # However, the input arguments have be sanitized with the CommandSanitizer process = subprocess.run( # nosemgrep "; ".join(update_cmds), # nosemgrep @@ -99,6 +104,11 @@ def initialize_repo(self): logger.info(f"Running Commands: {'; '.join(cmd_init)}") + CommandSanitizer(args=[self.pipeline.repo, self.pipeline.SamlGroupName]) + + # This command is too complex to be executed as a list of commands. We need to run it with shell=True + # However, the input arguments have be sanitized with the CommandSanitizer + process = subprocess.run( # nosemgrep '; '.join(cmd_init), # nosemgrep text=True, # nosemgrep @@ -201,6 +211,9 @@ def git_push_repo(self): logger.info(f"Running Commands: {'; '.join(git_cmds)}") + # This command does not include any customer upstream input + # no sanitization is needed and shell=true does not impose a risk + process = subprocess.run( # nosemgrep '; '.join(git_cmds), # nosemgrep text=True, # nosemgrep @@ -215,23 +228,17 @@ def git_push_repo(self): @staticmethod def clean_up_repo(path): if path: - precmd = [ - 'deactivate;', - 'rm', - '-rf', - f"{path}" - ] - + cmd = ['rm', '-rf', f"{path}"] cwd = os.path.dirname(os.path.abspath(__file__)) - logger.info(f"Running command : \n {' '.join(precmd)}") - - process = subprocess.run( # nosemgrep - ' '.join(precmd), # nosemgrep - text=True, # nosemgrep - shell=True, # nosec # nosemgrep - encoding='utf-8', # nosemgrep - capture_output=True, # nosemgrep - cwd=cwd # nosemgrep + logger.info(f"Running command : \n {' '.join(cmd)}") + + process = subprocess.run( + cmd, + text=True, + shell=False, + encoding='utf-8', + capture_output=True, + cwd=cwd ) if process.returncode == 0: diff --git a/backend/dataall/modules/datapipelines/cdk/datapipelines_pipeline.py b/backend/dataall/modules/datapipelines/cdk/datapipelines_pipeline.py index c2e7f887b..db5b97484 100644 --- a/backend/dataall/modules/datapipelines/cdk/datapipelines_pipeline.py +++ b/backend/dataall/modules/datapipelines/cdk/datapipelines_pipeline.py @@ -22,6 +22,7 @@ from dataall.modules.datapipelines.db.datapipelines_models import DataPipeline, DataPipelineEnvironment from dataall.modules.datapipelines.db.datapipelines_repositories import DatapipelinesRepository from dataall.base.utils.cdk_nag_utils import CDKNagUtil +from dataall.base.utils.shell_utils import CommandSanitizer logger = logging.getLogger(__name__) @@ -186,6 +187,11 @@ def __init__(self, scope, id, target_uri: str = None, **kwargs): 'aws codecommit put-file --repository-name ${REPO_NAME} --branch-name main --file-content file://ddk.json --file-path ddk.json --parent-commit-id ${COMMITID} --cli-binary-format raw-in-base64-out', ] + CommandSanitizer(args=[pipeline.repo]) + + # This command is too complex to be executed as a list of commands. We need to run it with shell=True + # However, the input arguments have be sanitized with the CommandSanitizer + process = subprocess.run( # nosemgrep "; ".join(update_cmds), # nosemgrep text=True, # nosemgrep @@ -507,6 +513,7 @@ def write_ddk_json_multienvironment(path, output_file, pipeline_environment, dev with open(f'{path}/{output_file}', 'w') as text_file: print(json, file=text_file) + @staticmethod def initialize_repo(pipeline, code_dir_path, env_vars): venv_name = ".venv" @@ -521,6 +528,11 @@ def initialize_repo(pipeline, code_dir_path, env_vars): logger.info(f"Running Commands: {'; '.join(cmd_init)}") + CommandSanitizer(args=[pipeline.repo]) + + # This command is too complex to be executed as a list of commands. We need to run it with shell=True + # However, the input arguments have be sanitized with the CommandSanitizer + process = subprocess.run( # nosemgrep '; '.join(cmd_init), # nosemgrep text=True, # nosemgrep diff --git a/deploy/stacks/solution_bundling.py b/deploy/stacks/solution_bundling.py index aee909f33..62bb3a119 100644 --- a/deploy/stacks/solution_bundling.py +++ b/deploy/stacks/solution_bundling.py @@ -14,18 +14,21 @@ def __init__(self, source_path=None): def try_bundle(self, output_dir: str, options: BundlingOptions) -> bool: requirements_path = Path(self.source_path, 'requirements.txt') - command = [ - f'cp -a {self.source_path}/. {output_dir}/ && pip install -r {requirements_path} -t {output_dir}' - ] - subprocess.check_output( # nosemgrep - command, # nosemgrep - stderr=subprocess.STDOUT, # nosemgrep - shell=True, # nosec # nosemgrep + subprocess.check_output( + ['cp', '-a', f'{self.source_path}/.', f'{output_dir}/'], + stderr=subprocess.STDOUT, + shell=False, ) - ls_output = subprocess.check_output( # nosemgrep - [f'ls -ll {output_dir}'], # nosemgrep - stderr=subprocess.STDOUT, # nosemgrep - shell=True, # nosec # nosemgrep + subprocess.check_output( + ['pip', 'install', '-r', requirements_path, '-t', output_dir], + stderr=subprocess.STDOUT, + shell=False, + ) + + subprocess.check_output( + ['ls', '-ll', f'{output_dir}'], + stderr=subprocess.STDOUT, + shell=False, ) return True