From 4cdc02b42386bb3945047bd12fcd516c5f45641a Mon Sep 17 00:00:00 2001 From: dlpzx Date: Wed, 13 Sep 2023 10:00:18 +0200 Subject: [PATCH 01/10] Fix shell=true in solutions bundling --- deploy/stacks/solution_bundling.py | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/deploy/stacks/solution_bundling.py b/deploy/stacks/solution_bundling.py index a78156401..5cf7740d0 100644 --- a/deploy/stacks/solution_bundling.py +++ b/deploy/stacks/solution_bundling.py @@ -14,18 +14,21 @@ def __init__(self, source_path=None): def try_bundle(self, output_dir: str, options: BundlingOptions) -> bool: requirements_path = Path(self.source_path, 'requirements.txt') - command = [ - f'cp -a {self.source_path}/. {output_dir}/ && pip install -r {requirements_path} -t {output_dir}' - ] subprocess.check_output( - command, + ['cp', 'a', f'{self.source_path}/.', f'{output_dir}/'], stderr=subprocess.STDOUT, - shell=True, + shell=False, + ) + + subprocess.check_output( + ['pip', 'install', '-r', requirements_path, '-t', output_dir], + stderr=subprocess.STDOUT, + shell=False, ) ls_output = subprocess.check_output( - [f'ls -ll {output_dir}'], + ['ls', '-ll', f'{output_dir}'], stderr=subprocess.STDOUT, - shell=True, + shell=False, ) return True From ad3b590fa3d651dff275dade39e840c41c03b3d1 Mon Sep 17 00:00:00 2001 From: dlpzx Date: Thu, 14 Sep 2023 13:42:26 +0200 Subject: [PATCH 02/10] shell=false in cdk_cli_wrapper --- .../dataall/base/cdkproxy/cdk_cli_wrapper.py | 71 +++++++++++++------ deploy/stacks/solution_bundling.py | 2 +- 2 files changed, 52 insertions(+), 21 deletions(-) diff --git a/backend/dataall/base/cdkproxy/cdk_cli_wrapper.py b/backend/dataall/base/cdkproxy/cdk_cli_wrapper.py index 6760fea94..e5314a411 100644 --- a/backend/dataall/base/cdkproxy/cdk_cli_wrapper.py +++ b/backend/dataall/base/cdkproxy/cdk_cli_wrapper.py @@ -46,7 +46,7 @@ def aws_configure(profile_name='default'): print('..............................................') print(f"AWS_CONTAINER_CREDENTIALS_RELATIVE_URI: {os.getenv('AWS_CONTAINER_CREDENTIALS_RELATIVE_URI')}") cmd = ['curl', '169.254.170.2$AWS_CONTAINER_CREDENTIALS_RELATIVE_URI'] - process = subprocess.run(' '.join(cmd), text=True, shell=True, encoding='utf-8', capture_output=True) # nosec # nosemgrep + process = subprocess.run(cmd, text=True, shell=False, encoding='utf-8', capture_output=True) creds = None if process.returncode == 0: creds = ast.literal_eval(process.stdout) @@ -157,16 +157,36 @@ def deploy_cdk_stack(engine: Engine, stackid: str, app_path: str = None, path: s f'"{sys.executable} {app_path}"', '--verbose', ] + cmd1 = ['', '.', '~/.nvm/nvm.sh'] + logger.info(f"Running command : \n {' '.join(cmd1)}") + + process = subprocess.run( + cmd1, + text=True, + shell=False, + encoding='utf-8', + env=env, + cwd=cwd, + ) + cmd2 = ['cdk', 'deploy', '--all', '--require-approval', 'never', '-c', f"appid='{stack.name}'", + '-c', f"account='{stack.accountid}'", # the target accountid + '-c', f"region='{stack.region}'", # the target region + '-c', f"stack='{stack.stack}'", # the predefined stack + '-c', f"target_uri='{stack.targetUri}'", # the payload for the stack with additional parameters + '-c', "data='{}'", # skips synth step when no changes apply + '--app', f'"{sys.executable}', f'"{app_path}"', + '--verbose', + ] - logger.info(f"Running command : \n {' '.join(cmd)}") + logger.info(f"Running command : \n {' '.join(cmd2)}") - process = subprocess.run( # nosemgrep - ' '.join(cmd), # nosemgrep - text=True, # nosemgrep - shell=True, # nosec # nosemgrep - encoding='utf-8', # nosemgrep - env=env, # nosemgrep - cwd=cwd, # nosemgrep + process = subprocess.run( + cmd2, + text=True, + shell=False, + encoding='utf-8', + env=env, + cwd=cwd, ) if extension: @@ -208,17 +228,28 @@ def describe_stack(stack, engine: Engine = None, stackid: str = None): def cdk_installed(): - cmd = ['. ~/.nvm/nvm.sh && cdk', '--version'] - logger.info(f"Running command {' '.join(cmd)}") - - subprocess.run( # nosemgrep - cmd, # nosemgrep - text=True, # nosemgrep - shell=True, # nosec # nosemgrep - encoding='utf-8', # nosemgrep - stdout=subprocess.PIPE, # nosemgrep - stderr=subprocess.PIPE, # nosemgrep - cwd=os.path.dirname(__file__), # nosemgrep + cmd1 = ['.','~/.nvm/nvm.sh'] + logger.info(f"Running command {' '.join(cmd1)}") + subprocess.run( + cmd1, + text=True, + shell=False, + encoding='utf-8', + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + cwd=os.path.dirname(__file__), + ) + + cmd2 = ['cdk', '--version'] + logger.info(f"Running command {' '.join(cmd2)}") + subprocess.run( + cmd2, + text=True, + shell=False, + encoding='utf-8', + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + cwd=os.path.dirname(__file__), ) diff --git a/deploy/stacks/solution_bundling.py b/deploy/stacks/solution_bundling.py index 5cf7740d0..bc9450b24 100644 --- a/deploy/stacks/solution_bundling.py +++ b/deploy/stacks/solution_bundling.py @@ -15,7 +15,7 @@ def __init__(self, source_path=None): def try_bundle(self, output_dir: str, options: BundlingOptions) -> bool: requirements_path = Path(self.source_path, 'requirements.txt') subprocess.check_output( - ['cp', 'a', f'{self.source_path}/.', f'{output_dir}/'], + ['cp', '-a', f'{self.source_path}/.', f'{output_dir}/'], stderr=subprocess.STDOUT, shell=False, ) From 178c07621a735729c5f2fe083a76d88736d4f3a6 Mon Sep 17 00:00:00 2001 From: dlpzx Date: Thu, 14 Sep 2023 13:52:06 +0200 Subject: [PATCH 03/10] linting --- .../dataall/base/cdkproxy/cdk_cli_wrapper.py | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/backend/dataall/base/cdkproxy/cdk_cli_wrapper.py b/backend/dataall/base/cdkproxy/cdk_cli_wrapper.py index e5314a411..d1d328a82 100644 --- a/backend/dataall/base/cdkproxy/cdk_cli_wrapper.py +++ b/backend/dataall/base/cdkproxy/cdk_cli_wrapper.py @@ -169,14 +169,14 @@ def deploy_cdk_stack(engine: Engine, stackid: str, app_path: str = None, path: s cwd=cwd, ) cmd2 = ['cdk', 'deploy', '--all', '--require-approval', 'never', '-c', f"appid='{stack.name}'", - '-c', f"account='{stack.accountid}'", # the target accountid - '-c', f"region='{stack.region}'", # the target region - '-c', f"stack='{stack.stack}'", # the predefined stack - '-c', f"target_uri='{stack.targetUri}'", # the payload for the stack with additional parameters - '-c', "data='{}'", # skips synth step when no changes apply - '--app', f'"{sys.executable}', f'"{app_path}"', - '--verbose', - ] + '-c', f"account='{stack.accountid}'", # the target accountid + '-c', f"region='{stack.region}'", # the target region + '-c', f"stack='{stack.stack}'", # the predefined stack + '-c', f"target_uri='{stack.targetUri}'", # the payload for the stack with additional parameters + '-c', "data='{}'", # skips synth step when no changes apply + '--app', f'"{sys.executable}', f'"{app_path}"', + '--verbose', + ] logger.info(f"Running command : \n {' '.join(cmd2)}") @@ -228,7 +228,7 @@ def describe_stack(stack, engine: Engine = None, stackid: str = None): def cdk_installed(): - cmd1 = ['.','~/.nvm/nvm.sh'] + cmd1 = ['.', '~/.nvm/nvm.sh'] logger.info(f"Running command {' '.join(cmd1)}") subprocess.run( cmd1, From bc88a09aa9bc56762b38dcb993e9bdbe8b8c80c0 Mon Sep 17 00:00:00 2001 From: dlpzx Date: Fri, 15 Sep 2023 15:35:14 +0200 Subject: [PATCH 04/10] Add command sanitizer --- .../dataall/base/cdkproxy/cdk_cli_wrapper.py | 49 +++++++------------ backend/dataall/base/utils/shell_utils.py | 19 +++++++ 2 files changed, 37 insertions(+), 31 deletions(-) create mode 100644 backend/dataall/base/utils/shell_utils.py diff --git a/backend/dataall/base/cdkproxy/cdk_cli_wrapper.py b/backend/dataall/base/cdkproxy/cdk_cli_wrapper.py index d1d328a82..8cd46c2e3 100644 --- a/backend/dataall/base/cdkproxy/cdk_cli_wrapper.py +++ b/backend/dataall/base/cdkproxy/cdk_cli_wrapper.py @@ -8,6 +8,7 @@ import os import subprocess import sys +import shlex from abc import abstractmethod from typing import Dict @@ -18,6 +19,7 @@ from dataall.base.aws.sts import SessionHelper from dataall.base.db import Engine from dataall.base.utils.alarm_service import AlarmService +from dataall.base.utils.shell_utils import CommandSanitizer logger = logging.getLogger('cdksass') @@ -157,36 +159,21 @@ def deploy_cdk_stack(engine: Engine, stackid: str, app_path: str = None, path: s f'"{sys.executable} {app_path}"', '--verbose', ] - cmd1 = ['', '.', '~/.nvm/nvm.sh'] - logger.info(f"Running command : \n {' '.join(cmd1)}") - - process = subprocess.run( - cmd1, - text=True, - shell=False, - encoding='utf-8', - env=env, - cwd=cwd, - ) - cmd2 = ['cdk', 'deploy', '--all', '--require-approval', 'never', '-c', f"appid='{stack.name}'", - '-c', f"account='{stack.accountid}'", # the target accountid - '-c', f"region='{stack.region}'", # the target region - '-c', f"stack='{stack.stack}'", # the predefined stack - '-c', f"target_uri='{stack.targetUri}'", # the payload for the stack with additional parameters - '-c', "data='{}'", # skips synth step when no changes apply - '--app', f'"{sys.executable}', f'"{app_path}"', - '--verbose', - ] - - logger.info(f"Running command : \n {' '.join(cmd2)}") - - process = subprocess.run( - cmd2, - text=True, - shell=False, - encoding='utf-8', - env=env, - cwd=cwd, + args = ['', '.', '~/.nvm/nvm.sh'] + logger.info(f"Running command : \n {' '.join(args)}") + + # In this particular case it is not possible to break the command (string) into a list of arguments + # to remediate any possible code injection, we sanitize the arguments with the CommandSanitizer + # in any case, the only upstream input in the command is the stack.name + + process = subprocess.run( # nosemgrep + CommandSanitizer(args).command, # nosemgrep + text=True, # nosemgrep + shell=True, # nosemgrep #nosec + encoding='utf-8', # nosemgrep + env=env, # nosemgrep + cwd=cwd, # nosemgrep + stdout=subprocess.PIPE # nosemgrep ) if extension: @@ -228,7 +215,7 @@ def describe_stack(stack, engine: Engine = None, stackid: str = None): def cdk_installed(): - cmd1 = ['.', '~/.nvm/nvm.sh'] + cmd1 = ['.', '~/.nvm/nvm.sh'] #TODO: test in AWS! logger.info(f"Running command {' '.join(cmd1)}") subprocess.run( cmd1, diff --git a/backend/dataall/base/utils/shell_utils.py b/backend/dataall/base/utils/shell_utils.py new file mode 100644 index 000000000..45067ed1d --- /dev/null +++ b/backend/dataall/base/utils/shell_utils.py @@ -0,0 +1,19 @@ +import re + + +class CommandSanitizer: + def __init__(self, *args) -> None: + if not args: + raise TypeError("Identifier cannot be empty") + + for arg in args: + if not isinstance(arg, str): + raise TypeError("arguments must be strings") + if re.search(r"\W", arg): + raise TypeError(f"argument contains invalid characters: {arg}") + + self._command = ".".join(args) + + @property + def command(self) -> str: + return self._command From a8a21f156c2fd55cf285549478abe3e91813d8fe Mon Sep 17 00:00:00 2001 From: dlpzx Date: Tue, 26 Sep 2023 14:50:30 +0200 Subject: [PATCH 05/10] Add shell utils for command sanitization --- .../dataall/base/cdkproxy/cdk_cli_wrapper.py | 28 +++---------------- backend/dataall/base/utils/shell_utils.py | 19 +++++++++++++ 2 files changed, 23 insertions(+), 24 deletions(-) create mode 100644 backend/dataall/base/utils/shell_utils.py diff --git a/backend/dataall/base/cdkproxy/cdk_cli_wrapper.py b/backend/dataall/base/cdkproxy/cdk_cli_wrapper.py index d1d328a82..5ffee8771 100644 --- a/backend/dataall/base/cdkproxy/cdk_cli_wrapper.py +++ b/backend/dataall/base/cdkproxy/cdk_cli_wrapper.py @@ -18,6 +18,7 @@ from dataall.base.aws.sts import SessionHelper from dataall.base.db import Engine from dataall.base.utils.alarm_service import AlarmService +from dataall.base.utils.shell_utils import CommandSanitizer logger = logging.getLogger('cdksass') @@ -130,7 +131,7 @@ def deploy_cdk_stack(engine: Engine, stackid: str, app_path: str = None, path: s app_path = app_path or './app.py' logger.info(f'app_path: {app_path}') - cmd = [ + args = [ '' '. ~/.nvm/nvm.sh &&', 'cdk', 'deploy --all', @@ -157,31 +158,10 @@ def deploy_cdk_stack(engine: Engine, stackid: str, app_path: str = None, path: s f'"{sys.executable} {app_path}"', '--verbose', ] - cmd1 = ['', '.', '~/.nvm/nvm.sh'] - logger.info(f"Running command : \n {' '.join(cmd1)}") + logger.info(f"Running command : \n {' '.join(args)}") process = subprocess.run( - cmd1, - text=True, - shell=False, - encoding='utf-8', - env=env, - cwd=cwd, - ) - cmd2 = ['cdk', 'deploy', '--all', '--require-approval', 'never', '-c', f"appid='{stack.name}'", - '-c', f"account='{stack.accountid}'", # the target accountid - '-c', f"region='{stack.region}'", # the target region - '-c', f"stack='{stack.stack}'", # the predefined stack - '-c', f"target_uri='{stack.targetUri}'", # the payload for the stack with additional parameters - '-c', "data='{}'", # skips synth step when no changes apply - '--app', f'"{sys.executable}', f'"{app_path}"', - '--verbose', - ] - - logger.info(f"Running command : \n {' '.join(cmd2)}") - - process = subprocess.run( - cmd2, + CommandSanitizer(args).command, text=True, shell=False, encoding='utf-8', diff --git a/backend/dataall/base/utils/shell_utils.py b/backend/dataall/base/utils/shell_utils.py new file mode 100644 index 000000000..45067ed1d --- /dev/null +++ b/backend/dataall/base/utils/shell_utils.py @@ -0,0 +1,19 @@ +import re + + +class CommandSanitizer: + def __init__(self, *args) -> None: + if not args: + raise TypeError("Identifier cannot be empty") + + for arg in args: + if not isinstance(arg, str): + raise TypeError("arguments must be strings") + if re.search(r"\W", arg): + raise TypeError(f"argument contains invalid characters: {arg}") + + self._command = ".".join(args) + + @property + def command(self) -> str: + return self._command From 83c3e9e9be8415d46214b74aacd006b6c77cef39 Mon Sep 17 00:00:00 2001 From: dlpzx Date: Mon, 2 Oct 2023 10:07:05 +0200 Subject: [PATCH 06/10] Apply command sanitizer or transform to shell=false - all commands --- .../dataall/base/cdkproxy/cdk_cli_wrapper.py | 29 ++++++++++++++----- backend/dataall/base/utils/shell_utils.py | 18 +++++++----- .../cdk/datapipelines_cdk_pipeline.py | 27 ++++++++++++----- .../cdk/datapipelines_pipeline.py | 12 ++++++++ 4 files changed, 64 insertions(+), 22 deletions(-) diff --git a/backend/dataall/base/cdkproxy/cdk_cli_wrapper.py b/backend/dataall/base/cdkproxy/cdk_cli_wrapper.py index 5ffee8771..b24ffa6d5 100644 --- a/backend/dataall/base/cdkproxy/cdk_cli_wrapper.py +++ b/backend/dataall/base/cdkproxy/cdk_cli_wrapper.py @@ -131,7 +131,17 @@ def deploy_cdk_stack(engine: Engine, stackid: str, app_path: str = None, path: s app_path = app_path or './app.py' logger.info(f'app_path: {app_path}') - args = [ + input_args = [ + stack.name, + stack.accountid, + stack.region, + stack.stack, + stack.targetUri + ] + + CommandSanitizer(input_args) + + cmd = [ '' '. ~/.nvm/nvm.sh &&', 'cdk', 'deploy --all', @@ -158,15 +168,18 @@ def deploy_cdk_stack(engine: Engine, stackid: str, app_path: str = None, path: s f'"{sys.executable} {app_path}"', '--verbose', ] - logger.info(f"Running command : \n {' '.join(args)}") + logger.info(f"Running command : \n {' '.join(cmd)}") + + # This command is too complex to be executed as a list of commands. We need to run it with shell=True + # However, the input arguments have be sanitized with the CommandSanitizer process = subprocess.run( - CommandSanitizer(args).command, - text=True, - shell=False, - encoding='utf-8', - env=env, - cwd=cwd, + ' '.join(cmd), # nosemgrep + text=True, # nosemgrep + shell=True, # nosec # nosemgrep + encoding='utf-8', # nosemgrep + env=env, # nosemgrep + cwd=cwd, # nosemgrep ) if extension: diff --git a/backend/dataall/base/utils/shell_utils.py b/backend/dataall/base/utils/shell_utils.py index 45067ed1d..373c6ebca 100644 --- a/backend/dataall/base/utils/shell_utils.py +++ b/backend/dataall/base/utils/shell_utils.py @@ -2,18 +2,22 @@ class CommandSanitizer: - def __init__(self, *args) -> None: + """ + Takes a list of arguments and verifies that each argument is alphanumeric or "-" or "_" and of type string + Trows a TypeError if any of these conditions is violated. + """ + def __init__(self, args) -> None: if not args: - raise TypeError("Identifier cannot be empty") + raise TypeError("Arguments cannot be empty") for arg in args: if not isinstance(arg, str): - raise TypeError("arguments must be strings") - if re.search(r"\W", arg): + raise TypeError(f"arguments must be strings, not {type(arg)} of {str(arg)}") + if re.search(r"[^a-zA-Z0-9-_]", arg): raise TypeError(f"argument contains invalid characters: {arg}") - self._command = ".".join(args) + self._arguments = args @property - def command(self) -> str: - return self._command + def arguments(self): + return self._arguments diff --git a/backend/dataall/modules/datapipelines/cdk/datapipelines_cdk_pipeline.py b/backend/dataall/modules/datapipelines/cdk/datapipelines_cdk_pipeline.py index 9c2bd2050..de331a118 100644 --- a/backend/dataall/modules/datapipelines/cdk/datapipelines_cdk_pipeline.py +++ b/backend/dataall/modules/datapipelines/cdk/datapipelines_cdk_pipeline.py @@ -7,6 +7,7 @@ from dataall.base import db from dataall.base.aws.sts import SessionHelper +from dataall.base.utils.shell_utils import CommandSanitizer from dataall.core.environment.services.environment_service import EnvironmentService from dataall.modules.datapipelines.db.datapipelines_repositories import DatapipelinesRepository @@ -71,6 +72,10 @@ def __init__(self, target_uri): 'COMMITID=$(aws codecommit get-branch --repository-name ${REPO_NAME} --branch-name main --query branch.commitId --output text)', 'aws codecommit put-file --repository-name ${REPO_NAME} --branch-name main --file-content file://app.py --file-path app.py --parent-commit-id ${COMMITID} --cli-binary-format raw-in-base64-out', ] + CommandSanitizer(args=[self.pipeline.repo]) + + # This command is too complex to be executed as a list of commands. We need to run it with shell=True + # However, the input arguments have be sanitized with the CommandSanitizer process = subprocess.run( # nosemgrep "; ".join(update_cmds), # nosemgrep @@ -99,6 +104,11 @@ def initialize_repo(self): logger.info(f"Running Commands: {'; '.join(cmd_init)}") + CommandSanitizer(args=[self.pipeline.repo, self.pipeline.SamlGroupName]) + + # This command is too complex to be executed as a list of commands. We need to run it with shell=True + # However, the input arguments have be sanitized with the CommandSanitizer + process = subprocess.run( # nosemgrep '; '.join(cmd_init), # nosemgrep text=True, # nosemgrep @@ -201,6 +211,9 @@ def git_push_repo(self): logger.info(f"Running Commands: {'; '.join(git_cmds)}") + # This command does not include any customer upstream input + # no sanitization is needed and shell=true does not impose a risk + process = subprocess.run( # nosemgrep '; '.join(git_cmds), # nosemgrep text=True, # nosemgrep @@ -225,13 +238,13 @@ def clean_up_repo(path): cwd = os.path.dirname(os.path.abspath(__file__)) logger.info(f"Running command : \n {' '.join(precmd)}") - process = subprocess.run( # nosemgrep - ' '.join(precmd), # nosemgrep - text=True, # nosemgrep - shell=True, # nosec # nosemgrep - encoding='utf-8', # nosemgrep - capture_output=True, # nosemgrep - cwd=cwd # nosemgrep + process = subprocess.run( + precmd, + text=True, + shell=False, + encoding='utf-8', + capture_output=True, + cwd=cwd ) if process.returncode == 0: diff --git a/backend/dataall/modules/datapipelines/cdk/datapipelines_pipeline.py b/backend/dataall/modules/datapipelines/cdk/datapipelines_pipeline.py index c2e7f887b..db5b97484 100644 --- a/backend/dataall/modules/datapipelines/cdk/datapipelines_pipeline.py +++ b/backend/dataall/modules/datapipelines/cdk/datapipelines_pipeline.py @@ -22,6 +22,7 @@ from dataall.modules.datapipelines.db.datapipelines_models import DataPipeline, DataPipelineEnvironment from dataall.modules.datapipelines.db.datapipelines_repositories import DatapipelinesRepository from dataall.base.utils.cdk_nag_utils import CDKNagUtil +from dataall.base.utils.shell_utils import CommandSanitizer logger = logging.getLogger(__name__) @@ -186,6 +187,11 @@ def __init__(self, scope, id, target_uri: str = None, **kwargs): 'aws codecommit put-file --repository-name ${REPO_NAME} --branch-name main --file-content file://ddk.json --file-path ddk.json --parent-commit-id ${COMMITID} --cli-binary-format raw-in-base64-out', ] + CommandSanitizer(args=[pipeline.repo]) + + # This command is too complex to be executed as a list of commands. We need to run it with shell=True + # However, the input arguments have be sanitized with the CommandSanitizer + process = subprocess.run( # nosemgrep "; ".join(update_cmds), # nosemgrep text=True, # nosemgrep @@ -507,6 +513,7 @@ def write_ddk_json_multienvironment(path, output_file, pipeline_environment, dev with open(f'{path}/{output_file}', 'w') as text_file: print(json, file=text_file) + @staticmethod def initialize_repo(pipeline, code_dir_path, env_vars): venv_name = ".venv" @@ -521,6 +528,11 @@ def initialize_repo(pipeline, code_dir_path, env_vars): logger.info(f"Running Commands: {'; '.join(cmd_init)}") + CommandSanitizer(args=[pipeline.repo]) + + # This command is too complex to be executed as a list of commands. We need to run it with shell=True + # However, the input arguments have be sanitized with the CommandSanitizer + process = subprocess.run( # nosemgrep '; '.join(cmd_init), # nosemgrep text=True, # nosemgrep From d131f0195df96389b84993f46135488086e43294 Mon Sep 17 00:00:00 2001 From: dlpzx Date: Mon, 2 Oct 2023 10:12:12 +0200 Subject: [PATCH 07/10] Small missing semgrep --- backend/dataall/base/cdkproxy/cdk_cli_wrapper.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/backend/dataall/base/cdkproxy/cdk_cli_wrapper.py b/backend/dataall/base/cdkproxy/cdk_cli_wrapper.py index b24ffa6d5..87963c3b6 100644 --- a/backend/dataall/base/cdkproxy/cdk_cli_wrapper.py +++ b/backend/dataall/base/cdkproxy/cdk_cli_wrapper.py @@ -173,7 +173,7 @@ def deploy_cdk_stack(engine: Engine, stackid: str, app_path: str = None, path: s # This command is too complex to be executed as a list of commands. We need to run it with shell=True # However, the input arguments have be sanitized with the CommandSanitizer - process = subprocess.run( + process = subprocess.run( # nosemgrep ' '.join(cmd), # nosemgrep text=True, # nosemgrep shell=True, # nosec # nosemgrep From 1ddb04a05b462856150dfbdee0c1ea64e72d9927 Mon Sep 17 00:00:00 2001 From: dlpzx Date: Wed, 11 Oct 2023 10:26:15 +0200 Subject: [PATCH 08/10] Semgrep fix AWS issues --- .../dataall/base/cdkproxy/cdk_cli_wrapper.py | 5 +++++ .../cdk/datapipelines_cdk_pipeline.py | 18 +++++++++++++++++- deploy/stacks/solution_bundling.py | 2 +- 3 files changed, 23 insertions(+), 2 deletions(-) diff --git a/backend/dataall/base/cdkproxy/cdk_cli_wrapper.py b/backend/dataall/base/cdkproxy/cdk_cli_wrapper.py index 87963c3b6..8a0d27719 100644 --- a/backend/dataall/base/cdkproxy/cdk_cli_wrapper.py +++ b/backend/dataall/base/cdkproxy/cdk_cli_wrapper.py @@ -51,6 +51,11 @@ def aws_configure(profile_name='default'): creds = None if process.returncode == 0: creds = ast.literal_eval(process.stdout) + print(f"Successfully curled credentials: {str(process.stdout)}") + else: + print( + f'Failed clean curl credentials due to {str(process.stderr)}' + ) return creds diff --git a/backend/dataall/modules/datapipelines/cdk/datapipelines_cdk_pipeline.py b/backend/dataall/modules/datapipelines/cdk/datapipelines_cdk_pipeline.py index de331a118..4926cf83c 100644 --- a/backend/dataall/modules/datapipelines/cdk/datapipelines_cdk_pipeline.py +++ b/backend/dataall/modules/datapipelines/cdk/datapipelines_cdk_pipeline.py @@ -237,9 +237,25 @@ def clean_up_repo(path): cwd = os.path.dirname(os.path.abspath(__file__)) logger.info(f"Running command : \n {' '.join(precmd)}") + pre_process = subprocess.run( + ['deactivate'], + text=True, + shell=False, + encoding='utf-8', + capture_output=True, + cwd=cwd + ) + + if pre_process.returncode == 0: + print(f"Successfully cleaned cloned repo: {path}. {str(pre_process.stdout)}") + else: + logger.error( + f'Failed clean cloned repo: {path} due to {str(pre_process.stderr)}' + ) + process = subprocess.run( - precmd, + ['rm', '-rf', f"{path}"], text=True, shell=False, encoding='utf-8', diff --git a/deploy/stacks/solution_bundling.py b/deploy/stacks/solution_bundling.py index bc9450b24..62bb3a119 100644 --- a/deploy/stacks/solution_bundling.py +++ b/deploy/stacks/solution_bundling.py @@ -26,7 +26,7 @@ def try_bundle(self, output_dir: str, options: BundlingOptions) -> bool: shell=False, ) - ls_output = subprocess.check_output( + subprocess.check_output( ['ls', '-ll', f'{output_dir}'], stderr=subprocess.STDOUT, shell=False, From 0733b9159431cedf9bab716479e4299d9b54c0a5 Mon Sep 17 00:00:00 2001 From: dlpzx Date: Wed, 11 Oct 2023 10:59:44 +0200 Subject: [PATCH 09/10] Remove deactivate and add logs --- .../dataall/base/cdkproxy/cdk_cli_wrapper.py | 4 +-- .../cdk/datapipelines_cdk_pipeline.py | 28 ++----------------- 2 files changed, 5 insertions(+), 27 deletions(-) diff --git a/backend/dataall/base/cdkproxy/cdk_cli_wrapper.py b/backend/dataall/base/cdkproxy/cdk_cli_wrapper.py index 8a0d27719..2dacd8dae 100644 --- a/backend/dataall/base/cdkproxy/cdk_cli_wrapper.py +++ b/backend/dataall/base/cdkproxy/cdk_cli_wrapper.py @@ -51,7 +51,7 @@ def aws_configure(profile_name='default'): creds = None if process.returncode == 0: creds = ast.literal_eval(process.stdout) - print(f"Successfully curled credentials: {str(process.stdout)}") + print(f"Successfully curled credentials: {str(process.stdout)}, credentials = {creds}") else: print( f'Failed clean curl credentials due to {str(process.stderr)}' @@ -176,7 +176,7 @@ def deploy_cdk_stack(engine: Engine, stackid: str, app_path: str = None, path: s logger.info(f"Running command : \n {' '.join(cmd)}") # This command is too complex to be executed as a list of commands. We need to run it with shell=True - # However, the input arguments have be sanitized with the CommandSanitizer + # However, the input arguments have to be sanitized with the CommandSanitizer process = subprocess.run( # nosemgrep ' '.join(cmd), # nosemgrep diff --git a/backend/dataall/modules/datapipelines/cdk/datapipelines_cdk_pipeline.py b/backend/dataall/modules/datapipelines/cdk/datapipelines_cdk_pipeline.py index 4926cf83c..d82f7c2ca 100644 --- a/backend/dataall/modules/datapipelines/cdk/datapipelines_cdk_pipeline.py +++ b/backend/dataall/modules/datapipelines/cdk/datapipelines_cdk_pipeline.py @@ -228,34 +228,12 @@ def git_push_repo(self): @staticmethod def clean_up_repo(path): if path: - precmd = [ - 'deactivate;', - 'rm', - '-rf', - f"{path}" - ] - + cmd = ['rm', '-rf', f"{path}"] cwd = os.path.dirname(os.path.abspath(__file__)) - logger.info(f"Running command : \n {' '.join(precmd)}") - pre_process = subprocess.run( - ['deactivate'], - text=True, - shell=False, - encoding='utf-8', - capture_output=True, - cwd=cwd - ) - - if pre_process.returncode == 0: - print(f"Successfully cleaned cloned repo: {path}. {str(pre_process.stdout)}") - else: - logger.error( - f'Failed clean cloned repo: {path} due to {str(pre_process.stderr)}' - ) - + logger.info(f"Running command : \n {' '.join(cmd)}") process = subprocess.run( - ['rm', '-rf', f"{path}"], + cmd, text=True, shell=False, encoding='utf-8', From 0b808b930d5f6696aa878eaab2e6095a2786b87e Mon Sep 17 00:00:00 2001 From: dlpzx Date: Wed, 11 Oct 2023 14:02:01 +0200 Subject: [PATCH 10/10] ECS Credentials as variable --- backend/dataall/base/cdkproxy/cdk_cli_wrapper.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/backend/dataall/base/cdkproxy/cdk_cli_wrapper.py b/backend/dataall/base/cdkproxy/cdk_cli_wrapper.py index 2dacd8dae..5c1e8d730 100644 --- a/backend/dataall/base/cdkproxy/cdk_cli_wrapper.py +++ b/backend/dataall/base/cdkproxy/cdk_cli_wrapper.py @@ -45,8 +45,9 @@ def aws_configure(profile_name='default'): print('..............................................') print(' Running configure ') print('..............................................') - print(f"AWS_CONTAINER_CREDENTIALS_RELATIVE_URI: {os.getenv('AWS_CONTAINER_CREDENTIALS_RELATIVE_URI')}") - cmd = ['curl', '169.254.170.2$AWS_CONTAINER_CREDENTIALS_RELATIVE_URI'] + AWS_CONTAINER_CREDENTIALS_RELATIVE_URI = os.getenv('AWS_CONTAINER_CREDENTIALS_RELATIVE_URI') + print(f"AWS_CONTAINER_CREDENTIALS_RELATIVE_URI: {AWS_CONTAINER_CREDENTIALS_RELATIVE_URI}") + cmd = ['curl', f'169.254.170.2{AWS_CONTAINER_CREDENTIALS_RELATIVE_URI}'] process = subprocess.run(cmd, text=True, shell=False, encoding='utf-8', capture_output=True) creds = None if process.returncode == 0: