Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix: shell true semgrep #760

Merged
merged 13 commits into from
Oct 16, 2023
50 changes: 37 additions & 13 deletions backend/dataall/base/cdkproxy/cdk_cli_wrapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
from dataall.base.aws.sts import SessionHelper
from dataall.base.db import Engine
from dataall.base.utils.alarm_service import AlarmService
from dataall.base.utils.shell_utils import CommandSanitizer

logger = logging.getLogger('cdksass')

Expand Down Expand Up @@ -46,7 +47,7 @@ def aws_configure(profile_name='default'):
print('..............................................')
print(f"AWS_CONTAINER_CREDENTIALS_RELATIVE_URI: {os.getenv('AWS_CONTAINER_CREDENTIALS_RELATIVE_URI')}")
cmd = ['curl', '169.254.170.2$AWS_CONTAINER_CREDENTIALS_RELATIVE_URI']
process = subprocess.run(' '.join(cmd), text=True, shell=True, encoding='utf-8', capture_output=True) # nosec # nosemgrep
process = subprocess.run(cmd, text=True, shell=False, encoding='utf-8', capture_output=True)
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I am working on this item, after plotting the error logs I think the issue is on the url to get the ECS-EC2 credentials. I will use f-string to create the curl command since we can obtain it as environment variable. I am now testing it in AWS

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@noah-paige I tested in AWS and this way of defining the commands works :)

creds = None
if process.returncode == 0:
creds = ast.literal_eval(process.stdout)
Expand Down Expand Up @@ -130,6 +131,16 @@ def deploy_cdk_stack(engine: Engine, stackid: str, app_path: str = None, path: s
app_path = app_path or './app.py'

logger.info(f'app_path: {app_path}')
input_args = [
stack.name,
stack.accountid,
stack.region,
stack.stack,
stack.targetUri
]

CommandSanitizer(input_args)

cmd = [
'' '. ~/.nvm/nvm.sh &&',
'cdk',
Expand Down Expand Up @@ -157,9 +168,11 @@ def deploy_cdk_stack(engine: Engine, stackid: str, app_path: str = None, path: s
f'"{sys.executable} {app_path}"',
'--verbose',
]

logger.info(f"Running command : \n {' '.join(cmd)}")

# This command is too complex to be executed as a list of commands. We need to run it with shell=True
# However, the input arguments have be sanitized with the CommandSanitizer

process = subprocess.run( # nosemgrep
' '.join(cmd), # nosemgrep
text=True, # nosemgrep
Expand Down Expand Up @@ -208,17 +221,28 @@ def describe_stack(stack, engine: Engine = None, stackid: str = None):


def cdk_installed():
cmd = ['. ~/.nvm/nvm.sh && cdk', '--version']
logger.info(f"Running command {' '.join(cmd)}")

subprocess.run( # nosemgrep
cmd, # nosemgrep
text=True, # nosemgrep
shell=True, # nosec # nosemgrep
encoding='utf-8', # nosemgrep
stdout=subprocess.PIPE, # nosemgrep
stderr=subprocess.PIPE, # nosemgrep
cwd=os.path.dirname(__file__), # nosemgrep
cmd1 = ['.', '~/.nvm/nvm.sh']
logger.info(f"Running command {' '.join(cmd1)}")
subprocess.run(
cmd1,
text=True,
shell=False,
encoding='utf-8',
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
cwd=os.path.dirname(__file__),
)

cmd2 = ['cdk', '--version']
logger.info(f"Running command {' '.join(cmd2)}")
subprocess.run(
cmd2,
text=True,
shell=False,
encoding='utf-8',
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
cwd=os.path.dirname(__file__),
)


Expand Down
23 changes: 23 additions & 0 deletions backend/dataall/base/utils/shell_utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
import re


class CommandSanitizer:
"""
Takes a list of arguments and verifies that each argument is alphanumeric or "-" or "_" and of type string
Trows a TypeError if any of these conditions is violated.
"""
def __init__(self, args) -> None:
if not args:
raise TypeError("Arguments cannot be empty")

for arg in args:
if not isinstance(arg, str):
raise TypeError(f"arguments must be strings, not {type(arg)} of {str(arg)}")
if re.search(r"[^a-zA-Z0-9-_]", arg):
raise TypeError(f"argument contains invalid characters: {arg}")

self._arguments = args

@property
def arguments(self):
return self._arguments
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@

from dataall.base import db
from dataall.base.aws.sts import SessionHelper
from dataall.base.utils.shell_utils import CommandSanitizer
from dataall.core.environment.services.environment_service import EnvironmentService
from dataall.modules.datapipelines.db.datapipelines_repositories import DatapipelinesRepository

Expand Down Expand Up @@ -71,6 +72,10 @@ def __init__(self, target_uri):
'COMMITID=$(aws codecommit get-branch --repository-name ${REPO_NAME} --branch-name main --query branch.commitId --output text)',
'aws codecommit put-file --repository-name ${REPO_NAME} --branch-name main --file-content file://app.py --file-path app.py --parent-commit-id ${COMMITID} --cli-binary-format raw-in-base64-out',
]
CommandSanitizer(args=[self.pipeline.repo])

# This command is too complex to be executed as a list of commands. We need to run it with shell=True
# However, the input arguments have be sanitized with the CommandSanitizer

process = subprocess.run( # nosemgrep
"; ".join(update_cmds), # nosemgrep
Expand Down Expand Up @@ -99,6 +104,11 @@ def initialize_repo(self):

logger.info(f"Running Commands: {'; '.join(cmd_init)}")

CommandSanitizer(args=[self.pipeline.repo, self.pipeline.SamlGroupName])

# This command is too complex to be executed as a list of commands. We need to run it with shell=True
# However, the input arguments have be sanitized with the CommandSanitizer

process = subprocess.run( # nosemgrep
'; '.join(cmd_init), # nosemgrep
text=True, # nosemgrep
Expand Down Expand Up @@ -201,6 +211,9 @@ def git_push_repo(self):

logger.info(f"Running Commands: {'; '.join(git_cmds)}")

# This command does not include any customer upstream input
# no sanitization is needed and shell=true does not impose a risk

process = subprocess.run( # nosemgrep
'; '.join(git_cmds), # nosemgrep
text=True, # nosemgrep
Expand All @@ -225,13 +238,13 @@ def clean_up_repo(path):
cwd = os.path.dirname(os.path.abspath(__file__))
logger.info(f"Running command : \n {' '.join(precmd)}")

process = subprocess.run( # nosemgrep
' '.join(precmd), # nosemgrep
text=True, # nosemgrep
shell=True, # nosec # nosemgrep
encoding='utf-8', # nosemgrep
capture_output=True, # nosemgrep
cwd=cwd # nosemgrep
process = subprocess.run(
precmd,
text=True,
shell=False,
encoding='utf-8',
capture_output=True,
cwd=cwd
)

if process.returncode == 0:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
from dataall.modules.datapipelines.db.datapipelines_models import DataPipeline, DataPipelineEnvironment
from dataall.modules.datapipelines.db.datapipelines_repositories import DatapipelinesRepository
from dataall.base.utils.cdk_nag_utils import CDKNagUtil
from dataall.base.utils.shell_utils import CommandSanitizer

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -186,6 +187,11 @@ def __init__(self, scope, id, target_uri: str = None, **kwargs):
'aws codecommit put-file --repository-name ${REPO_NAME} --branch-name main --file-content file://ddk.json --file-path ddk.json --parent-commit-id ${COMMITID} --cli-binary-format raw-in-base64-out',
]

CommandSanitizer(args=[pipeline.repo])

# This command is too complex to be executed as a list of commands. We need to run it with shell=True
# However, the input arguments have be sanitized with the CommandSanitizer

process = subprocess.run( # nosemgrep
"; ".join(update_cmds), # nosemgrep
text=True, # nosemgrep
Expand Down Expand Up @@ -507,6 +513,7 @@ def write_ddk_json_multienvironment(path, output_file, pipeline_environment, dev
with open(f'{path}/{output_file}', 'w') as text_file:
print(json, file=text_file)

@staticmethod
def initialize_repo(pipeline, code_dir_path, env_vars):

venv_name = ".venv"
Expand All @@ -521,6 +528,11 @@ def initialize_repo(pipeline, code_dir_path, env_vars):

logger.info(f"Running Commands: {'; '.join(cmd_init)}")

CommandSanitizer(args=[pipeline.repo])

# This command is too complex to be executed as a list of commands. We need to run it with shell=True
# However, the input arguments have be sanitized with the CommandSanitizer

process = subprocess.run( # nosemgrep
'; '.join(cmd_init), # nosemgrep
text=True, # nosemgrep
Expand Down
25 changes: 14 additions & 11 deletions deploy/stacks/solution_bundling.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,18 +14,21 @@ def __init__(self, source_path=None):

def try_bundle(self, output_dir: str, options: BundlingOptions) -> bool:
requirements_path = Path(self.source_path, 'requirements.txt')
command = [
f'cp -a {self.source_path}/. {output_dir}/ && pip install -r {requirements_path} -t {output_dir}'
]
subprocess.check_output( # nosemgrep
command, # nosemgrep
stderr=subprocess.STDOUT, # nosemgrep
shell=True, # nosec # nosemgrep
subprocess.check_output(
['cp', '-a', f'{self.source_path}/.', f'{output_dir}/'],
stderr=subprocess.STDOUT,
shell=False,
)

ls_output = subprocess.check_output( # nosemgrep
[f'ls -ll {output_dir}'], # nosemgrep
stderr=subprocess.STDOUT, # nosemgrep
shell=True, # nosec # nosemgrep
subprocess.check_output(
['pip', 'install', '-r', requirements_path, '-t', output_dir],
stderr=subprocess.STDOUT,
shell=False,
)

ls_output = subprocess.check_output(
dlpzx marked this conversation as resolved.
Show resolved Hide resolved
['ls', '-ll', f'{output_dir}'],
stderr=subprocess.STDOUT,
shell=False,
)
return True
Loading