security-fix: resolve security issue with starting a process with a s…

…hell - Issue: Fixed [B605:start_process_with_a_shell] identified by Bandit, which flagged starting a process with a shell as a high-severity security risk (CWE-78: OS Command Injection). - Replaced os.system call with a safer alternative to prevent shell injection vulnerabilities. Signed-off-by: ChengyuZhu6 <[email protected]>
pytorch · Sep 13, 2024 · c1d296d · c1d296d
1 parent 95d3e89
commit c1d296d
Show file tree

Hide file tree

Showing 20 changed files with 585 additions and 258 deletions.
diff --git a/binaries/build.py b/binaries/build.py
@@ -1,6 +1,8 @@
 import argparse
 import glob
 import os
+import shlex
+import subprocess
 import sys
 
 # To help discover local modules
@@ -49,10 +51,12 @@ def build_dist_whl(args):
         print(f"## In directory: {os.getcwd()} | Executing command: {cur_wheel_cmd}")
 
         if not args.dry_run:
-            build_exit_code = os.system(cur_wheel_cmd)
-            # If any one of the steps fail, exit with error
-            if build_exit_code != 0:
-                sys.exit(f"## {binary} build Failed !")
+            try:
+                cur_wheel_cmd_list = shlex.split(cur_wheel_cmd)
+                subprocess.run(cur_wheel_cmd_list, check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+            except subprocess.CalledProcessError as e:
+                print(f"## {binary} build Failed! Error: {e.stderr.decode()}")
+                sys.exit(1)
 
 
 def build(args):

diff --git a/binaries/conda/build_packages.py b/binaries/conda/build_packages.py
@@ -1,19 +1,16 @@
 import argparse
 import os
+import subprocess
 from datetime import date
 
-from ts_scripts.utils import try_and_handle
+from ts_scripts.utils import try_and_handle, find_conda_binary
 
 conda_build_dir = os.path.dirname(os.path.abspath(__file__))
 REPO_ROOT = os.path.join(conda_build_dir, "..", "..")
 MINICONDA_DOWNLOAD_URL = (
     "https://repo.anaconda.com/miniconda/Miniconda3-py39_4.9.2-Linux-x86_64.sh"
 )
-CONDA_BINARY = (
-    os.popen("which conda").read().strip()
-    if os.system(f"conda --version") == 0
-    else f"$HOME/miniconda/condabin/conda"
-)
+CONDA_BINARY = find_conda_binary()
 
 CONDA_PACKAGES_PATH = os.path.join(REPO_ROOT, "binaries", "conda", "output")
 CONDA_LINUX_PACKAGES_PATH = os.path.join(
@@ -32,8 +29,7 @@
 
 if os.name == "nt":
     # Assumes miniconda is installed in windows
-    CONDA_BINARY = "conda"
-
+    CONDA_BINARY = "conda"    
 
 def add_nightly_suffix_conda(binary_name: str) -> str:
     """
@@ -52,29 +48,29 @@ def install_conda_build(dry_run):
     """
 
     # Check if conda binary already exists
-    exit_code = os.system(f"conda --version")
-    if exit_code == 0:
-        print(
-            f"'conda' already present on the system. Proceeding without a fresh conda installation."
-        )
+    try:
+        subprocess.run(["conda", "--version"], check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+        print("'conda' already present on the system. Proceeding without a fresh conda installation.")
         return
-    try_and_handle(
-        f"{CONDA_BINARY} install python=3.8 conda-build anaconda-client -y", dry_run
-    )
+    except subprocess.CalledProcessError:
+        # Conda is not available, proceed with installation
+        try_and_handle(
+            f"{CONDA_BINARY} install python=3.8 conda-build anaconda-client -y", dry_run
+        )
 
 
 def install_miniconda(dry_run):
     """
     Installs miniconda, a slimmer anaconda installation to build conda packages
     """
 
-    # Check if conda binary already exists
-    exit_code = os.system(f"conda --version")
-    if exit_code == 0:
-        print(
-            f"'conda' already present on the system. Proceeding without a fresh minconda installation."
-        )
+    try:
+        subprocess.run(["conda", "--version"], check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+        print("'conda' already present on the system. Proceeding without a fresh conda installation.")
         return
+    except subprocess.CalledProcessError as e:
+        raise (e)
+
     if os.name == "nt":
         print(
             "Identified as Windows system. Please install miniconda using this URL: https://repo.anaconda.com/miniconda/Miniconda3-latest-Windows-x86_64.exe"

diff --git a/binaries/install.py b/binaries/install.py
@@ -1,4 +1,5 @@
 import os
+import subprocess
 import sys
 import glob
 
@@ -13,19 +14,25 @@ def install():
     if is_conda_env():
         print("## Using conda to install torchserve and torch-model-archiver")
         channel_dir = os.path.abspath(os.path.join(REPO_ROOT, "binaries", "conda", "output"))
-        conda_cmd = f"conda install --channel {channel_dir} -y torchserve torch-model-archiver"
-        print(f"## In directory: {os.getcwd()} | Executing command: {conda_cmd}")
-        install_exit_code = os.system(conda_cmd)
+        conda_cmd = ["conda", "install", "--channel", channel_dir, "-y", "torchserve", "torch-model-archiver"]
+        print(f"## In directory: {os.getcwd()} | Executing command: {' '.join(conda_cmd)}")
+
+        try:
+            subprocess.run(conda_cmd, check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+        except subprocess.CalledProcessError as e:
+            sys.exit("## Torchserve/Model archiver Installation Failed!")
+
     else:
         print("## Using pip to install torchserve and torch-model-archiver")
         ts_wheel = glob.glob(os.path.join(REPO_ROOT, "dist", "*.whl"))[0]
         ma_wheel = glob.glob(os.path.join(REPO_ROOT, "model-archiver", "dist", "*.whl"))[0]
-        pip_cmd = f"pip install {ts_wheel} {ma_wheel}"
-        print(f"## In directory: {os.getcwd()} | Executing command: {pip_cmd}")
-        install_exit_code = os.system(pip_cmd)
+        pip_cmd = ["pip", "install", ts_wheel, ma_wheel]
+        print(f"## In directory: {os.getcwd()} | Executing command: {' '.join(pip_cmd)}")
 
-    if install_exit_code != 0:
-        sys.exit("## Torchserve \ Model archiver Installation Failed !")
+        try:
+            subprocess.run(pip_cmd, check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+        except subprocess.CalledProcessError as e:
+            sys.exit("## Torchserve/Model archiver Installation Failed!")
 
 
 if __name__ == "__main__":

diff --git a/binaries/upload.py b/binaries/upload.py
@@ -3,6 +3,7 @@
 import glob
 import os
 import sys
+import subprocess
 
 # To help discover local modules
 REPO_ROOT = os.path.join(os.path.dirname(os.path.abspath(__file__)), "..")
@@ -44,11 +45,17 @@ def upload_conda_packages(args, PACKAGES, CONDA_PACKAGES_PATH):
                 "tar.bz2"
             ):
                 print(f"Uploading to anaconda package: {name}")
-                anaconda_upload_command = f"anaconda upload {file_path} --force"
-                exit_code = os.system(anaconda_upload_command)
-                if exit_code != 0:
-                    print(f"Anaconda package upload failed for package {name}")
-                    return exit_code
+                anaconda_upload_command = ["anaconda", "upload", file_path, "--force"]
+
+                try:
+                    subprocess.run(
+                        anaconda_upload_command,
+                        check=True,
+                        stdout=subprocess.PIPE,
+                        stderr=subprocess.PIPE
+                    )
+                except subprocess.CalledProcessError as e:
+                    return e.returncode
     print(f"All packages uploaded to anaconda successfully")
 
 

diff --git a/examples/image_classifier/near_real_time_video/create_mar_file.py b/examples/image_classifier/near_real_time_video/create_mar_file.py
@@ -5,6 +5,7 @@
 import argparse
 import os
 import shutil
+import subprocess
 
 MODEL_PTH_FILE = "resnet18-f37072fd.pth"
 MODEL_STORE = "model_store"
@@ -15,7 +16,7 @@ def download_pth_file(output_file: str) -> None:
     if not os.path.exists(output_file):
         cmd = ["wget", " https://download.pytorch.org/models/resnet18-f37072fd.pth"]
         print("Downloading resnet-18 pth file")
-        os.system(" ".join(cmd))
+        subprocess.run(cmd, check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
 
 
 def create_mar():
@@ -42,9 +43,8 @@ def create_mar():
             "--handler image_classifier",
             "--force",
         ]
-
     print(f"Archiving resnet-18 model into {MAR_FILE}")
-    os.system(" ".join(cmd))
+    subprocess.run(cmd, check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
 
 
 def move_mar_file():

diff --git a/examples/torchrec_dlrm/create_dlrm_mar.py b/examples/torchrec_dlrm/create_dlrm_mar.py
@@ -2,8 +2,7 @@
 This script creates a DLRM model and packs it into a TorchServe mar file
 """
 
-import os
-
+import subprocess
 import torch
 from dlrm_factory import DLRMFactory
 
@@ -32,7 +31,7 @@ def main():
     ]
 
     print("Archiving model into dlrm.mar")
-    os.system(" ".join(cmd))
+    subprocess.run(cmd, check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
     print("Done")
 
 

diff --git a/test/pytest/test_distributed_inference_handler.py b/test/pytest/test_distributed_inference_handler.py
@@ -1,6 +1,6 @@
 import os
 import sys
-
+import subprocess
 import pytest
 import test_utils
 
@@ -37,10 +37,14 @@ def test_large_model_inference():
     )
 
     try:
-        command = f"newman run -e {POSTMAN_ENV_FILE} {POSTMAN_COLLECTION_INFERENCE} -d {POSTMAN_LARGE_MODEL_INFERENCE_DATA_FILE} -r cli,htmlextra --reporter-htmlextra-export {ARTIFACTS_INFERENCE_DIR}/{REPORT_FILE} --verbose"
-        result = os.system(command)
+        command = [
+            "newman", "run", "-e", POSTMAN_ENV_FILE, POSTMAN_COLLECTION_INFERENCE,
+            "-d", POSTMAN_LARGE_MODEL_INFERENCE_DATA_FILE, "-r", "cli,htmlextra",
+            "--reporter-htmlextra-export", f"{ARTIFACTS_INFERENCE_DIR}/{REPORT_FILE}", "--verbose"
+        ]
+        result = subprocess.run(command, check=True)
         assert (
-            result == 0
+            result.returncode == 0
         ), "Error: Distributed inference failed, the exit code is not zero"
     finally:
         test_utils.stop_torchserve()

diff --git a/test/pytest/test_handler.py b/test/pytest/test_handler.py
@@ -2,7 +2,7 @@
 import json
 import logging
 import os
-
+import subprocess
 import numpy as np
 import pytest
 import requests
@@ -314,10 +314,14 @@ def test_huggingface_bert_batch_inference():
 
     # Make 2 curl requests in parallel with &
     # curl --header \"X-Forwarded-For: 1.2.3.4\" won't work since you can't access local host anymore
-    response = os.popen(
-        f"curl http://127.0.0.1:8080/predictions/BERTSeqClassification -T {input_text} & curl http://127.0.0.1:8080/predictions/BERTSeqClassification -T {input_text}"
-    )
-    response = response.read()
+    curl_command = [
+        "curl", "http://127.0.0.1:8080/predictions/BERTSeqClassification", "-T", input_text
+    ]
+    process1 = subprocess.Popen(curl_command, stdout=subprocess.PIPE)
+    process2 = subprocess.Popen(curl_command, stdout=subprocess.PIPE)
+    response1, _ = process1.communicate()
+    response2, _ = process2.communicate()
+    response = response1.decode() + response2.decode()
 
     ## Assert that 2 responses are returned from the same batch
     assert response == "Not AcceptedNot Accepted"
@@ -360,7 +364,7 @@ def test_MMF_activity_recognition_model_register_and_inference_on_valid_model():
 
 def test_huggingface_bert_model_parallel_inference():
     number_of_gpus = torch.cuda.device_count()
-    check = os.popen(f"curl http://localhost:8081/models")
+    check = subprocess.run(["curl", "http://localhost:8081/models"], capture_output=True, text=True)
     print(check)
     if number_of_gpus > 1:
         batch_size = 1
@@ -385,10 +389,12 @@ def test_huggingface_bert_model_parallel_inference():
             "sample_text_captum_input.txt",
         )
 
-        response = os.popen(
-            f"curl http://127.0.0.1:8080/predictions/Textgeneration -T {input_text}"
+        response = subprocess.run(
+            ["curl", "http://127.0.0.1:8080/predictions/Textgeneration", "-T", input_text],
+            capture_output=True,
+            text=True
         )
-        response = response.read()
+        response = response.stdout
 
         assert (
             "Bloomberg has decided to publish a new report on the global economy"

diff --git a/test/pytest/test_sm_mme_requirements.py b/test/pytest/test_sm_mme_requirements.py
@@ -1,6 +1,6 @@
 import os
 import pathlib
-
+import subprocess
 import pytest
 import requests
 import test_utils
@@ -103,25 +103,24 @@ def test_oom_on_invoke():
 
     # Make 8 curl requests in parallel with &
     # Send multiple requests to make sure to hit OOM
+    processes = []
     for i in range(10):
-        response = os.popen(
-            f"curl http://127.0.0.1:8080/models/BERTSeqClassification/invoke -T {input_text} && "
-            f"curl http://127.0.0.1:8080/models/BERTSeqClassification/invoke -T {input_text} && "
-            f"curl http://127.0.0.1:8080/models/BERTSeqClassification/invoke -T {input_text} && "
-            f"curl http://127.0.0.1:8080/models/BERTSeqClassification/invoke -T {input_text} && "
-            f"curl http://127.0.0.1:8080/models/BERTSeqClassification/invoke -T {input_text} && "
-            f"curl http://127.0.0.1:8080/models/BERTSeqClassification/invoke -T {input_text} && "
-            f"curl http://127.0.0.1:8080/models/BERTSeqClassification/invoke -T {input_text} && "
-            f"curl http://127.0.0.1:8080/models/BERTSeqClassification/invoke -T {input_text} "
-        )
-        response = response.read()
+        for _ in range(8):
+            process = subprocess.Popen(
+                ["curl", "http://127.0.0.1:8080/models/BERTSeqClassification/invoke", "-T", input_text],
+                stdout=subprocess.PIPE,
+                stderr=subprocess.PIPE
+            )
+            processes.append(process)
+    responses = []
+    for process in processes:
+        stdout, stderr = process.communicate()
+        responses.append(stdout.decode())
 
     # If OOM is hit, we expect code 507 to be present in the response string
-    lines = response.split("\n")
     output = ""
-    for line in lines:
-        if "code" in line:
-            line = line.strip()
-            output = line
+    for response in responses:
+        if '"code": 507,' in response:
+            output = '"code": 507,'
             break
     assert output == '"code": 507,', "OOM Error expected"