Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Check for errors in ModelScan API result #1688

Open
wants to merge 7 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions .github/workflows/modelscan.yml
Original file line number Diff line number Diff line change
Expand Up @@ -31,3 +31,9 @@ jobs:
run: |
cd lib/modelscan_api
python -m pytest

# Pytest -m integration
- name: Run integration testing
run: |
cd lib/modelscan_api
python -m pytest -m integration
25 changes: 13 additions & 12 deletions backend/src/clients/modelScan.ts
Original file line number Diff line number Diff line change
Expand Up @@ -33,18 +33,19 @@ interface ModelScanResponse {
skipped_files: string[]
}
}
issues: [
{
description: string
operator: string
module: string
source: string
scanner: string
severity: string
},
]
// TODO: currently unknown what this might look like
errors: object[]
issues: {
description: string
operator: string
module: string
source: string
scanner: string
severity: string
}[]
errors: {
category: string
description: string
source: string
}[]
}

export async function getModelScanInfo() {
Expand Down
16 changes: 16 additions & 0 deletions backend/src/connectors/fileScanning/modelScan.ts
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,21 @@ export class ModelScanFileScanningConnector extends BaseFileScanningConnector {
try {
const scanResults = await scanStream(s3Stream, file.name, file.size)

if (scanResults.errors.length !== 0) {
log.error(
{ errors: scanResults.errors, modelId: file.modelId, fileId: file._id, name: file.name },
'Scan errored.',
)
return [
{
toolName: modelScanToolName,
state: ScanState.Error,
scannerVersion: modelscanVersion,
lastRunAt: new Date(),
},
]
}

const issues = scanResults.summary.total_issues
const isInfected = issues > 0
const viruses: string[] = []
Expand Down Expand Up @@ -91,6 +106,7 @@ export class ModelScanFileScanningConnector extends BaseFileScanningConnector {
{
toolName: modelScanToolName,
state: ScanState.Error,
scannerVersion: modelscanVersion,
lastRunAt: new Date(),
},
]
Expand Down
11 changes: 10 additions & 1 deletion lib/modelscan_api/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -53,12 +53,21 @@ pre-commit install

### Tests

To run the tests:
To run the unit tests:

```bash
pytest
```

To run the integration tests (does not require any externally running services):

```bash
pytest -m integration
```

Note that the integration tests use safe but technically "malicious" file(s) to check ModelScan's performance. Please
refer to [test_integration](./tests/test_integration/README.md) for details.

### Running

To run in [dev mode](https://fastapi.tiangolo.com/fastapi-cli/#fastapi-dev):
Expand Down
19 changes: 15 additions & 4 deletions lib/modelscan_api/bailo_modelscan_api/dependencies.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
from __future__ import annotations

import logging
import re
from pathlib import Path

logger = logging.getLogger(__name__)
Expand All @@ -21,7 +22,17 @@ def parse_path(path: str | Path | None) -> Path:
return Path().cwd() if path == "." else Path(path).absolute()


def safe_join(root_dir: str | Path | None, filename: str | Path) -> Path:
def sanitise_unix_filename(filename: str) -> str:
"""Safely convert an arbitrary string to a valid unix filename by only preserving explicitly allowed characters as per https://en.wikipedia.org/wiki/Filename#Reserved_characters_and_words
Note that this is not safe for Windows users as it doesn't check for reserved words e.g. CON and AUX.

:param filename: the untrusted filename to be sanitised
:return: a valid filename with trusted characters
"""
return re.sub(r"[/\\?%*:|\"<>\x7F\x00-\x1F]", "-", filename)


def safe_join(root_dir: str | Path | None, filename: str) -> Path:
"""Combine a trusted directory path with an untrusted filename to get a full path.

:param root_dir: Trusted path/directory.
Expand All @@ -33,13 +44,13 @@ def safe_join(root_dir: str | Path | None, filename: str | Path) -> Path:
if not filename or not str(filename).strip():
raise ValueError("filename must not be empty")

stripped_filename = Path(str(filename)).name.strip()
safe_filename = sanitise_unix_filename(filename).strip()

if not stripped_filename:
if not safe_filename:
raise ValueError("filename must not be empty")

parent_dir = parse_path(root_dir).resolve()
full_path = parent_dir.joinpath(stripped_filename).resolve()
full_path = parent_dir.joinpath(safe_filename).resolve()
if not full_path.is_relative_to(parent_dir):
raise ValueError("Could not safely join paths.")

Expand Down
119 changes: 99 additions & 20 deletions lib/modelscan_api/bailo_modelscan_api/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,28 +87,107 @@ async def info(settings: Annotated[Settings, Depends(get_settings)]) -> ApiInfor
"description": "modelscan returned results",
"content": {
"application/json": {
"example": {
"summary": {
"total_issues_by_severity": {"LOW": 0, "MEDIUM": 1, "HIGH": 0, "CRITICAL": 0},
"total_issues": 1,
"input_path": "/foo/bar/unsafe_model.h5",
"absolute_path": "/foo/bar",
"modelscan_version": "0.8.1",
"timestamp": "2024-11-19T12:00:00.000000",
"scanned": {"total_scanned": 1, "scanned_files": ["unsafe_model.h5"]},
"skipped": {"total_skipped": 0, "skipped_files": []},
"examples": {
"Normal": {
"value": {
"summary": {
"total_issues_by_severity": {"LOW": 0, "MEDIUM": 0, "HIGH": 0, "CRITICAL": 0},
"total_issues": 0,
"input_path": "/foo/bar/safe_model.pkl",
"absolute_path": "/foo/bar",
"modelscan_version": "0.8.1",
"timestamp": "2024-11-19T12:00:00.000000",
"scanned": {"total_scanned": 1, "scanned_files": ["safe_model.pkl"]},
"skipped": {
"total_skipped": 0,
"skipped_files": [],
},
},
"issues": [],
"errors": [],
}
},
"Issue": {
"value": {
"summary": {
"total_issues_by_severity": {"LOW": 0, "MEDIUM": 1, "HIGH": 0, "CRITICAL": 0},
"total_issues": 1,
"input_path": "/foo/bar/unsafe_model.h5",
"absolute_path": "/foo/bar",
"modelscan_version": "0.8.1",
"timestamp": "2024-11-19T12:00:00.000000",
"scanned": {"total_scanned": 1, "scanned_files": ["unsafe_model.h5"]},
"skipped": {"total_skipped": 0, "skipped_files": []},
},
"issues": [
{
"description": "Use of unsafe operator 'Lambda' from module 'Keras'",
"operator": "Lambda",
"module": "Keras",
"source": "unsafe_model.h5",
"scanner": "modelscan.scanners.H5LambdaDetectScan",
"severity": "MEDIUM",
}
],
"errors": [],
}
},
"issues": [
{
"description": "Use of unsafe operator 'Lambda' from module 'Keras'",
"operator": "Lambda",
"module": "Keras",
"source": "unsafe_model.h5",
"scanner": "modelscan.scanners.H5LambdaDetectScan",
"severity": "MEDIUM",
"Skipped": {
"value": {
"errors": [],
"issues": [],
"summary": {
"input_path": "/foo/bar/empty.txt",
"absolute_path": "/foo/bar",
"modelscan_version": "0.8.1",
"scanned": {"total_scanned": 0},
"skipped": {
"skipped_files": [
{
"category": "SCAN_NOT_SUPPORTED",
"description": "Model Scan did not scan file",
"source": "empty.txt",
}
],
"total_skipped": 1,
},
"timestamp": "2024-11-19T12:00:00.000000",
"total_issues": 0,
"total_issues_by_severity": {"CRITICAL": 0, "HIGH": 0, "LOW": 0, "MEDIUM": 0},
},
}
],
"errors": [],
},
"Error": {
"value": {
"summary": {
"total_issues_by_severity": {"LOW": 0, "MEDIUM": 0, "HIGH": 0, "CRITICAL": 0},
"total_issues": 0,
"input_path": "/foo/bar/null.h5",
"absolute_path": "/foo/bar",
"modelscan_version": "0.8.1",
"timestamp": "2024-11-19T12:00:00.000000",
"scanned": {"total_scanned": 0},
"skipped": {
"total_skipped": 1,
"skipped_files": [
{
"category": "SCAN_NOT_SUPPORTED",
"description": "Model Scan did not scan file",
"source": "null.h5",
}
],
},
},
"issues": [],
"errors": [
{
"category": "MODEL_SCAN",
"description": "Unable to synchronously open file (file signature not found)",
"source": "null.h5",
}
],
}
},
}
}
},
Expand Down
131 changes: 0 additions & 131 deletions lib/modelscan_api/bailo_modelscan_api/test_dependencies.py

This file was deleted.

Loading
Loading