Skip to content

Commit

Permalink
Merge pull request #154 from mkosiarc/sbom-merge
Browse files Browse the repository at this point in the history
Create sbom-utility-scripts image
  • Loading branch information
mkosiarc authored Aug 14, 2024
2 parents 8b7448b + 425808e commit 4bc7a20
Show file tree
Hide file tree
Showing 21 changed files with 1,870 additions and 15 deletions.
Original file line number Diff line number Diff line change
@@ -1,22 +1,22 @@
name: Build base images sbom script image
name: Build sbom utility scripts image

on:
push:
branches:
- main
paths:
- base-images-sbom-script/**
- sbom-utility-scripts/**

pull_request:
branches:
- main
paths:
- base-images-sbom-script/**
- sbom-utility-scripts/**


env:
REGISTRY: quay.io/redhat-appstudio
IMAGE_NAME: base-images-sbom-script
IMAGE_NAME: sbom-utility-scripts-image

jobs:
build:
Expand All @@ -31,10 +31,16 @@ jobs:
with:
python-version: 3.11

- name: Run tox checks
- name: Run tox checks for base-images-sbom-script
run: |
python3 -m pip install tox
cd ./base-images-sbom-script/app/
cd ./sbom-utility-scripts/scripts/base-images-sbom-script/app/
tox
- name: Run tox checks for merge-cachi2-sboms-script
run: |
python3 -m pip install tox
cd ./sbom-utility-scripts/scripts/merge-cachi2-sboms-script/
tox
- name: Build Image
Expand All @@ -43,9 +49,9 @@ jobs:
with:
image: ${{ env.IMAGE_NAME }}
tags: ${{ github.sha }}
context: ./base-images-sbom-script
context: ./sbom-utility-scripts
containerfiles: |
./base-images-sbom-script/Dockerfile
./sbom-utility-scripts/Dockerfile
- name: Push to Quay
if: github.event_name == 'push' # don't push image from PR
Expand Down
7 changes: 0 additions & 7 deletions base-images-sbom-script/Dockerfile

This file was deleted.

11 changes: 11 additions & 0 deletions sbom-utility-scripts/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
FROM registry.access.redhat.com/ubi9/python-39:1-192.1722518946@sha256:0176b477075984d5a502253f951d2502f0763c551275f9585ac515b9f241d73d

WORKDIR /scripts

COPY scripts/merge_syft_sboms.py /scripts
COPY scripts/merge-cachi2-sboms-script/merge_cachi2_sboms.py /scripts
COPY scripts/create_purl_sbom.py /scripts
COPY scripts/base-images-sbom-script/app/base_images_sbom_script.py /scripts
COPY scripts/base-images-sbom-script/app/requirements.txt /scripts

RUN pip3 install -r requirements.txt
File renamed without changes.
File renamed without changes.
File renamed without changes.
10 changes: 10 additions & 0 deletions sbom-utility-scripts/scripts/create_purl_sbom.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
import json

with open("./sbom-cyclonedx.json") as f:
cyclonedx_sbom = json.load(f)

purls = [{"purl": component["purl"]} for component in cyclonedx_sbom.get("components", []) if "purl" in component]
purl_content = {"image_contents": {"dependencies": purls}}

with open("sbom-purl.json", "w") as output_file:
json.dump(purl_content, output_file, indent=4)
Original file line number Diff line number Diff line change
@@ -0,0 +1,180 @@
#!/usr/bin/env python3
import json
from argparse import ArgumentParser
from typing import Any, Callable
from urllib.parse import quote_plus, urlsplit


def _is_syft_local_golang_component(component: dict) -> bool:
"""
Check if a Syft Golang reported component is a local replacement.
Local replacements are reported in a very different way by Cachi2, which is why the same
reports by Syft should be removed.
"""
return component.get("purl", "").startswith("pkg:golang") and (
component.get("name", "").startswith(".") or component.get("version", "") == "(devel)"
)


def _is_cachi2_non_registry_dependency(component: dict) -> bool:
"""
Check if Cachi2 component was fetched from a VCS or a direct file location.
Cachi2 reports non-registry components in a different way from Syft, so the reports from
Syft need to be removed.
Unfortunately, there's no way to determine which components are non-registry by looking
at the Syft report alone. This function is meant to create a list of non-registry components
from Cachi2's SBOM, then remove the corresponding ones reported by Syft for the merged SBOM.
Note that this function is only applicable for PyPI or NPM components.
"""
purl = component.get("purl", "")

return (purl.startswith("pkg:pypi") or purl.startswith("pkg:npm")) and (
"vcs_url=" in purl or "download_url=" in purl
)


def _unique_key_cachi2(component: dict) -> str:
"""
Create a unique key from Cachi2 reported components.
This is done by taking a purl and removing any qualifiers and subpaths.
See https://github.com/package-url/purl-spec/tree/master#purl for more info on purls.
"""
url = urlsplit(component["purl"])
return url.scheme + ":" + url.path


def _unique_key_syft(component: dict) -> str:
"""
Create a unique key for Syft reported components.
This is done by taking a lowercase namespace/name, and URL encoding the version.
Syft does not set any qualifier for NPM, Pip or Golang, so there's no need to remove them
as done in _unique_key_cachi2.
If a Syft component lacks a purl (e.g. type OS), we'll use its name and version instead.
"""
if "purl" not in component:
return component.get("name", "") + "@" + component.get("version", "")

if "@" in component["purl"]:
name, version = component["purl"].split("@")

if name.startswith("pkg:pypi"):
name = name.lower()

if name.startswith("pkg:golang"):
version = quote_plus(version)

return f"{name}@{version}"
else:
return component["purl"]


def _get_syft_component_filter(cachi_sbom_components: list[dict[str, Any]]) -> Callable:
"""
Get a function that filters out Syft components for the merged SBOM.
This function currently considers a Syft component as a duplicate/removable if:
- it has the same key as a Cachi2 component
- it is a local Golang replacement
- is a non-registry component also reported by Cachi2
Note that for the last bullet, we can only rely on the Pip dependency's name to find a
duplicate. This is because Cachi2 does not report a non-PyPI Pip dependency's version.
Even though multiple versions of a same dependency can be available in the same project,
we are removing all Syft instances by name only because Cachi2 will report them correctly,
given that it scans all the source code properly and the image is built hermetically.
"""
cachi2_non_registry_components = [
component["name"] for component in cachi_sbom_components if _is_cachi2_non_registry_dependency(component)
]

cachi2_indexed_components = {_unique_key_cachi2(component): component for component in cachi_sbom_components}

def is_duplicate_non_registry_component(component: dict[str, Any]) -> bool:
return component["name"] in cachi2_non_registry_components

def component_is_duplicated(component: dict[str, Any]) -> bool:
key = _unique_key_syft(component)

return (
_is_syft_local_golang_component(component)
or is_duplicate_non_registry_component(component)
or key in cachi2_indexed_components.keys()
)

return component_is_duplicated


def _merge_tools_metadata(syft_sbom: dict[Any, Any], cachi2_sbom: dict[Any, Any]) -> None:
"""Merge the content of tools in the metadata section of the SBOM.
With CycloneDX 1.5, a new format for specifying tools was introduced, and the format from 1.4
was marked as deprecated.
This function aims to support both formats in the Syft SBOM. We're assuming the Cachi2 SBOM
was generated with the same version as this script, and it will be in the older format.
"""
syft_tools = syft_sbom["metadata"]["tools"]
cachi2_tools = cachi2_sbom["metadata"]["tools"]

if isinstance(syft_tools, dict):
components = []

for t in cachi2_tools:
components.append(
{
"author": t["vendor"],
"name": t["name"],
"type": "application",
}
)

syft_tools["components"].extend(components)
elif isinstance(syft_tools, list):
syft_tools.extend(cachi2_tools)
else:
raise RuntimeError(
"The .metadata.tools JSON key is in an unexpected format. "
f"Expected dict or list, got {type(syft_tools)}."
)


def merge_sboms(cachi2_sbom_path: str, syft_sbom_path: str) -> str:
"""Merge Cachi2 components into the Syft SBOM while removing duplicates."""
with open(cachi2_sbom_path) as file:
cachi2_sbom = json.load(file)

with open(syft_sbom_path) as file:
syft_sbom = json.load(file)

is_duplicate_component = _get_syft_component_filter(cachi2_sbom["components"])

filtered_syft_components = [c for c in syft_sbom.get("components", []) if not is_duplicate_component(c)]

syft_sbom["components"] = filtered_syft_components + cachi2_sbom["components"]

_merge_tools_metadata(syft_sbom, cachi2_sbom)

return json.dumps(syft_sbom, indent=2)


if __name__ == "__main__":
parser = ArgumentParser()

parser.add_argument("cachi2_sbom_path")
parser.add_argument("syft_sbom_path")

args = parser.parse_args()

merged_sbom = merge_sboms(args.cachi2_sbom_path, args.syft_sbom_path)

print(merged_sbom)
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
pytest
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
#
# This file is autogenerated by pip-compile with Python 3.11
# by the following command:
#
# pip-compile --generate-hashes --output-file=requirements-test.txt requirements-test.in
#
iniconfig==2.0.0 \
--hash=sha256:2d91e135bf72d31a410b17c16da610a82cb55f6b0477d1a902134b24a455b8b3 \
--hash=sha256:b6a85871a79d2e3b22d2d1b94ac2824226a63c6b741c88f7ae975f18b6778374
# via pytest
packaging==24.1 \
--hash=sha256:026ed72c8ed3fcce5bf8950572258698927fd1dbda10a5e981cdf0ac37f4f002 \
--hash=sha256:5b8f2217dbdbd2f7f384c41c628544e6d52f2d0f53c6d0c3ea61aa5d1d7ff124
# via pytest
pluggy==1.5.0 \
--hash=sha256:2cffa88e94fdc978c4c574f15f9e59b7f4201d439195c3715ca9e2486f1d0cf1 \
--hash=sha256:44e1ad92c8ca002de6377e165f3e0f1be63266ab4d554740532335b9d75ea669
# via pytest
pytest==8.3.2 \
--hash=sha256:4ba08f9ae7dcf84ded419494d229b48d0903ea6407b030eaec46df5e6a73bba5 \
--hash=sha256:c132345d12ce551242c87269de812483f5bcc87cdbb4722e48487ba194f9fdce
# via -r requirements-test.in
Loading

0 comments on commit 4bc7a20

Please sign in to comment.