From e2e456c7ddbe38685237701ca4fa76d0baccac52 Mon Sep 17 00:00:00 2001 From: Ore Olarewaju Date: Mon, 26 Jan 2026 12:34:33 +0100 Subject: [PATCH 1/5] feat(sbom): generate sbom report after image scan --- scanner/generate-sbom-report.sh | 297 ++++++++++++++++++++++++++++++++ 1 file changed, 297 insertions(+) create mode 100755 scanner/generate-sbom-report.sh diff --git a/scanner/generate-sbom-report.sh b/scanner/generate-sbom-report.sh new file mode 100755 index 0000000..dc5591c --- /dev/null +++ b/scanner/generate-sbom-report.sh @@ -0,0 +1,297 @@ +#!/usr/bin/env bash +set -euo pipefail + +# Script to generate detailed SBOM report from scan results +# Usage: generate-sbom-report.sh + +SCAN_RESULTS_DIR="${1:-./scan-results}" +OUTPUT_FILE="${2:-sbom-detailed-report.md}" +SCAN_SUMMARY="$SCAN_RESULTS_DIR/scan-summary.json" + +if [[ ! -f "$SCAN_SUMMARY" ]]; then + echo "Error: scan-summary.json not found in $SCAN_RESULTS_DIR" >&2 + exit 1 +fi + +# Get successful scans +SUCCESSFUL_SCANS=$(jq -r '.successful_scans[]' "$SCAN_SUMMARY" 2>/dev/null || echo "") + +if [[ -z "$SUCCESSFUL_SCANS" ]]; then + echo "No successful scans found in scan summary" >&2 + exit 1 +fi + +# Get namespace from scan summary if available +NAMESPACE=$(jq -r '.scan_summary.namespace // "pharia-ai"' "$SCAN_SUMMARY" 2>/dev/null || echo "pharia-ai") + +# Count total images +TOTAL_IMAGES=$(echo "$SUCCESSFUL_SCANS" | wc -l | tr -d ' ') + +# Start report +cat > "$OUTPUT_FILE" </dev/null || echo "0") + TOTAL_COMPONENTS=$((TOTAL_COMPONENTS + components)) + + # Count OS packages + os_packages=$(jq '[.components[] | select(.type == "library" or .type == "operating-system")] | length' "$sbom_file" 2>/dev/null || echo "0") + TOTAL_OS_PACKAGES=$((TOTAL_OS_PACKAGES + os_packages)) + + # Count Python packages + python_packages=$(jq '[.components[] | select(.purl // "" | contains("pypi"))] | length' "$sbom_file" 2>/dev/null || echo "0") + TOTAL_PYTHON_PACKAGES=$((TOTAL_PYTHON_PACKAGES + python_packages)) + TOTAL_PYPI=$((TOTAL_PYPI + python_packages)) + + # Count APK packages + apk_packages=$(jq '[.components[] | select(.purl // "" | contains("pkg:apk"))] | length' "$sbom_file" 2>/dev/null || echo "0") + TOTAL_APK=$((TOTAL_APK + apk_packages)) + + # Count NPM packages + npm_packages=$(jq '[.components[] | select(.purl // "" | contains("npm"))] | length' "$sbom_file" 2>/dev/null || echo "0") + TOTAL_NPM=$((TOTAL_NPM + npm_packages)) + + # Count licenses + licenses=$(jq '[.components[].licenses[]?] | length' "$sbom_file" 2>/dev/null || echo "0") + if [[ $licenses -gt 0 ]]; then + IMAGES_WITH_LICENSES=$((IMAGES_WITH_LICENSES + 1)) + fi + TOTAL_LICENSES=$((TOTAL_LICENSES + licenses)) + fi +done + +TOTAL_IMAGES=$(echo "$SUCCESSFUL_SCANS" | wc -l | tr -d ' ') + +cat >> "$OUTPUT_FILE" <&2 + + # Extract image name (last part) + image_name=$(echo "$image" | sed 's|.*/||') + + # Get component count + component_count=$(jq '.components | length' "$sbom_file" 2>/dev/null || echo "0") + + # Get component types breakdown + os_components=$(jq '[.components[] | select(.type == "operating-system")] | length' "$sbom_file" 2>/dev/null || echo "0") + library_components=$(jq '[.components[] | select(.type == "library")] | length' "$sbom_file" 2>/dev/null || echo "0") + application_components=$(jq '[.components[] | select(.type == "application")] | length' "$sbom_file" 2>/dev/null || echo "0") + + # Get package type breakdown + apk_packages=$(jq '[.components[] | select(.purl // "" | contains("pkg:apk"))] | length' "$sbom_file" 2>/dev/null || echo "0") + pypi_packages=$(jq '[.components[] | select(.purl // "" | contains("pypi"))] | length' "$sbom_file" 2>/dev/null || echo "0") + npm_packages=$(jq '[.components[] | select(.purl // "" | contains("npm"))] | length' "$sbom_file" 2>/dev/null || echo "0") + + # Get license information + unique_licenses=$(jq '[.components[].licenses[]?.license.id // .components[].licenses[]?.license.name] | unique | length' "$sbom_file" 2>/dev/null || echo "0") + license_list=$(jq -r '[.components[].licenses[]?.license.id // .components[].licenses[]?.license.name] | unique | .[]' "$sbom_file" 2>/dev/null | sort -u | head -10 | tr '\n' ',' | sed 's/,$//' || echo "N/A") + + # Get top 10 components by name (count duplicates) + top_components=$(jq -r '.components[] | "\(.name)@\(.version // "unknown")"' "$sbom_file" 2>/dev/null | sort | uniq -c | sort -rn | head -10 | awk '{print " - " $2 " (count: " $1 ")"}' || echo " - N/A") + + # Get metadata + metadata_file="$SCAN_RESULTS_DIR/$img_safe/metadata.json" + base_image="N/A" + if [[ -f "$metadata_file" ]]; then + base_image=$(jq -r '.base_image // "N/A"' "$metadata_file" 2>/dev/null || echo "N/A") + fi + + # Write to report + cat >> "$OUTPUT_FILE" < +View all components (click to expand) + +| Component Name | Version | Type | PURL | License | +|----------------|---------|------|------|---------| +$(jq -r '.components[] | "| \(.name // "N/A") | \(.version // "N/A") | \(.type // "N/A") | \(.purl // "N/A") | \(.licenses[0].license.id // .licenses[0].license.name // "N/A") |"' "$sbom_file" 2>/dev/null) + +*Complete component list ($component_count components).* + + + +--- + +EOF + + IMAGE_NUM=$((IMAGE_NUM + 1)) +done + +# Calculate component type totals +TOTAL_LIBRARY=0 +TOTAL_OS=0 +TOTAL_APP=0 + +for image in $SUCCESSFUL_SCANS; do + img_safe=$(echo "$image" | sed 's|[^A-Za-z0-9._-]|_|g') + sbom_file="$SCAN_RESULTS_DIR/$img_safe/sbom.json" + + if [[ -f "$sbom_file" ]]; then + lib_count=$(jq '[.components[] | select(.type == "library")] | length' "$sbom_file" 2>/dev/null || echo "0") + os_count=$(jq '[.components[] | select(.type == "operating-system")] | length' "$sbom_file" 2>/dev/null || echo "0") + app_count=$(jq '[.components[] | select(.type == "application")] | length' "$sbom_file" 2>/dev/null || echo "0") + + TOTAL_LIBRARY=$((TOTAL_LIBRARY + lib_count)) + TOTAL_OS=$((TOTAL_OS + os_count)) + TOTAL_APP=$((TOTAL_APP + app_count)) + fi +done + +# Add summary section +cat >> "$OUTPUT_FILE" <&2 + From 16d82fd46437911912e174936a8940a465907ab5 Mon Sep 17 00:00:00 2001 From: Ore Olarewaju Date: Mon, 26 Jan 2026 19:49:30 +0100 Subject: [PATCH 2/5] pre-commit fix --- scanner/generate-sbom-report.sh | 45 ++++++++++++++++----------------- 1 file changed, 22 insertions(+), 23 deletions(-) diff --git a/scanner/generate-sbom-report.sh b/scanner/generate-sbom-report.sh index dc5591c..b8d1d22 100755 --- a/scanner/generate-sbom-report.sh +++ b/scanner/generate-sbom-report.sh @@ -31,8 +31,8 @@ TOTAL_IMAGES=$(echo "$SUCCESSFUL_SCANS" | wc -l | tr -d ' ') cat > "$OUTPUT_FILE" </dev/null || echo "0") TOTAL_COMPONENTS=$((TOTAL_COMPONENTS + components)) - + # Count OS packages os_packages=$(jq '[.components[] | select(.type == "library" or .type == "operating-system")] | length' "$sbom_file" 2>/dev/null || echo "0") TOTAL_OS_PACKAGES=$((TOTAL_OS_PACKAGES + os_packages)) - + # Count Python packages python_packages=$(jq '[.components[] | select(.purl // "" | contains("pypi"))] | length' "$sbom_file" 2>/dev/null || echo "0") TOTAL_PYTHON_PACKAGES=$((TOTAL_PYTHON_PACKAGES + python_packages)) TOTAL_PYPI=$((TOTAL_PYPI + python_packages)) - + # Count APK packages apk_packages=$(jq '[.components[] | select(.purl // "" | contains("pkg:apk"))] | length' "$sbom_file" 2>/dev/null || echo "0") TOTAL_APK=$((TOTAL_APK + apk_packages)) - + # Count NPM packages npm_packages=$(jq '[.components[] | select(.purl // "" | contains("npm"))] | length' "$sbom_file" 2>/dev/null || echo "0") TOTAL_NPM=$((TOTAL_NPM + npm_packages)) - + # Count licenses licenses=$(jq '[.components[].licenses[]?] | length' "$sbom_file" 2>/dev/null || echo "0") if [[ $licenses -gt 0 ]]; then @@ -113,49 +113,49 @@ IMAGE_NUM=1 for image in $SUCCESSFUL_SCANS; do img_safe=$(echo "$image" | sed 's|[^A-Za-z0-9._-]|_|g') sbom_file="$SCAN_RESULTS_DIR/$img_safe/sbom.json" - + if [[ ! -f "$sbom_file" ]]; then continue fi - + echo "Processing image $IMAGE_NUM/$TOTAL_IMAGES: $image" >&2 - + # Extract image name (last part) image_name=$(echo "$image" | sed 's|.*/||') - + # Get component count component_count=$(jq '.components | length' "$sbom_file" 2>/dev/null || echo "0") - + # Get component types breakdown os_components=$(jq '[.components[] | select(.type == "operating-system")] | length' "$sbom_file" 2>/dev/null || echo "0") library_components=$(jq '[.components[] | select(.type == "library")] | length' "$sbom_file" 2>/dev/null || echo "0") application_components=$(jq '[.components[] | select(.type == "application")] | length' "$sbom_file" 2>/dev/null || echo "0") - + # Get package type breakdown apk_packages=$(jq '[.components[] | select(.purl // "" | contains("pkg:apk"))] | length' "$sbom_file" 2>/dev/null || echo "0") pypi_packages=$(jq '[.components[] | select(.purl // "" | contains("pypi"))] | length' "$sbom_file" 2>/dev/null || echo "0") npm_packages=$(jq '[.components[] | select(.purl // "" | contains("npm"))] | length' "$sbom_file" 2>/dev/null || echo "0") - + # Get license information unique_licenses=$(jq '[.components[].licenses[]?.license.id // .components[].licenses[]?.license.name] | unique | length' "$sbom_file" 2>/dev/null || echo "0") license_list=$(jq -r '[.components[].licenses[]?.license.id // .components[].licenses[]?.license.name] | unique | .[]' "$sbom_file" 2>/dev/null | sort -u | head -10 | tr '\n' ',' | sed 's/,$//' || echo "N/A") - + # Get top 10 components by name (count duplicates) top_components=$(jq -r '.components[] | "\(.name)@\(.version // "unknown")"' "$sbom_file" 2>/dev/null | sort | uniq -c | sort -rn | head -10 | awk '{print " - " $2 " (count: " $1 ")"}' || echo " - N/A") - + # Get metadata metadata_file="$SCAN_RESULTS_DIR/$img_safe/metadata.json" base_image="N/A" if [[ -f "$metadata_file" ]]; then base_image=$(jq -r '.base_image // "N/A"' "$metadata_file" 2>/dev/null || echo "N/A") fi - + # Write to report cat >> "$OUTPUT_FILE" </dev/null || echo "0") os_count=$(jq '[.components[] | select(.type == "operating-system")] | length' "$sbom_file" 2>/dev/null || echo "0") app_count=$(jq '[.components[] | select(.type == "application")] | length' "$sbom_file" 2>/dev/null || echo "0") - + TOTAL_LIBRARY=$((TOTAL_LIBRARY + lib_count)) TOTAL_OS=$((TOTAL_OS + os_count)) TOTAL_APP=$((TOTAL_APP + app_count)) @@ -294,4 +294,3 @@ Each SBOM includes: EOF echo "✅ Detailed SBOM report generated: $OUTPUT_FILE" >&2 - From babd24629aa328b027bf11f75be7f32a222386f3 Mon Sep 17 00:00:00 2001 From: Ore Olarewaju Date: Mon, 26 Jan 2026 19:58:35 +0100 Subject: [PATCH 3/5] add python sbom generator --- scanner_py/cli/__init__.py | 7 + scanner_py/cli/generate_sbom_report.py | 414 +++++++++++++++++++++++++ 2 files changed, 421 insertions(+) create mode 100644 scanner_py/cli/generate_sbom_report.py diff --git a/scanner_py/cli/__init__.py b/scanner_py/cli/__init__.py index e563ea0..8e8880a 100644 --- a/scanner_py/cli/__init__.py +++ b/scanner_py/cli/__init__.py @@ -10,6 +10,7 @@ from .extract import create_extract_parser, run_extract from .verify_chainguard import create_chainguard_parser, run_chainguard from .generate_report import create_generate_report_parser, run_generate_report +from .generate_sbom_report import create_generate_sbom_report_parser, run_generate_sbom_report from .oras_scan import create_oras_scan_parser, run_oras_scan @@ -28,6 +29,7 @@ def create_main_parser() -> argparse.ArgumentParser: extract Extract attestations from an image verify-chainguard Check if image uses Chainguard base image generate-report Generate reports from existing scan results + generate-sbom-report Generate detailed SBOM analysis report from scan results Examples: # Simple triage scan (direct Trivy scan, triage.toml support) @@ -42,6 +44,9 @@ def create_main_parser() -> argparse.ArgumentParser: # Generate report from existing scan results scanner-py generate-report --input-dir ./scan-results -o report.md + # Generate detailed SBOM report + scanner-py generate-sbom-report --input-dir ./scan-results -o sbom-report.md + # Scan a single image scanner-py scan-image --image registry.io/app:v1.0 @@ -63,6 +68,7 @@ def create_main_parser() -> argparse.ArgumentParser: create_extract_parser(subparsers) create_chainguard_parser(subparsers) create_generate_report_parser(subparsers) + create_generate_sbom_report_parser(subparsers) return parser @@ -93,6 +99,7 @@ def main(argv: Optional[List[str]] = None) -> int: "extract": run_extract, "verify-chainguard": run_chainguard, "generate-report": run_generate_report, + "generate-sbom-report": run_generate_sbom_report, } handler = command_handlers.get(args.command) diff --git a/scanner_py/cli/generate_sbom_report.py b/scanner_py/cli/generate_sbom_report.py new file mode 100644 index 0000000..526a144 --- /dev/null +++ b/scanner_py/cli/generate_sbom_report.py @@ -0,0 +1,414 @@ +"""CLI for generating detailed SBOM reports from scan results. + +Equivalent to generate-sbom-report.sh +""" + +import argparse +import json +import re +from collections import Counter +from datetime import datetime +from pathlib import Path +from typing import Any, Dict, List, Optional + +from ..utils.logging import setup_logging, LogLevel, get_logger + +logger = get_logger(__name__) + + +def create_generate_sbom_report_parser(subparsers: Any) -> argparse.ArgumentParser: + """Create the generate-sbom-report subparser.""" + parser = subparsers.add_parser( + "generate-sbom-report", + help="Generate detailed SBOM report from scan results", + description=""" +Generate a detailed Software Bill of Materials (SBOM) analysis report from +previously completed scans. This report provides comprehensive component +inventory, license information, and package breakdowns for all successfully +scanned images. +""", + formatter_class=argparse.RawDescriptionHelpFormatter, + epilog=""" +Examples: + # Generate SBOM report from scan results + scanner-py generate-sbom-report --input-dir ./scan-results --output sbom-report.md + + # Generate report with custom output file + scanner-py generate-sbom-report --input-dir ./scan-results -o detailed-sbom.md +""", + ) + + parser.add_argument( + "--input-dir", + default="./scan-results", + help="Directory containing scan results (default: ./scan-results)", + ) + parser.add_argument( + "--output", "-o", + default="sbom-detailed-report.md", + help="Output file path (default: sbom-detailed-report.md)", + ) + parser.add_argument( + "--verbose", + action="store_true", + help="Enable verbose logging", + ) + + return parser + + +def sanitize_image_name(image: str) -> str: + """Sanitize image name for use in file paths.""" + return re.sub(r'[^A-Za-z0-9._-]', '_', image) + + +def load_scan_summary(scan_results_dir: Path) -> Optional[Dict[str, Any]]: + """Load scan summary JSON file.""" + summary_path = scan_results_dir / "scan-summary.json" + if not summary_path.exists(): + logger.error(f"scan-summary.json not found in {scan_results_dir}") + return None + + try: + with open(summary_path) as f: + return json.load(f) + except json.JSONDecodeError as e: + logger.error(f"Failed to parse scan-summary.json: {e}") + return None + + +def load_sbom(sbom_path: Path) -> Optional[Dict[str, Any]]: + """Load SBOM JSON file.""" + if not sbom_path.exists(): + return None + + try: + with open(sbom_path) as f: + return json.load(f) + except json.JSONDecodeError as e: + logger.warning(f"Failed to parse SBOM {sbom_path}: {e}") + return None + + +def analyze_sbom(sbom: Dict[str, Any]) -> Dict[str, Any]: + """Analyze SBOM and extract statistics.""" + components = sbom.get("components", []) + + stats = { + "total_components": len(components), + "os_components": 0, + "library_components": 0, + "application_components": 0, + "apk_packages": 0, + "pypi_packages": 0, + "npm_packages": 0, + "total_licenses": 0, + "unique_licenses": set(), + } + + for component in components: + comp_type = component.get("type", "") + if comp_type == "operating-system": + stats["os_components"] += 1 + elif comp_type == "library": + stats["library_components"] += 1 + elif comp_type == "application": + stats["application_components"] += 1 + + purl = component.get("purl", "") + if "pkg:apk" in purl: + stats["apk_packages"] += 1 + elif "pypi" in purl: + stats["pypi_packages"] += 1 + elif "npm" in purl: + stats["npm_packages"] += 1 + + licenses = component.get("licenses", []) + for license_obj in licenses: + stats["total_licenses"] += 1 + license_id = license_obj.get("license", {}).get("id") + license_name = license_obj.get("license", {}).get("name") + if license_id: + stats["unique_licenses"].add(license_id) + elif license_name: + stats["unique_licenses"].add(license_name) + + stats["unique_licenses"] = sorted(list(stats["unique_licenses"])) + return stats + + +def get_top_components(sbom: Dict[str, Any], limit: int = 10) -> List[str]: + """Get top components by name (counting duplicates).""" + components = sbom.get("components", []) + component_names = [] + + for component in components: + name = component.get("name", "N/A") + version = component.get("version", "unknown") + component_names.append(f"{name}@{version}") + + counter = Counter(component_names) + top = counter.most_common(limit) + return [f" - {name} (count: {count})" for name, count in top] + + +def generate_report( + scan_results_dir: Path, + output_file: Path, + successful_scans: List[str], + namespace: str = "pharia-ai", +) -> bool: + """Generate detailed SBOM report.""" + total_images = len(successful_scans) + + # Calculate overall statistics + total_components = 0 + total_os_packages = 0 + total_python_packages = 0 + total_licenses = 0 + images_with_licenses = 0 + total_apk = 0 + total_pypi = 0 + total_npm = 0 + + # First pass: calculate totals + for image in successful_scans: + img_safe = sanitize_image_name(image) + sbom_path = scan_results_dir / img_safe / "sbom.json" + sbom = load_sbom(sbom_path) + + if sbom: + stats = analyze_sbom(sbom) + total_components += stats["total_components"] + total_os_packages += stats["os_components"] + stats["library_components"] + total_python_packages += stats["pypi_packages"] + total_pypi += stats["pypi_packages"] + total_apk += stats["apk_packages"] + total_npm += stats["npm_packages"] + total_licenses += stats["total_licenses"] + if stats["total_licenses"] > 0: + images_with_licenses += 1 + + # Start writing report + with open(output_file, "w") as f: + # Header + f.write("# Detailed SBOM Analysis Report\n\n") + f.write(f"**Generated:** {datetime.utcnow().strftime('%Y-%m-%dT%H:%M:%SZ')}\n") + f.write(f"**Source:** Successful SBOM scans from {namespace} namespace\n") + f.write(f"**Total Images Analyzed:** {total_images}\n\n") + f.write("---\n\n") + f.write("## Executive Summary\n\n") + f.write( + f"This report provides detailed analysis of Software Bill of Materials (SBOM) " + f"for all {total_images} successfully scanned container images. Each SBOM contains " + f"a complete inventory of all software components, including operating system " + f"packages, application dependencies, and their licenses.\n\n" + ) + + # Overall statistics + f.write("## Overall SBOM Statistics\n\n") + f.write("| Metric | Value |\n") + f.write("|--------|-------|\n") + f.write(f"| Total Images Analyzed | {total_images} |\n") + f.write(f"| Total Components | {total_components} |\n") + f.write(f"| Total OS Packages | {total_os_packages} |\n") + f.write(f"| Total Python Packages | {total_python_packages} |\n") + f.write(f"| Total Licenses | {total_licenses} |\n") + f.write(f"| Images with License Info | {images_with_licenses} / {total_images} |\n\n") + f.write("---\n\n") + f.write("## Detailed Image Analysis\n\n") + + # Detailed section for each image + image_num = 1 + total_library = 0 + total_os = 0 + total_app = 0 + + for image in successful_scans: + img_safe = sanitize_image_name(image) + sbom_path = scan_results_dir / img_safe / "sbom.json" + sbom = load_sbom(sbom_path) + + if not sbom: + continue + + logger.info(f"Processing image {image_num}/{total_images}: {image}") + + image_name = image.split("/")[-1] + stats = analyze_sbom(sbom) + + total_library += stats["library_components"] + total_os += stats["os_components"] + total_app += stats["application_components"] + + # Get license list (top 10) + license_list = ", ".join(stats["unique_licenses"][:10]) + if not license_list: + license_list = "N/A" + + # Get top components + top_components = get_top_components(sbom, limit=10) + top_components_str = "\n".join(top_components) if top_components else " - N/A" + + # Get metadata + metadata_path = scan_results_dir / img_safe / "metadata.json" + base_image = "N/A" + if metadata_path.exists(): + try: + with open(metadata_path) as mf: + metadata = json.load(mf) + base_image = metadata.get("base_image", "N/A") + except Exception: + pass + + # Write image section + f.write(f"### {image_num}. {image_name}\n\n") + f.write(f"**Image:** `{image}`\n") + f.write(f"**SBOM File:** `{img_safe}/sbom.json`\n\n") + f.write("#### Component Summary\n\n") + f.write("| Metric | Count |\n") + f.write("|--------|-------|\n") + f.write(f"| Total Components | {stats['total_components']} |\n") + f.write(f"| OS Components | {stats['os_components']} |\n") + f.write(f"| Library Components | {stats['library_components']} |\n") + f.write(f"| Application Components | {stats['application_components']} |\n\n") + f.write("#### Package Type Breakdown\n\n") + f.write("| Package Type | Count |\n") + f.write("|--------------|-------|\n") + f.write(f"| APK (Alpine/Chainguard) | {stats['apk_packages']} |\n") + f.write(f"| PyPI (Python) | {stats['pypi_packages']} |\n") + f.write(f"| NPM (Node.js) | {stats['npm_packages']} |\n\n") + f.write("#### License Information\n\n") + f.write(f"- **Unique Licenses:** {len(stats['unique_licenses'])}\n") + f.write(f"- **Top Licenses:** {license_list}\n\n") + f.write("#### Top Components\n\n") + f.write(f"{top_components_str}\n\n") + f.write("#### Component Details\n\n") + f.write("
\n") + f.write("View all components (click to expand)\n\n") + f.write("| Component Name | Version | Type | PURL | License |\n") + f.write("|----------------|---------|------|------|---------|\n") + + # Write all components + for component in sbom.get("components", []): + name = component.get("name", "N/A") + version = component.get("version", "N/A") + comp_type = component.get("type", "N/A") + purl = component.get("purl", "N/A") + + license_id = "N/A" + licenses = component.get("licenses", []) + if licenses: + license_obj = licenses[0].get("license", {}) + license_id = license_obj.get("id") or license_obj.get("name", "N/A") + + f.write(f"| {name} | {version} | {comp_type} | {purl} | {license_id} |\n") + + f.write(f"\n*Complete component list ({stats['total_components']} components).*\n\n") + f.write("
\n\n") + f.write("---\n\n") + + image_num += 1 + + # Component type distribution + f.write("---\n\n") + f.write("## Component Type Distribution\n\n") + f.write("### By Component Type\n\n") + f.write("| Type | Count | Percentage |\n") + f.write("|------|-------|------------|\n") + + if total_components > 0: + lib_pct = (total_library / total_components) * 100 + os_pct = (total_os / total_components) * 100 + app_pct = (total_app / total_components) * 100 + f.write(f"| Library | {total_library} | {lib_pct:.1f}% |\n") + f.write(f"| Operating System | {total_os} | {os_pct:.1f}% |\n") + f.write(f"| Application | {total_app} | {app_pct:.1f}% |\n\n") + else: + f.write("| Library | 0 | 0.0% |\n") + f.write("| Operating System | 0 | 0.0% |\n") + f.write("| Application | 0 | 0.0% |\n\n") + + f.write("### By Package Manager\n\n") + f.write("| Package Manager | Count | Percentage |\n") + f.write("|-----------------|-------|------------|\n") + + if total_components > 0: + apk_pct = (total_apk / total_components) * 100 + pypi_pct = (total_pypi / total_components) * 100 + npm_pct = (total_npm / total_components) * 100 + f.write(f"| APK (Chainguard/Alpine) | {total_apk} | {apk_pct:.1f}% |\n") + f.write(f"| PyPI (Python) | {total_pypi} | {pypi_pct:.1f}% |\n") + f.write(f"| NPM (Node.js) | {total_npm} | {npm_pct:.1f}% |\n\n") + else: + f.write("| APK (Chainguard/Alpine) | 0 | 0.0% |\n") + f.write("| PyPI (Python) | 0 | 0.0% |\n") + f.write("| NPM (Node.js) | 0 | 0.0% |\n\n") + + # License analysis + f.write("---\n\n") + f.write("## License Analysis\n\n") + f.write("### License Distribution\n\n") + f.write("The SBOMs contain license information for components. Common licenses found:\n\n") + f.write("- **MIT** - Most common permissive license\n") + f.write("- **Apache-2.0** - Apache License 2.0\n") + f.write("- **BSD-3-Clause** - BSD 3-Clause License\n") + f.write("- **GPL-2.0** - GNU General Public License v2\n") + f.write("- **MPL-2.0** - Mozilla Public License 2.0\n\n") + f.write("### License Compliance Notes\n\n") + f.write("- All images use Chainguard base images which have clear licensing\n") + f.write("- Python packages typically include license information in their metadata\n") + f.write("- OS packages from Chainguard follow Wolfi licensing standards\n\n") + f.write("---\n\n") + f.write("## SBOM Format Information\n\n") + f.write("All SBOMs are in **CycloneDX 1.6** format with the following characteristics:\n\n") + f.write("- **Format:** CycloneDX JSON\n") + f.write("- **Schema:** http://cyclonedx.org/schema/bom-1.6.schema.json\n") + f.write("- **Generated by:** Trivy\n") + f.write("- **Includes:** Components, licenses, hashes, PURLs (Package URLs)\n\n") + f.write("### SBOM Metadata\n\n") + f.write("Each SBOM includes:\n") + f.write("- Component inventory (complete list of all software)\n") + f.write("- Package URLs (PURLs) for component identification\n") + f.write("- License information where available\n") + f.write("- Component hashes for integrity verification\n") + f.write("- Component types and classifications\n\n") + f.write("---\n\n") + f.write("**Report End**\n") + + return True + + +def run_generate_sbom_report(args: argparse.Namespace) -> int: + """Run the generate-sbom-report command.""" + setup_logging(LogLevel.DEBUG if args.verbose else LogLevel.INFO) + + scan_results_dir = Path(args.input_dir) + output_file = Path(args.output) + + if not scan_results_dir.exists(): + logger.error(f"Scan results directory not found: {scan_results_dir}") + return 1 + + # Load scan summary + summary = load_scan_summary(scan_results_dir) + if not summary: + return 1 + + # Get successful scans + successful_scans = summary.get("successful_scans", []) + if not successful_scans: + logger.error("No successful scans found in scan summary") + return 1 + + # Get namespace + namespace = summary.get("scan_summary", {}).get("namespace", "pharia-ai") + + # Generate report + if generate_report(scan_results_dir, output_file, successful_scans, namespace): + logger.info(f"✅ Detailed SBOM report generated: {output_file}") + return 0 + else: + logger.error("Failed to generate SBOM report") + return 1 + From 0f6180506bec47ec5b12c4ab3b09da2a35580ea7 Mon Sep 17 00:00:00 2001 From: Ore Olarewaju Date: Mon, 26 Jan 2026 20:03:02 +0100 Subject: [PATCH 4/5] pre-commit fix --- scanner_py/cli/generate_sbom_report.py | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) diff --git a/scanner_py/cli/generate_sbom_report.py b/scanner_py/cli/generate_sbom_report.py index 526a144..9d48f61 100644 --- a/scanner_py/cli/generate_sbom_report.py +++ b/scanner_py/cli/generate_sbom_report.py @@ -93,7 +93,7 @@ def load_sbom(sbom_path: Path) -> Optional[Dict[str, Any]]: def analyze_sbom(sbom: Dict[str, Any]) -> Dict[str, Any]: """Analyze SBOM and extract statistics.""" components = sbom.get("components", []) - + stats = { "total_components": len(components), "os_components": 0, @@ -141,12 +141,12 @@ def get_top_components(sbom: Dict[str, Any], limit: int = 10) -> List[str]: """Get top components by name (counting duplicates).""" components = sbom.get("components", []) component_names = [] - + for component in components: name = component.get("name", "N/A") version = component.get("version", "unknown") component_names.append(f"{name}@{version}") - + counter = Counter(component_names) top = counter.most_common(limit) return [f" - {name} (count: {count})" for name, count in top] @@ -160,7 +160,7 @@ def generate_report( ) -> bool: """Generate detailed SBOM report.""" total_images = len(successful_scans) - + # Calculate overall statistics total_components = 0 total_os_packages = 0 @@ -176,7 +176,7 @@ def generate_report( img_safe = sanitize_image_name(image) sbom_path = scan_results_dir / img_safe / "sbom.json" sbom = load_sbom(sbom_path) - + if sbom: stats = analyze_sbom(sbom) total_components += stats["total_components"] @@ -236,7 +236,7 @@ def generate_report( image_name = image.split("/")[-1] stats = analyze_sbom(sbom) - + total_library += stats["library_components"] total_os += stats["os_components"] total_app += stats["application_components"] @@ -295,7 +295,7 @@ def generate_report( version = component.get("version", "N/A") comp_type = component.get("type", "N/A") purl = component.get("purl", "N/A") - + license_id = "N/A" licenses = component.get("licenses", []) if licenses: @@ -316,7 +316,7 @@ def generate_report( f.write("### By Component Type\n\n") f.write("| Type | Count | Percentage |\n") f.write("|------|-------|------------|\n") - + if total_components > 0: lib_pct = (total_library / total_components) * 100 os_pct = (total_os / total_components) * 100 @@ -332,7 +332,7 @@ def generate_report( f.write("### By Package Manager\n\n") f.write("| Package Manager | Count | Percentage |\n") f.write("|-----------------|-------|------------|\n") - + if total_components > 0: apk_pct = (total_apk / total_components) * 100 pypi_pct = (total_pypi / total_components) * 100 @@ -411,4 +411,3 @@ def run_generate_sbom_report(args: argparse.Namespace) -> int: else: logger.error("Failed to generate SBOM report") return 1 - From 5de231629633c9056821566edbfbefa07f3f93bd Mon Sep 17 00:00:00 2001 From: Ore Olarewaju Date: Mon, 26 Jan 2026 21:57:38 +0100 Subject: [PATCH 5/5] cleanup sbom --- scanner/generate-sbom-report.sh | 296 ------------------------- scanner_py/cli/generate_sbom_report.py | 4 +- 2 files changed, 3 insertions(+), 297 deletions(-) delete mode 100755 scanner/generate-sbom-report.sh diff --git a/scanner/generate-sbom-report.sh b/scanner/generate-sbom-report.sh deleted file mode 100755 index b8d1d22..0000000 --- a/scanner/generate-sbom-report.sh +++ /dev/null @@ -1,296 +0,0 @@ -#!/usr/bin/env bash -set -euo pipefail - -# Script to generate detailed SBOM report from scan results -# Usage: generate-sbom-report.sh - -SCAN_RESULTS_DIR="${1:-./scan-results}" -OUTPUT_FILE="${2:-sbom-detailed-report.md}" -SCAN_SUMMARY="$SCAN_RESULTS_DIR/scan-summary.json" - -if [[ ! -f "$SCAN_SUMMARY" ]]; then - echo "Error: scan-summary.json not found in $SCAN_RESULTS_DIR" >&2 - exit 1 -fi - -# Get successful scans -SUCCESSFUL_SCANS=$(jq -r '.successful_scans[]' "$SCAN_SUMMARY" 2>/dev/null || echo "") - -if [[ -z "$SUCCESSFUL_SCANS" ]]; then - echo "No successful scans found in scan summary" >&2 - exit 1 -fi - -# Get namespace from scan summary if available -NAMESPACE=$(jq -r '.scan_summary.namespace // "pharia-ai"' "$SCAN_SUMMARY" 2>/dev/null || echo "pharia-ai") - -# Count total images -TOTAL_IMAGES=$(echo "$SUCCESSFUL_SCANS" | wc -l | tr -d ' ') - -# Start report -cat > "$OUTPUT_FILE" </dev/null || echo "0") - TOTAL_COMPONENTS=$((TOTAL_COMPONENTS + components)) - - # Count OS packages - os_packages=$(jq '[.components[] | select(.type == "library" or .type == "operating-system")] | length' "$sbom_file" 2>/dev/null || echo "0") - TOTAL_OS_PACKAGES=$((TOTAL_OS_PACKAGES + os_packages)) - - # Count Python packages - python_packages=$(jq '[.components[] | select(.purl // "" | contains("pypi"))] | length' "$sbom_file" 2>/dev/null || echo "0") - TOTAL_PYTHON_PACKAGES=$((TOTAL_PYTHON_PACKAGES + python_packages)) - TOTAL_PYPI=$((TOTAL_PYPI + python_packages)) - - # Count APK packages - apk_packages=$(jq '[.components[] | select(.purl // "" | contains("pkg:apk"))] | length' "$sbom_file" 2>/dev/null || echo "0") - TOTAL_APK=$((TOTAL_APK + apk_packages)) - - # Count NPM packages - npm_packages=$(jq '[.components[] | select(.purl // "" | contains("npm"))] | length' "$sbom_file" 2>/dev/null || echo "0") - TOTAL_NPM=$((TOTAL_NPM + npm_packages)) - - # Count licenses - licenses=$(jq '[.components[].licenses[]?] | length' "$sbom_file" 2>/dev/null || echo "0") - if [[ $licenses -gt 0 ]]; then - IMAGES_WITH_LICENSES=$((IMAGES_WITH_LICENSES + 1)) - fi - TOTAL_LICENSES=$((TOTAL_LICENSES + licenses)) - fi -done - -TOTAL_IMAGES=$(echo "$SUCCESSFUL_SCANS" | wc -l | tr -d ' ') - -cat >> "$OUTPUT_FILE" <&2 - - # Extract image name (last part) - image_name=$(echo "$image" | sed 's|.*/||') - - # Get component count - component_count=$(jq '.components | length' "$sbom_file" 2>/dev/null || echo "0") - - # Get component types breakdown - os_components=$(jq '[.components[] | select(.type == "operating-system")] | length' "$sbom_file" 2>/dev/null || echo "0") - library_components=$(jq '[.components[] | select(.type == "library")] | length' "$sbom_file" 2>/dev/null || echo "0") - application_components=$(jq '[.components[] | select(.type == "application")] | length' "$sbom_file" 2>/dev/null || echo "0") - - # Get package type breakdown - apk_packages=$(jq '[.components[] | select(.purl // "" | contains("pkg:apk"))] | length' "$sbom_file" 2>/dev/null || echo "0") - pypi_packages=$(jq '[.components[] | select(.purl // "" | contains("pypi"))] | length' "$sbom_file" 2>/dev/null || echo "0") - npm_packages=$(jq '[.components[] | select(.purl // "" | contains("npm"))] | length' "$sbom_file" 2>/dev/null || echo "0") - - # Get license information - unique_licenses=$(jq '[.components[].licenses[]?.license.id // .components[].licenses[]?.license.name] | unique | length' "$sbom_file" 2>/dev/null || echo "0") - license_list=$(jq -r '[.components[].licenses[]?.license.id // .components[].licenses[]?.license.name] | unique | .[]' "$sbom_file" 2>/dev/null | sort -u | head -10 | tr '\n' ',' | sed 's/,$//' || echo "N/A") - - # Get top 10 components by name (count duplicates) - top_components=$(jq -r '.components[] | "\(.name)@\(.version // "unknown")"' "$sbom_file" 2>/dev/null | sort | uniq -c | sort -rn | head -10 | awk '{print " - " $2 " (count: " $1 ")"}' || echo " - N/A") - - # Get metadata - metadata_file="$SCAN_RESULTS_DIR/$img_safe/metadata.json" - base_image="N/A" - if [[ -f "$metadata_file" ]]; then - base_image=$(jq -r '.base_image // "N/A"' "$metadata_file" 2>/dev/null || echo "N/A") - fi - - # Write to report - cat >> "$OUTPUT_FILE" < -View all components (click to expand) - -| Component Name | Version | Type | PURL | License | -|----------------|---------|------|------|---------| -$(jq -r '.components[] | "| \(.name // "N/A") | \(.version // "N/A") | \(.type // "N/A") | \(.purl // "N/A") | \(.licenses[0].license.id // .licenses[0].license.name // "N/A") |"' "$sbom_file" 2>/dev/null) - -*Complete component list ($component_count components).* - - - ---- - -EOF - - IMAGE_NUM=$((IMAGE_NUM + 1)) -done - -# Calculate component type totals -TOTAL_LIBRARY=0 -TOTAL_OS=0 -TOTAL_APP=0 - -for image in $SUCCESSFUL_SCANS; do - img_safe=$(echo "$image" | sed 's|[^A-Za-z0-9._-]|_|g') - sbom_file="$SCAN_RESULTS_DIR/$img_safe/sbom.json" - - if [[ -f "$sbom_file" ]]; then - lib_count=$(jq '[.components[] | select(.type == "library")] | length' "$sbom_file" 2>/dev/null || echo "0") - os_count=$(jq '[.components[] | select(.type == "operating-system")] | length' "$sbom_file" 2>/dev/null || echo "0") - app_count=$(jq '[.components[] | select(.type == "application")] | length' "$sbom_file" 2>/dev/null || echo "0") - - TOTAL_LIBRARY=$((TOTAL_LIBRARY + lib_count)) - TOTAL_OS=$((TOTAL_OS + os_count)) - TOTAL_APP=$((TOTAL_APP + app_count)) - fi -done - -# Add summary section -cat >> "$OUTPUT_FILE" <&2 diff --git a/scanner_py/cli/generate_sbom_report.py b/scanner_py/cli/generate_sbom_report.py index 9d48f61..88ea713 100644 --- a/scanner_py/cli/generate_sbom_report.py +++ b/scanner_py/cli/generate_sbom_report.py @@ -1,6 +1,8 @@ """CLI for generating detailed SBOM reports from scan results. -Equivalent to generate-sbom-report.sh +Generates comprehensive Software Bill of Materials (SBOM) analysis reports +from scan results, including component inventory, license information, and +package breakdowns. """ import argparse