diff --git a/.gitignore b/.gitignore index a851fac..87d07d3 100755 --- a/.gitignore +++ b/.gitignore @@ -88,6 +88,9 @@ log-agent.txt reports/ results/ results.json +vulns.json +vulns-basic.sarif +vulns-enhanced.sarif # Backup files *.bkp @@ -99,4 +102,11 @@ da-analyzer-results # Optional: User-specific test scripts if not shared test-commands.txt -workbench-cli-log.txt \ No newline at end of file +workbench-cli-log.txt +*.sarif + +vuln-report-epss-kev.json +vuln-report-epss.json +vuln-report-nvd-epss-kev.json +vuln-report.json +vuln-report-epss.json diff --git a/src/workbench_cli/api/components_api.py b/src/workbench_cli/api/components_api.py new file mode 100644 index 0000000..468bce1 --- /dev/null +++ b/src/workbench_cli/api/components_api.py @@ -0,0 +1,54 @@ +from typing import Dict, Any + +import logging + +from ..exceptions import ApiError +from .helpers.api_base import APIBase +from .helpers.component_info_normalizer import normalize_component_response + +logger = logging.getLogger("workbench-cli") + + +class ComponentsAPI(APIBase): + """Workbench API Component Operations.""" + + def get_component_information(self, component_name: str, component_version: str) -> Dict[str, Any]: + """Retrieve component metadata from Workbench. + + Args: + component_name: The component or package name (e.g. "ansi-regex"). + component_version: The component version (e.g. "1.1.1"). + + Returns: + Dictionary with the component information as returned by the API. + + Raises: + ApiError: If the component does not exist or the API request fails. + """ + logger.debug( + "Fetching information for component '%s' version '%s'...", + component_name, + component_version, + ) + + payload = { + "group": "components", + "action": "get_information", + "data": { + "component_name": component_name, + "component_version": component_version, + }, + } + + response = self._send_request(payload) + + # Successful response + if response.get("status") == "1" and "data" in response: + return normalize_component_response(response["data"]) + + # Something went wrong – build a helpful error message + error_msg = response.get("error", f"Unexpected response: {response}") + raise ApiError( + f"Failed to fetch information for component '{component_name}' version '{component_version}': {error_msg}", + details=response, + ) diff --git a/src/workbench_cli/api/helpers/component_info_normalizer.py b/src/workbench_cli/api/helpers/component_info_normalizer.py new file mode 100644 index 0000000..0b196f4 --- /dev/null +++ b/src/workbench_cli/api/helpers/component_info_normalizer.py @@ -0,0 +1,70 @@ +from typing import Any, Dict, List + +# Fields that callers actively use; expand if more become important. +_EXPECTED_FIELDS = { + "id", + "cpe", + "name", + "version", + "purl", + "purl_type", + "purl_namespace", + "purl_name", + "purl_version", + "supplier_name", + "supplier_url", + "license_identifier", + "license_name", + "copyright", + "comment", +} + + +def normalize_component_response(raw: Any) -> Dict[str, Any]: + """Return a stable dict from the Workbench *components/get_information* response. + + Workbench 25.x sometimes returns the *data* field as a single-element list, or + as a dict. Future versions may rename or add keys. This helper: + • Converts list→dict when length==1. + • Ignores unknown fields (passes through only those we care about). + • Returns an empty dict on unexpected structures. + """ + # 1. Normalise list ↔ dict + if isinstance(raw, list): + raw = raw[0] if raw else {} + if not isinstance(raw, dict): + return {} + + # 2. Map any known aliases between versions (none yet, but placeholder) + aliases = { + # Map alternative field names to our canonical keys + "license": "license_identifier", # some API versions use generic 'license' + "licenseId": "license_identifier", # camel-cased variant + "license_id": "license_identifier", # snake_cased variant + "licenseName": "license_name", + "license_name": "license_name", # ensure canonical form if already correct + } + for old, new in aliases.items(): + if old in raw and new not in raw: + raw[new] = raw.pop(old) + + # When only identifier or name supplied, attempt to set the other for completeness + if "license_identifier" in raw and "license_name" not in raw: + raw["license_name"] = raw["license_identifier"] + if "license_name" in raw and "license_identifier" not in raw: + raw["license_identifier"] = raw["license_name"] + + # Extract from *licenses* list (Workbench 25.x) when canonical keys still missing + if ("license_identifier" not in raw or not raw["license_identifier"]) and "licenses" in raw: + lic_data = raw.get("licenses") or [] + if isinstance(lic_data, list) and lic_data: + first_lic = lic_data[0] + if isinstance(first_lic, dict): + raw["license_identifier"] = first_lic.get("identifier") or first_lic.get("id") or raw.get("license_identifier") + raw["license_name"] = first_lic.get("name") or raw.get("license_name") + elif isinstance(lic_data, dict): # single object + raw["license_identifier"] = lic_data.get("identifier") or lic_data.get("id") or raw.get("license_identifier") + raw["license_name"] = lic_data.get("name") or raw.get("license_name") + + # 3. Return only expected fields (others are ignored to shield callers) + return {field: raw.get(field) for field in _EXPECTED_FIELDS if field in raw} \ No newline at end of file diff --git a/src/workbench_cli/api/workbench_api.py b/src/workbench_cli/api/workbench_api.py index d4d6c27..2428957 100644 --- a/src/workbench_cli/api/workbench_api.py +++ b/src/workbench_cli/api/workbench_api.py @@ -8,6 +8,7 @@ from .projects_api import ProjectsAPI from .scans_api import ScansAPI from .vulnerabilities_api import VulnerabilitiesAPI +from .components_api import ComponentsAPI from ..exceptions import ( WorkbenchCLIError, ApiError, @@ -35,7 +36,7 @@ # Assume logger is configured in main.py logger = logging.getLogger("workbench-cli") -class WorkbenchAPI(UploadAPI, ResolveWorkbenchProjectScan, ProjectsAPI, VulnerabilitiesAPI, ScansAPI): +class WorkbenchAPI(UploadAPI, ResolveWorkbenchProjectScan, ProjectsAPI, VulnerabilitiesAPI, ScansAPI, ComponentsAPI): """ Workbench API client class for interacting with the FossID Workbench API. This class composes all the individual API parts into a single client. diff --git a/src/workbench_cli/cli.py b/src/workbench_cli/cli.py index 1da017d..438c466 100644 --- a/src/workbench_cli/cli.py +++ b/src/workbench_cli/cli.py @@ -52,7 +52,7 @@ def add_common_result_options(subparser): results_display_args.add_argument("--show-scan-metrics", help="Show metrics on file identifications (total files, pending id, identified, no matches).", action="store_true", default=False) results_display_args.add_argument("--show-policy-warnings", help="Shows Policy Warnings in identified components or dependencies.", action="store_true", default=False) results_display_args.add_argument("--show-vulnerabilities", help="Shows a summary of vulnerabilities found in the scan.", action="store_true", default=False) - results_display_args.add_argument("--path-result", help="Saves the requested results to this file/directory (JSON format).", metavar="PATH") + results_display_args.add_argument("--json-result-path", help="Saves the requested results to this file/directory (JSON format).", metavar="PATH") # --- Main Parsing Function --- def parse_cmdline_args(): @@ -114,6 +114,24 @@ def parse_cmdline_args(): # Download reports for a specific scan (globally) workbench-cli --api-url --api-user --api-token \\ download-reports --scan-name MYSCAN01 --report-scope scan --report-type html --report-save-path reports/ + + # Export vulnerability results in CycloneDX format + workbench-cli --api-url --api-user --api-token \\ + export-vulns --project-name MYPROJ --scan-name MYSCAN01 --format cyclonedx -o vulns.cdx.json + + # Export complete SBOM with all components (not just vulnerable ones) + workbench-cli --api-url --api-user --api-token \\ + export-vulns --project-name MYPROJ --scan-name MYSCAN01 --format cyclonedx -o complete-sbom.cdx.json --augment-full-bom + + # Export CycloneDX SBOM with external enrichment + workbench-cli --api-url --api-user --api-token \\ + export-vulns --project-name MYPROJ --scan-name MYSCAN01 --format cyclonedx -o vulns-enriched.cdx.json \\ + --enrich-nvd --enrich-epss --augment-full-bom + + # Export vulnerability results in SPDX 3.0 format with enrichment + workbench-cli --api-url --api-user --api-token \\ + export-vulns --project-name MYPROJ --scan-name MYSCAN01 --format spdx3 -o vulns.spdx.json \\ + --enrich-nvd --enrich-epss """ ) @@ -297,6 +315,45 @@ def parse_cmdline_args(): add_common_monitoring_options(scan_git_parser) add_common_result_options(scan_git_parser) + # --- 'export-vulns' Subcommand --- + export_vulns_parser = subparsers.add_parser( + 'export-vulns', + help='Export vulnerability results in multiple formats (SARIF, CycloneDX, SPDX 3.0).', + description='Export vulnerability results from an existing scan in various formats:\n' + '• SARIF (Static Analysis Results Interchange Format) v2.1.0 - compatible with GitHub Advanced Security\n' + '• CycloneDX - Software Bill of Materials with vulnerability information\n' + '• SPDX 3.0 - Security Profile for vulnerability reporting\n\n' + 'All formats share the same data enrichment pipeline and VEX assessment processing.', + formatter_class=RawTextHelpFormatter + ) + + # Required arguments + required_args = export_vulns_parser.add_argument_group("Required") + required_args.add_argument("--project-name", help="Project name containing the scan.", type=str, required=True, metavar="NAME") + required_args.add_argument("--scan-name", help="Scan name to export vulnerability results from.", type=str, required=True, metavar="NAME") + required_args.add_argument("--format", help="Output format for the vulnerability report.", choices=["sarif", "cyclonedx", "spdx3"], required=True, metavar="FORMAT") + required_args.add_argument("-o", "--output", help="Output file path for the vulnerability report.", type=str, required=True, metavar="PATH") + + # External API enrichment + external_api_args = export_vulns_parser.add_argument_group("External API Enrichment (Network Calls)") + external_api_args.add_argument("--enrich-nvd", help="Fetch CVE descriptions from NVD API (Default: False - opt-in).", action=argparse.BooleanOptionalAction, default=False) + external_api_args.add_argument("--enrich-epss", help="Fetch EPSS scores from FIRST API (Default: False - opt-in).", action=argparse.BooleanOptionalAction, default=False) + external_api_args.add_argument("--enrich-cisa-kev", help="Fetch CISA Known Exploited Vulnerabilities (Default: False - opt-in).", action=argparse.BooleanOptionalAction, default=False) + external_api_args.add_argument("--external-timeout", help="Timeout for external API calls in seconds (Default: 30).", type=int, default=30, metavar="SECONDS") + + # Output processing & suppression + processing_args = export_vulns_parser.add_argument_group("Output Processing & Suppression") + processing_args.add_argument("--severity-threshold", help="Filter vulnerabilities by CVSS severity.", choices=["critical", "high", "medium", "low"], metavar="LEVEL") + processing_args.add_argument("--disable-dynamic-risk-scoring", dest="disable_dynamic_risk_scoring", help="Disable Dynamic Risk Scoring (VEX suppression and EPSS / KEV escalation).", action="store_true") + processing_args.add_argument("--augment-full-bom", help="Augment the SBOM from the scan with vulnerability enrichment and dynamic scoring.", action="store_true") + + + # Output control + output_control_args = export_vulns_parser.add_argument_group("Output Control") + output_control_args.add_argument("--quiet", help="Suppress progress output.", action="store_true") + + add_common_monitoring_options(export_vulns_parser) + # --- Validate args after parsing --- args = parser.parse_args() diff --git a/src/workbench_cli/handlers/__init__.py b/src/workbench_cli/handlers/__init__.py index b3cd199..69b098a 100644 --- a/src/workbench_cli/handlers/__init__.py +++ b/src/workbench_cli/handlers/__init__.py @@ -13,6 +13,7 @@ from .show_results import handle_show_results from .evaluate_gates import handle_evaluate_gates from .download_reports import handle_download_reports +from .export_vulns import handle_export_vulns __all__ = [ 'handle_scan', @@ -21,5 +22,6 @@ 'handle_import_sbom', 'handle_show_results', 'handle_evaluate_gates', - 'handle_download_reports' + 'handle_download_reports', + 'handle_export_vulns' ] diff --git a/src/workbench_cli/handlers/export_vulns.py b/src/workbench_cli/handlers/export_vulns.py new file mode 100644 index 0000000..2690931 --- /dev/null +++ b/src/workbench_cli/handlers/export_vulns.py @@ -0,0 +1,475 @@ +# workbench_cli/handlers/export_vulns.py + +import logging +import argparse +import os +from typing import TYPE_CHECKING, List, Dict, Any, Optional, Tuple + +from ..utilities.error_handling import handler_error_wrapper +from ..utilities.vuln_report.sarif_generator import save_vulns_to_sarif +from ..utilities.vuln_report.cyclonedx_generator import save_vulns_to_cyclonedx +from ..utilities.vuln_report.spdx_generator import save_vulns_to_spdx +from ..utilities.vuln_report.cve_data_gathering import enrich_vulnerabilities +from ..exceptions import ( + ApiError, + NetworkError, + ProcessTimeoutError, + ProcessError +) + +if TYPE_CHECKING: + from ..api import WorkbenchAPI + +logger = logging.getLogger("workbench-cli") + + +@handler_error_wrapper +def handle_export_vulns(workbench: "WorkbenchAPI", params: argparse.Namespace) -> bool: + """ + Handler for the 'export-vulns' command. Exports vulnerability results in various formats. + + Args: + workbench: The Workbench API client + params: Command line parameters + + Returns: + bool: True if the operation was successful + """ + + print(f"\n--- Running {params.command.upper()} Command ---") + + # Validate format + supported_formats = ['sarif', 'cyclonedx', 'spdx3'] + if params.format not in supported_formats: + raise ProcessError(f"Unsupported format '{params.format}'. Supported formats: {', '.join(supported_formats)}") + + # Check format-specific dependencies + _check_format_dependencies(params.format) + + # Extract common parameters once + common_params = _extract_common_params(params) + + # Resolve project and scan (find only) + if not params.quiet: + print(f"\nResolving scan for {params.format.upper()} export...") + project_code = workbench.resolve_project(params.project_name, create_if_missing=False) + scan_code, scan_id = workbench.resolve_scan( + scan_name=params.scan_name, + project_name=params.project_name, + create_if_missing=False, + params=params + ) + + # Ensure scan processes are idle before fetching results + if not params.quiet: + print("\nEnsuring scan processes are idle before fetching vulnerability data...") + try: + workbench.ensure_scan_is_idle(scan_code, params, ["SCAN", "DEPENDENCY_ANALYSIS"]) + except (ProcessTimeoutError, ProcessError, ApiError, NetworkError) as e: + logger.warning(f"Could not verify scan completion for '{scan_code}': {e}. Proceeding anyway.") + if not params.quiet: + print("\nWarning: Could not verify scan completion status. Results may be incomplete.") + + # ------------------------------------------------------------------ + # 1. Fetch vulnerability list & built-in VEX + # ------------------------------------------------------------------ + vulnerabilities = _fetch_vulnerabilities_and_vex(workbench, scan_code, params) + + # ------------------------------------------------------------------ + # 2. Determine export flow and gather external enrichment data + # ------------------------------------------------------------------ + export_flow = _determine_export_flow(params) + + if not params.quiet: + print(f" • Using {export_flow} approach") + + external_data = _perform_external_enrichment( + vulnerabilities, + common_params['nvd_enrichment'], + common_params['epss_enrichment'], + common_params['cisa_kev_enrichment'], + common_params['external_timeout'] + ) + + # ------------------------------------------------------------------ + # 3. Create enriched vulnerability objects (format-agnostic) + # ------------------------------------------------------------------ + if not params.quiet: + print(f"\n🔬 Creating Enriched Vulnerability Objects") + + from ..utilities.vuln_report.cve_data_gathering import create_enriched_vulnerabilities + + enriched_vulnerabilities = create_enriched_vulnerabilities( + vulnerabilities=vulnerabilities, + external_data=external_data, + enable_dynamic_risk_scoring=common_params['enable_dynamic_risk_scoring'] + ) + + if not params.quiet: + print(f"✅ Created {len(enriched_vulnerabilities)} Enriched Vulnerability Objects") + + # ------------------------------------------------------------------ + # 4. Apply dynamic scoring (VEX suppression, EPSS / KEV promotion) + # ------------------------------------------------------------------ + _display_dynamic_scoring(vulnerabilities, common_params['enable_dynamic_risk_scoring'], external_data) + + # ------------------------------------------------------------------ + # Export Logic: Execute flow-specific export + # ------------------------------------------------------------------ + if not params.quiet: + print(f"\n📤 Exporting {params.format.upper()} report...") + + try: + if export_flow == 'augmentation': + _execute_augmentation_flow( + workbench=workbench, + scan_code=scan_code, + enriched_vulnerabilities=enriched_vulnerabilities, + external_data=external_data, + params=params, + common_params=common_params + ) + else: # generation flow + all_components = _bootstrap_bom_components(vulnerabilities, quiet=params.quiet) + _execute_generation_flow( + scan_code=scan_code, + enriched_vulnerabilities=enriched_vulnerabilities, + all_components=all_components, + external_data=external_data, + params=params, + common_params=common_params + ) + + if not params.quiet: + print(f"\n✅ {params.format.upper()} export completed successfully!") + print(f"📄 Report saved to: {params.output}") + print(f"🔧 Approach: {export_flow} with external enrichment and dynamic risk scoring") + + return True + + except Exception as e: + logger.error(f"Failed to export {params.format.upper()}: {e}") + if isinstance(e, (ApiError, NetworkError, ProcessTimeoutError, ProcessError)): + raise + else: + raise ProcessError(f"Failed to export vulnerability data to {params.format.upper()} format: {str(e)}") + + +def _extract_common_params(params: argparse.Namespace) -> Dict[str, Any]: + """Extract commonly used parameters to avoid repetitive getattr calls.""" + return { + 'nvd_enrichment': getattr(params, 'enrich_nvd', False), + 'epss_enrichment': getattr(params, 'enrich_epss', False), + 'cisa_kev_enrichment': getattr(params, 'enrich_cisa_kev', False), + 'external_timeout': getattr(params, 'external_timeout', 30), + 'enable_dynamic_risk_scoring': not getattr(params, 'disable_dynamic_risk_scoring', False), + 'quiet': getattr(params, 'quiet', False), + 'augment_full_bom': getattr(params, 'augment_full_bom', False) + } + + +def _check_format_dependencies(format_name: str) -> None: + """Check if required dependencies are available for the specified format.""" + if format_name == 'cyclonedx': + try: + import cyclonedx + except ImportError: + raise ProcessError( + "CycloneDX format requires the 'cyclonedx-python-lib' package. " + "This should be installed automatically with workbench-cli. " + "Try reinstalling: pip install --force-reinstall workbench-cli" + ) + elif format_name == 'spdx3': + try: + import spdx_tools + except ImportError: + raise ProcessError( + "SPDX 3.0 format requires the 'spdx-tools' package. " + "This should be installed automatically with workbench-cli. " + "Try reinstalling: pip install --force-reinstall workbench-cli" + ) + # SARIF format has no external dependencies + + +def _fetch_vulnerabilities_and_vex( + workbench: "WorkbenchAPI", + scan_code: str, + params: argparse.Namespace, +) -> List[Dict[str, Any]]: + """Retrieve vulnerabilities from Workbench (with severity filter) and print VEX summary.""" + if not params.quiet: + print("\n🔍 Fetching data from Workbench…") + + vulnerabilities = workbench.list_vulnerabilities(scan_code) + + # Severity filter + if getattr(params, "severity_threshold", None): + sev_order = {"critical": 4, "high": 3, "medium": 2, "low": 1} + min_level = sev_order.get(params.severity_threshold.lower(), 0) + vulnerabilities = [v for v in vulnerabilities if sev_order.get(v.get("severity", "").lower(), 0) >= min_level] + + if not params.quiet: + # Simple severity breakdown without external dependency + severity_counts = {} + for vuln in vulnerabilities: + severity = vuln.get("severity", "UNKNOWN").upper() + severity_counts[severity] = severity_counts.get(severity, 0) + 1 + + # Format compact breakdown + breakdown_parts = [] + for severity in ['CRITICAL', 'HIGH', 'MEDIUM', 'LOW']: + if severity_counts.get(severity, 0) > 0: + breakdown_parts.append(f"{severity[0]}: {severity_counts[severity]}") + breakdown_text = f"[{', '.join(breakdown_parts)}]" if breakdown_parts else "" + + print(f"📋 Retrieved {len(vulnerabilities)} Vulnerabilities {breakdown_text}") + _display_vex_summary(vulnerabilities, indent=" ") + + return vulnerabilities + + +def _determine_export_flow(params: argparse.Namespace) -> str: + """ + Determine which export flow to use based on parameters. + + Returns: + str: Either 'augmentation' or 'generation' + """ + # Check if augmentation flow is requested and supported + if (params.format in ['cyclonedx'] and # Will expand to include 'spdx3' later + getattr(params, 'augment_full_bom', False)): + return 'augmentation' + else: + return 'generation' + + +def _bootstrap_bom_components( + vulnerabilities: List[Dict[str, Any]], + quiet: bool = False +) -> List[Dict[str, Any]]: + """Bootstrap BOM with component metadata for the generation flow.""" + from ..utilities.vuln_report.bootstrap_bom import bootstrap_bom_from_vulnerabilities + + return bootstrap_bom_from_vulnerabilities( + vulnerabilities=vulnerabilities, + quiet=quiet + ) + + +def _execute_augmentation_flow( + workbench: "WorkbenchAPI", + scan_code: str, + enriched_vulnerabilities: List[Dict[str, Any]], + external_data: Dict[str, Dict[str, Any]], + params: argparse.Namespace, + common_params: Dict[str, Any] +) -> None: + """Execute the SBOM augmentation flow with automatic resource management.""" + from ..utilities.vuln_report.sbom_utils import managed_sbom_download + + with managed_sbom_download( + workbench=workbench, + scan_code=scan_code, + sbom_format=params.format, + include_vex=True, + params=params, + quiet=common_params['quiet'] + ) as sbom_path: + + if not sbom_path: + # Fallback to generation approach if SBOM download failed + if not common_params['quiet']: + print(" ⚠️ SBOM download failed, falling back to generation approach") + + all_components = _bootstrap_bom_components(enriched_vulnerabilities, quiet=common_params['quiet']) + _execute_generation_flow( + scan_code=scan_code, + enriched_vulnerabilities=enriched_vulnerabilities, + all_components=all_components, + external_data=external_data, + params=params, + common_params=common_params + ) + return + + # Execute format-specific augmentation + if params.format == 'cyclonedx': + from ..utilities.vuln_report.cyclonedx_enrichment import augment_cyclonedx_sbom_from_file + + augment_cyclonedx_sbom_from_file( + sbom_path=sbom_path, + filepath=params.output, + scan_code=scan_code, + external_data=external_data, + nvd_enrichment=common_params['nvd_enrichment'], + epss_enrichment=common_params['epss_enrichment'], + cisa_kev_enrichment=common_params['cisa_kev_enrichment'], + enable_dynamic_risk_scoring=common_params['enable_dynamic_risk_scoring'], + quiet=common_params['quiet'] + ) + elif params.format == 'spdx3': + # Future SPDX augmentation implementation + raise ProcessError("SPDX augmentation flow not yet implemented") + else: + raise ProcessError(f"Augmentation flow not supported for format: {params.format}") + + +def _execute_generation_flow( + scan_code: str, + enriched_vulnerabilities: List[Dict[str, Any]], + all_components: List[Dict[str, Any]], + external_data: Dict[str, Dict[str, Any]], + params: argparse.Namespace, + common_params: Dict[str, Any] +) -> None: + """Execute the BOM generation flow from enriched vulnerability data.""" + # Convert enriched vulnerabilities back to original format for backward compatibility + # TODO: Update format-specific generators to consume enriched vulnerability objects directly + vulnerabilities = [] + for enriched_vuln in enriched_vulnerabilities: + # Extract original vulnerability data (excluding enrichment metadata) + vuln = {k: v for k, v in enriched_vuln.items() + if k not in ['external_enrichment', 'dynamic_risk', 'enriched_description', + 'epss_score', 'epss_percentile', 'cisa_known_exploited', + 'cwe_ids', 'external_references']} + vulnerabilities.append(vuln) + + export_args = { + 'filepath': params.output, + 'vulnerabilities': vulnerabilities, + 'scan_code': scan_code, + 'external_data': external_data, + 'nvd_enrichment': common_params['nvd_enrichment'], + 'epss_enrichment': common_params['epss_enrichment'], + 'cisa_kev_enrichment': common_params['cisa_kev_enrichment'], + 'enable_dynamic_risk_scoring': common_params['enable_dynamic_risk_scoring'], + 'quiet': common_params['quiet'], + 'base_sbom_path': None # Generation flow doesn't use base SBOM + } + + if params.format == 'cyclonedx': + from ..utilities.vuln_report.cyclonedx_generator import save_vulns_to_cyclonedx + save_vulns_to_cyclonedx(**export_args) + elif params.format == 'sarif': + from ..utilities.vuln_report.sarif_generator import save_vulns_to_sarif + export_args['api_timeout'] = common_params['external_timeout'] + save_vulns_to_sarif(**export_args) + elif params.format == 'spdx3': + from ..utilities.vuln_report.spdx_generator import save_vulns_to_spdx + export_args['api_timeout'] = common_params['external_timeout'] + save_vulns_to_spdx(**export_args) + else: + raise ProcessError(f"Unsupported format for generation flow: {params.format}") + + +def _perform_external_enrichment( + vulnerabilities: List[Dict[str, Any]], + nvd_enrichment: bool, + epss_enrichment: bool, + cisa_kev_enrichment: bool, + api_timeout: int +) -> Dict[str, Dict[str, Any]]: + """Perform external enrichment and display status messages.""" + import os + + # Show enrichment status + enrichment_sources = [] + if nvd_enrichment: + enrichment_sources.append("NVD") + if epss_enrichment: + enrichment_sources.append("EPSS") + if cisa_kev_enrichment: + enrichment_sources.append("CISA KEV") + + if enrichment_sources: + print(f"\n🔍 External Enrichment: {', '.join(enrichment_sources)}") + + # Get unique CVEs for display + from ..utilities.vuln_report.risk_adjustments import extract_unique_cves + unique_cves = extract_unique_cves(vulnerabilities) + + # Show custom NVD message if NVD enrichment is enabled + if nvd_enrichment and unique_cves: + print(f" 📋 Fetching additional details for {len(unique_cves)} CVEs from NVD") + if not os.environ.get('NVD_API_KEY'): + print(f" 💡 For faster performance, set the 'NVD_API_KEY' environment variable") + + # Perform the actual enrichment with suppressed logging + # Temporarily increase logging level to suppress INFO messages + import logging + nvd_logger = logging.getLogger('workbench_cli.utilities.vuln_report.cve_data_gathering') + original_level = nvd_logger.level + nvd_logger.setLevel(logging.WARNING) + + try: + external_data = enrich_vulnerabilities( + unique_cves, + nvd_enrichment, + epss_enrichment, + cisa_kev_enrichment, + api_timeout + ) + finally: + nvd_logger.setLevel(original_level) + + # Show EPSS results if EPSS enrichment was enabled + if epss_enrichment and external_data: + epss_count = sum(1 for cve_data in external_data.values() if cve_data.get('epss_score') is not None) + if epss_count > 0: + print(f" 📊 EPSS scores retrieved for {epss_count} CVEs") + + return external_data + else: + print(f"\n🔍 External Enrichment: DISABLED") + return {} + + +def _display_vex_summary(vulnerabilities: List[Dict[str, Any]], indent: str = "") -> None: + """Display VEX assessment information in a concise format.""" + # Simple VEX counting without external dependency + total_with_vex = 0 + with_status = 0 + with_response = 0 + + for vuln in vulnerabilities: + if vuln.get("vuln_exp_id") or vuln.get("vuln_exp_status") or vuln.get("vuln_exp_response"): + total_with_vex += 1 + if vuln.get("vuln_exp_status"): + with_status += 1 + if vuln.get("vuln_exp_response"): + with_response += 1 + + if total_with_vex > 0: + print(f"{indent}• Retrieved VEX for {total_with_vex}/{len(vulnerabilities)} CVEs [Status: {with_status}, Response: {with_response}]") + + +def _count_high_risk_indicators_detailed( + vulnerabilities: List[Dict[str, Any]], + external_data: Dict[str, Dict[str, Any]] +) -> Dict[str, int]: + """Count vulnerabilities by high risk indicator state.""" + from ..utilities.vuln_report.risk_adjustments import count_high_risk_indicators_detailed + + return count_high_risk_indicators_detailed(vulnerabilities, external_data) + + +def _display_dynamic_scoring( + vulnerabilities: List[Dict[str, Any]], + enable_dynamic_risk_scoring: bool, + external_data: Dict[str, Dict[str, Any]] +) -> None: + """Display dynamic scoring summary focusing on high/low/unknown risk levels.""" + + print(f"\n🔧 Dynamic Scoring:") + + # Show High Risk Indicator summary + if enable_dynamic_risk_scoring: + high_risk_counts = _count_high_risk_indicators_detailed(vulnerabilities, external_data) + if high_risk_counts["yes"] > 0: + print(f" • High Risk Vulnerabilities: {high_risk_counts['yes']}/{len(vulnerabilities)} require immediate triage") + if high_risk_counts["no"] > 0: + print(f" • Suppressed Vulnerabilities: {high_risk_counts['no']}/{len(vulnerabilities)} assessed as low risk") + if high_risk_counts["unknown"] > 0: + print(f" • Unknown Risk Context: {high_risk_counts['unknown']}/{len(vulnerabilities)} need additional intelligence") + else: + print(f" • Dynamic Risk Scoring: Disabled") \ No newline at end of file diff --git a/src/workbench_cli/main.py b/src/workbench_cli/main.py index 124bc3d..257fd9d 100644 --- a/src/workbench_cli/main.py +++ b/src/workbench_cli/main.py @@ -31,6 +31,7 @@ handle_evaluate_gates, handle_download_reports, handle_scan_git, + handle_export_vulns, ) @@ -89,6 +90,7 @@ def main() -> int: "evaluate-gates": handle_evaluate_gates, "download-reports": handle_download_reports, "scan-git": handle_scan_git, + "export-vulns": handle_export_vulns, } handler = COMMAND_HANDLERS.get(params.command) diff --git a/src/workbench_cli/utilities/sbom_validator.py b/src/workbench_cli/utilities/sbom_validator.py index 9e340c2..4af1f20 100644 --- a/src/workbench_cli/utilities/sbom_validator.py +++ b/src/workbench_cli/utilities/sbom_validator.py @@ -160,7 +160,21 @@ def _detect_sbom_format(file_path: str) -> str: if (' Tuple[str, str, Dict[str, Any], Dict]: diff --git a/src/workbench_cli/utilities/scan_workflows.py b/src/workbench_cli/utilities/scan_workflows.py index 6737328..0361d4d 100644 --- a/src/workbench_cli/utilities/scan_workflows.py +++ b/src/workbench_cli/utilities/scan_workflows.py @@ -436,11 +436,21 @@ def fetch_display_save_results(workbench: 'WorkbenchAPI', params: argparse.Names if any_results_requested: display_results(collected_results, params) - save_path = getattr(params, 'path_result', None) - if save_path: + # Handle JSON output + json_path = getattr(params, 'json_result_path', None) + if json_path: if collected_results: - print(f"\nSaving collected results to '{save_path}'...") - save_results_to_file(save_path, collected_results, scan_code) + print(f"\nSaving collected results to '{json_path}'...") + save_results_to_file(json_path, collected_results, scan_code) + else: + print("\nNo results were successfully collected, skipping JSON save.") + + # Legacy support for --path-result (deprecated, use --json-result-path instead) + legacy_path = getattr(params, 'path_result', None) + if legacy_path: + if collected_results: + print(f"\nSaving collected results to '{legacy_path}'...") + save_results_to_file(legacy_path, collected_results, scan_code) else: print("\nNo results were successfully collected, skipping save.") diff --git a/src/workbench_cli/utilities/vuln_report/__init__.py b/src/workbench_cli/utilities/vuln_report/__init__.py new file mode 100644 index 0000000..5ef6be5 --- /dev/null +++ b/src/workbench_cli/utilities/vuln_report/__init__.py @@ -0,0 +1,24 @@ +""" +Vulnerability report generation utilities. + +This package contains utilities for generating vulnerability reports in various formats: +- SARIF (Static Analysis Results Interchange Format) +- CycloneDX (Software Bill of Materials with vulnerability information) +- SPDX 3.0 (Security Profile) + +All formats share the same data enrichment pipeline but use different output serializers. +""" + +__all__ = [ + # Core enrichment utilities + "bootstrap_bom", + "cve_data_gathering", + + # Dynamic risk adjustments + "risk_adjustments", + + # Format generators + "sarif_generator", + "cyclonedx_generator", + "spdx_generator", +] \ No newline at end of file diff --git a/src/workbench_cli/utilities/vuln_report/bootstrap_bom.py b/src/workbench_cli/utilities/vuln_report/bootstrap_bom.py new file mode 100644 index 0000000..6939124 --- /dev/null +++ b/src/workbench_cli/utilities/vuln_report/bootstrap_bom.py @@ -0,0 +1,269 @@ +""" +BOM bootstrapping for API-powered vulnerability report generation. + +This module provides component metadata gathering using the Workbench Components API. +Used exclusively by the generation flow to bootstrap BOMs with component information +from vulnerabilities. Format-specific generators can then read from this enriched data. + +The augmentation flow works entirely with existing SBOM data and does not use this module. +""" + +import logging +import os +from typing import Dict, Any, Optional, Tuple, List, Set +from concurrent.futures import ThreadPoolExecutor, as_completed + +from ...api.components_api import ComponentsAPI +from ...exceptions import ApiError, NetworkError + +logger = logging.getLogger(__name__) + +# Cache to avoid repeated API lookups per component-version +_COMPONENT_ECOSYSTEM_CACHE: Dict[Tuple[str, str], str] = {} +# Cache for full component records +_COMPONENT_INFO_CACHE: Dict[Tuple[str, str], Dict[str, Any]] = {} + + +# --------------------------------------------------------------------------- +# Helper Functions +# --------------------------------------------------------------------------- + +def _progress_message(message: str, quiet: bool = False) -> None: + """Print progress message if not in quiet mode.""" + if not quiet: + print(f" {message}") + + +def _extract_component_pairs(vulnerabilities: List[Dict[str, Any]]) -> Set[Tuple[str, str]]: + """Extract unique (name, version) pairs from vulnerabilities.""" + pairs = set() + for vuln in vulnerabilities: + name = vuln.get("component_name") + version = vuln.get("component_version") + if name and version: + pairs.add((name, version)) + return pairs + + +# --------------------------------------------------------------------------- +# Main BOM Bootstrapping Interface +# --------------------------------------------------------------------------- + +def bootstrap_bom_from_vulnerabilities( + vulnerabilities: List[Dict[str, Any]], + quiet: bool = False +) -> List[Dict[str, Any]]: + """ + Bootstrap BOM with component metadata from vulnerabilities using Workbench API. + + This is the main interface for the generation flow that builds BOMs from vulnerabilities. + Creates format-agnostic component data that format-specific generators can use. + + Args: + vulnerabilities: List of vulnerability dictionaries + quiet: If True, suppress progress messages + + Returns: + List of component dictionaries with enriched metadata suitable for any format + """ + # Get component count first for initial message + component_list = _extract_components_from_vulnerabilities(vulnerabilities) + + if not quiet: + print(f"\n🔧 Bootstrapping BOM with {len(component_list)} Components") + + fetch_component_info(vulnerabilities, quiet=True) # Always suppress internal logging + + if not quiet: + print("✅ Component Metadata Retrieved") + return component_list + + +# --------------------------------------------------------------------------- +# Component Information Cache Management +# --------------------------------------------------------------------------- + +def fetch_component_info(vulnerabilities: List[Dict[str, Any]], quiet: bool = False) -> None: + """Fetch component information for all unique components in parallel.""" + if not vulnerabilities: + return + + # Extract unique component/version pairs + unique_components = _extract_component_pairs(vulnerabilities) + + # Filter components that need fetching + components_to_fetch = _filter_components_to_fetch(unique_components) + if not components_to_fetch: + return # All components already cached + + # Get credentials directly from environment + api_url = os.getenv("WORKBENCH_URL") + api_user = os.getenv("WORKBENCH_USER") + api_token = os.getenv("WORKBENCH_TOKEN") + credentials = (api_url, api_user, api_token) + + # Fetch component information in parallel + successful_fetches = _fetch_components_parallel(components_to_fetch, credentials) + + +def _filter_components_to_fetch(unique_components: Set[Tuple[str, str]]) -> List[Tuple[str, str]]: + """Filter components that need to be fetched (not cached or missing license data).""" + components_to_fetch = [] + for name, version in unique_components: + cache_key = (name, version) + cached = _COMPONENT_INFO_CACHE.get(cache_key) + if not cached or (not cached.get("license_identifier") and not cached.get("license_name")): + components_to_fetch.append((name, version)) + return components_to_fetch + + +def _fetch_components_parallel( + components_to_fetch: List[Tuple[str, str]], + credentials: Tuple[str, str, str] +) -> int: + """Fetch components in parallel and return number of successful fetches.""" + api_url, api_user, api_token = credentials + successful_fetches = 0 + + with ThreadPoolExecutor(max_workers=5) as executor: + # Submit all fetch tasks + future_to_component = { + executor.submit(_fetch_single_component_info, name, version, api_url, api_user, api_token): (name, version) + for name, version in components_to_fetch + } + + # Process results as they complete + for future in as_completed(future_to_component): + name, version = future_to_component[future] + try: + info = future.result() + _COMPONENT_INFO_CACHE[(name, version)] = info + if info: # Only count non-empty results as successful + successful_fetches += 1 + except Exception as e: + logger.debug(f"Failed to fetch component info for {name}@{version}: {e}") + # Store empty dict to avoid re-fetching + _COMPONENT_INFO_CACHE[(name, version)] = {} + + return successful_fetches + + +def _fetch_single_component_info(component_name: str, component_version: str, + api_url: str, api_user: str, api_token: str) -> Dict[str, Any]: + """Fetch component information for a single component.""" + try: + api_client = ComponentsAPI(api_url, api_user, api_token) + info = api_client.get_component_information(component_name, component_version) or {} + return info + except (ApiError, NetworkError, Exception): + logger.debug(f"Component information lookup failed for {component_name}@{component_version}", exc_info=True) + return {} + + +def get_component_info(component_name: str, component_version: Optional[str]) -> Dict[str, Any]: + """ + Return cached Workbench component record with fallback to API. + + This is the public interface for getting component information that format-specific + generators can use to enrich their components with Workbench metadata. + """ + if not component_name or not component_version: + return {} + + cache_key = (component_name, component_version) + if cache_key in _COMPONENT_INFO_CACHE: + return _COMPONENT_INFO_CACHE[cache_key] + + try: + # Get credentials directly from environment + api_url = os.getenv("WORKBENCH_URL") + api_user = os.getenv("WORKBENCH_USER") + api_token = os.getenv("WORKBENCH_TOKEN") + + api_client = ComponentsAPI(api_url, api_user, api_token) + info = api_client.get_component_information(component_name, component_version) or {} + _COMPONENT_INFO_CACHE[cache_key] = info + return info + except (ApiError, NetworkError, Exception): + logger.debug("Component information lookup failed", exc_info=True) + return {} + + +# --------------------------------------------------------------------------- +# Component Extraction Utilities +# --------------------------------------------------------------------------- + +def _extract_components_from_vulnerabilities(vulnerabilities: List[Dict[str, Any]]) -> List[Dict[str, Any]]: + """Extract unique components from vulnerability data.""" + components = [] + seen_components = set() + + for vuln in vulnerabilities: + name, version = vuln.get("component_name"), vuln.get("component_version") + if name and version: + key = (name, version) + if key not in seen_components: + seen_components.add(key) + components.append({ + "name": name, + "version": version, + "source": "vulnerability_data" + }) + + return components + + +# --------------------------------------------------------------------------- +# Ecosystem Detection Utilities +# --------------------------------------------------------------------------- + +def detect_package_ecosystem( + component_name: str, + component_version: Optional[str] = None, + purl: Optional[str] = None, +) -> str: + """ + Detect package ecosystem using purl_type from Components API. + + This provides format-agnostic ecosystem detection that can be used by + any format-specific generator (CycloneDX, SPDX, etc.). + """ + if component_version: + cache_key = (component_name, component_version) + if cache_key in _COMPONENT_ECOSYSTEM_CACHE: + return _COMPONENT_ECOSYSTEM_CACHE[cache_key] + + # Get component info from cache or API + info = _COMPONENT_INFO_CACHE.get(cache_key) or get_component_info(component_name, component_version) + + if info and info.get("purl_type"): + ecosystem = info["purl_type"] + _COMPONENT_ECOSYSTEM_CACHE[cache_key] = ecosystem + return ecosystem + + # Default fallback + return "generic" + + +# --------------------------------------------------------------------------- +# Public API +# --------------------------------------------------------------------------- + +__all__ = [ + # Main bootstrapping interface + "bootstrap_bom_from_vulnerabilities", + + # Component information access + "get_component_info", + "fetch_component_info", + + # Component extraction + "_extract_components_from_vulnerabilities", + + # Ecosystem detection + "detect_package_ecosystem", + + # Cache access for internal use + "_COMPONENT_ECOSYSTEM_CACHE", + "_COMPONENT_INFO_CACHE", +] \ No newline at end of file diff --git a/src/workbench_cli/utilities/vuln_report/cve_data_gathering.py b/src/workbench_cli/utilities/vuln_report/cve_data_gathering.py new file mode 100644 index 0000000..e4048b2 --- /dev/null +++ b/src/workbench_cli/utilities/vuln_report/cve_data_gathering.py @@ -0,0 +1,590 @@ +""" +Vulnerability data enrichment utilities. + +This module provides functionality to enhance vulnerability data with external sources +including NVD, EPSS scores, and CISA KEV data. +""" + +import json +import requests +import time +import logging +import os +import threading +from typing import Dict, List, Any, Optional +from concurrent.futures import ThreadPoolExecutor, as_completed + +logger = logging.getLogger(__name__) + +# External API configurations +EPSS_API_URL = "https://api.first.org/data/v1/epss" +NVD_API_URL = "https://services.nvd.nist.gov/rest/json/cves/2.0" +CISA_KEV_URL = "https://www.cisa.gov/sites/default/files/feeds/known_exploited_vulnerabilities.json" + +# Rate limiting settings +NVD_RATE_LIMIT_NO_KEY = 5 # requests per 30 seconds without API key +NVD_RATE_LIMIT_WITH_KEY = 50 # requests per 30 seconds with API key +EPSS_RATE_LIMIT = 100 # requests per minute +REQUEST_TIMEOUT = 30 # seconds + +# Module-level cache for NVD data to persist across function calls +_NVD_CACHE: Dict[str, Dict[str, Any]] = {} + + +class RateLimiter: + """Thread-safe rate limiter for API requests.""" + + def __init__(self, max_workers: int, delay: float): + self.max_workers = max_workers + self.delay = delay + self._last_request_time = 0 + self._lock = threading.Lock() + + def wait(self) -> None: + """Wait if necessary to respect rate limits.""" + with self._lock: + current_time = time.time() + elapsed = current_time - self._last_request_time + + if elapsed < self.delay: + sleep_time = self.delay - elapsed + time.sleep(sleep_time) + + self._last_request_time = time.time() + + +def enrich_vulnerabilities(cve_list: List[str], + nvd_enrichment: bool = True, + epss_enrichment: bool = True, + cisa_kev_enrichment: bool = True, + api_timeout: int = 30) -> Dict[str, Dict[str, Any]]: + """ + Enrich vulnerability data with external sources. + + Args: + cve_list: List of CVE IDs to enrich + nvd_enrichment: Whether to fetch CVE descriptions from NVD + epss_enrichment: Whether to fetch EPSS scores from FIRST + cisa_kev_enrichment: Whether to fetch known exploit information + api_timeout: Timeout for external API calls in seconds + + Returns: + Dict mapping CVE IDs to their external data + """ + if not cve_list: + return {} + + return _fetch_external_vulnerability_data( + cve_list, + nvd_enrichment, + epss_enrichment, + cisa_kev_enrichment, + api_timeout + ) + + +def create_enriched_vulnerabilities( + vulnerabilities: List[Dict[str, Any]], + external_data: Optional[Dict[str, Dict[str, Any]]] = None, + enable_dynamic_risk_scoring: bool = True +) -> List[Dict[str, Any]]: + """ + Create format-agnostic enriched vulnerability objects from vulnerability data. + + This function creates standardized vulnerability objects that include external + enrichment and dynamic risk scoring. Format-specific generators can then + convert these to their specific formats (CycloneDX, SARIF, SPDX, etc.). + + Args: + vulnerabilities: List of vulnerability dictionaries from Workbench API + external_data: Pre-fetched external enrichment data (optional) + enable_dynamic_risk_scoring: Whether to apply dynamic risk scoring + + Returns: + List of enriched vulnerability dictionaries with standardized format-agnostic metadata + """ + from .risk_adjustments import calculate_dynamic_risk + + # Validate external data + external_data = external_data or {} + + enriched_vulnerabilities = [] + + for vuln in vulnerabilities: + try: + cve = vuln.get("cve", "UNKNOWN") + ext_data = external_data.get(cve, {}) + + # Create enriched vulnerability object + enriched_vuln = _create_enriched_vulnerability(vuln, ext_data, enable_dynamic_risk_scoring) + enriched_vulnerabilities.append(enriched_vuln) + + except Exception as e: + logger.error(f"Failed to enrich vulnerability {vuln.get('cve', 'UNKNOWN')}: {e}") + # Include original vulnerability as fallback + enriched_vulnerabilities.append(vuln.copy()) + continue + + return enriched_vulnerabilities + + +def _create_enriched_vulnerability( + vuln: Dict[str, Any], + ext_data: Dict[str, Any], + enable_dynamic_risk_scoring: bool +) -> Dict[str, Any]: + """ + Create a format-agnostic enriched vulnerability object. + + This creates a standardized vulnerability object that can be consumed by + any format-specific generator (CycloneDX, SARIF, SPDX, etc.). + """ + from .risk_adjustments import calculate_dynamic_risk + + # Start with original vulnerability data + enriched_vuln = vuln.copy() + + # Add external enrichment data + if ext_data: + # Add external data under a standardized key + enriched_vuln["external_enrichment"] = ext_data.copy() + + # Merge important external fields into top level for convenience + if ext_data.get("nvd_description"): + enriched_vuln["enriched_description"] = ext_data["nvd_description"] + if ext_data.get("epss_score") is not None: + enriched_vuln["epss_score"] = ext_data["epss_score"] + if ext_data.get("epss_percentile") is not None: + enriched_vuln["epss_percentile"] = ext_data["epss_percentile"] + if ext_data.get("cisa_kev"): + enriched_vuln["cisa_known_exploited"] = ext_data["cisa_kev"] + if ext_data.get("nvd_cwe"): + enriched_vuln["cwe_ids"] = ext_data["nvd_cwe"] + if ext_data.get("nvd_references"): + enriched_vuln["external_references"] = ext_data["nvd_references"] + + # Calculate dynamic risk if enabled + if enable_dynamic_risk_scoring: + try: + dynamic_risk_adjustment = calculate_dynamic_risk(vuln, ext_data) + enriched_vuln["dynamic_risk"] = { + "original_level": dynamic_risk_adjustment.original_level.value, + "adjusted_level": dynamic_risk_adjustment.adjusted_level.value, + "adjustment_reason": dynamic_risk_adjustment.adjustment_reason, + "priority_context": dynamic_risk_adjustment.priority_context, + "suppressed": dynamic_risk_adjustment.suppressed, + "high_risk_indicator": dynamic_risk_adjustment.high_risk_indicator, + "high_risk_evidence": dynamic_risk_adjustment.high_risk_evidence, + "was_promoted": dynamic_risk_adjustment.was_promoted, + "was_demoted": dynamic_risk_adjustment.was_demoted + } + except Exception as e: + logger.warning(f"Failed to calculate dynamic risk for {vuln.get('cve', 'UNKNOWN')}: {e}") + + return enriched_vuln + + +def _fetch_external_vulnerability_data(cve_list: List[str], + nvd_enrichment: bool = True, + epss_enrichment: bool = True, + cisa_kev_enrichment: bool = True, + timeout: int = 30) -> Dict[str, Dict[str, Any]]: + """ + Fetch external vulnerability data from multiple sources. + + Returns: + Dict mapping CVE IDs to their external data + """ + external_data = {} + + # Initialize data structure + for cve in cve_list: + external_data[cve] = { + "epss_score": None, + "epss_percentile": None, + "cisa_kev": False, + "exploitdb_count": 0, + "nvd_description": None, + "nvd_cwe": None, + "nvd_references": [], + "full_cvss_vector": None, + "attack_vector_detail": None + } + + # Fetch data from different sources + try: + if epss_enrichment: + epss_data = _fetch_epss_scores(cve_list, timeout) + for cve, data in epss_data.items(): + if cve in external_data: + external_data[cve].update(data) + except Exception as e: + logger.warning(f"Failed to fetch EPSS data: {e}") + + try: + if cisa_kev_enrichment: + kev_data = _fetch_cisa_kev_data(cve_list, timeout) + for cve in kev_data: + if cve in external_data: + external_data[cve]["cisa_kev"] = True + except Exception as e: + logger.warning(f"Failed to fetch CISA KEV data: {e}") + + try: + if nvd_enrichment: + nvd_data = _fetch_nvd_data(cve_list, timeout) + for cve, data in nvd_data.items(): + if cve in external_data: + external_data[cve].update(data) + except Exception as e: + logger.warning(f"Failed to fetch NVD data: {e}") + + return external_data + + +def _fetch_epss_scores(cve_list: List[str], timeout: int = 30) -> Dict[str, Dict[str, Any]]: + """Fetch EPSS scores from FIRST API.""" + epss_data = {} + + # EPSS API supports batch queries + batch_size = 100 # API limit + for i in range(0, len(cve_list), batch_size): + batch = cve_list[i:i + batch_size] + cve_param = ",".join(batch) + + try: + response = requests.get( + f"{EPSS_API_URL}?cve={cve_param}", + timeout=timeout, + headers={"User-Agent": "Workbench-CLI/1.0"} + ) + response.raise_for_status() + + data = response.json() + if data.get("status") == "OK" and "data" in data: + for item in data["data"]: + cve = item.get("cve") + epss_val = item.get("epss") + percentile_val = item.get("percentile") + + if cve and epss_val is not None and percentile_val is not None: + try: + epss_score = float(epss_val) + epss_percentile = float(percentile_val) + # Only include if we have valid data (not just defaults) + if epss_score >= 0 and epss_percentile >= 0: + epss_data[cve] = { + "epss_score": epss_score, + "epss_percentile": epss_percentile + } + except (ValueError, TypeError): + # Skip invalid data + logger.debug(f"Invalid EPSS data for {cve}: epss={epss_val}, percentile={percentile_val}") + continue + + # Rate limiting + time.sleep(1) + + except Exception as e: + logger.warning(f"Failed to fetch EPSS data for batch {i//batch_size + 1}: {e}") + + return epss_data + + +def _fetch_cisa_kev_data(cve_list: List[str], timeout: int = 30) -> List[str]: + """Fetch CISA Known Exploited Vulnerabilities data.""" + try: + response = requests.get( + CISA_KEV_URL, + timeout=timeout, + headers={"User-Agent": "Workbench-CLI/1.0"} + ) + response.raise_for_status() + + kev_data = response.json() + known_exploited = set() + + if "vulnerabilities" in kev_data: + for vuln in kev_data["vulnerabilities"]: + cve = vuln.get("cveID") + if cve and cve in cve_list: + known_exploited.add(cve) + + return list(known_exploited) + + except Exception as e: + logger.warning(f"Failed to fetch CISA KEV data: {e}") + return [] + + +def _fetch_nvd_data(cve_list: List[str], timeout: int = 30) -> Dict[str, Dict[str, Any]]: + """ + Fetch detailed CVE information from NVD API 2.0 with enhanced performance and reliability. + + Improvements: + - Concurrent processing with rate limiting + - Exponential backoff retry logic + - Persistent module-level caching for duplicate requests + - API key support for higher rate limits + - Progress tracking for large CVE lists + """ + nvd_data = {} + + if not cve_list: + return nvd_data + + # Check for API key in environment variables + api_key = os.environ.get('NVD_API_KEY') + max_workers = 5 if api_key else 2 # Higher concurrency with API key + rate_limit_delay = 0.6 if api_key else 6 # 50 requests per 30s with key, 5 per 30s without + + # Initialize rate limiter + rate_limiter = RateLimiter(max_workers, rate_limit_delay) + + logger.info(f"Fetching NVD data for {len(cve_list)} CVEs using {'API key' if api_key else 'public rate limits'}") + + # Filter out already cached CVEs + cves_to_fetch = [cve for cve in cve_list if cve not in _NVD_CACHE] + + if not cves_to_fetch: + logger.info("All CVEs found in cache") + return {cve: _NVD_CACHE[cve] for cve in cve_list if cve in _NVD_CACHE} + + # Process CVEs concurrently + with ThreadPoolExecutor(max_workers=max_workers) as executor: + # Submit all tasks + future_to_cve = { + executor.submit(_fetch_single_cve_nvd, cve, api_key, rate_limiter, timeout): cve + for cve in cves_to_fetch + } + + # Collect results with progress tracking + completed = 0 + for future in as_completed(future_to_cve): + cve = future_to_cve[future] + completed += 1 + + try: + result = future.result() + if result: + nvd_data[cve] = result + _NVD_CACHE[cve] = result # Cache successful results + + if completed % 10 == 0 or completed == len(cves_to_fetch): + logger.info(f"Processed {completed}/{len(cves_to_fetch)} CVEs") + + except Exception as e: + logger.warning(f"Failed to fetch NVD data for {cve}: {e}") + + # Include cached results in the return data + for cve in cve_list: + if cve in _NVD_CACHE and cve not in nvd_data: + nvd_data[cve] = _NVD_CACHE[cve] + + return nvd_data + + +def _fetch_single_cve_nvd(cve: str, api_key: Optional[str], rate_limiter: 'RateLimiter', + timeout: int) -> Optional[Dict[str, Any]]: + """Fetch a single CVE from NVD with retry logic and rate limiting.""" + headers = {"User-Agent": "FossID-Workbench-CLI/1.0"} + if api_key: + headers["apiKey"] = api_key + + max_retries = 3 + base_delay = 1.0 + + for attempt in range(max_retries): + try: + # Wait for rate limiter + rate_limiter.wait() + + response = requests.get( + f"{NVD_API_URL}?cveId={cve}", + timeout=timeout, + headers=headers + ) + + # Handle rate limiting + if response.status_code == 429: + retry_after = int(response.headers.get('Retry-After', 60)) + logger.warning(f"Rate limited for {cve}, waiting {retry_after}s") + time.sleep(retry_after) + continue + + response.raise_for_status() + + data = response.json() + if "vulnerabilities" in data and data["vulnerabilities"]: + return _parse_nvd_vulnerability(data["vulnerabilities"][0]["cve"]) + + return None + + except requests.exceptions.RequestException as e: + if attempt < max_retries - 1: + delay = base_delay * (2 ** attempt) # Exponential backoff + logger.warning(f"Request failed for {cve}, retrying in {delay}s: {e}") + time.sleep(delay) + else: + logger.error(f"Failed to fetch {cve} after {max_retries} attempts: {e}") + raise + + return None + + +def _parse_nvd_vulnerability(vuln_data: Dict[str, Any]) -> Dict[str, Any]: + """Parse NVD vulnerability data into standardized format.""" + # Extract description + description = "No description available" + if "descriptions" in vuln_data: + for desc in vuln_data["descriptions"]: + if desc.get("lang") == "en": + description = desc.get("value", description) + break + + # Extract CWE information + cwe_ids = [] + if "weaknesses" in vuln_data: + for weakness in vuln_data["weaknesses"]: + if weakness.get("type") == "Primary": + for desc in weakness.get("description", []): + if desc.get("lang") == "en": + cwe_ids.append(desc.get("value", "")) + + # Extract references + references = [] + if "references" in vuln_data: + for ref in vuln_data["references"][:10]: # Increased to 10 references + references.append({ + "url": ref.get("url", ""), + "source": ref.get("source", ""), + "tags": ref.get("tags", []) + }) + + # Extract full CVSS vector + full_cvss_vector = None + cvss_score = None + exploitability_score = None + impact_score = None + cvss3_metrics = {} + + if "metrics" in vuln_data: + for metric_type in ["cvssMetricV31", "cvssMetricV30", "cvssMetricV2"]: + if metric_type in vuln_data["metrics"]: + metrics = vuln_data["metrics"][metric_type] + if metrics: + metric_entry = metrics[0] + cvss_data = metric_entry.get("cvssData", {}) + full_cvss_vector = cvss_data.get("vectorString") + cvss_score = cvss_data.get("baseScore") + + # Collect detailed metrics for v3 / v3.1 + if metric_type.startswith("cvssMetricV3"): + keys_map = { + "attackVector": "attack_vector", + "attackComplexity": "attack_complexity", + "privilegesRequired": "privileges_required", + "userInteraction": "user_interaction", + "scope": "scope", + "confidentialityImpact": "confidentiality", + "integrityImpact": "integrity", + "availabilityImpact": "availability", + } + for k_src, k_dst in keys_map.items(): + if cvss_data.get(k_src): + cvss3_metrics[k_dst] = cvss_data[k_src] + + exploitability_score = metric_entry.get("exploitabilityScore") or exploitability_score + impact_score = metric_entry.get("impactScore") or impact_score + break + + return { + "nvd_description": description, + "nvd_cwe": cwe_ids, + "nvd_references": references, + "full_cvss_vector": full_cvss_vector, + "cvss_score": cvss_score, + "nvd_published": vuln_data.get("published"), + "nvd_last_modified": vuln_data.get("lastModified"), + "exploitability_score": exploitability_score, + "impact_score": impact_score, + "cvss3_metrics": cvss3_metrics + } + + +# Security metadata processing functions + +def build_cvss_vector(vuln: Dict[str, Any]) -> str: + """ + Build a CVSS vector string from available vulnerability data. + + This function consolidates CVSS vector building logic that was previously + duplicated across different format generators. + """ + version = vuln.get("cvss_version", "3.1") + + vector_parts = [f"CVSS:{version}"] + + # Attack Vector + av = vuln.get("attack_vector", "") + if av: + av_map = {"NETWORK": "N", "ADJACENT_NETWORK": "A", "LOCAL": "L", "PHYSICAL": "P"} + vector_parts.append(f"AV:{av_map.get(av, av[0] if av else 'N')}") + + # Attack Complexity + ac = vuln.get("attack_complexity", "") + if ac: + ac_map = {"LOW": "L", "HIGH": "H"} + vector_parts.append(f"AC:{ac_map.get(ac, ac[0] if ac else 'L')}") + + # Availability Impact + a = vuln.get("availability_impact", "") + if a: + a_map = {"NONE": "N", "LOW": "L", "HIGH": "H"} + vector_parts.append(f"A:{a_map.get(a, a[0] if a else 'N')}") + + return "/".join(vector_parts) if len(vector_parts) > 1 else "CVSS vector not available" + + +# --------------------------------------------------------------------------- +# Public API +# --------------------------------------------------------------------------- + +def extract_version_ranges(references: List[Dict[str, Any]]) -> str: + """Extract version information from NVD references where possible.""" + version_patterns = [] + + for ref in references: + url = ref.get("url", "").lower() + tags = [tag.lower() for tag in ref.get("tags", [])] + + # Look for vendor advisory URLs that often contain version info + if any(tag in ["vendor advisory", "patch", "mitigation"] for tag in tags): + # Common patterns in vendor URLs + if "github.com" in url and "/releases/" in url: + # GitHub release pages often have version info + version_patterns.append("See GitHub releases for affected versions") + elif any(vendor in url for vendor in ["apache.org", "nodejs.org", "golang.org", "python.org"]): + version_patterns.append("Check vendor advisory for version details") + + if version_patterns: + return "; ".join(set(version_patterns)) # Remove duplicates + + return "" + + +__all__ = [ + # Main enrichment functions + "enrich_vulnerabilities", + "create_enriched_vulnerabilities", + + # Security metadata processing + "build_cvss_vector", + "extract_version_ranges", + + # Rate limiting + "RateLimiter", + + # Cache access for internal use + "_NVD_CACHE", +] \ No newline at end of file diff --git a/src/workbench_cli/utilities/vuln_report/cyclonedx_enrichment.py b/src/workbench_cli/utilities/vuln_report/cyclonedx_enrichment.py new file mode 100644 index 0000000..190e861 --- /dev/null +++ b/src/workbench_cli/utilities/vuln_report/cyclonedx_enrichment.py @@ -0,0 +1,542 @@ +""" +CycloneDX vulnerability enrichment module. + +This module provides shared enrichment functionality for CycloneDX SBOMs, +including external data enrichment (NVD, EPSS, CISA KEV) and dynamic risk scoring. +Can be used by both generation and augmentation flows. +""" + +import json +import logging +import os +from typing import Dict, List, Any, Optional +from datetime import datetime, timezone + +logger = logging.getLogger(__name__) + +# CycloneDX imports (optional dependency) +try: + from cyclonedx.model.component import Component, ComponentType + from cyclonedx.model.vulnerability import ( + Vulnerability, + VulnerabilityRating, + VulnerabilityReference, + BomTarget, + VulnerabilitySource, + VulnerabilityScoreSource, + VulnerabilitySeverity, + ) + from cyclonedx.model.bom import Bom + from cyclonedx.model import ExternalReference, ExternalReferenceType, Property + from packageurl import PackageURL + CYCLONEDX_AVAILABLE = True +except ImportError: + # Fallback types when CycloneDX is not available + Bom = Any + Component = Any + Vulnerability = Any + VulnerabilityRating = Any + VulnerabilityReference = Any + VulnerabilitySource = Any + VulnerabilityScoreSource = Any + ComponentType = Any + ExternalReference = Any + ExternalReferenceType = Any + PackageURL = Any + BomTarget = Any + Property = Any + CYCLONEDX_AVAILABLE = False + +from .risk_adjustments import calculate_dynamic_risk, RiskAdjustment +from .cve_data_gathering import build_cvss_vector + + +# Removed functions moved to cyclonedx_generator.py for generation flow: +# - enrich_cyclonedx_sbom (creates new vulnerabilities) +# - _enrich_vulnerabilities_batch (batch processing for new vulnerabilities) +# - _create_cyclonedx_vulnerability (creates new CycloneDX Vulnerability objects) +# - _map_severity_to_cyclonedx (severity mapping utility) + + +def enrich_cyclonedx_vulnerability_in_place(vuln: Dict[str, Any], ext_data: Dict[str, Any]) -> bool: + """ + Enrich a CycloneDX vulnerability object in place with external data. + + This function is used for augmenting existing SBOMs where vulnerabilities + are already present in the SBOM as JSON objects. + + Returns: + bool: True if any enrichment was applied, False otherwise + """ + enriched = False + + try: + # Update description from NVD if available + if ext_data.get("nvd_description") and not vuln.get("description"): + vuln["description"] = ext_data["nvd_description"] + enriched = True + + # Add or update EPSS rating + epss_score = ext_data.get("epss_score") + if epss_score is not None and epss_score > 0.0: + ratings = vuln.get("ratings", []) + + # Check if EPSS rating already exists + epss_rating_exists = any( + rating.get("source", {}).get("name") == "EPSS" + for rating in ratings + ) + + if not epss_rating_exists: + epss_rating = { + "source": {"name": "EPSS", "url": "https://www.first.org/epss"}, + "score": epss_score, + "method": "other" + } + ratings.append(epss_rating) + vuln["ratings"] = ratings + enriched = True + + # Add or update external references + if ext_data.get("nvd_references"): + external_refs = vuln.get("externalReferences", []) + existing_urls = {ref.get("url") for ref in external_refs} + + added_refs = 0 + for ref in ext_data["nvd_references"]: + url = ref.get("url") + if url and url not in existing_urls: + external_refs.append({ + "type": "advisories", + "url": url, + "comment": ref.get("source", "") + }) + added_refs += 1 + + if added_refs > 0: + vuln["externalReferences"] = external_refs + enriched = True + + # Add CWE information + if ext_data.get("nvd_cwe") and not vuln.get("cwes"): + cwe_list = [] + for cwe in ext_data["nvd_cwe"]: + if cwe.startswith("CWE-"): + try: + cwe_list.append(int(cwe.replace("CWE-", ""))) + except ValueError: + continue + if cwe_list: + vuln["cwes"] = cwe_list + enriched = True + + # Add EPSS properties for reference + epss_score = ext_data.get("epss_score") + epss_percentile = ext_data.get("epss_percentile") + + if epss_score is not None or epss_percentile is not None: + if not vuln.get("properties"): + vuln["properties"] = [] + + existing_props = {prop.get("name"): prop for prop in vuln["properties"]} + + if epss_score is not None and "epss_score" not in existing_props: + vuln["properties"].append({"name": "epss_score", "value": str(epss_score)}) + enriched = True + + if epss_percentile is not None and "epss_percentile" not in existing_props: + vuln["properties"].append({"name": "epss_percentile", "value": str(epss_percentile)}) + enriched = True + + # Add CISA KEV flag + if ext_data.get("cisa_kev"): + if not vuln.get("properties"): + vuln["properties"] = [] + + existing_props = {prop.get("name"): prop for prop in vuln["properties"]} + + if "cisa_known_exploited" not in existing_props: + vuln["properties"].append({"name": "cisa_known_exploited", "value": "true"}) + enriched = True + + # Ensure vulnerability is marked exploitable if not already + if vuln.get("analysis"): + if vuln["analysis"].get("state") != "exploitable": + vuln["analysis"]["state"] = "exploitable" + enriched = True + else: + vuln["analysis"] = {"state": "exploitable"} + enriched = True + + return enriched + + except Exception as e: + logger.warning(f"Failed to enrich vulnerability {vuln.get('id', 'UNKNOWN')}: {e}") + return False + + +def apply_dynamic_risk_to_cyclonedx_vuln(vuln: Dict[str, Any], risk_adjustment: "RiskAdjustment") -> None: + """Apply dynamic risk adjustment to a CycloneDX vulnerability.""" + if not vuln.get("properties"): + vuln["properties"] = [] + + # Add or update high risk indicator + existing_props = {prop.get("name"): prop for prop in vuln["properties"]} + + # High risk indicator + if "high_risk_indicator" in existing_props: + existing_props["high_risk_indicator"]["value"] = risk_adjustment.high_risk_indicator + else: + vuln["properties"].append({ + "name": "high_risk_indicator", + "value": risk_adjustment.high_risk_indicator + }) + + # High risk evidence + if risk_adjustment.high_risk_evidence: + if "high_risk_evidence" in existing_props: + existing_props["high_risk_evidence"]["value"] = risk_adjustment.high_risk_evidence + else: + vuln["properties"].append({ + "name": "high_risk_evidence", + "value": risk_adjustment.high_risk_evidence + }) + + +def convert_sbom_vuln_to_internal_format(vuln: Dict[str, Any], component_lookup: Dict[str, Any]) -> Dict[str, Any]: + """Convert a CycloneDX SBOM vulnerability to internal format for risk calculation.""" + # Extract basic vulnerability info + vuln_data = { + "cve": vuln.get("id", "UNKNOWN"), + "vulnerability_id": vuln.get("bom-ref", ""), + "severity": _extract_severity_from_cyclonedx_vuln(vuln), + "base_score": _extract_base_score_from_cyclonedx_vuln(vuln), + "component_name": "Unknown", + "component_version": "Unknown", + } + + # Extract VEX information if present + if vuln.get("analysis"): + analysis = vuln["analysis"] + vuln_data["vuln_exp_status"] = analysis.get("state", "") + vuln_data["vuln_exp_justification"] = analysis.get("justification", "") + vuln_data["vuln_exp_response"] = analysis.get("responses", []) + vuln_data["vuln_exp_detail"] = analysis.get("detail", "") + + # Resolve component information from affects + if vuln.get("affects"): + for affect in vuln["affects"]: + ref = affect.get("ref") + if ref and ref in component_lookup: + component = component_lookup[ref] + vuln_data["component_name"] = component.get("name", "Unknown") + vuln_data["component_version"] = component.get("version", "Unknown") + break + + return vuln_data + + +def _extract_severity_from_cyclonedx_vuln(vuln: Dict[str, Any]) -> str: + """Extract severity from a CycloneDX vulnerability.""" + ratings = vuln.get("ratings", []) + for rating in ratings: + if rating.get("severity"): + return rating["severity"].upper() + return "UNKNOWN" + + +def _extract_base_score_from_cyclonedx_vuln(vuln: Dict[str, Any]) -> str: + """Extract base score from a CycloneDX vulnerability.""" + ratings = vuln.get("ratings", []) + for rating in ratings: + if rating.get("score") is not None: + return str(rating["score"]) + return "N/A" + + +def _validate_external_data(external_data: Optional[Dict[str, Dict[str, Any]]]) -> Dict[str, Dict[str, Any]]: + """Validate and sanitize external enrichment data.""" + if external_data is None: + return {} + + if not isinstance(external_data, dict): + logger.warning("External data is not a dictionary, ignoring") + return {} + + # Validate structure + validated_data = {} + for cve, cve_data in external_data.items(): + if not isinstance(cve_data, dict): + logger.warning(f"Invalid data structure for CVE {cve}, skipping") + continue + + # Validate and sanitize fields + sanitized_data = {} + + # NVD fields + if "nvd_description" in cve_data and isinstance(cve_data["nvd_description"], str): + sanitized_data["nvd_description"] = cve_data["nvd_description"] + + if "nvd_references" in cve_data and isinstance(cve_data["nvd_references"], list): + sanitized_data["nvd_references"] = cve_data["nvd_references"] + + if "nvd_cwe" in cve_data and isinstance(cve_data["nvd_cwe"], list): + sanitized_data["nvd_cwe"] = cve_data["nvd_cwe"] + + # EPSS fields + if "epss_score" in cve_data: + try: + sanitized_data["epss_score"] = float(cve_data["epss_score"]) + except (ValueError, TypeError): + logger.warning(f"Invalid EPSS score for CVE {cve}: {cve_data['epss_score']}") + + if "epss_percentile" in cve_data: + try: + sanitized_data["epss_percentile"] = float(cve_data["epss_percentile"]) + except (ValueError, TypeError): + logger.warning(f"Invalid EPSS percentile for CVE {cve}: {cve_data['epss_percentile']}") + + # CISA KEV + if "cisa_kev" in cve_data: + sanitized_data["cisa_kev"] = bool(cve_data["cisa_kev"]) + + # CVSS fields + if "full_cvss_vector" in cve_data and isinstance(cve_data["full_cvss_vector"], str): + sanitized_data["full_cvss_vector"] = cve_data["full_cvss_vector"] + + validated_data[cve] = sanitized_data + + return validated_data + + +def augment_cyclonedx_sbom_from_file( + sbom_path: str, + filepath: str, + scan_code: str, + external_data: Optional[Dict[str, Dict[str, Any]]] = None, + nvd_enrichment: bool = False, + epss_enrichment: bool = False, + cisa_kev_enrichment: bool = False, + enable_dynamic_risk_scoring: bool = True, + quiet: bool = False +) -> None: + """ + Augment an existing CycloneDX SBOM file with external enrichment and dynamic risk scoring. + + This function handles file I/O for SBOM augmentation: + 1. Loads the existing CycloneDX SBOM from file + 2. Applies external enrichment (NVD, EPSS, CISA KEV) and dynamic risk scoring + 3. Saves the augmented SBOM back to file + + Args: + sbom_path: Path to the existing CycloneDX SBOM file + filepath: Path where the augmented SBOM should be saved + scan_code: The scan code for reference (metadata only) + external_data: Pre-fetched external enrichment data (optional) + nvd_enrichment: Whether NVD enrichment was enabled + epss_enrichment: Whether EPSS enrichment was enabled + cisa_kev_enrichment: Whether CISA KEV enrichment was enabled + enable_dynamic_risk_scoring: Whether dynamic risk scoring is enabled + quiet: Whether to suppress output messages + + Raises: + ImportError: If cyclonedx-python-lib is not installed + IOError: If the file cannot be written + OSError: If the directory cannot be created + FileNotFoundError: If sbom_path doesn't exist + ValueError: If the SBOM cannot be parsed + """ + if not CYCLONEDX_AVAILABLE: + raise ImportError( + "CycloneDX support requires the 'cyclonedx-python-lib' package. " + "This should be installed automatically with workbench-cli. " + "Try reinstalling: pip install --force-reinstall workbench-cli" + ) + + # Validate and normalize inputs + external_data = _validate_external_data(external_data) + + if not quiet: + print(f" • Augmenting CycloneDX SBOM from {os.path.basename(sbom_path)}") + + # Validate input file exists + if not os.path.exists(sbom_path): + raise FileNotFoundError(f"SBOM file not found: {sbom_path}") + + # Check file size for performance warnings + file_size = os.path.getsize(sbom_path) + if file_size > 50_000_000: # > 50MB + logger.warning(f"Large SBOM file detected ({file_size:,} bytes). Processing may take longer.") + + # Load the existing SBOM as JSON to preserve the original format + try: + with open(sbom_path, "r", encoding="utf-8") as f: + sbom_json = json.load(f) + except (FileNotFoundError, json.JSONDecodeError) as e: + raise ValueError(f"Failed to load SBOM from {sbom_path}: {e}") + + # Validate SBOM structure using existing validator + from ..sbom_validator import SBOMValidator + try: + # Use existing comprehensive validation + sbom_format, version, metadata, _ = SBOMValidator.validate_sbom_file(sbom_path) + if sbom_format != "cyclonedx": + raise ValueError(f"Expected CycloneDX SBOM, got {sbom_format}") + except Exception as e: + raise ValueError(f"SBOM validation failed: {e}") + + # Extract existing vulnerabilities from the SBOM + existing_vulnerabilities = sbom_json.get("vulnerabilities", []) + + if not quiet: + print(f" • Found {len(existing_vulnerabilities)} vulnerabilities in SBOM") + + if not existing_vulnerabilities: + logger.info("No vulnerabilities found in SBOM - adding enrichment metadata only") + + # Create component lookup for resolving component names from bom-refs + component_lookup = {} + components = sbom_json.get("components", []) + + for comp in components: + if not isinstance(comp, dict): + continue + bom_ref = comp.get("bom-ref") + if bom_ref: + component_lookup[bom_ref] = comp + + if not quiet and components: + print(f" • Indexed {len(component_lookup)} components for vulnerability resolution") + + # Track processing statistics + stats = { + "vulnerabilities_processed": 0, + "vulnerabilities_enriched": 0, + "vulnerabilities_risk_scored": 0, + "processing_errors": 0 + } + + # Process each vulnerability in the SBOM + for vuln in existing_vulnerabilities: + try: + if not isinstance(vuln, dict): + stats["processing_errors"] += 1 + continue + + cve = vuln.get("id", "UNKNOWN") + ext_data = external_data.get(cve, {}) + + # Convert SBOM vulnerability to internal format for dynamic risk calculation + vuln_data = convert_sbom_vuln_to_internal_format(vuln, component_lookup) + + # Apply external enrichment to vulnerability object + enriched = enrich_cyclonedx_vulnerability_in_place(vuln, ext_data) + if enriched: + stats["vulnerabilities_enriched"] += 1 + + # Apply dynamic risk scoring if enabled + if enable_dynamic_risk_scoring: + try: + dynamic_risk_adjustment = calculate_dynamic_risk(vuln_data, ext_data) + apply_dynamic_risk_to_cyclonedx_vuln(vuln, dynamic_risk_adjustment) + stats["vulnerabilities_risk_scored"] += 1 + except Exception as e: + logger.warning(f"Failed to calculate dynamic risk for CVE {cve}: {e}") + stats["processing_errors"] += 1 + + stats["vulnerabilities_processed"] += 1 + + except Exception as e: + logger.warning(f"Failed to process vulnerability {vuln.get('id', 'UNKNOWN')}: {e}") + stats["processing_errors"] += 1 + continue + + # Add/update enrichment metadata properties + try: + if not sbom_json.get("metadata"): + sbom_json["metadata"] = {} + + if not sbom_json["metadata"].get("properties"): + sbom_json["metadata"]["properties"] = [] + + # Prepare standardized enrichment properties with validation status + enrichment_props = { + "workbench_scan_code": scan_code, + "nvd_enriched": str(nvd_enrichment).lower(), + "epss_enriched": str(epss_enrichment).lower(), + "cisa_kev_enriched": str(cisa_kev_enrichment).lower(), + "bom_type": "augmented_bom", + "vulnerability_count": str(len(existing_vulnerabilities)), + "augmented_vulnerabilities": str(stats["vulnerabilities_processed"]), + "enriched_vulnerabilities": str(stats["vulnerabilities_enriched"]), + "risk_scored_vulnerabilities": str(stats["vulnerabilities_risk_scored"]), + "processing_errors": str(stats["processing_errors"]), + "validation_status": "passed" if stats["processing_errors"] == 0 else "warnings" + } + + # Update existing properties or add new ones + existing_props = {prop.get("name"): prop for prop in sbom_json["metadata"]["properties"]} + for name, value in enrichment_props.items(): + if name in existing_props: + existing_props[name]["value"] = value + else: + sbom_json["metadata"]["properties"].append({"name": name, "value": value}) + + except Exception as e: + logger.warning(f"Failed to update SBOM metadata: {e}") + + # Save the augmented SBOM maintaining the original format + output_dir = os.path.dirname(filepath) or "." + try: + os.makedirs(output_dir, exist_ok=True) + + # Create backup of original file if overwriting + if os.path.exists(filepath) and os.path.samefile(sbom_path, filepath): + backup_path = f"{filepath}.backup" + logger.info(f"Creating backup: {backup_path}") + import shutil + shutil.copy2(filepath, backup_path) + + # Write the augmented SBOM as JSON + with open(filepath, 'w', encoding='utf-8') as f: + json.dump(sbom_json, f, ensure_ascii=False, indent=2) + + if not quiet: + print(f" • Augmented CycloneDX SBOM saved to: {filepath}") + if stats["processing_errors"] > 0: + print(f" • Warning: {stats['processing_errors']} processing errors (see logs)") + + except (IOError, OSError) as e: + logger.error(f"Failed to save augmented SBOM: {e}") + if not quiet: + print(f"\nError: Failed to save augmented CycloneDX results to {filepath}: {e}") + raise + + # Log processing summary + logger.info(f"SBOM augmentation completed: {stats['vulnerabilities_processed']} vulnerabilities processed, " + f"{stats['vulnerabilities_enriched']} enriched, {stats['vulnerabilities_risk_scored']} risk-scored") + + if stats["processing_errors"] > 0: + logger.warning(f"SBOM augmentation had {stats['processing_errors']} processing errors") + + + + +# --------------------------------------------------------------------------- +# Public API +# --------------------------------------------------------------------------- + +__all__ = [ + # Main enrichment functions + "enrich_cyclonedx_vulnerability_in_place", + "apply_dynamic_risk_to_cyclonedx_vuln", + + # File-based augmentation + "augment_cyclonedx_sbom_from_file", + + # Conversion utilities + "convert_sbom_vuln_to_internal_format", + + # Validation functions + "_validate_external_data", +] \ No newline at end of file diff --git a/src/workbench_cli/utilities/vuln_report/cyclonedx_generator.py b/src/workbench_cli/utilities/vuln_report/cyclonedx_generator.py new file mode 100644 index 0000000..296f2c4 --- /dev/null +++ b/src/workbench_cli/utilities/vuln_report/cyclonedx_generator.py @@ -0,0 +1,559 @@ +""" +CycloneDX vulnerability report generation. + +This module provides functionality to convert vulnerability data from the Workbench API +into CycloneDX format, which is a software bill of materials (SBOM) format that includes +vulnerability information. + +The module supports two approaches: +1. Building a new SBOM from vulnerability data (generation approach) +2. Augmenting an existing CycloneDX SBOM with vulnerability data (augmentation approach) + +Both approaches use the unified enrichment flow from cyclonedx_enrichment.py +""" + +import json +import logging +import os +from typing import Dict, List, Any, Optional +from datetime import datetime + +logger = logging.getLogger(__name__) + +# CycloneDX imports (optional dependency) +try: + from cyclonedx.model.component import Component, ComponentType + from cyclonedx.model.vulnerability import ( + Vulnerability, + VulnerabilityRating, + VulnerabilityReference, + BomTarget, + VulnerabilitySource, + VulnerabilityScoreSource, + VulnerabilitySeverity, + ) + from cyclonedx.model.bom import Bom + from cyclonedx.output.json import JsonV1Dot6 + from cyclonedx.model import ExternalReference, ExternalReferenceType, Property + from cyclonedx.model.license import DisjunctiveLicense, LicenseRepository + from packageurl import PackageURL + CYCLONEDX_AVAILABLE = True +except ImportError: + # Fallback types when CycloneDX is not available + Bom = Any + Component = Any + Vulnerability = Any + VulnerabilityRating = Any + VulnerabilityReference = Any + VulnerabilitySource = Any + VulnerabilityScoreSource = Any + ComponentType = Any + JsonV1Dot6 = Any + ExternalReference = Any + ExternalReferenceType = Any + DisjunctiveLicense = Any + LicenseRepository = Any + PackageURL = Any + BomTarget = Any + Property = Any + CYCLONEDX_AVAILABLE = False + +# Import enrichment pipeline for unified flow +from .cyclonedx_enrichment import augment_cyclonedx_sbom_from_file + +# Added get_component_info for richer component metadata +from .bootstrap_bom import get_component_info + + +def save_vulns_to_cyclonedx( + filepath: str, + vulnerabilities: List[Dict[str, Any]], + scan_code: str, + external_data: Optional[Dict[str, Dict[str, Any]]] = None, + nvd_enrichment: bool = False, + epss_enrichment: bool = False, + cisa_kev_enrichment: bool = False, + enable_dynamic_risk_scoring: bool = True, + quiet: bool = False, + base_sbom_path: Optional[str] = None, + all_components: Optional[List[Dict[str, Any]]] = None +) -> None: + """ + Save vulnerability results in CycloneDX format using unified enrichment pipeline. + + This function creates a basic SBOM (generation) or loads an existing SBOM (augmentation), + then hands off to the unified enrichment pipeline for external data enrichment and + dynamic risk scoring. + + Args: + filepath: Path where the CycloneDX file should be saved + vulnerabilities: List of vulnerability dictionaries from the API + scan_code: The scan code for reference + external_data: Pre-fetched external enrichment data (optional) + nvd_enrichment: Whether NVD enrichment was enabled + epss_enrichment: Whether EPSS enrichment was enabled + cisa_kev_enrichment: Whether CISA KEV enrichment was enabled + enable_dynamic_risk_scoring: Whether dynamic risk scoring is enabled + quiet: Whether to suppress output messages + base_sbom_path: Path to existing CycloneDX SBOM to augment (optional) + all_components: List of all components from scan when --augment-full-bom is used (optional) + + Raises: + ImportError: If cyclonedx-python-lib is not installed + IOError: If the file cannot be written + OSError: If the directory cannot be created + FileNotFoundError: If base_sbom_path is provided but file doesn't exist + """ + if not CYCLONEDX_AVAILABLE: + raise ImportError( + "CycloneDX support requires the 'cyclonedx-python-lib' package. " + "This should be installed automatically with workbench-cli. " + "Try reinstalling: pip install --force-reinstall workbench-cli" + ) + + output_dir = os.path.dirname(filepath) or "." + + try: + os.makedirs(output_dir, exist_ok=True) + + # Choose between augmentation and generation approach + if base_sbom_path and os.path.exists(base_sbom_path): + # **AUGMENTATION FLOW**: Load existing SBOM and enrich it + if not quiet: + print(f" • Augmenting existing SBOM from {os.path.basename(base_sbom_path)}") + + augment_cyclonedx_sbom_from_file( + sbom_path=base_sbom_path, + filepath=filepath, + scan_code=scan_code, + external_data=external_data, + nvd_enrichment=nvd_enrichment, + epss_enrichment=epss_enrichment, + cisa_kev_enrichment=cisa_kev_enrichment, + enable_dynamic_risk_scoring=enable_dynamic_risk_scoring, + quiet=quiet + ) + return # Augmentation is complete + + else: + # **GENERATION FLOW**: Create basic BOM, then enrich it + if base_sbom_path and not quiet: + print(f" • Warning: Base SBOM not found at {base_sbom_path}, building from vulnerabilities only") + + if not quiet: + print(f" • Creating basic SBOM structure") + + # Build basic BOM structure (no enrichment) + start_time = datetime.utcnow() + + cyclonedx_bom = _build_basic_cyclonedx_bom( + vulnerabilities, + scan_code, + all_components + ) + + # Add basic vulnerabilities (no enrichment) + _add_vulnerabilities_to_bom( + bom=cyclonedx_bom, + vulnerabilities=vulnerabilities, + quiet=quiet + ) + + # Add basic metadata + _add_basic_metadata( + cyclonedx_bom, + scan_code, + nvd_enrichment, + epss_enrichment, + cisa_kev_enrichment + ) + + # Serialize basic BOM to JSON + json_serializer = JsonV1Dot6(cyclonedx_bom) + basic_bom_json = json.loads(json_serializer.output_as_string()) + + # Create a temporary file for the basic BOM + import tempfile + with tempfile.NamedTemporaryFile(mode='w', suffix='.json', delete=False) as temp_file: + json.dump(basic_bom_json, temp_file, indent=2) + temp_sbom_path = temp_file.name + + try: + if not quiet: + print(f" • Handing off to enrichment pipeline") + + # Hand off to enrichment pipeline + from .cyclonedx_enrichment import augment_cyclonedx_sbom_from_file + augment_cyclonedx_sbom_from_file( + sbom_path=temp_sbom_path, + filepath=filepath, + scan_code=scan_code, + external_data=external_data, + nvd_enrichment=nvd_enrichment, + epss_enrichment=epss_enrichment, + cisa_kev_enrichment=cisa_kev_enrichment, + enable_dynamic_risk_scoring=enable_dynamic_risk_scoring, + quiet=quiet + ) + + build_time = (datetime.utcnow() - start_time).total_seconds() + logger.debug(f"BOM generation and enrichment completed in {build_time:.2f} seconds") + + finally: + # Clean up temporary file + try: + os.unlink(temp_sbom_path) + except OSError: + pass + + if not quiet: + print(f" • CycloneDX SBOM saved to: {filepath}") + + except (IOError, OSError) as e: + if not quiet: + print(f"\nWarning: Failed to save CycloneDX results to {filepath}: {e}") + raise + + +def _build_basic_cyclonedx_bom( + vulnerabilities: List[Dict[str, Any]], + scan_code: str, + all_components: Optional[List[Dict[str, Any]]] = None +) -> Bom: + """ + Build a basic CycloneDX BOM with components from the bootstrapped BOM. + + This function now uses the all_components that were already bootstrapped by + the export_vulns handler, eliminating duplicate component creation logic. + """ + # Create the BOM + bom = Bom() + + # Use bootstrapped components if provided + if all_components: + component_lookup: Dict[str, Component] = {} + + for comp_data in all_components: + comp_name = comp_data.get("name", "Unknown") + comp_version = comp_data.get("version", "") + comp_key = f"{comp_name}:{comp_version}" + + if comp_key not in component_lookup: + component = _create_cyclonedx_component_from_bootstrap_data(comp_data) + component_lookup[comp_key] = component + bom.components.add(component) + else: + # Fallback: create minimal components from vulnerability data + # This should rarely be used since all_components should always be provided + component_lookup: Dict[str, Component] = {} + + for vuln in vulnerabilities: + comp_name = vuln.get("component_name", "Unknown") + comp_version = vuln.get("component_version", "") + comp_key = f"{comp_name}:{comp_version}" + + if comp_key not in component_lookup: + # Create minimal component without enrichment + component = Component( + name=comp_name, + version=comp_version, + type=ComponentType.LIBRARY, + bom_ref=f"pkg:{comp_name}@{comp_version}" + ) + component_lookup[comp_key] = component + bom.components.add(component) + + return bom + + +def _create_cyclonedx_component_from_bootstrap_data(comp_data: Dict[str, Any]) -> Component: + """Create a CycloneDX Component from bootstrapped component data with all enrichment.""" + name = comp_data.get("name", "Unknown") + version = comp_data.get("version", "") + + # Create basic component + component = Component( + name=name, + version=version, + type=ComponentType.LIBRARY, + bom_ref=f"pkg:{name}@{version}" + ) + + # Get enrichment data from cache (already fetched by bootstrap_bom.py) + component_info = get_component_info(name, version) + + if component_info: + # Add CPE if available + if component_info.get("cpe"): + component.cpe = component_info["cpe"] + + # Add PURL from API response + if component_info.get("purl"): + try: + component.purl = PackageURL.from_string(component_info["purl"]) + except Exception: + pass + + # Add external references + if component_info.get("url"): + try: + component.external_references.add( + ExternalReference( + type=ExternalReferenceType.WEBSITE, + url=component_info["url"] + ) + ) + except Exception: + pass + + if component_info.get("download_url"): + try: + component.external_references.add( + ExternalReference( + type=ExternalReferenceType.DISTRIBUTION, + url=component_info["download_url"] + ) + ) + except Exception: + pass + + if component_info.get("supplier_url"): + try: + component.external_references.add( + ExternalReference( + type=ExternalReferenceType.WEBSITE, + url=component_info["supplier_url"] + ) + ) + except Exception: + pass + + if component_info.get("community_url"): + try: + component.external_references.add( + ExternalReference( + type=ExternalReferenceType.WEBSITE, + url=component_info["community_url"] + ) + ) + except Exception: + pass + + # Add description if available + if component_info.get("description"): + component.description = component_info["description"] + + # Add license information + if component_info.get("license_identifier") or component_info.get("license_name"): + license_repo = LicenseRepository() + + # Create DisjunctiveLicense with appropriate fields + license_obj = DisjunctiveLicense( + id=component_info.get("license_identifier"), + name=component_info.get("license_name") + ) + license_repo.add(license_obj) + component.licenses = license_repo + + return component + + +def _add_vulnerabilities_to_bom( + bom: Bom, + vulnerabilities: List[Dict[str, Any]], + quiet: bool = False +) -> None: + """ + Add basic vulnerabilities to a CycloneDX BOM without enrichment. + + This function creates the basic vulnerability structure only. + Enrichment (external data, dynamic risk scoring) is handled separately. + + Args: + bom: The CycloneDX BOM to add vulnerabilities to + vulnerabilities: List of vulnerabilities in internal format + quiet: Whether to suppress output messages + """ + if not vulnerabilities: + if not quiet: + print(" • No vulnerabilities to add") + return + + # Process vulnerabilities in batches for better performance + batch_size = 500 + total_vulnerabilities = len(vulnerabilities) + + if not quiet and total_vulnerabilities > batch_size: + print(f" • Processing {total_vulnerabilities} vulnerabilities in batches of {batch_size}") + + added_count = 0 + for i in range(0, total_vulnerabilities, batch_size): + batch = vulnerabilities[i:i + batch_size] + + for vuln in batch: + try: + # Create basic vulnerability (no enrichment) + basic_vuln = _create_basic_cyclonedx_vulnerability(vuln) + + # Add to BOM + bom.vulnerabilities.add(basic_vuln) + added_count += 1 + + except Exception as e: + logger.error(f"Failed to create vulnerability {vuln.get('cve', 'UNKNOWN')}: {e}") + continue + + if not quiet and total_vulnerabilities > batch_size: + progress = min(i + batch_size, total_vulnerabilities) + print(f" • Processed {progress}/{total_vulnerabilities} vulnerabilities") + + if not quiet: + print(f" • Added {added_count} basic vulnerabilities") + + +def _create_basic_cyclonedx_vulnerability(vuln: Dict[str, Any]) -> Vulnerability: + """ + Create a basic CycloneDX Vulnerability object without enrichment. + + This creates the minimal vulnerability structure required for a valid SBOM. + External enrichment (NVD, EPSS, CISA KEV) and dynamic risk scoring + are handled separately by the enrichment pipeline. + """ + cve = vuln.get("cve", "UNKNOWN") + component_name = vuln.get("component_name", "Unknown") + component_version = vuln.get("component_version", "Unknown") + + # Create vulnerability with basic information only + vulnerability = Vulnerability( + bom_ref=f"vuln-{cve}-{component_name}-{component_version}", + id=cve if cve != "UNKNOWN" else f"UNKNOWN-{vuln.get('id', 'unknown')}" + ) + + # Add affects relationship to link vulnerability to component + component_bom_ref = f"pkg:{component_name}@{component_version}" + vulnerability.affects = [BomTarget(ref=component_bom_ref)] + + # Add basic description (will be enhanced by enrichment if NVD data available) + vulnerability.description = f"Security vulnerability affecting {component_name} version {component_version}" + + # Add basic CVSS rating if available + base_score = vuln.get("base_score") + if base_score and base_score != "N/A": + try: + score_value = float(base_score) + rating = VulnerabilityRating( + source=None, + score=score_value, + severity=_map_severity_to_cyclonedx(vuln.get("severity", "UNKNOWN")), + method=VulnerabilityScoreSource.CVSS_V3, + ) + vulnerability.ratings = [rating] + except (ValueError, TypeError): + pass + + # Add basic VEX analysis if available + _add_vex_analysis(vulnerability, vuln) + + return vulnerability + + +def _map_severity_to_cyclonedx(severity: str) -> "VulnerabilitySeverity": + """Map string severity to CycloneDX VulnerabilitySeverity enum.""" + severity_map = { + "critical": VulnerabilitySeverity.CRITICAL, + "high": VulnerabilitySeverity.HIGH, + "medium": VulnerabilitySeverity.MEDIUM, + "low": VulnerabilitySeverity.LOW, + "info": VulnerabilitySeverity.INFO, + "informational": VulnerabilitySeverity.INFO, + "none": VulnerabilitySeverity.NONE, + } + return severity_map.get(severity.lower(), VulnerabilitySeverity.UNKNOWN) + + +def _add_vex_analysis(vulnerability: Vulnerability, vuln: Dict[str, Any]) -> None: + """Add VEX analysis to vulnerability if available.""" + try: + from cyclonedx.model.impact_analysis import ( + ImpactAnalysisState, + ImpactAnalysisJustification, + ImpactAnalysisResponse, + ) + from cyclonedx.model.vulnerability import VulnerabilityAnalysis + + vex_status = (vuln.get("vuln_exp_status") or "").lower() + vex_justification = (vuln.get("vuln_exp_justification") or "").lower() + vex_response = vuln.get("vuln_exp_response") or [] + if isinstance(vex_response, str): + vex_response = [vex_response] + + analysis_kwargs = {} + + # Map status → ImpactAnalysisState + state_enum = next((s for s in ImpactAnalysisState if s.value == vex_status), None) + if state_enum: + analysis_kwargs["state"] = state_enum + + # Map justification + just_enum = next((j for j in ImpactAnalysisJustification if j.value == vex_justification), None) + if just_enum: + analysis_kwargs["justification"] = just_enum + + # Map responses + mapped_responses = [] + if vex_response: + response_items = [] + if isinstance(vex_response, str): + response_items = [r.strip() for r in vex_response.split(',')] + elif isinstance(vex_response, list): + response_items = vex_response + + for item in response_items: + item_lower = str(item).lower().strip() + enum_match = next((r for r in ImpactAnalysisResponse if r.value == item_lower), None) + if enum_match: + mapped_responses.append(enum_match) + + if mapped_responses: + analysis_kwargs["responses"] = mapped_responses + + # Detail (if present) + vex_details = vuln.get("vuln_exp_details") or vuln.get("vuln_exp_detail") + if vex_details: + analysis_kwargs["detail"] = vex_details + + if analysis_kwargs: + vulnerability.analysis = VulnerabilityAnalysis(**analysis_kwargs) + + except Exception: + # Best-effort; don't fail report generation if mapping fails + pass + + +def _add_basic_metadata( + bom: Bom, + scan_code: str, + nvd_enrichment: bool, + epss_enrichment: bool, + cisa_kev_enrichment: bool + ) -> None: + """Add basic metadata to the BOM (enrichment metadata added later).""" + properties = [] + + # Scan code + properties.append(Property(name="workbench_scan_code", value=scan_code)) + + # Enrichment flags (for enrichment pipeline reference) + properties.append(Property(name="nvd_enrichment", value=str(nvd_enrichment))) + properties.append(Property(name="epss_enrichment", value=str(epss_enrichment))) + properties.append(Property(name="cisa_kev_enrichment", value=str(cisa_kev_enrichment))) + + # Generation metadata + properties.append(Property(name="generation_timestamp", value=datetime.utcnow().isoformat() + "Z")) + properties.append(Property(name="bom_type", value="vulnerable_only")) + + bom.properties = properties + + + + diff --git a/src/workbench_cli/utilities/vuln_report/risk_adjustments.py b/src/workbench_cli/utilities/vuln_report/risk_adjustments.py new file mode 100644 index 0000000..9828262 --- /dev/null +++ b/src/workbench_cli/utilities/vuln_report/risk_adjustments.py @@ -0,0 +1,453 @@ +""" +Dynamic risk adjustment utilities for vulnerability reports. + +This module provides format-agnostic logic for calculating dynamic risk levels +based on VEX assessments, EPSS scores, CISA KEV status, and other intelligence. +The risk calculations can be applied consistently across SARIF, CycloneDX, and SPDX formats. +""" + +from typing import Dict, List, Any, Optional, Tuple +from enum import Enum +import logging + +logger = logging.getLogger(__name__) + + +class RiskLevel(Enum): + """Standardized risk levels for vulnerability prioritization.""" + CRITICAL = "critical" # CISA KEV, high EPSS + exploitable VEX + HIGH = "high" # High EPSS (>0.1), exploitable VEX status + MEDIUM = "medium" # Default CVSS-based severity + LOW = "low" # Low severity or mitigated VEX + INFO = "info" # Suppressed by VEX (resolved/false positive) + + +class RiskAdjustment: + """Container for risk adjustment information.""" + + def __init__( + self, + original_level: RiskLevel, + adjusted_level: RiskLevel, + adjustment_reason: str, + priority_context: str = "", + suppressed: bool = False, + high_risk_indicator: str = "Unknown", + high_risk_evidence: str = "" + ): + self.original_level = original_level + self.adjusted_level = adjusted_level + self.adjustment_reason = adjustment_reason + self.priority_context = priority_context + self.suppressed = suppressed + self.high_risk_indicator = high_risk_indicator # "Yes", "No", or "Unknown" + self.high_risk_evidence = high_risk_evidence + + @property + def was_promoted(self) -> bool: + """Check if risk level was promoted (increased).""" + level_order = [RiskLevel.INFO, RiskLevel.LOW, RiskLevel.MEDIUM, RiskLevel.HIGH, RiskLevel.CRITICAL] + return level_order.index(self.adjusted_level) > level_order.index(self.original_level) + + @property + def was_demoted(self) -> bool: + """Check if risk level was demoted (decreased).""" + level_order = [RiskLevel.INFO, RiskLevel.LOW, RiskLevel.MEDIUM, RiskLevel.HIGH, RiskLevel.CRITICAL] + return level_order.index(self.adjusted_level) < level_order.index(self.original_level) + + +def calculate_dynamic_risk( + vuln: Dict[str, Any], + external_data: Dict[str, Any] +) -> RiskAdjustment: + """ + Calculate dynamic risk level based on vulnerability attributes and external data. + + This function always applies dynamic risk scoring adjustments when called. + Control whether to use dynamic risk scoring at the call site. + + Args: + vuln: Vulnerability dictionary from Workbench API + external_data: External enrichment data for this CVE + + Returns: + RiskAdjustment with original and adjusted risk levels + """ + original_level = _map_cvss_severity_to_risk_level(vuln.get("severity", "UNKNOWN")) + + # Start with original level + adjusted_level = original_level + adjustments = [] + + # High Risk Indicator logic (3-state: Yes/No/Unknown) + high_risk_indicator = "Unknown" # Default state + high_risk_evidence = [] + + # Get VEX status and response for use throughout the function + vex_status = (vuln.get("vuln_exp_status") or "").lower() + + # Handle VEX response - it can be a string or a list + vex_response_raw = vuln.get("vuln_exp_response") or "" + if isinstance(vex_response_raw, list): + # If it's a list, join with commas and lowercase + vex_response = ",".join(str(r).lower() for r in vex_response_raw) + else: + vex_response = str(vex_response_raw).lower() + + # Check for VEX suppression FIRST - VEX should override other risk factors + # VEX status-based suppression (demote to INFO) - High Risk = "No" + if vex_status in ["not_affected", "fixed", "mitigated", "resolved", "resolved_with_pedigree", "false_positive"]: + adjusted_level = RiskLevel.INFO + adjustments.append(f"VEX assessment: {vex_status}") + high_risk_indicator = "No" + high_risk_evidence.append(f"VEX suppressed: {vex_status}") + + # VEX response-based suppression - High Risk = "No" + elif vex_response in ["will_not_fix", "update"]: + adjusted_level = RiskLevel.INFO + adjustments.append(f"VEX response: {vex_response}") + high_risk_indicator = "No" + high_risk_evidence.append(f"VEX response: {vex_response}") + + # If not suppressed by VEX, check for high risk escalation factors - High Risk = "Yes" + else: + # CISA KEV promotion to CRITICAL level + if external_data.get("cisa_kev"): + adjusted_level = RiskLevel.CRITICAL + adjustments.append("CISA Known Exploited Vulnerability") + high_risk_indicator = "Yes" + high_risk_evidence.append("CISA Known Exploited Vulnerability") + + # EPSS-based promotion to HIGH level + epss_score = external_data.get("epss_score", 0) + if epss_score and epss_score > 0.1: + adjusted_level = RiskLevel.HIGH + adjustments.append(f"High EPSS exploitation probability: {epss_score:.3f}") + high_risk_indicator = "Yes" + high_risk_evidence.append(f"High EPSS score: {epss_score:.3f}") + + # VEX status-based promotion to HIGH (exploitable takes priority) + if vex_status in ["exploitable", "affected"]: + adjusted_level = RiskLevel.HIGH + adjustments.append(f"VEX assessment indicates {vex_status} status") + high_risk_indicator = "Yes" + high_risk_evidence.append(f"VEX assessment: {vex_status}") + + # VEX response-based promotion for unfixable vulnerabilities + if "can_not_fix" in vex_response: + adjusted_level = RiskLevel.HIGH + adjustments.append(f"VEX response indicates unfixable vulnerability") + high_risk_indicator = "Yes" + high_risk_evidence.append(f"VEX response: unfixable (can_not_fix)") + + # Critical severity as high risk indicator + if (vuln.get("severity") or "").upper() == "CRITICAL": + # Only set to "Yes" if not already set by other factors + if high_risk_indicator == "Unknown": + high_risk_indicator = "Yes" + high_risk_evidence.append("Critical CVSS severity") + + # Prepare evidence string + if high_risk_indicator == "Yes": + evidence_string = "; ".join(high_risk_evidence) + elif high_risk_indicator == "No": + evidence_string = "; ".join(high_risk_evidence) + else: # Unknown + evidence_string = "No additional risk context available" + + # No adjustment needed + if len(adjustments) == 0: + return RiskAdjustment( + original_level=original_level, + adjusted_level=original_level, + adjustment_reason="No risk adjustment applied", + high_risk_indicator=high_risk_indicator, + high_risk_evidence=evidence_string + ) + + # Construct the final adjustment reason + adjustment_reason = "Dynamic risk adjustment: " + " -> ".join(adjustments) + + return RiskAdjustment( + original_level=original_level, + adjusted_level=adjusted_level, + adjustment_reason=adjustment_reason, + high_risk_indicator=high_risk_indicator, + high_risk_evidence=evidence_string + ) + + +def calculate_batch_risk_adjustments( + vulnerabilities: List[Dict[str, Any]], + external_data: Dict[str, Dict[str, Any]] +) -> Dict[str, RiskAdjustment]: + """ + Calculate risk adjustments for a batch of vulnerabilities. + + This function always applies dynamic risk scoring. Only call when dynamic risk scoring is enabled. + + Args: + vulnerabilities: List of vulnerability dictionaries + external_data: External enrichment data keyed by CVE + + Returns: + Dictionary mapping vulnerability IDs to RiskAdjustment objects + """ + adjustments = {} + + for vuln in vulnerabilities: + vuln_id = str(vuln.get("id", "unknown")) + cve = vuln.get("vuln_id") or vuln.get("cve", "UNKNOWN") + ext_data = external_data.get(cve, {}) + + adjustment = calculate_dynamic_risk(vuln, ext_data) + adjustments[vuln_id] = adjustment + + return adjustments + + +def get_risk_summary(adjustments: Dict[str, RiskAdjustment]) -> Dict[str, int]: + """ + Generate a summary of risk adjustments. + + Args: + adjustments: Dictionary of risk adjustments + + Returns: + Summary statistics about risk adjustments + """ + summary = { + "total_vulnerabilities": len(adjustments), + "promoted": 0, + "demoted": 0, + "suppressed": 0, + "unchanged": 0, + "by_adjusted_level": {level.value: 0 for level in RiskLevel}, + "promotion_reasons": {}, + "suppression_reasons": {} + } + + for adjustment in adjustments.values(): + # Count by adjusted level + summary["by_adjusted_level"][adjustment.adjusted_level.value] += 1 + + # Count adjustments + if adjustment.was_promoted: + summary["promoted"] += 1 + reason = adjustment.adjustment_reason + summary["promotion_reasons"][reason] = summary["promotion_reasons"].get(reason, 0) + 1 + elif adjustment.was_demoted: + summary["demoted"] += 1 + if adjustment.suppressed: + summary["suppressed"] += 1 + reason = adjustment.adjustment_reason + summary["suppression_reasons"][reason] = summary["suppression_reasons"].get(reason, 0) + 1 + else: + summary["unchanged"] += 1 + + return summary + + +# Format-specific mapping functions + +def risk_level_to_sarif_level(risk_level: RiskLevel) -> str: + """Map RiskLevel to SARIF level.""" + mapping = { + RiskLevel.CRITICAL: "error", + RiskLevel.HIGH: "error", + RiskLevel.MEDIUM: "warning", + RiskLevel.LOW: "warning", + RiskLevel.INFO: "note" + } + return mapping[risk_level] + + +def risk_level_to_cyclonedx_severity(risk_level: RiskLevel) -> str: + """Map RiskLevel to CycloneDX severity.""" + mapping = { + RiskLevel.CRITICAL: "critical", + RiskLevel.HIGH: "high", + RiskLevel.MEDIUM: "medium", + RiskLevel.LOW: "low", + RiskLevel.INFO: "info" + } + return mapping[risk_level] + + +def risk_level_to_spdx_severity(risk_level: RiskLevel) -> str: + """Map RiskLevel to SPDX severity.""" + mapping = { + RiskLevel.CRITICAL: "CRITICAL", + RiskLevel.HIGH: "HIGH", + RiskLevel.MEDIUM: "MEDIUM", + RiskLevel.LOW: "LOW", + RiskLevel.INFO: "LOW" # SPDX doesn't have INFO, use LOW + } + return mapping[risk_level] + + +# Helper functions + +def _map_cvss_severity_to_risk_level(severity: str) -> RiskLevel: + """Map CVSS severity string to RiskLevel.""" + mapping = { + "CRITICAL": RiskLevel.CRITICAL, + "HIGH": RiskLevel.HIGH, + "MEDIUM": RiskLevel.MEDIUM, + "LOW": RiskLevel.LOW, + "UNKNOWN": RiskLevel.MEDIUM # Default to medium for unknown + } + return mapping.get((severity or "").upper(), RiskLevel.MEDIUM) + + +def apply_vex_suppression_filter( + vulnerabilities: List[Dict[str, Any]], + external_data: Dict[str, Dict[str, Any]] +) -> Tuple[List[Dict[str, Any]], List[Dict[str, Any]]]: + """ + Filter vulnerabilities based on VEX suppression rules. + + This function always applies dynamic risk scoring. Only call when dynamic risk scoring is enabled. + + Args: + vulnerabilities: List of vulnerability dictionaries + external_data: External enrichment data + + Returns: + Tuple of (non_suppressed_vulnerabilities, suppressed_vulnerabilities) + """ + non_suppressed = [] + suppressed = [] + + for vuln in vulnerabilities: + cve = vuln.get("vuln_id") or vuln.get("cve", "UNKNOWN") + ext_data = external_data.get(cve, {}) + + adjustment = calculate_dynamic_risk(vuln, ext_data) + + if adjustment.suppressed: + suppressed.append(vuln) + else: + non_suppressed.append(vuln) + + return non_suppressed, suppressed + + +# Legacy compatibility functions (for existing SARIF code) + +def map_vex_status_to_sarif_level( + vex_status: str, + original_level: str, + external_data: Dict[str, Any] = None +) -> str: + """ + Legacy compatibility function for existing SARIF code. + Maps to the new risk calculation system. + """ + if external_data is None: + external_data = {} + + # Create a mock vulnerability for the calculation + mock_vuln = { + "vuln_exp_status": vex_status, + "severity": "MEDIUM" # Default, will be overridden by external data logic + } + + adjustment = calculate_dynamic_risk(mock_vuln, external_data) + return risk_level_to_sarif_level(adjustment.adjusted_level) + + +# Business logic functions - moved from sarif_generator.py and export_vulns.py + +def extract_unique_cves(vulnerabilities: List[Dict[str, Any]]) -> List[str]: + """Extract unique CVEs from vulnerability data, excluding UNKNOWN values.""" + return list(set( + vuln.get("vuln_id") or vuln.get("cve", "UNKNOWN") + for vuln in vulnerabilities + if (vuln.get("vuln_id") or vuln.get("cve")) != "UNKNOWN" + )) + + +def count_high_risk_vulnerabilities(vulnerabilities: List[Dict[str, Any]], + external_data: Dict[str, Dict[str, Any]]) -> Dict[str, int]: + """Count high-risk vulnerabilities based on external data.""" + counts = { + "cisa_kev": 0, + "high_epss": 0, + "critical_severity": 0, + "total_high_risk": 0 + } + + high_risk_cves = set() + + for vuln in vulnerabilities: + cve = vuln.get("vuln_id") or vuln.get("cve", "UNKNOWN") + ext_data = external_data.get(cve, {}) + + is_high_risk = False + + if ext_data.get("cisa_kev"): + counts["cisa_kev"] += 1 + is_high_risk = True + + epss_score = ext_data.get("epss_score") + if epss_score is not None and epss_score > 0.1: + counts["high_epss"] += 1 + is_high_risk = True + + if (vuln.get("severity") or "").upper() == "CRITICAL": + counts["critical_severity"] += 1 + is_high_risk = True + + if is_high_risk: + high_risk_cves.add(cve) + + counts["total_high_risk"] = len(high_risk_cves) + return counts + + +def count_high_risk_indicators_detailed( + vulnerabilities: List[Dict[str, Any]], + external_data: Dict[str, Dict[str, Any]] +) -> Dict[str, int]: + """Count vulnerabilities by high risk indicator state.""" + counts = {"yes": 0, "no": 0, "unknown": 0} + + for vuln in vulnerabilities: + cve = vuln.get("vuln_id") or vuln.get("cve", "UNKNOWN") + ext_data = external_data.get(cve, {}) + + adjustment = calculate_dynamic_risk(vuln, ext_data) + state = adjustment.high_risk_indicator.lower() + if state in counts: + counts[state] += 1 + + return counts + + +# Public API +__all__ = [ + # Core types + "RiskLevel", + "RiskAdjustment", + + # Main functions + "calculate_dynamic_risk", + "calculate_batch_risk_adjustments", + + # Format mappings + "risk_level_to_sarif_level", + "risk_level_to_cyclonedx_severity", + "risk_level_to_spdx_severity", + + # Filtering functions + "apply_vex_suppression_filter", + + # Business logic functions + "extract_unique_cves", + "count_high_risk_vulnerabilities", + "count_high_risk_indicators_detailed", + + # Legacy compatibility + "map_vex_status_to_sarif_level", +] \ No newline at end of file diff --git a/src/workbench_cli/utilities/vuln_report/sarif_generator.py b/src/workbench_cli/utilities/vuln_report/sarif_generator.py new file mode 100644 index 0000000..548f4c3 --- /dev/null +++ b/src/workbench_cli/utilities/vuln_report/sarif_generator.py @@ -0,0 +1,808 @@ +""" +SARIF 2.1.0 vulnerability report generation. + +This module provides functionality to convert vulnerability data from the Workbench API +into SARIF (Static Analysis Results Interchange Format) v2.1.0 format, optimized for +GitHub Advanced Security and other security tools. + +The module supports generation-only workflow - building SARIF from vulnerability data. +SARIF does not support augmentation workflows like SBOM formats. + +Enhanced with comprehensive VEX support, risk notifications, and dynamic prioritization +for maximum utility in security operations workflows. +""" + +import json +import logging +import os +from typing import Dict, List, Any, Optional +from datetime import datetime + +logger = logging.getLogger(__name__) + +# Import shared utilities for enrichment pipeline +from .bootstrap_bom import detect_package_ecosystem +from .cve_data_gathering import enrich_vulnerabilities, build_cvss_vector, extract_version_ranges +from .risk_adjustments import ( + calculate_dynamic_risk, + RiskAdjustment, + extract_unique_cves, + risk_level_to_sarif_level, + count_high_risk_vulnerabilities +) + + +def save_vulns_to_sarif( + filepath: str, + vulnerabilities: List[Dict[str, Any]], + scan_code: str, + external_data: Optional[Dict[str, Dict[str, Any]]] = None, + nvd_enrichment: bool = False, + epss_enrichment: bool = False, + cisa_kev_enrichment: bool = False, + enable_dynamic_risk_scoring: bool = True, + api_timeout: int = 30, + quiet: bool = False +) -> None: + """ + Save vulnerability results in SARIF 2.1.0 format, optimized for GitHub Advanced Security. + + Args: + filepath: Path where the SARIF file should be saved + vulnerabilities: List of vulnerability dictionaries from the API + scan_code: The scan code for reference + external_data: Pre-fetched external enrichment data (optional) + nvd_enrichment: Whether NVD enrichment was applied + epss_enrichment: Whether EPSS enrichment was applied + cisa_kev_enrichment: Whether CISA KEV enrichment was applied + enable_dynamic_risk_scoring: Whether dynamic risk scoring is enabled (includes VEX assessments) + api_timeout: API timeout used for enrichment + quiet: Whether to suppress output messages + + + Raises: + IOError: If the file cannot be written + OSError: If the directory cannot be created + """ + output_dir = os.path.dirname(filepath) or "." + + try: + os.makedirs(output_dir, exist_ok=True) + + # Fetch external enrichment data if not provided + if external_data is None: + external_data = _fetch_external_enrichment_data( + vulnerabilities, + nvd_enrichment, + epss_enrichment, + cisa_kev_enrichment, + api_timeout + ) + + # Generate SARIF document + sarif_document = convert_vulns_to_sarif( + vulnerabilities, + scan_code, + external_data, + nvd_enrichment, + epss_enrichment, + cisa_kev_enrichment, + enable_dynamic_risk_scoring + ) + + # Write SARIF file + with open(filepath, 'w', encoding='utf-8') as f: + json.dump(sarif_document, f, indent=2, ensure_ascii=False) + + if not quiet: + print(f" • SARIF report saved to: {filepath}") + + except (IOError, OSError) as e: + if not quiet: + print(f"\nWarning: Failed to save SARIF results to {filepath}: {e}") + raise + + +def convert_vulns_to_sarif( + vulnerabilities: List[Dict[str, Any]], + scan_code: str, + external_data: Optional[Dict[str, Dict[str, Any]]] = None, + nvd_enrichment: bool = False, + epss_enrichment: bool = False, + cisa_kev_enrichment: bool = False, + enable_dynamic_risk_scoring: bool = True +) -> Dict[str, Any]: + """ + Convert vulnerability results to SARIF 2.1.0 format, optimized for GitHub Advanced Security. + + Args: + vulnerabilities: List of vulnerability dictionaries from the API + scan_code: The scan code for reference + external_data: External enrichment data (optional) + nvd_enrichment: Whether NVD enrichment was applied + epss_enrichment: Whether EPSS enrichment was applied + cisa_kev_enrichment: Whether CISA KEV enrichment was applied + enable_dynamic_risk_scoring: Whether dynamic risk scoring is enabled (includes VEX assessments) + + Returns: + SARIF document as dictionary + """ + if external_data is None: + external_data = {} + + # Create SARIF document structure + sarif_doc = { + "version": "2.1.0", + "$schema": "https://schemastore.azurewebsites.net/schemas/json/sarif-2.1.0.json", + "runs": [ + _create_sarif_run( + vulnerabilities, + scan_code, + external_data, + nvd_enrichment, + epss_enrichment, + cisa_kev_enrichment, + enable_dynamic_risk_scoring ) + ] + } + + return sarif_doc + + +def _fetch_external_enrichment_data( + vulnerabilities: List[Dict[str, Any]], + nvd_enrichment: bool, + epss_enrichment: bool, + cisa_kev_enrichment: bool, + api_timeout: int +) -> Dict[str, Dict[str, Any]]: + """Fetch external enrichment data for vulnerabilities using existing utilities.""" + if not any([nvd_enrichment, epss_enrichment, cisa_kev_enrichment]): + return {} + + # Use existing CVE extraction logic + unique_cves = extract_unique_cves(vulnerabilities) + + external_data = {} + if unique_cves: + try: + external_data = enrich_vulnerabilities( + unique_cves, + nvd_enrichment, + epss_enrichment, + cisa_kev_enrichment, + api_timeout + ) + except Exception as e: + logger.warning(f"Failed to fetch external vulnerability data: {e}") + + return external_data + + + +def _create_sarif_run( + vulnerabilities: List[Dict[str, Any]], + scan_code: str, + external_data: Dict[str, Dict[str, Any]], + nvd_enrichment: bool, + epss_enrichment: bool, + cisa_kev_enrichment: bool, + enable_dynamic_risk_scoring: bool +) -> Dict[str, Any]: + """Create a SARIF run object optimized for GitHub Advanced Security.""" + + # Generate notifications for high-risk findings (includes VEX suppression summary) + notifications = _generate_risk_notifications(vulnerabilities, external_data) + + # Generate rules and results + rules = _generate_enhanced_sarif_rules(vulnerabilities, external_data, enable_dynamic_risk_scoring) + results = _generate_enhanced_sarif_results(vulnerabilities, external_data, enable_dynamic_risk_scoring) + + # Create run object with GitHub Advanced Security optimizations + run = { + "tool": { + "driver": { + "name": "FossID Workbench", + "version": "1.0.0", + "informationUri": "https://fossid.com/products/workbench/", + "rules": rules, + "notifications": notifications + } + }, + "results": results, + "properties": _create_run_properties( + scan_code, + vulnerabilities, + external_data, + nvd_enrichment, + epss_enrichment, + cisa_kev_enrichment + ) + } + + return run + + +def _generate_risk_notifications( + vulnerabilities: List[Dict[str, Any]], + external_data: Dict[str, Dict[str, Any]] +) -> List[Dict[str, Any]]: + """Generate notifications for high-risk findings.""" + notifications = [] + + cisa_kev_count = sum(1 for vuln in vulnerabilities + if external_data.get(vuln.get("vuln_id") or vuln.get("cve", ""), {}).get("cisa_kev")) + high_epss_count = sum(1 for vuln in vulnerabilities + if (external_data.get(vuln.get("vuln_id") or vuln.get("cve", ""), {}).get("epss_score") or 0) > 0.1) + + # Count VEX suppressed vulnerabilities using dynamic risk scoring + vex_suppressed_count = 0 + for vuln in vulnerabilities: + cve = vuln.get("vuln_id") or vuln.get("cve", "UNKNOWN") + ext_data = external_data.get(cve, {}) + try: + risk_adjustment = calculate_dynamic_risk(vuln, ext_data) + if risk_adjustment.high_risk_indicator == "No": + vex_suppressed_count += 1 + except Exception: + continue + + if cisa_kev_count > 0: + notifications.append({ + "level": "error", + "message": { + "text": f"⚠️ URGENT: {cisa_kev_count} vulnerabilities are on CISA's Known Exploited Vulnerabilities catalog and require immediate attention" + }, + "properties": { + "cisa_kev_count": cisa_kev_count, + "category": "security", + "priority": "critical" + } + }) + + if high_epss_count > 0: + notifications.append({ + "level": "warning", + "message": { + "text": f"🔍 HIGH RISK: {high_epss_count} vulnerabilities have elevated EPSS exploitation probability scores (>0.1)" + }, + "properties": { + "high_epss_count": high_epss_count, + "category": "security", + "priority": "high" + } + }) + + if vex_suppressed_count > 0: + notifications.append({ + "level": "note", + "message": { + "text": f"✅ VEX ASSESSMENTS: {vex_suppressed_count} vulnerabilities have been assessed and suppressed based on organizational VEX statements" + }, + "properties": { + "vex_suppressed_count": vex_suppressed_count, + "category": "assessment", + "priority": "info" + } + }) + + return notifications + + + + + +def generate_vex_properties(vuln: Dict[str, Any]) -> Dict[str, Any]: + """Generate VEX-related properties for SARIF output.""" + properties = {} + + if vuln.get("vuln_exp_status"): + properties["vex_status"] = vuln["vuln_exp_status"] + + if vuln.get("vuln_exp_justification"): + properties["vex_justification"] = vuln["vuln_exp_justification"] + + if vuln.get("vuln_exp_response"): + properties["vex_response"] = vuln["vuln_exp_response"] + + if vuln.get("vuln_exp_details"): + properties["vex_details"] = vuln["vuln_exp_details"] + + if vuln.get("vuln_exp_created"): + properties["vex_created"] = vuln["vuln_exp_created"] + + if vuln.get("vuln_exp_updated"): + properties["vex_updated"] = vuln["vuln_exp_updated"] + + if vuln.get("vuln_exp_created_by_username"): + properties["vex_created_by"] = vuln["vuln_exp_created_by_username"] + + if vuln.get("vuln_exp_updated_by_username"): + properties["vex_updated_by"] = vuln["vuln_exp_updated_by_username"] + + return properties + + + + + +def _generate_enhanced_sarif_rules( + vulnerabilities: List[Dict[str, Any]], + external_data: Dict[str, Dict[str, Any]], + enable_dynamic_risk_scoring: bool +) -> List[Dict[str, Any]]: + """Generate enhanced SARIF rules with rich NVD data and VEX information.""" + rules = {} + + for vuln in vulnerabilities: + cve_id = vuln.get("vuln_id") or vuln.get("cve", "UNKNOWN") + component_name = vuln.get("component_name", "Unknown") + component_version = vuln.get("component_version", "Unknown") + + # Skip only if we have absolutely no identifying information + if not cve_id and not component_name: + continue + + # Create component-specific rule ID for better tracking + rule_id = f"{cve_id}:{component_name}@{component_version}" + + if rule_id in rules: + continue + + # Get external enrichment data + ext_data = external_data.get(cve_id, {}) + + # Calculate dynamic risk if enabled + dynamic_risk_adjustment = None + if enable_dynamic_risk_scoring: + dynamic_risk_adjustment = calculate_dynamic_risk(vuln, ext_data) + + # Create enhanced descriptions using NVD data + short_desc = f"{cve_id} in {component_name}@{component_version} (CVSS {vuln.get('base_score', 'N/A')})" + if ext_data.get("nvd_cwe"): + cwe_list = ext_data["nvd_cwe"][:2] # Show first 2 CWEs to keep it concise + cwe_text = ", ".join(cwe_list) + short_desc += f" - {cwe_text}" + + # Use NVD description if available, otherwise fall back to generic description + nvd_desc = ext_data.get("nvd_description") + if nvd_desc and nvd_desc.strip() and nvd_desc != "No description available": + full_desc = nvd_desc + else: + full_desc = f"Security vulnerability {cve_id} affecting {component_name} with CVSS score {vuln.get('base_score', 'N/A')}" + + # Add component context to NVD description + if ext_data.get("nvd_description") and ext_data["nvd_description"] != "No description available": + full_desc += f"\n\nAffected Component: {component_name} version {component_version}" + + # Add affected version ranges if we can extract them from references + version_info = extract_version_ranges(ext_data.get("nvd_references", [])) + if version_info: + full_desc += f"\nKnown Affected Versions: {version_info}" + + rule = { + "id": rule_id, + "name": f"{cve_id} in {component_name}@{component_version}", + "shortDescription": { + "text": short_desc + }, + "fullDescription": { + "text": full_desc + }, + "defaultConfiguration": { + "level": _determine_rule_level(vuln, ext_data, dynamic_risk_adjustment) + }, + "properties": { + "security-severity": str(vuln.get("base_score", "0.0")), + "cvss_version": vuln.get("cvss_version", "N/A"), + "cvss_vector": ext_data.get("full_cvss_vector") or build_cvss_vector(vuln), + "base_score": ext_data.get("cvss_score") or vuln.get("base_score", "N/A"), + "attack_vector": vuln.get("attack_vector", "N/A"), + "attack_complexity": vuln.get("attack_complexity", "N/A"), + "availability_impact": vuln.get("availability_impact", "N/A"), + "severity": vuln.get("severity", "UNKNOWN"), + "component_name": component_name, + "cve": cve_id, + "component": f"{component_name}@{component_version}", + "ecosystem": detect_package_ecosystem(component_name, component_version), + "tags": ["security", "vulnerability"], + "nvd_enriched": bool(ext_data.get("nvd_description")) + }, + "helpUri": f"https://nvd.nist.gov/vuln/detail/{cve_id}" if cve_id != "UNKNOWN" else None + } + + # Add external data properties + if ext_data.get("epss_score") is not None: + rule["properties"]["epss_score"] = ext_data["epss_score"] + rule["properties"]["epss_percentile"] = ext_data["epss_percentile"] + + if ext_data.get("cisa_kev"): + rule["properties"]["cisa_known_exploited"] = True + + if ext_data.get("nvd_cwe"): + rule["properties"]["cwe_ids"] = ext_data["nvd_cwe"] + + # Add NVD references for additional context + if ext_data.get("nvd_references"): + # Include up to 5 most relevant references + relevant_refs = [] + for ref in ext_data["nvd_references"][:5]: + ref_info = { + "url": ref.get("url", ""), + "source": ref.get("source", "Unknown") + } + if ref.get("tags"): + ref_info["tags"] = ref["tags"] + relevant_refs.append(ref_info) + rule["properties"]["nvd_references"] = relevant_refs + + # Add VEX properties if available + if any(field in vuln for field in ["vuln_exp_status", "vuln_exp_justification", "vuln_exp_response", "vuln_exp_details"]): + vex_properties = generate_vex_properties(vuln) + rule["properties"].update(vex_properties) + + rules[rule_id] = rule + + return list(rules.values()) + + +def _determine_rule_level( + vuln: Dict[str, Any], + ext_data: Dict[str, Any], + dynamic_risk_adjustment: Optional[RiskAdjustment] +) -> str: + """Determine the SARIF level for a rule using dynamic risk scoring.""" + if dynamic_risk_adjustment: + return risk_level_to_sarif_level(dynamic_risk_adjustment.adjusted_level) + else: + return _map_severity_to_sarif_level(vuln.get("severity", "medium"), vuln, ext_data) + + +def _generate_enhanced_sarif_results( + vulnerabilities: List[Dict[str, Any]], + external_data: Dict[str, Dict[str, Any]], + enable_dynamic_risk_scoring: bool +) -> List[Dict[str, Any]]: + """Generate enhanced SARIF results with priority context and comprehensive metadata.""" + results = [] + + for vuln in vulnerabilities: + cve_id = vuln.get("vuln_id") or vuln.get("cve", "UNKNOWN") + component_name = vuln.get("component_name", "Unknown") + component_version = vuln.get("component_version", "Unknown") + severity = vuln.get("severity", "UNKNOWN") + base_score = vuln.get("base_score", "N/A") + + # Skip only if we have absolutely no identifying information + if not cve_id and not component_name: + continue + + # Get external data + ext_data = external_data.get(cve_id, {}) + + # Calculate dynamic risk if enabled + dynamic_risk_adjustment = None + if enable_dynamic_risk_scoring: + dynamic_risk_adjustment = calculate_dynamic_risk(vuln, ext_data) + + # Determine prioritization context based on dynamic risk adjustment + priority_context = _create_priority_context(vuln, ext_data, dynamic_risk_adjustment) + + # Create enhanced package URL with ecosystem detection + ecosystem = detect_package_ecosystem(component_name, component_version) + artifact_uri = f"pkg:{ecosystem}/{component_name}@{component_version}" + + # Create message with priority context + message_text = f"{priority_context}[CVSS: {base_score}] {cve_id}" + + result = { + "ruleId": f"{cve_id}:{component_name}@{component_version}", + "message": { + "text": message_text + }, + "level": _determine_result_level(vuln, ext_data, dynamic_risk_adjustment), + "locations": [{ + "physicalLocation": { + "artifactLocation": { + "uri": artifact_uri, + "description": { + "text": f"Vulnerable component: {component_name} version {component_version}" + } + }, + "region": { + "startLine": 1, + "startColumn": 1, + "snippet": { + "text": f"{component_name}@{component_version}" + } + } + }, + "logicalLocations": [{ + "name": component_name, + "fullyQualifiedName": artifact_uri, + "kind": "package" + }] + }], + "partialFingerprints": _create_enhanced_partial_fingerprints(vuln, component_name, component_version), + "baselineState": _determine_baseline_state(vuln, dynamic_risk_adjustment), + "properties": { + "vulnerability_id": vuln.get("id"), + "cvss_version": vuln.get("cvss_version"), + "security-severity": str(base_score), # SARIF standard property for security findings + "attack_vector": vuln.get("attack_vector"), + "attack_complexity": vuln.get("attack_complexity"), + "availability_impact": vuln.get("availability_impact"), + "component_id": vuln.get("component_id"), + "component_name": component_name, + "component_version": component_version, + "ecosystem": ecosystem, + "package_url": artifact_uri, + "cve": cve_id, + "component": f"{component_name}@{component_version}", + "severity": severity, + "baselineState": "unchanged", + "tags": { + "vulnerability": [cve_id], + "component": [f"{component_name}@{component_version}"], + "severity": [severity.lower() if severity != "UNKNOWN" else "unknown"] + } + } + } + + # Add external data properties + if ext_data.get("epss_score") is not None: + result["properties"]["epss_score"] = ext_data["epss_score"] + result["properties"]["epss_percentile"] = ext_data["epss_percentile"] + + if ext_data.get("cisa_kev"): + result["properties"]["cisa_known_exploited"] = True + + if ext_data.get("nvd_cwe"): + result["properties"]["cwe_ids"] = ext_data["nvd_cwe"] + + if ext_data.get("nvd_description"): + result["properties"]["nvd_description"] = ext_data["nvd_description"] + + if ext_data.get("full_cvss_vector"): + result["properties"]["full_cvss_vector"] = ext_data["full_cvss_vector"] + + if ext_data.get("nvd_references"): + # Store key references for analysis tools + result["properties"]["nvd_reference_count"] = len(ext_data["nvd_references"]) + result["properties"]["nvd_vendor_advisories"] = len([ + ref for ref in ext_data["nvd_references"] + if "vendor advisory" in [tag.lower() for tag in ref.get("tags", [])] + ]) + + # Add VEX properties if available + if any(field in vuln for field in ["vuln_exp_status", "vuln_exp_justification", "vuln_exp_response", "vuln_exp_details"]): + vex_properties = generate_vex_properties(vuln) + result["properties"].update(vex_properties) + + # Add comprehensive fingerprints for deduplication + wid = str(vuln.get("id", "unknown")) + result["fingerprints"] = { + "workbench/component": f"{component_name}@{component_version}", + "workbench/vulnerability": f"{cve_id}#{wid}", + "workbench/id": wid, + "primary": wid, + "stable": cve_id + } + + # Add remediation guidance if available + if vuln.get("fix_version"): + result["fixes"] = [{ + "description": { + "text": f"Update {component_name} to version {vuln['fix_version']} or later" + }, + "artifactChanges": [{ + "artifactLocation": { + "uri": f"{component_name}:{component_version}" + }, + "replacements": [{ + "deletedRegion": { + "startLine": 1, + "startColumn": 1, + "endLine": 1, + "endColumn": 1 + }, + "insertedContent": { + "text": f"{component_name}:{vuln['fix_version']}" + } + }] + }] + }] + + # Add suppression information based on dynamic risk adjustment + if dynamic_risk_adjustment and dynamic_risk_adjustment.high_risk_indicator == "No": + # This vulnerability has been assessed as low risk - add suppression info + result["suppressions"] = [{ + "kind": "externalTriage", + "status": "accepted", + "justification": dynamic_risk_adjustment.high_risk_evidence or "Assessed as low risk through dynamic risk scoring" + }] + + results.append(result) + + return results + + +def _create_priority_context( + vuln: Dict[str, Any], + ext_data: Dict[str, Any], + dynamic_risk_adjustment: Optional[RiskAdjustment] +) -> str: + """Determine prioritization context based on dynamic risk adjustment.""" + priority_context = "" + + if dynamic_risk_adjustment and dynamic_risk_adjustment.high_risk_indicator == "Yes": + # Check promotion reasons in order of priority + if ext_data.get("cisa_kev"): + priority_context = "[CISA KEV] " + elif (ext_data.get("epss_score") or 0) > 0.1: + priority_context = f"[EPSS: {ext_data['epss_score']:.3f}] " + elif vuln.get("vuln_exp_status") and vuln.get("vuln_exp_status").lower() in ["exploitable", "affected"]: + priority_context = f"[VEX: {vuln['vuln_exp_status'].upper()}] " + elif dynamic_risk_adjustment and dynamic_risk_adjustment.high_risk_indicator == "No": + # Check demotion reasons + vex_status = vuln.get("vuln_exp_status") + if vex_status: + priority_context = f"[VEX: {vex_status.upper()}] " + + return priority_context + + +def _determine_result_level( + vuln: Dict[str, Any], + ext_data: Dict[str, Any], + dynamic_risk_adjustment: Optional[RiskAdjustment] +) -> str: + """Determine the SARIF level for a result using dynamic risk scoring.""" + if dynamic_risk_adjustment: + return risk_level_to_sarif_level(dynamic_risk_adjustment.adjusted_level) + else: + return _map_severity_to_sarif_level(vuln.get("severity", "medium"), vuln, ext_data) + + +def _create_enhanced_partial_fingerprints(vuln: Dict[str, Any], component_name: str, component_version: str) -> Dict[str, str]: + """Create comprehensive partial fingerprints for deduplication.""" + cve_id = vuln.get("vuln_id") or vuln.get("cve", "") + + return { + "workbenchScan": f"{cve_id}:{component_name}@{component_version}", + "primaryLocationHash": f"{component_name}:{component_version}", + "cveComponent": f"{cve_id}:{component_name}", + "vulnerability": cve_id, + "component": f"{component_name}@{component_version}" + } + + +def _determine_baseline_state(vuln: Dict[str, Any], dynamic_risk_adjustment: Optional[RiskAdjustment]) -> str: + """Determine the baseline state for GitHub Advanced Security using dynamic risk assessment.""" + # Use dynamic risk assessment to determine baseline state + if dynamic_risk_adjustment and dynamic_risk_adjustment.high_risk_indicator == "No": + return "reviewed" # Assessed as low risk + elif vuln.get("vuln_exp_status") == "not_affected": + return "absent" + elif vuln.get("vuln_exp_status") == "fixed": + return "absent" + elif vuln.get("vuln_exp_response"): + return "reviewed" + else: + return "new" + + + + + +def _create_run_properties( + scan_code: str, + vulnerabilities: List[Dict[str, Any]], + external_data: Dict[str, Dict[str, Any]], + nvd_enrichment: bool, + epss_enrichment: bool, + cisa_kev_enrichment: bool +) -> Dict[str, Any]: + """Create run properties optimized for GitHub Advanced Security.""" + + # Basic scan metadata + properties = { + "scanCode": scan_code, + "timestamp": datetime.utcnow().isoformat() + "Z", + "vulnerabilityCount": len(vulnerabilities), + "componentCount": len(set( + f"{v.get('component_name')}@{v.get('component_version')}" + for v in vulnerabilities + if v.get('component_name') and v.get('component_version') + )) + } + + # Severity distribution + severity_counts = {} + for vuln in vulnerabilities: + severity = (vuln.get("severity") or "medium").lower() + severity_counts[severity] = severity_counts.get(severity, 0) + 1 + + properties["severityDistribution"] = severity_counts + + # Enrichment sources + enrichment_sources = [] + if nvd_enrichment: + enrichment_sources.append("NVD") + if epss_enrichment: + enrichment_sources.append("EPSS") + if cisa_kev_enrichment: + enrichment_sources.append("CISA KEV") + + if enrichment_sources: + properties["enrichmentSources"] = enrichment_sources + + # High risk indicators + high_risk_counts = count_high_risk_vulnerabilities(vulnerabilities, external_data) + if high_risk_counts.get("total_high_risk", 0) > 0: + properties["highRiskVulnerabilities"] = high_risk_counts["total_high_risk"] + + # VEX statistics + vex_stats = _calculate_vex_statistics(vulnerabilities) + if any(vex_stats.values()): + properties["vexStatistics"] = vex_stats + + return properties + + +def _calculate_vex_statistics(vulnerabilities: List[Dict[str, Any]]) -> Dict[str, int]: + """Calculate VEX statistics for the run properties.""" + vex_stats = { + "total_with_vex": 0, + "not_affected": 0, + "affected": 0, + "fixed": 0, + "under_investigation": 0, + "with_response": 0 + } + + for vuln in vulnerabilities: + if vuln.get("vuln_exp_id") or vuln.get("vuln_exp_status") or vuln.get("vuln_exp_response"): + vex_stats["total_with_vex"] += 1 + + status = vuln.get("vuln_exp_status") or "" + if status and status.lower() in vex_stats: + vex_stats[status.lower()] += 1 + + if vuln.get("vuln_exp_response"): + vex_stats["with_response"] += 1 + + return vex_stats + + +def _map_severity_to_sarif_level(severity: str, vuln: Dict[str, Any] = None, ext_data: Dict[str, Any] = None) -> str: + """Map vulnerability severity to SARIF level optimized for GitHub Advanced Security.""" + if not severity: + return "warning" + + severity_lower = severity.lower() + + # Check for high-risk indicators that should escalate the level + if ext_data and ext_data.get("cisa_kev"): + return "error" + + if ext_data and ext_data.get("epss_score") and float(ext_data["epss_score"]) > 0.7: + return "error" + + # Standard severity mapping + if severity_lower in ["critical", "high"]: + return "error" + elif severity_lower in ["medium", "moderate"]: + return "warning" + elif severity_lower in ["low", "informational", "info"]: + return "note" + else: + return "warning" + + +# Export public API +__all__ = [ + "save_vulns_to_sarif", + "convert_vulns_to_sarif", + "generate_vex_properties" +] \ No newline at end of file diff --git a/src/workbench_cli/utilities/vuln_report/sbom_utils.py b/src/workbench_cli/utilities/vuln_report/sbom_utils.py new file mode 100644 index 0000000..f5246b0 --- /dev/null +++ b/src/workbench_cli/utilities/vuln_report/sbom_utils.py @@ -0,0 +1,147 @@ +""" +SBOM utilities for vulnerability report augmentation workflows. + +This module provides SBOM download and resource management functionality +used by augmentation flows to work with existing SBOMs across multiple formats. +""" + +import logging +import tempfile +import argparse +import os +from contextlib import contextmanager +from typing import Optional, TYPE_CHECKING + +if TYPE_CHECKING: + from ...api import WorkbenchAPI + +logger = logging.getLogger(__name__) + + +def download_sbom( + workbench: "WorkbenchAPI", + scan_code: str, + *, + sbom_format: str = "cyclonedx", + include_vex: bool = True, + params: Optional[argparse.Namespace] = None, + quiet: bool = False, +) -> Optional[str]: + """Download a scan-level SBOM from Workbench for augmentation.""" + fmt_normalised = sbom_format.lower() + + if fmt_normalised not in {"cyclonedx", "cyclone_dx", "cdx"}: + if not quiet: + print(f" ℹ️ SBOM format '{sbom_format}' not yet supported") + return None + + report_type = "cyclone_dx" + + try: + is_async = report_type in workbench.ASYNC_REPORT_TYPES + if not quiet: + print(f" 📡 Generating SBOM {'asynchronously' if is_async else 'synchronously'}...") + + if is_async: + process_id = workbench.generate_scan_report(scan_code, report_type=report_type, include_vex=include_vex) + workbench._wait_for_process( + process_description=f"SBOM generation (Process ID: {process_id})", + check_function=workbench.check_scan_report_status, + check_args={"process_id": process_id, "scan_code": scan_code}, + status_accessor=lambda d: d.get("progress_state", "UNKNOWN"), + success_values={"FINISHED"}, + failure_values={"FAILED", "CANCELLED", "ERROR"}, + max_tries=getattr(params, "scan_number_of_tries", 60) if params else 60, + wait_interval=3, + progress_indicator=not quiet, + ) + response = workbench.download_scan_report(process_id) + else: + response = workbench.generate_scan_report(scan_code, report_type=report_type, include_vex=include_vex) + + # Save to temporary file + with tempfile.NamedTemporaryFile(mode="w", suffix=".json", delete=False, encoding="utf-8") as tmp: + content = response.content.decode("utf-8") if hasattr(response, "content") else str(response) + tmp.write(content) + if not quiet: + print(f" ✅ SBOM downloaded → {tmp.name}") + return tmp.name + + except Exception as exc: + logger.debug(f"SBOM download failed: {exc}") + return None + + +@contextmanager +def managed_sbom_download( + workbench: "WorkbenchAPI", + scan_code: str, + *, + sbom_format: str = "cyclonedx", + include_vex: bool = True, + params: Optional[argparse.Namespace] = None, + quiet: bool = False +): + """ + Context manager for SBOM download with automatic cleanup. + + Provides automatic resource management for downloaded SBOM files, + ensuring cleanup even if exceptions occur during processing. + + Args: + workbench: The Workbench API client + scan_code: The scan identifier + sbom_format: SBOM format to download ("cyclonedx", "spdx", etc.) + include_vex: Whether to include VEX data in the SBOM + params: Optional command line parameters + quiet: If True, suppress progress messages + + Yields: + Optional[str]: Path to the downloaded SBOM file, or None if download failed + + Example: + with managed_sbom_download(workbench, scan_code, quiet=params.quiet) as sbom_path: + if sbom_path: + # Process the SBOM file + process_sbom(sbom_path) + # File is automatically cleaned up here + """ + sbom_path = None + try: + # Download the SBOM + sbom_path = download_sbom( + workbench=workbench, + scan_code=scan_code, + sbom_format=sbom_format, + include_vex=include_vex, + params=params, + quiet=quiet + ) + + if sbom_path and not quiet: + print(f" 📥 Downloaded SBOM: {os.path.basename(sbom_path)}") + + # Yield the path to the caller + yield sbom_path + + finally: + # Cleanup: Remove the temporary file if it exists + if sbom_path and os.path.exists(sbom_path): + try: + os.unlink(sbom_path) + logger.debug(f"Cleaned up temporary SBOM file: {sbom_path}") + if not quiet and sbom_path: + print(f" 🧹 Cleaned up temporary SBOM file") + except Exception as e: + logger.warning(f"Failed to clean up temporary SBOM file {sbom_path}: {e}") + # Don't raise exception during cleanup - just log the warning + + +# --------------------------------------------------------------------------- +# Public API +# --------------------------------------------------------------------------- + +__all__ = [ + "download_sbom", + "managed_sbom_download", +] \ No newline at end of file diff --git a/src/workbench_cli/utilities/vuln_report/spdx_enrichment.py b/src/workbench_cli/utilities/vuln_report/spdx_enrichment.py new file mode 100644 index 0000000..e69de29 diff --git a/src/workbench_cli/utilities/vuln_report/spdx_generator.py b/src/workbench_cli/utilities/vuln_report/spdx_generator.py new file mode 100644 index 0000000..00f333c --- /dev/null +++ b/src/workbench_cli/utilities/vuln_report/spdx_generator.py @@ -0,0 +1,450 @@ +""" +SPDX 3.0 vulnerability report generation. + +This module provides functionality to convert vulnerability data from the Workbench API +into SPDX 3.0 format with Security Profile, which provides standardized vulnerability +information within software bill of materials. +""" + +import json +import logging +import os +from typing import Dict, List, Any, Optional +from datetime import datetime + +logger = logging.getLogger(__name__) + +# SPDX imports (optional dependency) +try: + from spdx_tools.spdx.model import Document, CreationInfo, Actor, ActorType + from spdx_tools.spdx.model.package import Package + from spdx_tools.spdx.model.vulnerability import Vulnerability, VulnerabilityReference + from spdx_tools.spdx.writer.json import write_document_to_file + SPDX_AVAILABLE = True +except ImportError: + # Fallback types when SPDX is not available + Document = Any + CreationInfo = Any + Actor = Any + ActorType = Any + Package = Any + Vulnerability = Any + VulnerabilityReference = Any + SPDX_AVAILABLE = False + +from .bootstrap_bom import detect_package_ecosystem +from .risk_adjustments import calculate_dynamic_risk, RiskAdjustment + + +def save_vulns_to_spdx( + filepath: str, + vulnerabilities: List[Dict[str, Any]], + scan_code: str, + external_data: Optional[Dict[str, Dict[str, Any]]] = None, + nvd_enrichment: bool = False, + epss_enrichment: bool = False, + cisa_kev_enrichment: bool = False, + enable_dynamic_risk_scoring: bool = True, + api_timeout: int = 30, + quiet: bool = False, + all_components: Optional[List[Dict[str, Any]]] = None, + base_sbom_path: Optional[str] = None +) -> None: + """ + Save vulnerability results in SPDX 3.0 format. + + Args: + filepath: Path where the SPDX file should be saved + vulnerabilities: List of vulnerability dictionaries from the API + scan_code: The scan code for reference + external_data: Pre-fetched external enrichment data (optional) + nvd_enrichment: Whether NVD enrichment was applied + epss_enrichment: Whether EPSS enrichment was applied + cisa_kev_enrichment: Whether CISA KEV enrichment was applied + enable_dynamic_risk_scoring: Whether dynamic risk scoring is enabled + api_timeout: API timeout used for enrichment + quiet: Whether to suppress output messages + all_components: List of all components from scan when --augment-full-bom is used (optional) + base_sbom_path: Path to base SBOM (for consistency, not used in SPDX) + + Raises: + IOError: If the file cannot be written + OSError: If the directory cannot be created + """ + if not SPDX_AVAILABLE: + raise ImportError( + "SPDX support requires the 'spdx-tools' package. " + "This should be installed automatically with workbench-cli. " + "Try reinstalling: pip install --force-reinstall workbench-cli" + ) + + output_dir = os.path.dirname(filepath) or "." + + try: + os.makedirs(output_dir, exist_ok=True) + + spdx_document = convert_vulns_to_spdx( + vulnerabilities, + scan_code, + external_data, + nvd_enrichment, + epss_enrichment, + cisa_kev_enrichment, + enable_dynamic_risk_scoring, + all_components + ) + + # Use SPDX JSON writer + write_document_to_file(spdx_document, filepath) + + if not quiet: + print(f"Saved SPDX 3.0 document to: {filepath}") + + except (IOError, OSError) as e: + if not quiet: + print(f"\nWarning: Failed to save SPDX results to {filepath}: {e}") + raise + + +def convert_vulns_to_spdx( + vulnerabilities: List[Dict[str, Any]], + scan_code: str, + external_data: Optional[Dict[str, Dict[str, Any]]] = None, + nvd_enrichment: bool = False, + epss_enrichment: bool = False, + cisa_kev_enrichment: bool = False, + enable_dynamic_risk_scoring: bool = True, + all_components: Optional[List[Dict[str, Any]]] = None +) -> Document: + """ + Create an SPDX 3.0 document with vulnerability information. + + Args: + vulnerabilities: List of vulnerability dictionaries from the API + scan_code: The scan code for reference + external_data: External enrichment data (optional) + nvd_enrichment: Whether NVD enrichment was applied + epss_enrichment: Whether EPSS enrichment was applied + cisa_kev_enrichment: Whether CISA KEV enrichment was applied + enable_dynamic_risk_scoring: Whether dynamic risk scoring is enabled + all_components: List of all components from scan when --augment-full-bom is used (optional) + + Returns: + SPDX Document object + """ + if not SPDX_AVAILABLE: + raise ImportError("SPDX support requires the 'spdx-tools' package which should be installed automatically") + + if external_data is None: + external_data = {} + + # Create SPDX document + creation_info = CreationInfo( + spdx_version="SPDX-3.0", + spdx_id=f"SPDXRef-DOCUMENT-{scan_code}", + name=f"Vulnerability Report - {scan_code}", + document_namespace=f"https://workbench.fossid.com/spdx/{scan_code}", + creators=[Actor(ActorType.TOOL, "FossID Workbench CLI")], + created=datetime.utcnow() + ) + + document = Document(creation_info) + + # Add enrichment metadata as document annotations + enrichment_annotations = [ + f"nvd_enriched: {str(nvd_enrichment).lower()}", + f"epss_enriched: {str(epss_enrichment).lower()}", + f"cisa_kev_enriched: {str(cisa_kev_enrichment).lower()}", + f"workbench_scan_code: {scan_code}", + f"generated_at: {datetime.utcnow().isoformat()}Z" + ] + + # Add annotations to document if the model supports it + if hasattr(document, 'annotations'): + document.annotations = enrichment_annotations + elif hasattr(document, 'comment'): + document.comment = "; ".join(enrichment_annotations) + + # Create packages and vulnerabilities + packages = {} + + # If all_components is provided, create packages for all components first + if all_components: + for comp in all_components: + component_name = comp.get("name", "Unknown") + component_version = comp.get("version", "") + package_key = f"{component_name}@{component_version}" + + if package_key not in packages: + ecosystem = detect_package_ecosystem(component_name, component_version) + + package = Package( + spdx_id=f"SPDXRef-Package-{component_name}-{component_version}", + name=component_name, + version=component_version, + download_location="NOASSERTION" # Required field + ) + + # Add package URL if possible + if ecosystem != "generic": + package.external_package_refs = [ + f"pkg:{ecosystem}/{component_name}@{component_version}" + ] + + packages[package_key] = package + document.packages.append(package) + + # Process vulnerabilities and create/update packages as needed + for vuln in vulnerabilities: + component_name = vuln.get("component_name", "Unknown") + component_version = vuln.get("component_version", "Unknown") + cve = vuln.get("cve", "UNKNOWN") + + # Create package if not exists (when all_components not provided or component not in all_components) + package_key = f"{component_name}@{component_version}" + if package_key not in packages: + ecosystem = detect_package_ecosystem(component_name, component_version) + + package = Package( + spdx_id=f"SPDXRef-Package-{component_name}-{component_version}", + name=component_name, + version=component_version, + download_location="NOASSERTION" # Required field + ) + + # Add package URL if possible + if ecosystem != "generic": + package.external_package_refs = [ + f"pkg:{ecosystem}/{component_name}@{component_version}" + ] + + packages[package_key] = package + document.packages.append(package) + + # Calculate dynamic risk adjustment (if enabled) + dynamic_risk_adjustment = None + if enable_dynamic_risk_scoring: + dynamic_risk_adjustment = calculate_dynamic_risk(vuln, external_data.get(cve, {})) + + # Create vulnerability + vulnerability = _create_spdx_vulnerability(vuln, external_data.get(cve, {}), dynamic_risk_adjustment) + document.vulnerabilities.append(vulnerability) + + return document + + +def _create_spdx_vulnerability( + vuln: Dict[str, Any], + ext_data: Dict[str, Any], + dynamic_risk_adjustment: Optional[RiskAdjustment] = None +) -> Vulnerability: + """Create an SPDX Vulnerability object from vulnerability data.""" + cve = vuln.get("cve", "UNKNOWN") + component_name = vuln.get("component_name", "Unknown") + component_version = vuln.get("component_version", "Unknown") + + # Create vulnerability + vulnerability_id = cve if cve != "UNKNOWN" else f"UNKNOWN-{vuln.get('id', 'unknown')}" + + vulnerability = Vulnerability( + spdx_id=f"SPDXRef-Vulnerability-{vulnerability_id}-{component_name}-{component_version}", + name=vulnerability_id + ) + + # Add description from NVD if available + if ext_data.get("nvd_description"): + vulnerability.summary = ext_data["nvd_description"] + else: + vulnerability.summary = f"Security vulnerability affecting {component_name} version {component_version}" + + # Add CVSS information and dynamic risk assessment + base_score = vuln.get("base_score") + if base_score and base_score != "N/A": + try: + score_value = float(base_score) + # Note: SPDX 3.0 vulnerability model is still evolving + # This is a simplified representation + vulnerability.cvss_score = score_value + + # Use original CVSS-based severity + vulnerability.severity = _map_severity_to_spdx(vuln.get("severity", "UNKNOWN")) + except (ValueError, TypeError): + pass + + # Add external references + references = [] + + # NVD reference + if cve != "UNKNOWN": + nvd_ref = VulnerabilityReference( + locator=f"https://nvd.nist.gov/vuln/detail/{cve}", + reference_type="advisory" + ) + references.append(nvd_ref) + + # Additional NVD references + if ext_data.get("nvd_references"): + for ref in ext_data["nvd_references"][:5]: # Limit to 5 references + ref_obj = VulnerabilityReference( + locator=ref.get("url", ""), + reference_type="other" + ) + references.append(ref_obj) + + vulnerability.external_references = references + + # Add VEX information and dynamic risk as annotations + annotations = [] + + # High Risk Indicator annotations (NEW) + if dynamic_risk_adjustment: + annotations.append(f"High Risk Indicator: {dynamic_risk_adjustment.high_risk_indicator}") + annotations.append(f"High Risk Evidence: {dynamic_risk_adjustment.high_risk_evidence}") + + vex_status = vuln.get("vuln_exp_status") + if vex_status: + annotations.append(f"VEX Status: {vex_status}") + + vex_response = vuln.get("vuln_exp_response") + if vex_response: + annotations.append(f"VEX Response: {vex_response}") + + vex_justification = vuln.get("vuln_exp_justification") + if vex_justification: + annotations.append(f"VEX Justification: {vex_justification}") + + # External enrichment annotations + if ext_data.get("epss_score") is not None: + annotations.append(f"EPSS Score: {ext_data['epss_score']:.3f}") + annotations.append(f"EPSS Percentile: {ext_data.get('epss_percentile', 'N/A')}") + + if ext_data.get("cisa_kev"): + annotations.append("CISA Known Exploited Vulnerability") + + if annotations: + vulnerability.comment = "; ".join(annotations) + + return vulnerability + + +def _map_severity_to_spdx(severity: str) -> str: + """Map Workbench severity to SPDX severity.""" + severity_map = { + "CRITICAL": "CRITICAL", + "HIGH": "HIGH", + "MEDIUM": "MEDIUM", + "LOW": "LOW", + "UNKNOWN": "UNKNOWN" + } + return severity_map.get(severity.upper(), "UNKNOWN") + + +# Fallback implementation for when spdx-tools is not available +def _create_spdx_json_fallback( + vulnerabilities: List[Dict[str, Any]], + scan_code: str, + external_data: Optional[Dict[str, Dict[str, Any]]] = None, + nvd_enrichment: bool = False, + epss_enrichment: bool = False, + cisa_kev_enrichment: bool = False, +) -> Dict[str, Any]: + """ + Create a simplified SPDX 3.0-like JSON structure when spdx-tools is not available. + This is a fallback implementation that creates a basic structure. + """ + if external_data is None: + external_data = {} + + # Create fallback SPDX JSON structure + spdx_doc = { + "spdxVersion": "SPDX-3.0", + "dataLicense": "CC0-1.0", + "SPDXID": f"SPDXRef-DOCUMENT-{scan_code}", + "name": f"Vulnerability Report - {scan_code}", + "documentNamespace": f"https://workbench.fossid.com/spdx/{scan_code}", + "creationInfo": { + "created": datetime.utcnow().isoformat() + "Z", + "creators": ["Tool: FossID Workbench CLI"], + "licenseListVersion": "3.24" + }, + "comment": f"nvd_enriched: {str(nvd_enrichment).lower()}; epss_enriched: {str(epss_enrichment).lower()}; cisa_kev_enriched: {str(cisa_kev_enrichment).lower()}; workbench_scan_code: {scan_code}; generated_at: {datetime.utcnow().isoformat()}Z", + "packages": [], + "vulnerabilities": [] + } + + # Track unique packages + packages = {} + + for vuln in vulnerabilities: + component_name = vuln.get("component_name", "Unknown") + component_version = vuln.get("component_version", "Unknown") + cve = vuln.get("cve", "UNKNOWN") + + # Create package if not exists + package_key = f"{component_name}@{component_version}" + if package_key not in packages: + ecosystem = detect_package_ecosystem(component_name, component_version) + + package = { + "SPDXID": f"SPDXRef-Package-{component_name}-{component_version}", + "name": component_name, + "version": component_version, + "downloadLocation": "NOASSERTION" + } + + if ecosystem != "generic": + package["externalRefs"] = [{ + "referenceCategory": "PACKAGE-MANAGER", + "referenceType": "purl", + "referenceLocator": f"pkg:{ecosystem}/{component_name}@{component_version}" + }] + + packages[package_key] = package + spdx_doc["packages"].append(package) + + # Create vulnerability + vulnerability_id = cve if cve != "UNKNOWN" else f"UNKNOWN-{vuln.get('id', 'unknown')}" + + vulnerability = { + "SPDXID": f"SPDXRef-Vulnerability-{vulnerability_id}-{component_name}-{component_version}", + "name": vulnerability_id + } + + # Add description + if external_data.get(cve, {}).get("nvd_description"): + vulnerability["summary"] = external_data[cve]["nvd_description"] + else: + vulnerability["summary"] = f"Security vulnerability affecting {component_name} version {component_version}" + + # Add CVSS information + base_score = vuln.get("base_score") + if base_score and base_score != "N/A": + try: + vulnerability["cvssScore"] = float(base_score) + vulnerability["severity"] = vuln.get("severity", "UNKNOWN") + except (ValueError, TypeError): + pass + + # Add external references + references = [] + if cve != "UNKNOWN": + references.append({ + "referenceCategory": "SECURITY", + "referenceType": "advisory", + "referenceLocator": f"https://nvd.nist.gov/vuln/detail/{cve}" + }) + + if external_data.get(cve, {}).get("nvd_references"): + for ref in external_data[cve]["nvd_references"][:5]: + references.append({ + "referenceCategory": "OTHER", + "referenceType": "other", + "referenceLocator": ref.get("url", "") + }) + + if references: + vulnerability["externalRefs"] = references + + spdx_doc["vulnerabilities"].append(vulnerability) + + return spdx_doc \ No newline at end of file diff --git a/tests/fixtures/test-sbom.json b/tests/fixtures/test-sbom.json new file mode 100644 index 0000000..a62e9b2 --- /dev/null +++ b/tests/fixtures/test-sbom.json @@ -0,0 +1,51 @@ +{ + "spdxVersion": "SPDX-2.2", + "dataLicense": "CC0-1.0", + "SPDXID": "SPDXRef-DOCUMENT", + "name": "LPA Firmware", + "documentNamespace": "http://volvocars.com/volvocars-oss-spdx-index/spa2/lpa-firmware-v2.5.0-b4dea91f-f4de-40f2-851f-8664c2356aa7", + "creationInfo": { + "creators": [ + "Person: Team Trust ()", + "Organization: Volvo Cars Corporation ()", + "Tool: lpa-sbom-generator-0.4.38 ()" + ], + "created": "2025-06-18T13:05:27Z" + }, + "packages": [ + { + "name": "lpa-firmware", + "SPDXID": "SPDXRef-1", + "versionInfo": "2.5.0", + "supplier": "Organization: Volvo Cars Corporation ()", + "downloadLocation": "NOASSERTION", + "filesAnalyzed": false, + "copyrightText": "NOASSERTION", + "licenseConcluded": "NOASSERTION", + "licenseDeclared": "NOASSERTION" + }, + { + "name": "aes", + "SPDXID": "SPDXRef-8", + "versionInfo": "0.7.5", + "downloadLocation": "https://github.com/RustCrypto/block-ciphers", + "filesAnalyzed": false, + "copyrightText": "NOASSERTION", + "licenseConcluded": "MIT OR Apache-2.0", + "licenseDeclared": "MIT OR Apache-2.0", + "summary": "Pure Rust implementation of the Advanced Encryption Standard (a.k.a. Rijndael)\nincluding support for AES in counter mode (a.k.a. AES-CTR)\n" + } + ], + "hasExtractedLicensingInfos": [], + "files": [], + "snippets": [], + "relationships": [ + { + "spdxElementId": "SPDXRef-1", + "relatedSpdxElement": "SPDXRef-8", + "relationshipType": "DEPENDS_ON" + } + ], + "annotations": [], + "documentDescribes": ["SPDXRef-1"] +} \ No newline at end of file diff --git a/tests/unit/handlers/test_show_results.py b/tests/unit/handlers/test_show_results.py index 9ec0a64..16e7adf 100644 --- a/tests/unit/handlers/test_show_results.py +++ b/tests/unit/handlers/test_show_results.py @@ -228,4 +228,6 @@ def test_handle_show_results_multiple_show_flags(self, mock_fetch, mock_workbenc # Verify assert result is True - mock_fetch.assert_called_once() \ No newline at end of file + mock_fetch.assert_called_once() + + \ No newline at end of file diff --git a/tests/unit/utilities/test_sarif_converter.py b/tests/unit/utilities/test_sarif_converter.py new file mode 100644 index 0000000..789ae71 --- /dev/null +++ b/tests/unit/utilities/test_sarif_converter.py @@ -0,0 +1,644 @@ +""" +Test suite for SARIF conversion utilities. + +This module contains comprehensive tests for the SARIF converter functionality +including conversion of vulnerability data to SARIF v2.1.0 format. +""" + +import pytest +import json +import tempfile +import os +import time +from unittest.mock import patch, mock_open +from typing import Dict, List, Any + +from workbench_cli.utilities.vuln_report.sarif_generator import ( + convert_vulns_to_sarif, + save_vulns_to_sarif, + _map_severity_to_sarif_level, + _generate_enhanced_sarif_rules, + _generate_enhanced_sarif_results, + _create_run_properties +) + +from workbench_cli.utilities.vuln_report.cve_data_gathering import ( + enrich_vulnerabilities, + _fetch_epss_scores, + _fetch_cisa_kev_data, + _fetch_nvd_data, + _fetch_single_cve_nvd, + _parse_nvd_vulnerability, + RateLimiter +) + + +class TestSarifConverter: + """Test cases for SARIF conversion functionality.""" + + def test_convert_vulns_to_sarif_with_data(self): + """Test conversion of vulnerability data to SARIF format.""" + sample_vulns = [ + { + "id": 1, + "cve": "CVE-2022-12345", + "cvss_version": "3.1", + "base_score": "9.8", + "severity": "CRITICAL", + "attack_vector": "NETWORK", + "attack_complexity": "LOW", + "availability_impact": "HIGH", + "component_id": 123, + "component_name": "test-package", + "component_version": "1.0.0", + "scan_id": 456, + "rejected": 0 + }, + { + "id": 2, + "cve": "CVE-2022-67890", + "cvss_version": "3.1", + "base_score": "5.5", + "severity": "MEDIUM", + "attack_vector": "LOCAL", + "attack_complexity": "LOW", + "availability_impact": "NONE", + "component_id": 124, + "component_name": "another-package", + "component_version": "2.1.0", + "scan_id": 456, + "rejected": 0 + } + ] + + sarif_data = convert_vulns_to_sarif(sample_vulns, "TEST_SCAN_123") + + # Validate SARIF structure + assert sarif_data["version"] == "2.1.0" + assert "$schema" in sarif_data + assert len(sarif_data["runs"]) == 1 + + run = sarif_data["runs"][0] + assert run["tool"]["driver"]["name"] == "Workbench Vulnerability Scanner" + assert run["properties"]["scanCode"] == "TEST_SCAN_123" + assert "timestamp" in run["properties"] + + # Validate rules (one per unique CVE-component combination) + assert len(run["tool"]["driver"]["rules"]) == 2 + rule_ids = [rule["id"] for rule in run["tool"]["driver"]["rules"]] + assert "CVE-2022-12345:test-package@1.0.0" in rule_ids + assert "CVE-2022-67890:another-package@2.1.0" in rule_ids + + # Validate results + assert len(run["results"]) == 2 + result_rule_ids = [result["ruleId"] for result in run["results"]] + assert "CVE-2022-12345:test-package@1.0.0" in result_rule_ids + assert "CVE-2022-67890:another-package@2.1.0" in result_rule_ids + + # Validate severity mapping + critical_result = next(r for r in run["results"] if r["ruleId"] == "CVE-2022-12345:test-package@1.0.0") + medium_result = next(r for r in run["results"] if r["ruleId"] == "CVE-2022-67890:another-package@2.1.0") + assert critical_result["level"] == "error" # Critical maps to error + assert medium_result["level"] == "warning" # Medium maps to warning + + # Validate lean properties + assert "vulnerabilityCount" in run["properties"] + assert "severityDistribution" in run["properties"] + + def test_convert_vulns_to_sarif_empty_data(self): + """Test conversion with empty vulnerability data.""" + sarif_data = convert_vulns_to_sarif([], "TEST_SCAN_EMPTY") + + assert sarif_data["version"] == "2.1.0" + assert len(sarif_data["runs"]) == 1 + + run = sarif_data["runs"][0] + assert run["tool"]["driver"]["name"] == "Workbench Vulnerability Scanner" + assert run["properties"]["scanCode"] == "TEST_SCAN_EMPTY" + assert len(run["tool"]["driver"]["rules"]) == 0 + assert len(run["results"]) == 0 + + def test_convert_vulns_to_sarif_with_external_data(self): + """Test conversion with external vulnerability data.""" + sample_vulns = [ + { + "id": 1, + "cve": "CVE-2022-12345", + "cvss_version": "3.1", + "base_score": "9.8", + "severity": "CRITICAL", + "component_name": "test-package", + "component_version": "1.0.0" + } + ] + + # Mock external data + mock_external_data = { + "CVE-2022-12345": { + "epss_score": 0.85, + "epss_percentile": 0.95, + "cisa_kev": True, + "nvd_description": "Test vulnerability description", + "nvd_cwe": ["CWE-79"] + } + } + + sarif_data = convert_vulns_to_sarif( + sample_vulns, + "TEST_SCAN_ENHANCED", + external_data=mock_external_data + ) + + # Validate external data integration + run = sarif_data["runs"][0] + rule = run["tool"]["driver"]["rules"][0] + + assert rule["properties"]["epss_score"] == 0.85 + assert rule["properties"]["cisa_known_exploited"] == True + + # The fullDescription should contain the NVD description when available + assert "Test vulnerability description" in rule["fullDescription"]["text"] + + def test_map_severity_to_sarif_level(self): + """Test mapping of severity levels to SARIF levels.""" + assert _map_severity_to_sarif_level("CRITICAL") == "error" + assert _map_severity_to_sarif_level("HIGH") == "error" + assert _map_severity_to_sarif_level("MEDIUM") == "warning" + assert _map_severity_to_sarif_level("LOW") == "note" + assert _map_severity_to_sarif_level("UNKNOWN") == "warning" + assert _map_severity_to_sarif_level("INVALID") == "warning" + assert _map_severity_to_sarif_level("") == "warning" + + def test_generate_sarif_rules(self): + """Test generation of SARIF rules from vulnerability data.""" + sample_vulns = [ + { + "cve": "CVE-2022-12345", + "cvss_version": "3.1", + "base_score": "9.8", + "severity": "CRITICAL", + "attack_vector": "NETWORK", + "attack_complexity": "LOW", + "availability_impact": "HIGH", + "component_name": "test-package", + "component_version": "1.0.0" + }, + { + "cve": "CVE-2022-12345", # Duplicate CVE+component should only create one rule + "cvss_version": "3.1", + "base_score": "9.8", + "severity": "CRITICAL", + "attack_vector": "NETWORK", + "attack_complexity": "LOW", + "availability_impact": "HIGH", + "component_name": "test-package", + "component_version": "1.0.0" + }, + { + "cve": "CVE-2022-67890", + "cvss_version": "3.0", + "base_score": "5.5", + "severity": "MEDIUM", + "component_name": "another-package", + "component_version": "2.1.0" + } + ] + + external_data = { + "CVE-2022-12345": { + "epss_score": 0.85, + "cisa_kev": True, + "nvd_description": "Test vulnerability description" + } + } + + rules = _generate_enhanced_sarif_rules(sample_vulns, external_data, enable_dynamic_risk_scoring=True) + + # Should have 2 rules (one per unique CVE-component combination) + assert len(rules) == 2 + + # Check rule IDs (now include component info) + rule_ids = [rule["id"] for rule in rules] + assert "CVE-2022-12345:test-package@1.0.0" in rule_ids + assert "CVE-2022-67890:another-package@2.1.0" in rule_ids + + # Check enriched rule properties + cve_rule = next(r for r in rules if r["id"] == "CVE-2022-12345:test-package@1.0.0") + assert cve_rule["properties"]["epss_score"] == 0.85 + assert cve_rule["properties"]["cisa_kev"] == True + assert "Test vulnerability description" in cve_rule["fullDescription"]["text"] + + # Check component-specific properties + assert cve_rule["properties"]["component"] == "test-package@1.0.0" + assert cve_rule["properties"]["cve"] == "CVE-2022-12345" + + def test_generate_sarif_results(self): + """Test generation of SARIF results from vulnerability data.""" + sample_vulns = [ + { + "id": 1, + "cve": "CVE-2022-12345", + "base_score": "9.8", + "severity": "CRITICAL", + "component_name": "test-package", + "component_version": "1.0.0", + "vuln_exp_status": "affected" + } + ] + + external_data = { + "CVE-2022-12345": { + "epss_score": 0.85, + "cisa_kev": True + } + } + + results = _generate_enhanced_sarif_results(sample_vulns, external_data, enable_dynamic_risk_scoring=True) + + assert len(results) == 1 + result = results[0] + + assert result["ruleId"] == "CVE-2022-12345:test-package@1.0.0" + assert result["level"] == "error" + assert "CVE-2022-12345 in test-package@1.0.0" in result["message"]["text"] + assert result["properties"]["epss_score"] == 0.85 + assert result["properties"]["cisa_kev"] == True + assert result["properties"]["vex_status"] == "affected" + + # Check enhanced features + assert "partialFingerprints" in result + assert "baselineState" in result + assert "fixes" in result + + def test_create_run_properties(self): + """Test creation of SARIF run properties.""" + sample_vulns = [ + {"severity": "CRITICAL", "vuln_exp_status": "affected"}, + {"severity": "HIGH", "vuln_exp_response": "will_not_fix"}, + {"severity": "MEDIUM"} + ] + + external_data = { + "CVE-2022-12345": {"epss_score": 0.85, "cisa_kev": True} + } + + all_components = [ + {"name": "test-package", "version": "1.0.0"}, + {"name": "another-package", "version": "2.1.0"} + ] + + properties = _create_run_properties( + scan_code="TEST_SCAN", + vulnerabilities=sample_vulns, + external_data=external_data, + nvd_enrichment=True, + epss_enrichment=True, + cisa_kev_enrichment=True, + all_components=all_components + ) + + assert properties["workbench_scan_code"] == "TEST_SCAN" + assert properties["total_vulnerabilities"] == 3 + assert properties["total_components"] == 2 + assert properties["enrichment_applied"]["nvd"] == True + assert properties["enrichment_applied"]["epss"] == True + assert properties["enrichment_applied"]["cisa_kev"] == True + assert properties["severity_distribution"]["critical"] == 1 + assert properties["severity_distribution"]["high"] == 1 + assert properties["severity_distribution"]["medium"] == 1 + assert properties["vex_statistics"]["total_with_vex"] == 2 + assert properties["external_data_statistics"]["enriched_cves"] == 1 + assert properties["external_data_statistics"]["with_epss"] == 1 + assert properties["external_data_statistics"]["with_cisa_kev"] == 1 + + def test_save_vulns_to_sarif_success(self): + """Test successful saving of SARIF file.""" + sample_vulns = [ + { + "id": 1, + "cve": "CVE-2022-12345", + "base_score": "9.8", + "severity": "CRITICAL", + "component_name": "test-package", + "component_version": "1.0.0" + } + ] + + with tempfile.NamedTemporaryFile(mode='w', suffix='.sarif', delete=False) as temp_file: + temp_filepath = temp_file.name + + try: + save_vulns_to_sarif( + temp_filepath, + sample_vulns, + "TEST_SCAN_123", + quiet=True + ) + + # Verify file was created and contains valid SARIF JSON + assert os.path.exists(temp_filepath) + + with open(temp_filepath, 'r') as f: + sarif_data = json.load(f) + + assert sarif_data["version"] == "2.1.0" + assert len(sarif_data["runs"]) == 1 + assert sarif_data["runs"][0]["properties"]["workbench_scan_code"] == "TEST_SCAN_123" + + finally: + # Clean up + if os.path.exists(temp_filepath): + os.unlink(temp_filepath) + + def test_save_vulns_to_sarif_creates_directory(self): + """Test that save_vulns_to_sarif creates necessary directories.""" + sample_vulns = [ + { + "id": 1, + "cve": "CVE-2022-12345", + "base_score": "9.8", + "severity": "CRITICAL", + "component_name": "test-package", + "component_version": "1.0.0" + } + ] + + with tempfile.TemporaryDirectory() as temp_dir: + new_dir = os.path.join(temp_dir, "new_directory") + filepath = os.path.join(new_dir, "test.sarif") + + save_vulns_to_sarif(filepath, sample_vulns, "TEST_SCAN_123", quiet=True) + + # Verify directory was created and file exists + assert os.path.exists(new_dir) + assert os.path.exists(filepath) + + def test_save_vulns_to_sarif_io_error(self): + """Test handling of IO errors during save.""" + sample_vulns = [ + { + "id": 1, + "cve": "CVE-2022-12345", + "base_score": "9.8", + "severity": "CRITICAL", + "component_name": "test-package", + "component_version": "1.0.0" + } + ] + + # Try to save to an invalid path + invalid_path = "/root/cannot_write_here.sarif" + + with pytest.raises((IOError, OSError)): + save_vulns_to_sarif(invalid_path, sample_vulns, "TEST_SCAN_123", quiet=True) + + def test_handle_missing_vulnerability_fields(self): + """Test handling of vulnerability data with missing fields.""" + sample_vulns = [ + { + "id": 1, + # Missing cve, base_score, severity, component info + }, + { + "id": 2, + "cve": "CVE-2022-12345", + # Missing component_name, component_version + } + ] + + sarif_data = convert_vulns_to_sarif(sample_vulns, "TEST_SCAN_MISSING") + + # Should still create valid SARIF structure + assert sarif_data["version"] == "2.1.0" + assert len(sarif_data["runs"]) == 1 + + run = sarif_data["runs"][0] + assert len(run["results"]) == 2 + + # Check that missing fields are handled gracefully + results = run["results"] + rule_ids = [r["ruleId"] for r in results] + + # First result should have UNKNOWN CVE with Unknown component + unknown_result = next(r for r in results if r["ruleId"].startswith("UNKNOWN")) + assert unknown_result is not None + + # Second result should have CVE but Unknown component info + cve_result = next(r for r in results if "CVE-2022-12345" in r["ruleId"]) + assert cve_result is not None + assert "Unknown" in cve_result["ruleId"] # Should have Unknown for missing component info + + # Both should have valid structure + assert "message" in unknown_result + assert "locations" in unknown_result + assert "message" in cve_result + assert "locations" in cve_result + + def test_sarif_schema_compliance(self): + """Test that generated SARIF complies with basic schema requirements.""" + sample_vulns = [ + { + "id": 1, + "cve": "CVE-2022-12345", + "base_score": "9.8", + "severity": "CRITICAL", + "component_name": "test-package", + "component_version": "1.0.0" + } + ] + + sarif_data = convert_vulns_to_sarif(sample_vulns, "TEST_SCAN_SCHEMA") + + # Basic schema compliance checks + assert "version" in sarif_data + assert "$schema" in sarif_data + assert "runs" in sarif_data + assert isinstance(sarif_data["runs"], list) + + run = sarif_data["runs"][0] + assert "tool" in run + assert "driver" in run["tool"] + assert "name" in run["tool"]["driver"] + assert "rules" in run["tool"]["driver"] + assert "results" in run + + # Check rules structure + for rule in run["tool"]["driver"]["rules"]: + assert "id" in rule + assert "shortDescription" in rule + assert "text" in rule["shortDescription"] + assert "fullDescription" in rule + assert "text" in rule["fullDescription"] + assert "defaultConfiguration" in rule + assert "level" in rule["defaultConfiguration"] + + # Check results structure + for result in run["results"]: + assert "ruleId" in result + assert "level" in result + assert "message" in result + assert "text" in result["message"] + assert "locations" in result + assert isinstance(result["locations"], list) + + # Check location structure + for location in result["locations"]: + assert "physicalLocation" in location + assert "artifactLocation" in location["physicalLocation"] + assert "uri" in location["physicalLocation"]["artifactLocation"] + + def test_vex_integration(self): + """Test VEX (Vulnerability Exploitability eXchange) integration.""" + sample_vulns = [ + { + "id": 1, + "cve": "CVE-2022-12345", + "base_score": "9.8", + "severity": "CRITICAL", + "component_name": "test-package", + "component_version": "1.0.0", + "vuln_exp_status": "not_affected", + "vuln_exp_response": "will_not_fix", + "vuln_exp_justification": "Component not used in production" + } + ] + + sarif_data = convert_vulns_to_sarif(sample_vulns, "TEST_SCAN_VEX") + + run = sarif_data["runs"][0] + result = run["results"][0] + + # Check VEX information in result properties + assert result["properties"]["vex_status"] == "not_affected" + assert result["properties"]["vex_response"] == "will_not_fix" + assert result["properties"]["vex_justification"] == "Component not used in production" + + # Check VEX statistics in run properties + assert "vex_statistics" in run["properties"] + vex_stats = run["properties"]["vex_statistics"] + assert vex_stats["total_with_vex"] == 1 + assert vex_stats["by_status"]["not_affected"] == 1 + assert vex_stats["by_response"]["will_not_fix"] == 1 + + +class TestVulnerabilityEnricher: + """Test cases for vulnerability enrichment functionality.""" + + def test_enrich_vulnerabilities_empty_list(self): + """Test enrichment with empty CVE list.""" + result = enrich_vulnerabilities([], True, True, True) + assert result == {} + + @patch('workbench_cli.utilities.vuln_report.cve_data_gathering.requests.get') + def test_fetch_epss_scores_success(self, mock_get): + """Test successful EPSS score fetching.""" + mock_response = mock_get.return_value + mock_response.status_code = 200 + mock_response.json.return_value = { + "data": [ + { + "cve": "CVE-2022-12345", + "epss": "0.85000", + "percentile": "0.95000" + } + ] + } + + result = _fetch_epss_scores(["CVE-2022-12345"]) + + assert "CVE-2022-12345" in result + assert result["CVE-2022-12345"]["epss_score"] == 0.85 + assert result["CVE-2022-12345"]["epss_percentile"] == 0.95 + + @patch('workbench_cli.utilities.vuln_report.cve_data_gathering.requests.get') + def test_fetch_cisa_kev_data_success(self, mock_get): + """Test successful CISA KEV data fetching.""" + mock_response = mock_get.return_value + mock_response.status_code = 200 + mock_response.json.return_value = { + "vulnerabilities": [ + { + "cveID": "CVE-2022-12345", + "vendorProject": "Test Vendor", + "product": "Test Product", + "vulnerabilityName": "Test Vulnerability", + "dateAdded": "2022-01-01", + "shortDescription": "Test description", + "requiredAction": "Test action", + "dueDate": "2022-01-15" + } + ] + } + + result = _fetch_cisa_kev_data(["CVE-2022-12345"]) + + assert "CVE-2022-12345" in result + assert result["CVE-2022-12345"]["cisa_kev"] == True + + def test_rate_limiter_functionality(self): + """Test rate limiter functionality.""" + rate_limiter = RateLimiter(max_requests=2, time_window=1.0) + + # First two requests should pass immediately + start_time = time.time() + rate_limiter.wait_if_needed() + rate_limiter.wait_if_needed() + first_duration = time.time() - start_time + + # Should be very fast (no waiting) + assert first_duration < 0.1 + + # Third request should wait + start_time = time.time() + rate_limiter.wait_if_needed() + wait_duration = time.time() - start_time + + # Should have waited at least 1 second + assert wait_duration >= 1.0 + + @patch('workbench_cli.utilities.vuln_report.cve_data_gathering.requests.get') + def test_parse_nvd_vulnerability(self, mock_get): + """Test parsing of NVD vulnerability data.""" + mock_nvd_data = { + "vulnerabilities": [ + { + "cve": { + "id": "CVE-2022-12345", + "descriptions": [ + { + "lang": "en", + "value": "Test vulnerability description" + } + ], + "weaknesses": [ + { + "description": [ + { + "lang": "en", + "value": "CWE-79" + } + ] + } + ], + "references": [ + { + "url": "https://example.com/advisory", + "source": "example.com", + "tags": ["Vendor Advisory"] + } + ] + } + } + ] + } + + result = _parse_nvd_vulnerability(mock_nvd_data) + + assert "CVE-2022-12345" in result + cve_data = result["CVE-2022-12345"] + assert cve_data["nvd_description"] == "Test vulnerability description" + assert "CWE-79" in cve_data["nvd_cwe"] + assert len(cve_data["nvd_references"]) == 1 + assert cve_data["nvd_references"][0]["url"] == "https://example.com/advisory" + + \ No newline at end of file diff --git a/tests/unit/utilities/test_scan_workflows.py b/tests/unit/utilities/test_scan_workflows.py index ba374ec..d0793d2 100644 --- a/tests/unit/utilities/test_scan_workflows.py +++ b/tests/unit/utilities/test_scan_workflows.py @@ -455,9 +455,10 @@ class TestFetchDisplaySaveResults: @patch('workbench_cli.utilities.scan_workflows.fetch_results') @patch('workbench_cli.utilities.scan_workflows.display_results') @patch('workbench_cli.utilities.scan_workflows.save_results_to_file') - def test_complete_workflow(self, mock_save, mock_display, mock_fetch, mock_workbench, mock_params): - """Test complete fetch, display, and save workflow.""" + def test_complete_workflow_legacy(self, mock_save, mock_display, mock_fetch, mock_workbench, mock_params): + """Test complete fetch, display, and save workflow with legacy path_result.""" mock_params.path_result = "output.json" + mock_params.json_result_path = None mock_params.show_licenses = True mock_fetch.return_value = {"test": "data"} mock_display.return_value = True @@ -468,11 +469,31 @@ def test_complete_workflow(self, mock_save, mock_display, mock_fetch, mock_workb mock_display.assert_called_once_with({"test": "data"}, mock_params) mock_save.assert_called_once_with("output.json", {"test": "data"}, TEST_SCAN_CODE) + @patch('workbench_cli.utilities.scan_workflows.fetch_results') + @patch('workbench_cli.utilities.scan_workflows.display_results') + @patch('workbench_cli.utilities.scan_workflows.save_results_to_file') + def test_json_result_path_workflow(self, mock_save, mock_display, mock_fetch, mock_workbench, mock_params): + """Test fetch, display, and save workflow with JSON result path.""" + mock_params.path_result = None + mock_params.json_result_path = "output.json" + mock_params.show_licenses = True + mock_fetch.return_value = {"test": "data"} + mock_display.return_value = True + + fetch_display_save_results(mock_workbench, mock_params, TEST_SCAN_CODE) + + mock_fetch.assert_called_once_with(mock_workbench, mock_params, TEST_SCAN_CODE) + mock_display.assert_called_once_with({"test": "data"}, mock_params) + mock_save.assert_called_once_with("output.json", {"test": "data"}, TEST_SCAN_CODE) + + + @patch('workbench_cli.utilities.scan_workflows.fetch_results') @patch('workbench_cli.utilities.scan_workflows.display_results') def test_no_save_specified(self, mock_display, mock_fetch, mock_workbench, mock_params): """Test fetch and display without saving.""" mock_params.path_result = None + mock_params.json_result_path = None mock_params.show_licenses = True mock_fetch.return_value = {"test": "data"} mock_display.return_value = True diff --git a/tests/unit/utilities/test_vulnerability_enricher.py b/tests/unit/utilities/test_vulnerability_enricher.py new file mode 100644 index 0000000..fcf8efd --- /dev/null +++ b/tests/unit/utilities/test_vulnerability_enricher.py @@ -0,0 +1,567 @@ +""" +Test suite for vulnerability enrichment utilities. + +This module contains comprehensive tests for the vulnerability enricher functionality +including external API integration for EPSS scores, CISA KEV data, and NVD details. +""" + +import pytest +import json +import time +import os +from unittest.mock import patch, Mock +from typing import Dict, List, Any + +from src.workbench_cli.utilities.vuln_report.cve_data_gathering import ( + enrich_vulnerabilities, + _fetch_external_vulnerability_data, + _fetch_epss_scores, + _fetch_cisa_kev_data, + _fetch_nvd_data, + _fetch_single_cve_nvd, + _parse_nvd_vulnerability, + RateLimiter +) + + +class TestVulnerabilityEnricher: + """Test cases for main vulnerability enrichment functionality.""" + + def test_enrich_vulnerabilities_empty_list(self): + """Test enrichment with empty CVE list.""" + result = enrich_vulnerabilities([]) + assert result == {} + + @patch('src.workbench_cli.utilities.vuln_report.cve_data_gathering._fetch_external_vulnerability_data') + def test_enrich_vulnerabilities_with_cves(self, mock_fetch): + """Test enrichment with CVE list.""" + mock_fetch.return_value = { + "CVE-2022-12345": { + "epss_score": 0.85, + "cisa_kev": True + } + } + + result = enrich_vulnerabilities(["CVE-2022-12345"]) + + assert "CVE-2022-12345" in result + assert result["CVE-2022-12345"]["epss_score"] == 0.85 + mock_fetch.assert_called_once() + + @patch('src.workbench_cli.utilities.vuln_report.cve_data_gathering._fetch_epss_scores') + @patch('src.workbench_cli.utilities.vuln_report.cve_data_gathering._fetch_cisa_kev_data') + @patch('src.workbench_cli.utilities.vuln_report.cve_data_gathering._fetch_nvd_data') + def test_fetch_external_vulnerability_data_all_sources(self, mock_nvd, mock_kev, mock_epss): + """Test fetching from all external data sources.""" + cve_list = ["CVE-2022-12345"] + + mock_epss.return_value = { + "CVE-2022-12345": {"epss_score": 0.85, "epss_percentile": 0.95} + } + mock_kev.return_value = ["CVE-2022-12345"] + mock_nvd.return_value = { + "CVE-2022-12345": {"nvd_description": "Test description"} + } + + result = _fetch_external_vulnerability_data(cve_list) + + assert "CVE-2022-12345" in result + assert result["CVE-2022-12345"]["epss_score"] == 0.85 + assert result["CVE-2022-12345"]["cisa_kev"] == True + assert result["CVE-2022-12345"]["nvd_description"] == "Test description" + + @patch('src.workbench_cli.utilities.vuln_report.cve_data_gathering._fetch_epss_scores') + def test_fetch_external_vulnerability_data_epss_only(self, mock_epss): + """Test fetching EPSS data only.""" + cve_list = ["CVE-2022-12345"] + + mock_epss.return_value = { + "CVE-2022-12345": {"epss_score": 0.75} + } + + result = _fetch_external_vulnerability_data( + cve_list, + nvd_enrichment=False, + epss_enrichment=True, + cisa_kev_enrichment=False + ) + + assert "CVE-2022-12345" in result + assert result["CVE-2022-12345"]["epss_score"] == 0.75 + assert result["CVE-2022-12345"]["cisa_kev"] == False + assert result["CVE-2022-12345"]["nvd_description"] is None + + @patch('src.workbench_cli.utilities.vuln_report.cve_data_gathering._fetch_epss_scores') + def test_fetch_external_vulnerability_data_with_exceptions(self, mock_epss): + """Test handling of exceptions during external data fetching.""" + cve_list = ["CVE-2022-12345"] + + mock_epss.side_effect = Exception("EPSS API failed") + + # Should not raise exception, should return initialized data structure + result = _fetch_external_vulnerability_data(cve_list) + + assert "CVE-2022-12345" in result + assert result["CVE-2022-12345"]["epss_score"] is None + assert result["CVE-2022-12345"]["cisa_kev"] == False + + +class TestEpssDataFetching: + """Test cases for EPSS score fetching.""" + + @patch('src.workbench_cli.utilities.vuln_report.cve_data_gathering.requests.get') + def test_fetch_epss_scores_success(self, mock_get): + """Test successful EPSS score fetching.""" + mock_response = { + "status": "OK", + "data": [ + { + "cve": "CVE-2022-12345", + "epss": "0.85000", + "percentile": "0.95000" + }, + { + "cve": "CVE-2022-67890", + "epss": "0.15000", + "percentile": "0.25000" + } + ] + } + mock_get.return_value.json.return_value = mock_response + mock_get.return_value.raise_for_status.return_value = None + + result = _fetch_epss_scores(["CVE-2022-12345", "CVE-2022-67890"]) + + assert len(result) == 2 + assert result["CVE-2022-12345"]["epss_score"] == 0.85 + assert result["CVE-2022-12345"]["epss_percentile"] == 0.95 + assert result["CVE-2022-67890"]["epss_score"] == 0.15 + + @patch('src.workbench_cli.utilities.vuln_report.cve_data_gathering.requests.get') + def test_fetch_epss_scores_batch_processing(self, mock_get): + """Test EPSS batch processing with large CVE lists.""" + # Create a list larger than batch size (100) + cve_list = [f"CVE-2022-{i:05d}" for i in range(150)] + + # Mock responses for two batches + mock_response = { + "status": "OK", + "data": [{"cve": cve, "epss": "0.5", "percentile": "0.5"} for cve in cve_list[:100]] + } + mock_get.return_value.json.return_value = mock_response + mock_get.return_value.raise_for_status.return_value = None + + with patch('src.workbench_cli.utilities.vuln_report.cve_data_gathering.time.sleep'): + result = _fetch_epss_scores(cve_list) + + # Should be called twice for two batches + assert mock_get.call_count == 2 + assert len(result) == 100 # Only first batch in mock response + + @patch('src.workbench_cli.utilities.vuln_report.cve_data_gathering.requests.get') + def test_fetch_epss_scores_api_error(self, mock_get): + """Test handling of EPSS API errors.""" + mock_get.side_effect = Exception("API Error") + + result = _fetch_epss_scores(["CVE-2022-12345"]) + + assert result == {} + + @patch('src.workbench_cli.utilities.vuln_report.cve_data_gathering.requests.get') + def test_fetch_epss_scores_invalid_response(self, mock_get): + """Test handling of invalid EPSS API response.""" + mock_response = {"status": "ERROR", "message": "Invalid request"} + mock_get.return_value.json.return_value = mock_response + mock_get.return_value.raise_for_status.return_value = None + + result = _fetch_epss_scores(["CVE-2022-12345"]) + + assert result == {} + + +class TestCisaKevDataFetching: + """Test cases for CISA KEV data fetching.""" + + @patch('src.workbench_cli.utilities.vuln_report.cve_data_gathering.requests.get') + def test_fetch_cisa_kev_data_success(self, mock_get): + """Test successful CISA KEV data fetching.""" + mock_response = { + "vulnerabilities": [ + { + "cveID": "CVE-2022-12345", + "vendorProject": "Test Vendor", + "product": "Test Product" + }, + { + "cveID": "CVE-2022-67890", + "vendorProject": "Another Vendor", + "product": "Another Product" + } + ] + } + mock_get.return_value.json.return_value = mock_response + mock_get.return_value.raise_for_status.return_value = None + + result = _fetch_cisa_kev_data(["CVE-2022-12345", "CVE-2022-99999"]) + + assert len(result) == 1 + assert "CVE-2022-12345" in result + assert "CVE-2022-99999" not in result + + @patch('src.workbench_cli.utilities.vuln_report.cve_data_gathering.requests.get') + def test_fetch_cisa_kev_data_no_vulnerabilities(self, mock_get): + """Test CISA KEV response with no vulnerabilities section.""" + mock_response = {"other_data": "value"} + mock_get.return_value.json.return_value = mock_response + mock_get.return_value.raise_for_status.return_value = None + + result = _fetch_cisa_kev_data(["CVE-2022-12345"]) + + assert result == [] + + @patch('src.workbench_cli.utilities.vuln_report.cve_data_gathering.requests.get') + def test_fetch_cisa_kev_data_api_error(self, mock_get): + """Test handling of CISA KEV API errors.""" + mock_get.side_effect = Exception("API Error") + + result = _fetch_cisa_kev_data(["CVE-2022-12345"]) + + assert result == [] + + +class TestNvdDataFetching: + """Test cases for NVD data fetching.""" + + def test_fetch_nvd_data_caching(self): + """Test that NVD data is cached between calls.""" + from src.workbench_cli.utilities.vuln_report.cve_data_gathering import _NVD_CACHE + + # Clear cache + _NVD_CACHE.clear() + + with patch('src.workbench_cli.utilities.vuln_report.cve_data_gathering._fetch_single_cve_nvd') as mock_fetch: + mock_fetch.return_value = {"nvd_description": "Test desc"} + + # First call should fetch from API + result1 = _fetch_nvd_data(["CVE-2022-12345"]) + assert mock_fetch.call_count == 1 + + # Second call should use cache + result2 = _fetch_nvd_data(["CVE-2022-12345"]) + assert mock_fetch.call_count == 1 # Should not increase + + assert result1 == result2 + + @patch.dict(os.environ, {"NVD_API_KEY": "test-key"}) + @patch('src.workbench_cli.utilities.vuln_report.cve_data_gathering.ThreadPoolExecutor') + def test_fetch_nvd_data_with_api_key(self, mock_executor): + """Test NVD data fetching with API key.""" + from src.workbench_cli.utilities.vuln_report.cve_data_gathering import _NVD_CACHE + + _NVD_CACHE.clear() + + # Mock executor behavior + mock_future = Mock() + mock_future.result.return_value = {"nvd_description": "Test desc"} + mock_executor.return_value.__enter__.return_value.submit.return_value = mock_future + + with patch('src.workbench_cli.utilities.vuln_report.cve_data_gathering.as_completed') as mock_as_completed: + mock_as_completed.return_value = [mock_future] + + result = _fetch_nvd_data(["CVE-2022-12345"]) + + # Should use higher concurrency with API key + mock_executor.assert_called_with(max_workers=5) + assert "CVE-2022-12345" in result + + @patch.dict(os.environ, {}, clear=True) + @patch('src.workbench_cli.utilities.vuln_report.cve_data_gathering.ThreadPoolExecutor') + def test_fetch_nvd_data_without_api_key(self, mock_executor): + """Test NVD data fetching without API key.""" + from src.workbench_cli.utilities.vuln_report.cve_data_gathering import _NVD_CACHE + + _NVD_CACHE.clear() + + # Mock executor behavior + mock_future = Mock() + mock_future.result.return_value = {"nvd_description": "Test desc"} + mock_executor.return_value.__enter__.return_value.submit.return_value = mock_future + + with patch('src.workbench_cli.utilities.vuln_report.cve_data_gathering.as_completed') as mock_as_completed: + mock_as_completed.return_value = [mock_future] + + result = _fetch_nvd_data(["CVE-2022-12345"]) + + # Should use lower concurrency without API key + mock_executor.assert_called_with(max_workers=2) + assert "CVE-2022-12345" in result + + @patch('src.workbench_cli.utilities.vuln_report.cve_data_gathering.requests.get') + def test_fetch_single_cve_nvd_success(self, mock_get): + """Test successful single CVE fetching from NVD.""" + mock_response = { + "vulnerabilities": [ + { + "cve": { + "descriptions": [ + { + "lang": "en", + "value": "Test vulnerability description" + } + ], + "weaknesses": [ + { + "type": "Primary", + "description": [ + { + "lang": "en", + "value": "CWE-79" + } + ] + } + ], + "references": [ + { + "url": "https://example.com/vuln", + "source": "test" + } + ], + "metrics": { + "cvssMetricV31": [ + { + "cvssData": { + "vectorString": "CVSS:3.1/AV:N/AC:L/PR:N/UI:R/S:C/C:L/I:L/A:N", + "baseScore": 6.1 + } + } + ] + } + } + } + ] + } + mock_get.return_value.json.return_value = mock_response + mock_get.return_value.raise_for_status.return_value = None + + from src.workbench_cli.utilities.vuln_report.cve_data_gathering import RateLimiter + rate_limiter = RateLimiter(max_workers=1, delay=0.1) + + result = _fetch_single_cve_nvd("CVE-2022-12345", None, rate_limiter, 30) + + assert result["nvd_description"] == "Test vulnerability description" + assert result["nvd_cwe"] == ["CWE-79"] + assert result["full_cvss_vector"] == "CVSS:3.1/AV:N/AC:L/PR:N/UI:R/S:C/C:L/I:L/A:N" + assert result["cvss_score"] == 6.1 + + @patch('src.workbench_cli.utilities.vuln_report.cve_data_gathering.requests.get') + def test_fetch_single_cve_nvd_rate_limited(self, mock_get): + """Test handling of rate limiting in NVD API.""" + # First call returns 429, second call succeeds + mock_response_429 = Mock() + mock_response_429.status_code = 429 + mock_response_429.headers = {"Retry-After": "1"} + + mock_response_success = Mock() + mock_response_success.status_code = 200 + mock_response_success.raise_for_status.return_value = None + mock_response_success.json.return_value = { + "vulnerabilities": [ + { + "cve": { + "descriptions": [ + { + "lang": "en", + "value": "Test description" + } + ] + } + } + ] + } + + mock_get.side_effect = [mock_response_429, mock_response_success] + + from src.workbench_cli.utilities.vuln_report.cve_data_gathering import RateLimiter + rate_limiter = RateLimiter(max_workers=1, delay=0.1) + + with patch('src.workbench_cli.utilities.vuln_report.cve_data_gathering.time.sleep') as mock_sleep: + result = _fetch_single_cve_nvd("CVE-2022-12345", None, rate_limiter, 30) + + assert result["nvd_description"] == "Test description" + assert mock_get.call_count == 2 + # Should have slept due to rate limiting (may be called multiple times due to rate limiter) + assert mock_sleep.call_count >= 1 + # One of the calls should be for the retry-after delay + sleep_calls = [call.args[0] for call in mock_sleep.call_args_list] + assert 1 in sleep_calls + + @patch('src.workbench_cli.utilities.vuln_report.cve_data_gathering.requests.get') + def test_fetch_single_cve_nvd_with_api_key(self, mock_get): + """Test single CVE fetching with API key.""" + mock_response = { + "vulnerabilities": [ + { + "cve": { + "descriptions": [ + { + "lang": "en", + "value": "Test vulnerability with API key" + } + ] + } + } + ] + } + mock_get.return_value.json.return_value = mock_response + mock_get.return_value.raise_for_status.return_value = None + + from src.workbench_cli.utilities.vuln_report.cve_data_gathering import RateLimiter + rate_limiter = RateLimiter(max_workers=1, delay=0.1) + + result = _fetch_single_cve_nvd("CVE-2022-12345", "test-api-key", rate_limiter, 30) + + # Check that API key was included in headers + call_args = mock_get.call_args + assert "apiKey" in call_args[1]["headers"] + assert call_args[1]["headers"]["apiKey"] == "test-api-key" + assert result["nvd_description"] == "Test vulnerability with API key" + + def test_parse_nvd_vulnerability_complete_data(self): + """Test parsing of complete NVD vulnerability data.""" + nvd_data = { + "descriptions": [ + { + "lang": "en", + "value": "Critical vulnerability in test component" + } + ], + "weaknesses": [ + { + "type": "Primary", + "description": [ + { + "lang": "en", + "value": "CWE-79" + } + ] + }, + { + "type": "Secondary", + "description": [{"lang": "en", "value": "CWE-20"}] + } + ], + "references": [ + {"url": "https://example.com/ref1", "source": "vendor", "tags": ["Vendor Advisory"]}, + {"url": "https://example.com/ref2", "source": "mitre", "tags": ["Third Party Advisory"]} + ], + "metrics": { + "cvssMetricV31": [{ + "cvssData": { + "vectorString": "CVSS:3.1/AV:N/AC:L/PR:N/UI:R/S:C/C:L/I:L/A:N", + "baseScore": 6.1 + } + }] + } + } + + result = _parse_nvd_vulnerability(nvd_data) + + assert result["nvd_description"] == "Critical vulnerability in test component" + assert result["nvd_cwe"] == ["CWE-79"] # Should only include Primary type + assert len(result["nvd_references"]) == 2 + assert result["full_cvss_vector"] == "CVSS:3.1/AV:N/AC:L/PR:N/UI:R/S:C/C:L/I:L/A:N" + assert result["cvss_score"] == 6.1 + + def test_parse_nvd_vulnerability_minimal_data(self): + """Test parsing of minimal NVD vulnerability data.""" + nvd_data = {} + + result = _parse_nvd_vulnerability(nvd_data) + + assert result["nvd_description"] == "No description available" + assert result["nvd_cwe"] == [] + assert result["nvd_references"] == [] + assert result["full_cvss_vector"] is None + assert result["cvss_score"] is None + + +class TestRateLimiter: + """Test cases for rate limiter functionality.""" + + def test_rate_limiter_basic_functionality(self): + """Test basic rate limiter functionality.""" + rate_limiter = RateLimiter(max_workers=2, delay=1.0) + + # First call should not wait + start_time = time.time() + rate_limiter.wait() + end_time = time.time() + + assert end_time - start_time < 0.1 # Should be immediate + + # Second call should not wait (within token limit) + start_time = time.time() + rate_limiter.wait() + end_time = time.time() + + assert end_time - start_time < 0.1 # Should be immediate + + def test_rate_limiter_token_exhaustion(self): + """Test rate limiter behavior when tokens are exhausted.""" + rate_limiter = RateLimiter(max_workers=1, delay=0.1) + + # First call should not wait + rate_limiter.wait() + + # Second call should wait since tokens are exhausted + start_time = time.time() + rate_limiter.wait() + end_time = time.time() + + assert end_time - start_time >= 0.09 # Should wait for token replenishment + + def test_rate_limiter_token_replenishment(self): + """Test that tokens are replenished over time.""" + rate_limiter = RateLimiter(max_workers=2, delay=0.1) + + # Exhaust tokens + rate_limiter.wait() + rate_limiter.wait() + + # Wait for token replenishment + time.sleep(0.11) + + # Should be able to make another call without delay + start_time = time.time() + rate_limiter.wait() + end_time = time.time() + + assert end_time - start_time < 0.05 # Should be quick + + def test_rate_limiter_thread_safety(self): + """Test rate limiter thread safety.""" + import threading + + rate_limiter = RateLimiter(max_workers=2, delay=0.1) + results = [] + + def worker(): + start_time = time.time() + rate_limiter.wait() + end_time = time.time() + results.append(end_time - start_time) + + # Start multiple threads + threads = [threading.Thread(target=worker) for _ in range(5)] + for thread in threads: + thread.start() + for thread in threads: + thread.join() + + # Some calls should be immediate, others should wait + assert len(results) == 5 + immediate_calls = sum(1 for r in results if r < 0.05) + delayed_calls = sum(1 for r in results if r >= 0.05) + + assert immediate_calls >= 2 # At least max_workers should be immediate + assert delayed_calls >= 1 # At least one should be delayed \ No newline at end of file