diff --git a/CHANGELOG.md b/CHANGELOG.md index b1cb4d8d..7d2b326f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,6 +8,18 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] - Upcoming changes... +## [1.44.0] - 2026-01-06 +### Added +- Added scan engine tuning parameters for snippet matching: + - `--min-snippet-hits` - Minimum snippet hits required (0 defers to server config) + - `--min-snippet-lines` - Minimum snippet lines required (0 defers to server config) + - `--snippet-range-tolerance` (`-srt`) - Snippet range tolerance (0 defers to server config) + - `--ranking` - Enable/disable result ranking (unset/true/false) + - `--ranking-threshold` - Ranking threshold value (-1 to 99, -1 defers to server config) + - `--honour-file-exts` - Honour file extensions during matching (unset/true/false) +- Added `file_snippet` section to scanoss.json settings schema for configuring tuning parameters +- Added `ScanSettingsBuilder` class for merging CLI and settings file configurations with priority: CLI > file_snippet > root settings + ## [1.43.1] - 2026-01-05 ### Changed - Restored `--no-wfp-output` flag for backwards compatibility (deprecated, no effect) @@ -772,4 +784,5 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 [1.41.1]: https://github.com/scanoss/scanoss.py/compare/v1.41.0...v1.41.1 [1.42.0]: https://github.com/scanoss/scanoss.py/compare/v1.41.1...v1.42.0 [1.43.0]: https://github.com/scanoss/scanoss.py/compare/v1.42.0...v1.43.0 -[1.43.1]: https://github.com/scanoss/scanoss.py/compare/v1.43.0...v1.43.1 \ No newline at end of file +[1.43.1]: https://github.com/scanoss/scanoss.py/compare/v1.43.0...v1.43.1 +[1.44.0]: https://github.com/scanoss/scanoss.py/compare/v1.43.1...v1.44.0 \ No newline at end of file diff --git a/CLIENT_HELP.md b/CLIENT_HELP.md index 877c6b8a..9655a10d 100644 --- a/CLIENT_HELP.md +++ b/CLIENT_HELP.md @@ -259,6 +259,60 @@ Multiple Headers: You can specify any number of custom headers by repeating the scanoss-py scan src -hdr "x-api-key:12345" -hdr "Authorization: Bearer " ``` +### Scan with Snippet Tuning Options +The following flags allow you to fine-tune snippet matching behavior during scanning: + +#### Set minimum snippet hits +Require at least 5 snippet hits for a match. A value of 0 defers to server configuration: +```bash +scanoss-py scan -o scan-results.json --min-snippet-hits 5 src +``` + +#### Set minimum snippet lines +Require at least 3 snippet lines for a match. A value of 0 defers to server configuration: +```bash +scanoss-py scan -o scan-results.json --min-snippet-lines 3 src +``` + +#### Set snippet range tolerance +Set the snippet range tolerance. A value of 0 defers to server configuration: +```bash +scanoss-py scan -o scan-results.json --snippet-range-tolerance 5 src +``` +Short form: +```bash +scanoss-py scan -o scan-results.json -srt 5 src +``` + +#### Enable or disable ranking +Enable ranking to prioritize results: +```bash +scanoss-py scan -o scan-results.json --ranking true src +``` +Disable ranking: +```bash +scanoss-py scan -o scan-results.json --ranking false src +``` + +#### Set ranking threshold +Set the ranking threshold to 50 (valid range: 0-99). A value of -1 defers to server configuration: +```bash +scanoss-py scan -o scan-results.json --ranking-threshold=50 src +``` +Note: Use `=` syntax for negative values: `--ranking-threshold=-1` + +#### Honour file extensions +Control whether file extensions are considered during matching: +```bash +scanoss-py scan -o scan-results.json --honour-file-exts true src +``` + +#### Combine multiple tuning options +You can combine multiple tuning options in a single scan: +```bash +scanoss-py scan -o scan-results.json --min-snippet-hits 5 --min-snippet-lines 3 --ranking true --ranking-threshold=75 src +``` + ### Converting RAW results into other formats The following command provides the capability to convert the RAW scan results from a SCANOSS scan into multiple different formats, including CycloneDX, SPDX Lite, CSV and GitLab Code Quality Report. For the full set of formats, please run: diff --git a/docs/source/_static/scanoss-settings-schema.json b/docs/source/_static/scanoss-settings-schema.json index e9e2cdc3..a167a63b 100644 --- a/docs/source/_static/scanoss-settings-schema.json +++ b/docs/source/_static/scanoss-settings-schema.json @@ -139,6 +139,130 @@ } } } + }, + "proxy": { + "type": "object", + "description": "Proxy configuration for API requests", + "properties": { + "host": { + "type": "string", + "description": "Proxy host URL" + } + } + }, + "http_config": { + "type": "object", + "description": "HTTP configuration for API requests", + "properties": { + "base_uri": { + "type": "string", + "description": "Base URI for API requests" + }, + "ignore_cert_errors": { + "type": "boolean", + "description": "Whether to ignore certificate errors" + } + } + }, + "file_snippet": { + "type": "object", + "description": "File snippet scanning configuration", + "properties": { + "proxy": { + "type": "object", + "description": "Proxy configuration for file snippet requests", + "properties": { + "host": { + "type": "string", + "description": "Proxy host URL" + } + } + }, + "http_config": { + "type": "object", + "description": "HTTP configuration for file snippet requests", + "properties": { + "base_uri": { + "type": "string", + "description": "Base URI for file snippet API requests" + }, + "ignore_cert_errors": { + "type": "boolean", + "description": "Whether to ignore certificate errors" + } + } + }, + "ranking_enabled": { + "type": ["boolean", "null"], + "description": "Enable/disable ranking", + "default": null + }, + "ranking_threshold": { + "type": ["integer", "null"], + "description": "Ranking threshold value. A value of -1 defers to server configuration", + "minimum": -1, + "maximum": 99, + "default": 0 + }, + "min_snippet_hits": { + "type": "integer", + "description": "Minimum snippet hits required", + "minimum": 0, + "default": 0 + }, + "min_snippet_lines": { + "type": "integer", + "description": "Minimum snippet lines required", + "minimum": 0, + "default": 0 + }, + "snippet_range_tolerance": { + "type": "integer", + "description": "Snippet range tolerance", + "minimum": 0, + "default": 0 + }, + "honour_file_exts": { + "type": ["boolean", "null"], + "description": "Ignores file extensions. When not set, defers to server configuration.", + "default": true + }, + "dependency_analysis": { + "type": "boolean", + "description": "Enable dependency analysis" + }, + "skip_headers": { + "type": "boolean", + "description": "Skip license headers, comments and imports at the beginning of files", + "default": false + }, + "skip_headers_limit": { + "type": "integer", + "description": "Maximum number of lines to skip when filtering headers", + "default": 0 + } + } + }, + "hpfm": { + "type": "object", + "description": "HPFM (High Precision Folder Matching) configuration", + "properties": { + "ranking_enabled": { + "type": "boolean", + "description": "Enable ranking for HPFM" + }, + "ranking_threshold": { + "type": ["integer", "null"], + "description": "Ranking threshold value. A value of -1 defers to server configuration", + "minimum": -1, + "maximum": 99, + "default": 0 + } + } + }, + "container": { + "type": "object", + "description": "Container scanning configuration" } } }, diff --git a/scanoss.json b/scanoss.json index 954cd89a..9289295d 100644 --- a/scanoss.json +++ b/scanoss.json @@ -15,6 +15,9 @@ "include": [ { "purl": "pkg:github/scanoss/scanoss.py" + }, + { + "purl": "pkg:github/scanoss/scanoss-winnowing.py" } ], "remove": [] diff --git a/src/scanoss/__init__.py b/src/scanoss/__init__.py index d090807f..b7db689a 100644 --- a/src/scanoss/__init__.py +++ b/src/scanoss/__init__.py @@ -22,4 +22,4 @@ THE SOFTWARE. """ -__version__ = '1.43.1' +__version__ = '1.44.0' diff --git a/src/scanoss/cli.py b/src/scanoss/cli.py index 520a6255..a15ecc5b 100644 --- a/src/scanoss/cli.py +++ b/src/scanoss/cli.py @@ -190,6 +190,46 @@ def setup_args() -> None: # noqa: PLR0912, PLR0915 '--no-wfp-output', action='store_true', help='DEPRECATED: Scans no longer generate scanner_output.wfp. Use "fingerprint -o" to create WFP files.' ) + # Snippet tuning options + p_scan.add_argument( + '--min-snippet-hits', + type=int, + default=None, + help='Minimum snippet hits required. A value of 0 defers to server configuration (optional)', + ) + p_scan.add_argument( + '--min-snippet-lines', + type=int, + default=None, + help='Minimum snippet lines required. A value of 0 defers to server configuration (optional)', + ) + p_scan.add_argument( + '-srt', + '--snippet-range-tolerance', + type=int, + default=None, + help='Snippet range tolerance. A value of 0 defers to server configuration (optional)', + ) + p_scan.add_argument( + '--ranking', + type=str, + choices=['unset' ,'true', 'false'], + default='unset', + help='Enable or disable ranking (optional - default: server configuration)', + ) + p_scan.add_argument( + '--ranking-threshold', + type=int, + default=None, + help='Ranking threshold value. Valid range: -1 to 99. A value of -1 defers to server configuration (optional)', + ) + p_scan.add_argument( + '--honour-file-exts', + type=str, + choices=['unset','true', 'false'], + default='unset', + help='Honour file extensions during scanning. When not set, defers to server configuration (optional)', + ) # Sub-command: fingerprint p_wfp = subparsers.add_parser( @@ -1377,11 +1417,11 @@ def wfp(parser, args): initialise_empty_file(args.output) # Load scan settings - scan_settings = None + scanoss_settings = None if not args.skip_settings_file: - scan_settings = ScanossSettings(debug=args.debug, trace=args.trace, quiet=args.quiet) + scanoss_settings = ScanossSettings(debug=args.debug, trace=args.trace, quiet=args.quiet) try: - scan_settings.load_json_file(args.settings, args.scan_dir) + scanoss_settings.load_json_file(args.settings, args.scan_dir) except ScanossSettingsError as e: print_stderr(f'Error: {e}') sys.exit(1) @@ -1403,7 +1443,7 @@ def wfp(parser, args): skip_md5_ids=args.skip_md5, strip_hpsm_ids=args.strip_hpsm, strip_snippet_ids=args.strip_snippet, - scan_settings=scan_settings, + scanoss_settings=scanoss_settings, skip_headers=args.skip_headers, skip_headers_limit=args.skip_headers_limit, ) @@ -1487,20 +1527,20 @@ def scan(parser, args): # noqa: PLR0912, PLR0915 print_stderr('ERROR: Cannot specify both --settings and --skip-file-settings options.') sys.exit(1) # Figure out which settings (if any) to load before processing - scan_settings = None + scanoss_settings = None if not args.skip_settings_file: - scan_settings = ScanossSettings(debug=args.debug, trace=args.trace, quiet=args.quiet) + scanoss_settings = ScanossSettings(debug=args.debug, trace=args.trace, quiet=args.quiet) try: if args.identify: - scan_settings.load_json_file(args.identify, args.scan_dir).set_file_type('legacy').set_scan_type( + scanoss_settings.load_json_file(args.identify, args.scan_dir).set_file_type('legacy').set_scan_type( 'identify' ) elif args.ignore: - scan_settings.load_json_file(args.ignore, args.scan_dir).set_file_type('legacy').set_scan_type( + scanoss_settings.load_json_file(args.ignore, args.scan_dir).set_file_type('legacy').set_scan_type( 'blacklist' ) else: - scan_settings.load_json_file(args.settings, args.scan_dir).set_file_type('new') + scanoss_settings.load_json_file(args.settings, args.scan_dir).set_file_type('new') except ScanossSettingsError as e: print_stderr(f'Error: {e}') @@ -1596,9 +1636,15 @@ def scan(parser, args): # noqa: PLR0912, PLR0915 skip_md5_ids=args.skip_md5, strip_hpsm_ids=args.strip_hpsm, strip_snippet_ids=args.strip_snippet, - scan_settings=scan_settings, + scanoss_settings=scanoss_settings, req_headers=process_req_headers(args.header), use_grpc=args.grpc, + min_snippet_hits=args.min_snippet_hits, + min_snippet_lines=args.min_snippet_lines, + snippet_range_tolerance=args.snippet_range_tolerance, + ranking=args.ranking, + ranking_threshold=args.ranking_threshold, + honour_file_exts=args.honour_file_exts, skip_headers=args.skip_headers, skip_headers_limit=args.skip_headers_limit, ) diff --git a/src/scanoss/data/scanoss-settings-schema.json b/src/scanoss/data/scanoss-settings-schema.json index e9e2cdc3..a167a63b 100644 --- a/src/scanoss/data/scanoss-settings-schema.json +++ b/src/scanoss/data/scanoss-settings-schema.json @@ -139,6 +139,130 @@ } } } + }, + "proxy": { + "type": "object", + "description": "Proxy configuration for API requests", + "properties": { + "host": { + "type": "string", + "description": "Proxy host URL" + } + } + }, + "http_config": { + "type": "object", + "description": "HTTP configuration for API requests", + "properties": { + "base_uri": { + "type": "string", + "description": "Base URI for API requests" + }, + "ignore_cert_errors": { + "type": "boolean", + "description": "Whether to ignore certificate errors" + } + } + }, + "file_snippet": { + "type": "object", + "description": "File snippet scanning configuration", + "properties": { + "proxy": { + "type": "object", + "description": "Proxy configuration for file snippet requests", + "properties": { + "host": { + "type": "string", + "description": "Proxy host URL" + } + } + }, + "http_config": { + "type": "object", + "description": "HTTP configuration for file snippet requests", + "properties": { + "base_uri": { + "type": "string", + "description": "Base URI for file snippet API requests" + }, + "ignore_cert_errors": { + "type": "boolean", + "description": "Whether to ignore certificate errors" + } + } + }, + "ranking_enabled": { + "type": ["boolean", "null"], + "description": "Enable/disable ranking", + "default": null + }, + "ranking_threshold": { + "type": ["integer", "null"], + "description": "Ranking threshold value. A value of -1 defers to server configuration", + "minimum": -1, + "maximum": 99, + "default": 0 + }, + "min_snippet_hits": { + "type": "integer", + "description": "Minimum snippet hits required", + "minimum": 0, + "default": 0 + }, + "min_snippet_lines": { + "type": "integer", + "description": "Minimum snippet lines required", + "minimum": 0, + "default": 0 + }, + "snippet_range_tolerance": { + "type": "integer", + "description": "Snippet range tolerance", + "minimum": 0, + "default": 0 + }, + "honour_file_exts": { + "type": ["boolean", "null"], + "description": "Ignores file extensions. When not set, defers to server configuration.", + "default": true + }, + "dependency_analysis": { + "type": "boolean", + "description": "Enable dependency analysis" + }, + "skip_headers": { + "type": "boolean", + "description": "Skip license headers, comments and imports at the beginning of files", + "default": false + }, + "skip_headers_limit": { + "type": "integer", + "description": "Maximum number of lines to skip when filtering headers", + "default": 0 + } + } + }, + "hpfm": { + "type": "object", + "description": "HPFM (High Precision Folder Matching) configuration", + "properties": { + "ranking_enabled": { + "type": "boolean", + "description": "Enable ranking for HPFM" + }, + "ranking_threshold": { + "type": ["integer", "null"], + "description": "Ranking threshold value. A value of -1 defers to server configuration", + "minimum": -1, + "maximum": 99, + "default": 0 + } + } + }, + "container": { + "type": "object", + "description": "Container scanning configuration" } } }, diff --git a/src/scanoss/scan_settings_builder.py b/src/scanoss/scan_settings_builder.py new file mode 100644 index 00000000..07db3e0a --- /dev/null +++ b/src/scanoss/scan_settings_builder.py @@ -0,0 +1,282 @@ +""" +SPDX-License-Identifier: MIT + + Copyright (c) 2025, SCANOSS + + Permission is hereby granted, free of charge, to any person obtaining a copy + of this software and associated documentation files (the "Software"), to deal + in the Software without restriction, including without limitation the rights + to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + copies of the Software, and to permit persons to whom the Software is + furnished to do so, subject to the following conditions: + + The above copyright notice and this permission notice shall be included in + all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + THE SOFTWARE. +""" + +from typing import TYPE_CHECKING, Optional + +if TYPE_CHECKING: + from .scanoss_settings import ScanossSettings + + +class ScanSettingsBuilder: + """Builder class for merging CLI arguments with scanoss.json settings file values. + + This class implements an API for merging scan configuration + from multiple sources with the following priority order: + 1. settings.file_snippet section in scanoss.json (highest priority) + 2. settings section in scanoss.json (middle priority) + 3. CLI arguments (lowest priority - used as fallback) + + Attributes: + proxy: Merged proxy host URL + url: Merged API base URL + ignore_cert_errors: Whether to ignore SSL certificate errors + min_snippet_hits: Minimum snippet hits required for matching + min_snippet_lines: Minimum snippet lines required for matching + honour_file_exts: Whether to honour file extensions during scanning + ranking: Whether ranking is enabled + ranking_threshold: Ranking threshold value + """ + + def __init__(self, scanoss_settings: 'ScanossSettings | None'): + """Initialize the builder with optional scanoss settings. + + Args: + scanoss_settings: ScanossSettings instance loaded from scanoss.json, + or None if no settings file is available. + """ + self.scanoss_settings = scanoss_settings + # Merged values + self.proxy: Optional[str] = None + self.url: Optional[str] = None + self.ignore_cert_errors: bool = False + self.min_snippet_hits: Optional[int] = None + self.min_snippet_lines: Optional[int] = None + self.snippet_range_tolerance: Optional[int] = None + self.honour_file_exts: Optional[any] = None + self.ranking: Optional[any] = None + self.ranking_threshold: Optional[int] = None + + def with_proxy(self, cli_value: str = None) -> 'ScanSettingsBuilder': + """Set proxy host with priority: file_snippet.proxy.host > settings.proxy.host > CLI. + + Args: + cli_value: Proxy host from CLI argument (e.g., 'http://proxy:8080') + + Returns: + Self for method chaining + """ + self.proxy = self._merge_with_priority( + cli_value, + self._get_proxy_host(self._get_file_snippet_proxy()), + self._get_proxy_host(self._get_root_proxy()) + ) + return self + + def with_url(self, cli_value: str = None) -> 'ScanSettingsBuilder': + """Set API base URL with priority: file_snippet.http_config.base_uri > settings.http_config.base_uri > CLI. + + Args: + cli_value: API base URL from CLI argument (e.g., 'https://api.scanoss.com') + + Returns: + Self for method chaining + """ + self.url = self._merge_with_priority( + cli_value, + self._get_file_snippet_http_config_value('base_uri'), + self._get_http_config_value('base_uri') + ) + return self + + def with_ignore_cert_errors(self, cli_value: bool = False) -> 'ScanSettingsBuilder': + """Set ignore_cert_errors with priority: CLI True > file_snippet > settings > False. + + Note: CLI value only takes effect if True (flag present). False means + the flag was not provided, so settings file values are checked. + + Args: + cli_value: Whether to ignore SSL certificate errors from CLI flag + + Returns: + Self for method chaining + """ + result = self._merge_with_priority( + cli_value if cli_value else None, + self._get_file_snippet_http_config_value('ignore_cert_errors'), + self._get_http_config_value('ignore_cert_errors') + ) + self.ignore_cert_errors = result if result is not None else False + return self + + def with_min_snippet_hits(self, cli_value: int = None) -> 'ScanSettingsBuilder': + """Set minimum snippet hits with priority: settings.file_snippet.min_snippet_hits > CLI. + + Args: + cli_value: Minimum snippet hits from CLI argument + + Returns: + Self for method chaining + """ + self.min_snippet_hits = self._merge_cli_with_settings( + cli_value, + self._get_file_snippet_setting('min_snippet_hits') + ) + return self + + def with_min_snippet_lines(self, cli_value: int = None) -> 'ScanSettingsBuilder': + """Set minimum snippet lines with priority: settings.file_snippet.min_snippet_lines > CLI. + + Args: + cli_value: Minimum snippet lines from CLI argument + + Returns: + Self for method chaining + """ + self.min_snippet_lines = self._merge_cli_with_settings( + cli_value, + self._get_file_snippet_setting('min_snippet_lines') + ) + return self + + def with_snippet_range_tolerance(self, cli_value: int = None) -> 'ScanSettingsBuilder': + """Set snippet range tolerance with priority: settings.file_snippet.snippet_range_tolerance > CLI. + + Args: + cli_value: Snippet range tolerance from CLI argument + + Returns: + Self for method chaining + """ + self.snippet_range_tolerance = self._merge_cli_with_settings( + cli_value, + self._get_file_snippet_setting('snippet_range_tolerance') + ) + return self + + def with_honour_file_exts(self, cli_value: str = None) -> 'ScanSettingsBuilder': + """Set honour_file_exts with priority: settings.file_snippet.honour_file_exts > CLI. + + Args: + cli_value: String 'true', 'false', or 'unset' from CLI argument + + Returns: + Self for method chaining + """ + self.honour_file_exts = self._merge_cli_with_settings( + cli_value, + self._get_file_snippet_setting('honour_file_exts') + ) + ## Convert to boolean + if self.honour_file_exts is not None and self.honour_file_exts!= 'unset': + self.honour_file_exts = self._str_to_bool(self.honour_file_exts) + return self + + def with_ranking(self, cli_value: str = None) -> 'ScanSettingsBuilder': + """Set ranking enabled with priority: settings.file_snippet.ranking_enabled > CLI. + + Args: + cli_value: String 'true', 'false', or 'unset' from CLI argument + + Returns: + Self for method chaining + """ + self.ranking = self._merge_cli_with_settings( + cli_value, + self._get_file_snippet_setting('ranking_enabled') + ) + if self.ranking is not None and self.ranking != 'unset': + self.ranking = self._str_to_bool(self.ranking) + return self + + def with_ranking_threshold(self, cli_value: int = None) -> 'ScanSettingsBuilder': + """Set ranking threshold with priority: settings.file_snippet.ranking_threshold > CLI. + + Args: + cli_value: Ranking threshold from CLI argument + + Returns: + Self for method chaining + """ + self.ranking_threshold = self._merge_cli_with_settings( + cli_value, + self._get_file_snippet_setting('ranking_threshold') + ) + return self + + # Private helper methods + @staticmethod + def _merge_with_priority(cli_value, file_snippet_value, root_value): + """Merge with priority: file_snippet > root settings > CLI""" + if file_snippet_value is not None: + return file_snippet_value + if root_value is not None: + return root_value + return cli_value + + @staticmethod + def _merge_cli_with_settings(cli_value, settings_value): + """Merge CLI value with settings, with settings taking priority over CLI. + + Returns settings_value if not None, otherwise falls back to cli_value. + """ + if settings_value is not None: + return settings_value + return cli_value + + + @staticmethod + def _str_to_bool(value: str) -> Optional[bool]: + """Convert string 'true'/'false' to boolean.""" + if value is None: + return None + if isinstance(value, bool): + return value + return value.lower() == 'true' + + # Methods to extract values from scanoss_settings + def _get_file_snippet_setting(self, key: str): + """Get a setting from the file_snippet section.""" + if not self.scanoss_settings: + return None + return self.scanoss_settings.get_file_snippet_settings().get(key) + + def _get_file_snippet_proxy(self): + """Get proxy config from file_snippet section.""" + return self.scanoss_settings.get_file_snippet_proxy() if self.scanoss_settings else None + + def _get_root_proxy(self): + """Get proxy config from root settings section.""" + return self.scanoss_settings.get_proxy() if self.scanoss_settings else None + + @staticmethod + def _get_proxy_host(proxy_config) -> Optional[str]: + """Extract host from proxy configuration dict.""" + if proxy_config is None: + return None + host = proxy_config.get('host') + return host if host else None + + def _get_http_config_value(self, key: str): + """Extract a value from http_config dict.""" + if not self.scanoss_settings: + return None + config = self.scanoss_settings.get_http_config() + return config.get(key) if config else None + + def _get_file_snippet_http_config_value(self, key: str): + """Extract a value from file_snippet http_config dict.""" + if not self.scanoss_settings: + return None + config = self.scanoss_settings.get_file_snippet_http_config() + return config.get(key) if config else None \ No newline at end of file diff --git a/src/scanoss/scanner.py b/src/scanoss/scanner.py index 48878080..89e37cf5 100644 --- a/src/scanoss/scanner.py +++ b/src/scanoss/scanner.py @@ -39,6 +39,7 @@ from . import __version__ from .csvoutput import CsvOutput from .cyclonedx import CycloneDx +from .scan_settings_builder import ScanSettingsBuilder from .scancodedeps import ScancodeDeps from .scanoss_settings import ScanossSettings from .scanossapi import ScanossApi @@ -103,9 +104,15 @@ def __init__( # noqa: PLR0913, PLR0915 strip_hpsm_ids=None, strip_snippet_ids=None, skip_md5_ids=None, - scan_settings: 'ScanossSettings | None' = None, + scanoss_settings: 'ScanossSettings | None' = None, req_headers: dict = None, use_grpc: bool = False, + min_snippet_hits: int = None, + min_snippet_lines: int = None, + snippet_range_tolerance: int = None, + ranking: str = None, + ranking_threshold: int = None, + honour_file_exts: str = None, skip_headers: bool = False, skip_headers_limit: int = 0, ): @@ -130,8 +137,20 @@ def __init__( # noqa: PLR0913, PLR0915 self.skip_size = skip_size self.skip_extensions = skip_extensions self.req_headers = req_headers + self.scanoss_settings = scanoss_settings ver_details = Scanner.version_details() + # Get settings values for skip_headers options + file_snippet_settings = scanoss_settings.get_file_snippet_settings() if scanoss_settings else {} + settings_skip_headers = file_snippet_settings.get('skip_headers') + settings_skip_headers_limit = file_snippet_settings.get('skip_headers_limit') + + # Merge CLI values with settings (scanoss.json takes priority over CLI) + skip_headers = Scanner._merge_cli_with_settings(skip_headers, settings_skip_headers) + skip_headers_limit = Scanner._merge_cli_with_settings( + skip_headers_limit, settings_skip_headers_limit) + self.print_debug(f'Skip headers {skip_headers} with limit: {skip_headers_limit}') + self.winnowing = Winnowing( debug=debug, trace=trace, @@ -146,21 +165,42 @@ def __init__( # noqa: PLR0913, PLR0915 skip_headers=skip_headers, skip_headers_limit=skip_headers_limit, ) + + # Build merged settings using builder pattern + scan_settings = (ScanSettingsBuilder(scanoss_settings) + .with_proxy(proxy) + .with_url(url) + .with_ignore_cert_errors(ignore_cert_errors) + .with_min_snippet_hits(min_snippet_hits) + .with_min_snippet_lines(min_snippet_lines) + .with_snippet_range_tolerance(snippet_range_tolerance) + .with_honour_file_exts(honour_file_exts) + .with_ranking(ranking) + .with_ranking_threshold(ranking_threshold)) + + self.print_debug(f'Scan settings: {scan_settings}') + self.scanoss_api = ScanossApi( debug=debug, trace=trace, quiet=quiet, api_key=api_key, - url=url, + url=scan_settings.url, flags=flags, timeout=timeout, ver_details=ver_details, - ignore_cert_errors=ignore_cert_errors, - proxy=proxy, + ignore_cert_errors=scan_settings.ignore_cert_errors, + proxy=scan_settings.proxy, ca_cert=ca_cert, pac=pac, retry=retry, - req_headers= self.req_headers, + req_headers=self.req_headers, + min_snippet_hits=scan_settings.min_snippet_hits, + min_snippet_lines=scan_settings.min_snippet_lines, + snippet_range_tolerance=scan_settings.snippet_range_tolerance, + honour_file_exts=scan_settings.honour_file_exts, + ranking=scan_settings.ranking, + ranking_threshold=scan_settings.ranking_threshold, ) sc_deps = ScancodeDeps(debug=debug, quiet=quiet, trace=trace, timeout=sc_timeout, sc_command=sc_command) grpc_api = ScanossGrpc( @@ -191,19 +231,32 @@ def __init__( # noqa: PLR0913, PLR0915 if self._skip_snippets: self.max_post_size = 8 * 1024 # 8k Max post size if we're skipping snippets - self.scan_settings = scan_settings self.post_processor = ( - ScanPostProcessor(scan_settings, debug=debug, trace=trace, quiet=quiet) if scan_settings else None + ScanPostProcessor(scanoss_settings, debug=debug, trace=trace, quiet=quiet) if scan_settings else None ) self._maybe_set_api_sbom() def _maybe_set_api_sbom(self): - if not self.scan_settings: + if not self.scanoss_settings: return - sbom = self.scan_settings.get_sbom() + sbom = self.scanoss_settings.get_sbom() if sbom: self.scanoss_api.set_sbom(sbom) + @staticmethod + def _merge_cli_with_settings(cli_value, settings_value): + """Merge CLI value with settings value (two-level priority: settings > cli). + + Args: + cli_value: Value from CLI argument + settings_value: Value from scanoss.json file_snippet settings + Returns: + Merged value with CLI taking priority over settings + """ + if settings_value is not None: + return settings_value + return cli_value + @staticmethod def __count_files_in_wfp_file(wfp_file: str): """ @@ -286,7 +339,8 @@ def is_dependency_scan(self): """ if self.scan_options & ScanType.SCAN_DEPENDENCIES.value: return True - return False + file_snippet_settings = self.scanoss_settings.get_file_snippet_settings() if self.scanoss_settings else {} + return file_snippet_settings.get('dependency_analysis', False) def scan_folder_with_options( # noqa: PLR0913 self, @@ -354,7 +408,7 @@ def scan_folder(self, scan_dir: str) -> bool: # noqa: PLR0912, PLR0915 debug=self.debug, trace=self.trace, quiet=self.quiet, - scanoss_settings=self.scan_settings, + scanoss_settings=self.scanoss_settings, all_extensions=self.all_extensions, all_folders=self.all_folders, hidden_files_folders=self.hidden_files_folders, @@ -615,7 +669,7 @@ def scan_files(self, files: []) -> bool: # noqa: PLR0912, PLR0915 debug=self.debug, trace=self.trace, quiet=self.quiet, - scanoss_settings=self.scan_settings, + scanoss_settings=self.scanoss_settings, all_extensions=self.all_extensions, all_folders=self.all_folders, hidden_files_folders=self.hidden_files_folders, @@ -923,7 +977,7 @@ def wfp_folder(self, scan_dir: str, wfp_file: str = None): debug=self.debug, trace=self.trace, quiet=self.quiet, - scanoss_settings=self.scan_settings, + scanoss_settings=self.scanoss_settings, all_extensions=self.all_extensions, all_folders=self.all_folders, hidden_files_folders=self.hidden_files_folders, diff --git a/src/scanoss/scanoss_settings.py b/src/scanoss/scanoss_settings.py index ff9b9292..3aeef664 100644 --- a/src/scanoss/scanoss_settings.py +++ b/src/scanoss/scanoss_settings.py @@ -335,3 +335,107 @@ def get_skip_sizes(self, operation_type: str) -> List[SizeFilter]: List: Min and max sizes to skip """ return self.data.get('settings', {}).get('skip', {}).get('sizes', {}).get(operation_type, []) + + def get_file_snippet_settings(self) -> dict: + """ + Get the file_snippet settings section + Returns: + dict: File snippet settings + """ + return self.data.get('settings', {}).get('file_snippet', {}) + + def get_min_snippet_hits(self) -> Optional[int]: + """ + Get the minimum snippet hits required + Returns: + int or None: Minimum snippet hits, or None if not set + """ + return self.get_file_snippet_settings().get('min_snippet_hits') + + def get_min_snippet_lines(self) -> Optional[int]: + """ + Get the minimum snippet lines required + Returns: + int or None: Minimum snippet lines, or None if not set + """ + return self.get_file_snippet_settings().get('min_snippet_lines') + + def get_snippet_range_tolerance(self) -> Optional[int]: + """ + Get the snippet range tolerance + Returns: + int or None: Snippet range tolerance, or None if not set + """ + return self.get_file_snippet_settings().get('snippet_range_tolerance') + + def get_ranking_enabled(self) -> Optional[bool]: + """ + Get whether ranking is enabled + Returns: + bool or None: True if enabled, False if disabled, None if not set + """ + return self.get_file_snippet_settings().get('ranking_enabled') + + def get_ranking_threshold(self) -> Optional[int]: + """ + Get the ranking threshold value + Returns: + int or None: Ranking threshold, or None if not set + """ + return self.get_file_snippet_settings().get('ranking_threshold') + + def get_honour_file_exts(self) -> Optional[bool]: + """ + Get whether to honour file extensions + Returns: + bool or None: True to honour, False to ignore, None if not set + """ + return self.get_file_snippet_settings().get('honour_file_exts') + + def get_skip_headers_limit(self) -> int: + """ + Get the skip headers limit value + Returns: + int: Skip headers limit, or 0 if not set + """ + return self.get_file_snippet_settings().get('skip_headers_limit', 0) + + def get_skip_headers(self) -> bool: + """ + Get whether to skip headers + Returns: + bool: True to skip headers, False otherwise (default) + """ + return self.get_file_snippet_settings().get('skip_headers', False) + + def get_proxy(self) -> Optional[dict]: + """ + Get the root-level proxy configuration + Returns: + dict or None: Proxy configuration with 'host' key, or None if not set + """ + return self.data.get('settings', {}).get('proxy') + + def get_http_config(self) -> Optional[dict]: + """ + Get the root-level http_config configuration + Returns: + dict or None: HTTP config with 'base_uri' and 'ignore_cert_errors' keys, or None if not set + """ + return self.data.get('settings', {}).get('http_config') + + def get_file_snippet_proxy(self) -> Optional[dict]: + """ + Get the file_snippet-level proxy configuration (takes priority over root) + Returns: + dict or None: Proxy configuration with 'host' key, or None if not set + """ + return self.get_file_snippet_settings().get('proxy') + + def get_file_snippet_http_config(self) -> Optional[dict]: + """ + Get the file_snippet-level http_config configuration (takes priority over root) + Returns: + dict or None: HTTP config with 'base_uri' and 'ignore_cert_errors' keys, or None if not set + """ + return self.get_file_snippet_settings().get('http_config') diff --git a/src/scanoss/scanossapi.py b/src/scanoss/scanossapi.py index f077585b..9f8cdb92 100644 --- a/src/scanoss/scanossapi.py +++ b/src/scanoss/scanossapi.py @@ -22,13 +22,16 @@ THE SOFTWARE. """ +import base64 import http.client as http_client +import json import logging import os import sys import time import uuid from json.decoder import JSONDecodeError +from typing import Optional, Union import requests import urllib3 @@ -69,6 +72,12 @@ def __init__( # noqa: PLR0912, PLR0913, PLR0915 pac: PACFile = None, retry: int = 5, req_headers: dict = None, + min_snippet_hits: int = None, + min_snippet_lines: int = None, + snippet_range_tolerance: int = None, + honour_file_exts: Union[bool, str, None] = 'unset', + ranking: Union[bool, str, None] = 'unset', + ranking_threshold: int = None, ): """ Initialise the SCANOSS API @@ -79,6 +88,12 @@ def __init__( # noqa: PLR0912, PLR0913, PLR0915 :param debug: Enable debug (default False) :param trace: Enable trace (default False) :param quiet: Enable quiet mode (default False) + :param min_snippet_hits: Minimum snippet hits required (default None) + :param min_snippet_lines: Minimum snippet lines required (default None) + :param snippet_range_tolerance: Snippet range tolerance (default None) + :param honour_file_exts: Whether to honour file extensions (default 'unset') + :param ranking: Enable/disable ranking (default 'unset') + :param ranking_threshold: Ranking threshold value (default None) To set a custom certificate use: REQUESTS_CA_BUNDLE=/path/to/cert.pem @@ -88,6 +103,13 @@ def __init__( # noqa: PLR0912, PLR0913, PLR0915 """ super().__init__(debug, trace, quiet) self.sbom = None + # Scan tuning parameters + self.min_snippet_hits = min_snippet_hits + self.min_snippet_lines = min_snippet_lines + self.snippet_range_tolerance = snippet_range_tolerance + self.honour_file_exts = honour_file_exts + self.ranking = ranking + self.ranking_threshold = ranking_threshold self.scan_format = scan_format if scan_format else 'plain' self.flags = flags self.timeout = timeout if timeout > MIN_TIMEOUT else DEFAULT_TIMEOUT @@ -154,6 +176,10 @@ def scan(self, wfp: str, context: str = None, scan_id: int = None): # noqa: PLR scan_files = {'file': ('%s.wfp' % request_id, wfp)} headers = self.headers headers['x-request-id'] = request_id # send a unique request id for each post + # Add scan settings header if any settings are configured + scan_settings_header = self._build_scan_settings_header() + if scan_settings_header: + headers['scanoss-settings'] = scan_settings_header r = None retry = 0 # Add some retry logic to cater for timeouts, etc. while retry <= self.retry_limit: @@ -267,6 +293,45 @@ def set_sbom(self, sbom): self.sbom = sbom return self + def _build_scan_settings_header(self) -> Optional[str]: + """ + Build base64-encoded JSON for x-scanoss-scan-settings header. + Only includes parameters that have meaningful (non-"unset") values. + Returns: + Base64-encoded JSON string, or None if no settings to send + """ + settings = {} + + # min_snippet_hits: 0 = unset, don't send + if self.min_snippet_hits is not None and self.min_snippet_hits != 0: + settings['min_snippet_hits'] = self.min_snippet_hits + + # min_snippet_lines: 0 = unset, don't send + if self.min_snippet_lines is not None and self.min_snippet_lines != 0: + settings['min_snippet_lines'] = self.min_snippet_lines + + # snippet_range_tolerance: 0 = unset, don't send + if self.snippet_range_tolerance is not None and self.snippet_range_tolerance != 0: + settings['snippet_range_tolerance'] = self.snippet_range_tolerance + + # honour_file_exts: None = unset, don't send + if self.honour_file_exts is not None and self.honour_file_exts != 'unset': + settings['honour_file_exts'] = self.honour_file_exts + + # ranking: None = unset, don't send + if self.ranking is not None and self.ranking != 'unset': + settings['ranking_enabled'] = self.ranking + + # ranking_threshold: -1 = unset, don't send + if self.ranking_threshold is not None and self.ranking_threshold != -1: + settings['ranking_threshold'] = self.ranking_threshold + + if settings: + json_str = json.dumps(settings) + self.print_debug(f'Scan settings: {json_str}') + return base64.b64encode(json_str.encode()).decode() + return None + def load_generic_headers(self, url): """ Adds custom headers from req_headers to the headers collection. diff --git a/src/scanoss/scanpostprocessor.py b/src/scanoss/scanpostprocessor.py index 4dd6f85d..b86accc9 100644 --- a/src/scanoss/scanpostprocessor.py +++ b/src/scanoss/scanpostprocessor.py @@ -80,7 +80,7 @@ class ScanPostProcessor(ScanossBase): def __init__( self, - scan_settings: ScanossSettings, + scanoss_settings: ScanossSettings, debug: bool = False, trace: bool = False, quiet: bool = False, @@ -88,14 +88,14 @@ def __init__( ): """ Args: - scan_settings (ScanossSettings): Scan settings object + scanoss_settings (ScanossSettings): Scanoss settings object debug (bool, optional): Debug mode. Defaults to False. trace (bool, optional): Traces. Defaults to False. quiet (bool, optional): Quiet mode. Defaults to False. results (dict | str, optional): Results to be processed. Defaults to None. """ super().__init__(debug, trace, quiet) - self.scan_settings = scan_settings + self.scanoss_settings = scanoss_settings self.results: dict = results self.component_info_map: dict = {} @@ -114,10 +114,10 @@ def _load_component_info(self): if not self.results: return for _, result in self.results.items(): - result = result[0] if isinstance(result, list) else result - purls = result.get('purl', []) + entry = result[0] if isinstance(result, list) else result + purls = entry.get('purl', []) for purl in purls: - self.component_info_map[purl] = result + self.component_info_map[purl] = entry def post_process(self): """ @@ -126,7 +126,7 @@ def post_process(self): Returns: dict: Processed results """ - if self.scan_settings.is_legacy(): + if self.scanoss_settings.is_legacy(): self.print_stderr( 'Legacy settings file detected. Post-processing is not supported for legacy settings file.' ) @@ -139,7 +139,7 @@ def _remove_dismissed_files(self): """ Remove entries from the results based on files and/or purls specified in the SCANOSS settings file """ - to_remove_entries = self.scan_settings.get_bom_remove() + to_remove_entries = self.scanoss_settings.get_bom_remove() if not to_remove_entries: return self.results = { @@ -152,15 +152,15 @@ def _replace_purls(self): """ Replace purls in the results based on the SCANOSS settings file """ - to_replace_entries = self.scan_settings.get_bom_replace() + to_replace_entries = self.scanoss_settings.get_bom_replace() if not to_replace_entries: return for result_path, result in self.results.items(): - result = result[0] if isinstance(result, list) else result - should_replace, to_replace_with_purl = self._should_replace_result(result_path, result, to_replace_entries) + entry = result[0] if isinstance(result, list) else result + should_replace, to_replace_with_purl = self._should_replace_result(result_path, entry, to_replace_entries) if should_replace: - self.results[result_path] = [self._update_replaced_result(result, to_replace_with_purl)] + self.results[result_path] = [self._update_replaced_result(entry, to_replace_with_purl)] def _update_replaced_result(self, result: dict, to_replace_with_purl: str) -> dict: """ diff --git a/tests/data/scanoss.json b/tests/data/scanoss.json index b33ddcf1..d8ed7de9 100644 --- a/tests/data/scanoss.json +++ b/tests/data/scanoss.json @@ -25,6 +25,29 @@ "replace_with": "pkg:github/scanoss/only_purl_match_replaced.py" } ] + }, + "settings": { + "proxy": { + "host": "http://root-proxy:8080" + }, + "http_config": { + "base_uri": "https://root-api.scanoss.com", + "ignore_cert_errors": false + }, + "file_snippet": { + "proxy": { + "host": "http://file-snippet-proxy:8080" + }, + "http_config": { + "base_uri": "https://file-snippet-api.scanoss.com", + "ignore_cert_errors": true + }, + "min_snippet_hits": 10, + "min_snippet_lines": 5, + "honour_file_exts": true, + "ranking_enabled": true, + "ranking_threshold": 75 + } } } diff --git a/tests/test_scan_settings_builder.py b/tests/test_scan_settings_builder.py new file mode 100644 index 00000000..12c000d4 --- /dev/null +++ b/tests/test_scan_settings_builder.py @@ -0,0 +1,362 @@ +""" +SPDX-License-Identifier: MIT + + Copyright (c) 2025, SCANOSS + + Permission is hereby granted, free of charge, to any person obtaining a copy + of this software and associated documentation files (the "Software"), to deal + in the Software without restriction, including without limitation the rights + to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + copies of the Software, and to permit persons to whom the Software is + furnished to do so, subject to the following conditions: + + The above copyright notice and this permission notice shall be included in + all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + THE SOFTWARE. +""" + +import os +import unittest +from pathlib import Path + +from src.scanoss.scan_settings_builder import (ScanSettingsBuilder) +from src.scanoss.scanoss_settings import ScanossSettings + + +class TestScanSettingsBuilder(unittest.TestCase): + """Tests for the ScanSettingsBuilder class.""" + + script_dir = os.path.dirname(os.path.abspath(__file__)) + scan_settings_path = Path(script_dir, 'data', 'scanoss.json').resolve() + scan_settings = ScanossSettings(filepath=scan_settings_path) + + # ========================================================================= + # Test initialization + # ========================================================================= + + def test_init_with_none_settings(self): + """Test initialization with None settings.""" + builder = ScanSettingsBuilder(None) + + self.assertIsNone(builder.scanoss_settings) + self.assertIsNone(builder.proxy) + self.assertIsNone(builder.url) + self.assertFalse(builder.ignore_cert_errors) + self.assertIsNone(builder.min_snippet_hits) + self.assertIsNone(builder.min_snippet_lines) + self.assertIsNone(builder.honour_file_exts) + self.assertIsNone(builder.ranking) + self.assertIsNone(builder.ranking_threshold) + + def test_init_with_settings(self): + """Test initialization with settings object.""" + builder = ScanSettingsBuilder(self.scan_settings) + + self.assertEqual(builder.scanoss_settings, self.scan_settings) + + # ========================================================================= + # Test static helper methods + # ========================================================================= + + def test_str_to_bool_with_none(self): + """Test _str_to_bool returns None for None input.""" + self.assertIsNone(ScanSettingsBuilder._str_to_bool(None)) + + def test_str_to_bool_with_true_string(self): + """Test _str_to_bool converts 'true' to True.""" + self.assertTrue(ScanSettingsBuilder._str_to_bool('true')) + self.assertTrue(ScanSettingsBuilder._str_to_bool('True')) + self.assertTrue(ScanSettingsBuilder._str_to_bool('TRUE')) + + def test_str_to_bool_with_false_string(self): + """Test _str_to_bool converts 'false' to False.""" + self.assertFalse(ScanSettingsBuilder._str_to_bool('false')) + self.assertFalse(ScanSettingsBuilder._str_to_bool('False')) + self.assertFalse(ScanSettingsBuilder._str_to_bool('FALSE')) + + def test_str_to_bool_with_bool_input(self): + """Test _str_to_bool passes through bool values.""" + self.assertTrue(ScanSettingsBuilder._str_to_bool(True)) + self.assertFalse(ScanSettingsBuilder._str_to_bool(False)) + + def test_merge_with_priority_file_snippet_wins(self): + """Test _merge_with_priority returns file_snippet value when present (highest priority).""" + result = ScanSettingsBuilder._merge_with_priority('cli', 'file_snippet', 'root') + self.assertEqual(result, 'file_snippet') + + def test_merge_with_priority_root_second(self): + """Test _merge_with_priority returns root when file_snippet is None.""" + result = ScanSettingsBuilder._merge_with_priority('cli', None, 'root') + self.assertEqual(result, 'root') + + def test_merge_with_priority_cli_fallback(self): + """Test _merge_with_priority returns CLI when others are None.""" + result = ScanSettingsBuilder._merge_with_priority('cli', None, None) + self.assertEqual(result, 'cli') + + def test_merge_with_priority_all_none(self): + """Test _merge_with_priority returns None when all are None.""" + result = ScanSettingsBuilder._merge_with_priority(None, None, None) + self.assertIsNone(result) + + def test_merge_cli_with_settings_settings_wins(self): + """Test _merge_cli_with_settings returns settings value when present (highest priority).""" + result = ScanSettingsBuilder._merge_cli_with_settings('cli', 'settings') + self.assertEqual(result, 'settings') + + def test_merge_cli_with_settings_cli_fallback(self): + """Test _merge_cli_with_settings returns CLI when settings is None.""" + result = ScanSettingsBuilder._merge_cli_with_settings('cli', None) + self.assertEqual(result, 'cli') + + # ========================================================================= + # Test with_proxy + # ========================================================================= + + def test_with_proxy_cli_only(self): + """Test with_proxy uses CLI value when no settings.""" + builder = ScanSettingsBuilder(None) + result = builder.with_proxy('http://cli-proxy:8080') + + self.assertEqual(builder.proxy, 'http://cli-proxy:8080') + self.assertEqual(result, builder) # Test chaining + + def test_with_proxy_from_file_snippet(self): + """Test with_proxy uses file_snippet.proxy.host when CLI is None.""" + builder = ScanSettingsBuilder(self.scan_settings) + builder.with_proxy(None) + + # file_snippet.proxy.host = "http://file-snippet-proxy:8080" + self.assertEqual(builder.proxy, 'http://file-snippet-proxy:8080') + + def test_with_proxy_settings_overrides_cli(self): + """Test with_proxy settings value overrides CLI.""" + builder = ScanSettingsBuilder(self.scan_settings) + builder.with_proxy('http://cli-proxy:8080') + + # file_snippet.proxy.host = "http://file-snippet-proxy:8080" takes priority + self.assertEqual(builder.proxy, 'http://file-snippet-proxy:8080') + + # ========================================================================= + # Test with_url + # ========================================================================= + + def test_with_url_cli_only(self): + """Test with_url uses CLI value when no settings.""" + builder = ScanSettingsBuilder(None) + builder.with_url('https://cli-api.example.com') + + self.assertEqual(builder.url, 'https://cli-api.example.com') + + def test_with_url_from_file_snippet(self): + """Test with_url uses file_snippet.http_config.base_uri.""" + builder = ScanSettingsBuilder(self.scan_settings) + builder.with_url(None) + + # file_snippet.http_config.base_uri = "https://file-snippet-api.scanoss.com" + self.assertEqual(builder.url, 'https://file-snippet-api.scanoss.com') + + def test_with_url_settings_overrides_cli(self): + """Test with_url settings value overrides CLI.""" + builder = ScanSettingsBuilder(self.scan_settings) + builder.with_url('https://cli-api.com') + + # file_snippet.http_config.base_uri = "https://file-snippet-api.scanoss.com" takes priority + self.assertEqual(builder.url, 'https://file-snippet-api.scanoss.com') + + # ========================================================================= + # Test with_ignore_cert_errors + # ========================================================================= + + def test_with_ignore_cert_errors_defaults_to_false(self): + """Test with_ignore_cert_errors defaults to False.""" + builder = ScanSettingsBuilder(None) + builder.with_ignore_cert_errors(False) + + self.assertFalse(builder.ignore_cert_errors) + + def test_with_ignore_cert_errors_cli_true(self): + """Test with_ignore_cert_errors with CLI True.""" + builder = ScanSettingsBuilder(None) + builder.with_ignore_cert_errors(True) + + self.assertTrue(builder.ignore_cert_errors) + + def test_with_ignore_cert_errors_from_file_snippet(self): + """Test with_ignore_cert_errors from file_snippet settings.""" + builder = ScanSettingsBuilder(self.scan_settings) + builder.with_ignore_cert_errors(False) + + # file_snippet.http_config.ignore_cert_errors = true + self.assertTrue(builder.ignore_cert_errors) + + def test_with_ignore_cert_errors_cli_true_overrides(self): + """Test with_ignore_cert_errors CLI True overrides settings.""" + builder = ScanSettingsBuilder(self.scan_settings) + builder.with_ignore_cert_errors(True) + + self.assertTrue(builder.ignore_cert_errors) + + # ========================================================================= + # Test with_min_snippet_hits + # ========================================================================= + + def test_with_min_snippet_hits_cli_only(self): + """Test with_min_snippet_hits uses CLI value.""" + builder = ScanSettingsBuilder(None) + builder.with_min_snippet_hits(5) + + self.assertEqual(builder.min_snippet_hits, 5) + + def test_with_min_snippet_hits_from_settings(self): + """Test with_min_snippet_hits from settings.""" + builder = ScanSettingsBuilder(self.scan_settings) + builder.with_min_snippet_hits(None) + + # file_snippet.min_snippet_hits = 10 + self.assertEqual(builder.min_snippet_hits, 10) + + def test_with_min_snippet_hits_settings_overrides_cli(self): + """Test with_min_snippet_hits settings overrides CLI.""" + builder = ScanSettingsBuilder(self.scan_settings) + builder.with_min_snippet_hits(5) + + # file_snippet.min_snippet_hits = 10 takes priority + self.assertEqual(builder.min_snippet_hits, 10) + + # ========================================================================= + # Test with_min_snippet_lines + # ========================================================================= + + def test_with_min_snippet_lines_cli_only(self): + """Test with_min_snippet_lines uses CLI value.""" + builder = ScanSettingsBuilder(None) + builder.with_min_snippet_lines(3) + + self.assertEqual(builder.min_snippet_lines, 3) + + def test_with_min_snippet_lines_from_settings(self): + """Test with_min_snippet_lines from settings.""" + builder = ScanSettingsBuilder(self.scan_settings) + builder.with_min_snippet_lines(None) + + # file_snippet.min_snippet_lines = 5 + self.assertEqual(builder.min_snippet_lines, 5) + + # ========================================================================= + # Test with_honour_file_exts + # ========================================================================= + + def test_with_honour_file_exts_cli_true(self): + """Test with_honour_file_exts with CLI 'true'.""" + builder = ScanSettingsBuilder(None) + builder.with_honour_file_exts('true') + + self.assertTrue(builder.honour_file_exts) + + def test_with_honour_file_exts_cli_false(self): + """Test with_honour_file_exts with CLI 'false'.""" + builder = ScanSettingsBuilder(None) + builder.with_honour_file_exts('false') + + self.assertFalse(builder.honour_file_exts) + + def test_with_honour_file_exts_from_settings(self): + """Test with_honour_file_exts from settings.""" + builder = ScanSettingsBuilder(self.scan_settings) + builder.with_honour_file_exts(None) + + # file_snippet.honour_file_exts = true + self.assertTrue(builder.honour_file_exts) + + def test_with_honour_file_exts_settings_overrides_cli(self): + """Test with_honour_file_exts settings overrides CLI.""" + builder = ScanSettingsBuilder(self.scan_settings) + builder.with_honour_file_exts('false') + + # file_snippet.honour_file_exts = true takes priority + self.assertTrue(builder.honour_file_exts) + + # ========================================================================= + # Test with_ranking + # ========================================================================= + + def test_with_ranking_cli_true(self): + """Test with_ranking with CLI 'true'.""" + builder = ScanSettingsBuilder(None) + builder.with_ranking('true') + + self.assertTrue(builder.ranking) + + def test_with_ranking_cli_false(self): + """Test with_ranking with CLI 'false'.""" + builder = ScanSettingsBuilder(None) + builder.with_ranking('false') + + self.assertFalse(builder.ranking) + + def test_with_ranking_from_settings(self): + """Test with_ranking from settings.""" + builder = ScanSettingsBuilder(self.scan_settings) + builder.with_ranking(None) + + # file_snippet.ranking_enabled = true + self.assertTrue(builder.ranking) + + # ========================================================================= + # Test with_ranking_threshold + # ========================================================================= + + def test_with_ranking_threshold_cli_only(self): + """Test with_ranking_threshold uses CLI value.""" + builder = ScanSettingsBuilder(None) + builder.with_ranking_threshold(50) + + self.assertEqual(builder.ranking_threshold, 50) + + def test_with_ranking_threshold_from_settings(self): + """Test with_ranking_threshold from settings.""" + builder = ScanSettingsBuilder(self.scan_settings) + builder.with_ranking_threshold(None) + + # file_snippet.ranking_threshold = 75 + self.assertEqual(builder.ranking_threshold, 75) + + # ========================================================================= + # Test method chaining + # ========================================================================= + + def test_method_chaining(self): + """Test that all with_* methods support chaining.""" + builder = ScanSettingsBuilder(None) + + result = (builder + .with_proxy('http://proxy:8080') + .with_url('https://api.example.com') + .with_ignore_cert_errors(True) + .with_min_snippet_hits(5) + .with_min_snippet_lines(3) + .with_honour_file_exts('true') + .with_ranking('true') + .with_ranking_threshold(50)) + + self.assertEqual(result, builder) + self.assertEqual(builder.proxy, 'http://proxy:8080') + self.assertEqual(builder.url, 'https://api.example.com') + self.assertTrue(builder.ignore_cert_errors) + self.assertEqual(builder.min_snippet_hits, 5) + self.assertEqual(builder.min_snippet_lines, 3) + self.assertTrue(builder.honour_file_exts) + self.assertTrue(builder.ranking) + self.assertEqual(builder.ranking_threshold, 50) + + +if __name__ == '__main__': + unittest.main() \ No newline at end of file