diff --git a/cycode/cli/apps/scan/remote_url_resolver.py b/cycode/cli/apps/scan/remote_url_resolver.py index 5f96328d..967e6ea0 100644 --- a/cycode/cli/apps/scan/remote_url_resolver.py +++ b/cycode/cli/apps/scan/remote_url_resolver.py @@ -99,17 +99,46 @@ def _try_to_get_plastic_remote_url(path: str) -> Optional[str]: def _try_get_git_remote_url(path: str) -> Optional[str]: try: - remote_url = git_proxy.get_repo(path).remotes[0].config_reader.get('url') - logger.debug('Found Git remote URL, %s', {'remote_url': remote_url, 'path': path}) + repo = git_proxy.get_repo(path, search_parent_directories=True) + remote_url = repo.remotes[0].config_reader.get('url') + logger.debug('Found Git remote URL, %s', {'remote_url': remote_url, 'repo_path': repo.working_dir}) return remote_url - except Exception: - logger.debug('Failed to get Git remote URL. Probably not a Git repository') + except Exception as e: + logger.debug('Failed to get Git remote URL. Probably not a Git repository', exc_info=e) return None -def try_get_any_remote_url(path: str) -> Optional[str]: +def _try_get_any_remote_url(path: str) -> Optional[str]: remote_url = _try_get_git_remote_url(path) if not remote_url: remote_url = _try_to_get_plastic_remote_url(path) return remote_url + + +def get_remote_url_scan_parameter(paths: tuple[str, ...]) -> Optional[str]: + remote_urls = set() + for path in paths: + # FIXME(MarshalX): perf issue. This looping will produce: + # - len(paths) Git subprocess calls in the worst case + # - len(paths)*2 Plastic SCM subprocess calls + remote_url = _try_get_any_remote_url(path) + if remote_url: + remote_urls.add(remote_url) + + if len(remote_urls) == 1: + # we are resolving remote_url only if all paths belong to the same repo (identical remote URLs), + # otherwise, the behavior is undefined + remote_url = remote_urls.pop() + + logger.debug( + 'Single remote URL found. Scan will be associated with organization, %s', {'remote_url': remote_url} + ) + return remote_url + + logger.debug( + 'Multiple different remote URLs found. Scan will not be associated with organization, %s', + {'remote_urls': remote_urls}, + ) + + return None diff --git a/cycode/cli/apps/scan/scan_parameters.py b/cycode/cli/apps/scan/scan_parameters.py index c3c4ecbe..4d950880 100644 --- a/cycode/cli/apps/scan/scan_parameters.py +++ b/cycode/cli/apps/scan/scan_parameters.py @@ -1,9 +1,8 @@ -import os from typing import Optional import typer -from cycode.cli.apps.scan.remote_url_resolver import try_get_any_remote_url +from cycode.cli.apps.scan.remote_url_resolver import get_remote_url_scan_parameter from cycode.cli.utils.scan_utils import generate_unique_scan_id from cycode.logger import get_logger @@ -29,18 +28,9 @@ def get_scan_parameters(ctx: typer.Context, paths: Optional[tuple[str, ...]] = N scan_parameters['paths'] = paths - if len(paths) != 1: - logger.debug('Multiple paths provided, going to ignore remote url') - return scan_parameters - - if not os.path.isdir(paths[0]): - logger.debug('Path is not a directory, going to ignore remote url') - return scan_parameters - - remote_url = try_get_any_remote_url(paths[0]) - if remote_url: - # TODO(MarshalX): remove hardcode in context - ctx.obj['remote_url'] = remote_url - scan_parameters['remote_url'] = remote_url + remote_url = get_remote_url_scan_parameter(paths) + # TODO(MarshalX): remove hardcode in context + ctx.obj['remote_url'] = remote_url + scan_parameters['remote_url'] = remote_url return scan_parameters