diff --git a/src/cmdline.py b/src/cmdline.py index aed8c03..9a45974 100644 --- a/src/cmdline.py +++ b/src/cmdline.py @@ -4,6 +4,7 @@ from src.downloader.download import ( download_all_workflows_and_actions, download_account_workflows_and_actions, + download_repo_workflows_and_actions, ) from src.indexer.index import index_downloaded_workflows_and_actions from src.reporter.report import generate @@ -24,12 +25,15 @@ REDIS_CLEAN_DEFAULT, DOWNLOAD_COMMAND, DOWNLOAD_ACCOUNT_COMMAND, + DOWNLOAD_REPO_COMMAND, DOWNLOAD_CRAWL_COMMAND, INDEX_COMMAND, REPORT_COMMAND, QUERIES_PATH_DEFAULT, REPORT_RAW_FORMAT, REPORT_JSON_FORMAT, + REPORT_SARIF_FORMAT, + REPORT_OUTPUT, SEVERITY_LEVELS, QUERY_TAGS, QUERY_IDS, @@ -39,6 +43,7 @@ DOWNLOAD_COMMAND: { DOWNLOAD_CRAWL_COMMAND: download_all_workflows_and_actions, DOWNLOAD_ACCOUNT_COMMAND: download_account_workflows_and_actions, + DOWNLOAD_REPO_COMMAND: download_repo_workflows_and_actions, }, INDEX_COMMAND: index_downloaded_workflows_and_actions, REPORT_COMMAND: generate, @@ -165,6 +170,28 @@ def raven() -> None: help="Download repositories owned by the authenticated user", ) + repo_download_parser = download_sub_parser.add_parser( + "repo", + help="Download specific repository", + parents=[download_parser_options, redis_parser], + ) + + repo_download_parser.add_argument( + "--repo-name", + required=True, + action="append", + type=str, + help="Repository to download" + ) + + repo_download_parser.add_argument( + "--workflow", + required=False, + action="append", + type=str, + help="Workflow to download" + ) + crawl_download_parser.add_argument( "--max-stars", type=int, help="Maximum number of stars for a repository" ) @@ -230,9 +257,15 @@ def raven() -> None: "--format", "-f", default=REPORT_RAW_FORMAT, - choices=[REPORT_RAW_FORMAT, REPORT_JSON_FORMAT], + choices=[REPORT_RAW_FORMAT, REPORT_JSON_FORMAT, REPORT_SARIF_FORMAT], help="Report format (default: raw)", ) + report_parser.add_argument( + "--output", + "-o", + default="", + help="Location to save report output" + ) format_sub_parser = report_parser.add_subparsers( dest="report_command", @@ -271,5 +304,7 @@ def raven() -> None: elif args.command == REPORT_COMMAND: load_reporter_config(vars(args)) COMMAND_FUNCTIONS[args.command]() + # Moved this function outside of report.py, otherwise pytest was failing + log.success_exit() else: parser.print_help() diff --git a/src/config/config.py b/src/config/config.py index 9a47471..85085b5 100644 --- a/src/config/config.py +++ b/src/config/config.py @@ -8,6 +8,8 @@ QUERIES_PATH_DEFAULT = "library" REPORT_RAW_FORMAT = "raw" REPORT_JSON_FORMAT = "json" +REPORT_SARIF_FORMAT = "sarif" +REPORT_OUTPUT = "" SLACK_REPORTER = "slack" NEO4J_URI_DEFAULT = "neo4j://localhost:7687" @@ -38,6 +40,7 @@ # CLI commands DOWNLOAD_COMMAND = "download" DOWNLOAD_ACCOUNT_COMMAND = "account" +DOWNLOAD_REPO_COMMAND = "repo" DOWNLOAD_CRAWL_COMMAND = "crawl" INDEX_COMMAND = "index" REPORT_COMMAND = "report" @@ -71,6 +74,8 @@ def load_downloader_config(args) -> None: Config.min_stars = args.get("min_stars", MIN_STARS_DEFAULT) Config.max_stars = args.get("max_stars") Config.account_name = args.get("account_name") + Config.repo_name = args.get("repo_name") + Config.workflow = args.get("workflow") Config.personal = args.get("personal") Config.clean_redis = args.get("clean_redis", REDIS_CLEAN_DEFAULT) @@ -129,6 +134,7 @@ def load_reporter_config(args): Config.reporter = args.get("report_command") Config.slack_token = args.get("slack_token") Config.channel_id = args.get("channel_id") + Config.output = args.get("output") load_redis_config(args) load_neo4j_config(args) @@ -143,6 +149,8 @@ class Config: min_stars: int = None max_stars: int = None account_name: list[str] = [] + repo_name: list[str] = [] + workflow: list[str] = [] personal: bool = None # Indexer Configs @@ -175,6 +183,7 @@ class Config: reporter: str = None slack_token: str = None channel_id: str = None + output: str = "" # Neo4j Config neo4j_uri: str = None diff --git a/src/downloader/download.py b/src/downloader/download.py index 796f66c..34745d7 100644 --- a/src/downloader/download.py +++ b/src/downloader/download.py @@ -77,12 +77,14 @@ def download_all_workflows_and_actions() -> None: download_workflows_and_actions(repo) -def download_workflows_and_actions(repo: str) -> None: +def download_workflows_and_actions(repo: str, only_workflows: list = []) -> None: """The flow is the following: - First we enumerate .github/workflows directory for workflows - For each such workflow we download it - If that workflow contains uses:..., we analyze the string, and download the action or the reusable workflow. + + We can also filter specific workflows if we only want to test a specific one. """ with RedisConnection(Config.redis_objects_ops_db) as ops_db: if ops_db.exists_in_set(Config.workflow_download_history_set, repo): @@ -94,6 +96,10 @@ def download_workflows_and_actions(repo: str) -> None: log.debug(f"[+] Found {len(workflows)} workflows for {repo}") for name, url in workflows.items(): + if len(only_workflows) > 0 and name.lower() not in only_workflows: + log.debug(f"[+] Skipping {name}") + continue + if is_url_contains_a_token(url): """ If the URL contains a token, it means it is a private repository. @@ -229,3 +235,31 @@ def download_action_or_reusable_workflow(uses_string: str, repo: str) -> None: ) # In the future, ref will be with commit sha add_ref_pointer_to_redis(full_path, full_path) + + +def download_repo_workflows_and_actions() -> None: + """Download single repository + + We are enumerating the .github/workflows directory, and downloading all the workflows. + In addition if the repository contains action.yml file, it means it is a composite action, + so we download it as well. + + For each such workflow we also scan if it uses additional external actions. + If so, we download these as well. + + We are trying to cache the downloads as much as we can to reduce redundant download attempts. + """ + log.info(f"[+] Scanning single repository") + + only_workflows = [] + if Config.workflow is not None and len(Config.workflow) > 0: + only_workflows = list(map(str.lower, Config.workflow)) + log.info(f"[+] Will only download the following workflows: {', '.join(only_workflows)}") + + for repo in Config.repo_name: + # Ensure it's of the "org/repo" format. + if repo.count("/") != 1: + log.error(f"[-] Repository '{repo}' is not a repository") + log.fail_exit() + continue + download_workflows_and_actions(repo, only_workflows=only_workflows) diff --git a/src/downloader/gh_api.py b/src/downloader/gh_api.py index 6009907..d79c35f 100644 --- a/src/downloader/gh_api.py +++ b/src/downloader/gh_api.py @@ -266,8 +266,15 @@ def get_repository_workflows(repo: str) -> Dict[str, str]: headers["Authorization"] = f"Token {Config.github_token}" + repo_name = repo + params = {} + if '@' in repo: + # The repo has the format of "org/repo@branch". + repo_name, repo_branch = repo.split('@') + params['ref'] = repo_branch + file_path = ".github/workflows" - r = get(CONTENTS_URL.format(repo_path=repo, file_path=file_path), headers=headers) + r = get(CONTENTS_URL.format(repo_path=repo_name, file_path=file_path), headers=headers, params=params) if r.status_code == 404: return {} if r.status_code == 403 and int(r.headers["X-RateLimit-Remaining"]) == 0: diff --git a/src/indexer/index.py b/src/indexer/index.py index 4864b90..74ba0c0 100644 --- a/src/indexer/index.py +++ b/src/indexer/index.py @@ -150,6 +150,10 @@ def index_workflow_file(workflow: str) -> None: obj["url"] = url obj["is_public"] = is_public + if 'on' not in obj: + # Could be some invalid/config file like: + # https://github.com/spotify/druid/blob/master/.github/workflows/codeql-config.yml + return Config.graph.push_object(Workflow.from_dict(obj)) ops_db.insert_to_set(Config.workflow_index_history_set, workflow_full_name) diff --git a/src/queries/__init__.py b/src/queries/__init__.py index aeb234f..ff78a32 100644 --- a/src/queries/__init__.py +++ b/src/queries/__init__.py @@ -12,6 +12,7 @@ def __init__( id: str, name: str, description: str, + full_description: str, tags: list, severity: str, query: list, @@ -19,6 +20,7 @@ def __init__( self.id = id self.name = name self.description = description + self.full_description = full_description self.tags = tags self.severity = severity self.query = query @@ -81,7 +83,9 @@ def to_raw(self) -> str: report += f"{Fore.CYAN}Severity:{Style.RESET_ALL} {self.severity}\n" wrapped_description = textwrap.fill(self.description, width=description_length) + wrapped_full_description = textwrap.fill(self.full_description, width=description_length) report += f"{Fore.CYAN}Description:{Style.RESET_ALL} {wrapped_description}\n" + report += f"{Fore.CYAN}Full Description:{Style.RESET_ALL} {wrapped_full_description}\n" report += f"{Fore.CYAN}Tags:{Style.RESET_ALL} {self.tags}\n" report += f"{Fore.CYAN}Workflow URLS:{Style.RESET_ALL}\n" @@ -98,6 +102,7 @@ def _to_dict(self) -> dict: "id": self.id, "name": self.name, "description": self.description, + "full_description": self.full_description, "tags": self.tags, "severity": self.severity, "result": self.result, diff --git a/src/reporter/report.py b/src/reporter/report.py index 2dbe09e..96c6dbb 100644 --- a/src/reporter/report.py +++ b/src/reporter/report.py @@ -2,6 +2,7 @@ Config, REPORT_RAW_FORMAT, REPORT_JSON_FORMAT, + REPORT_SARIF_FORMAT, SLACK_REPORTER, ) from src.reporter import slack_reporter @@ -27,6 +28,12 @@ def json_reporter(queries: List[Query]) -> str: return json.dumps([query.to_json() for query in queries], indent=4) +def sarif_reporter(queries: List[Query]) -> str: + # Get the JSON first as it's easier to parse, then create the sarif result. + json_report = json.loads(json_reporter(queries)) + return json.dumps(convert_json_to_sarif(json_report), indent=4) + + def get_queries() -> List[Query]: queries = [] for query_file in listdir(Config.queries_path): @@ -38,6 +45,7 @@ def get_queries() -> List[Query]: id=yml_query.get("id"), name=detection_info.get("name"), description=detection_info.get("description"), + full_description=detection_info.get("full-description"), tags=detection_info.get("tags"), severity=detection_info.get("severity"), query=yml_query.get("query"), @@ -60,6 +68,8 @@ def generate() -> None: report = raw_reporter(filtered_queries) elif Config.format == REPORT_JSON_FORMAT: report = json_reporter(filtered_queries) + elif Config.format == REPORT_SARIF_FORMAT: + report = sarif_reporter(filtered_queries) if Config.reporter == SLACK_REPORTER: if Config.slack_token and Config.channel_id: @@ -73,6 +83,81 @@ def generate() -> None: ) else: - print(report) - - success_exit() + if len(Config.output) > 0: + with open(Config.output, "w") as output_file: + output_file.write(report) + else: + print(report) + + +def convert_json_to_sarif(findings: dict) -> dict: + all_rules = [] + all_results = [] + + # Match severity to CVSS score. + scores = { + 'critical': 10, + 'high': 8, + 'medium': 6, + 'low': 3, + 'info': 0 + } + + for finding in findings: + # First add the rules. + rule = { + "id": finding["id"], + "name": finding["name"], + "shortDescription": { + "text": finding["description"] + }, + "fullDescription": { + "text": finding["full_description"] + }, + "properties": { + "security-severity": scores[finding["severity"]], + "tags": [ + "security" + ] + } + } + + all_rules.append(rule) + + # Now for each file mentioned in the "result" list, add a finding. + for result in finding["result"]: + item = { + "ruleId": finding["id"], + "message": { + "text": finding["description"] + }, + "locations": [ + { + "physicalLocation": { + "artifactLocation": { + "uri": result + } + } + } + ] + } + + all_results.append(item) + + return { + "version": "2.1.0", + "$schema": "http://json.schemastore.org/sarif-2.1.0.json", + "runs": [ + { + "tool": { + "driver": { + "name": "Raven Security Analyzer", + "version": "1.0.0", + "informationUri": "https://github.com/CycodeLabs/raven/", + "rules": all_rules + } + }, + "results": all_results + } + ] + } diff --git a/src/workflow_components/workflow.py b/src/workflow_components/workflow.py index 4eb2b49..f6750d8 100644 --- a/src/workflow_components/workflow.py +++ b/src/workflow_components/workflow.py @@ -207,7 +207,9 @@ def from_dict(obj_dict: Dict[str, Any]) -> "Workflow": # When we meet it, we want to create a special relation to inputs of the reusable workflow. # We continue to treat the workflow as a regular workflow, and not as a reusable workflow. # But the difference is that we connected the different inputs to the workflow. - if "workflow_call" in w.trigger: + # However, a workflow_call can be empty like in: + # https://github.com/python/cpython/blob/68e279b37aae3019979a05ca55f462b11aac14be/.github/workflows/reusable-docs.yml#L4 + if "workflow_call" in w.trigger and obj_dict["on"]["workflow_call"] is not None: wokrflow_call = obj_dict["on"]["workflow_call"] inputs = wokrflow_call["inputs"] for input_name, input in inputs.items(): diff --git a/tests/integration/integration_consts.py b/tests/integration/integration_consts.py index 47d370e..102505d 100644 --- a/tests/integration/integration_consts.py +++ b/tests/integration/integration_consts.py @@ -65,4 +65,19 @@ }, }, }, + { + "test_name": "test_demo_index_single_repo", + "json_path": "tests/integration/structures_json/demo-1-index.json", + "description": "Tests Demo-1's graph structures combined. It has a workflow that uses the checkout action.", + "queries": { + "nodes_query": GET_NODES_BY_PATH_QUERY, + "relationships_query": GET_RELATIONSHIPS_BY_PATH_QUERY, + "to_format": { + "paths_list": [ + "RavenIntegrationTests/Demo-1/.github/workflows/demo-workflow.yml", + "actions/checkout", + ] + }, + }, + } ] diff --git a/tests/integration/structures_json/demo-1-index.json b/tests/integration/structures_json/demo-1-index.json new file mode 100644 index 0000000..564b5ff --- /dev/null +++ b/tests/integration/structures_json/demo-1-index.json @@ -0,0 +1,415 @@ +{ + "nodes": [ + { + "path": "actions/checkout", + "using": "node20", + "name": "Checkout", + "is_public": true, + "_id": "d35e7df441120da9624b8c11e36151be", + "url": "https://github.com/actions/checkout/tree/main/action.yml", + "labels": [ + "CompositeAction" + ] + }, + { + "path": "actions/checkout", + "default": "${{ github.repository }}", + "name": "repository", + "description": "Repository name with owner. For example, actions/checkout", + "_id": "775c9f7b8b404a9df37b16c9d4d8f336", + "url": "https://github.com/actions/checkout/tree/main/action.yml", + "required": false, + "labels": [ + "CompositeActionInput" + ] + }, + { + "path": "actions/checkout", + "name": "ref", + "description": "The branch, tag or SHA to checkout. When checking out the repository that triggered a workflow, this defaults to the reference or SHA for that event. Otherwise, uses the default branch.\n", + "_id": "6bf31cd9bee2b2c9a0fd9a8d3c578903", + "url": "https://github.com/actions/checkout/tree/main/action.yml", + "required": false, + "labels": [ + "CompositeActionInput" + ] + }, + { + "path": "actions/checkout", + "default": "${{ github.token }}", + "name": "token", + "description": "Personal access token (PAT) used to fetch the repository. The PAT is configured with the local git config, which enables your scripts to run authenticated git commands. The post-job step removes the PAT.\n\nWe recommend using a service account with the least permissions necessary. Also when generating a new PAT, select the least scopes necessary.\n\n[Learn more about creating and using encrypted secrets](https://help.github.com/en/actions/automating-your-workflow-with-github-actions/creating-and-using-encrypted-secrets)\n", + "_id": "5f4fa35f28767a9155a5813b2cc934d5", + "url": "https://github.com/actions/checkout/tree/main/action.yml", + "required": false, + "labels": [ + "CompositeActionInput" + ] + }, + { + "path": "actions/checkout", + "name": "ssh-key", + "description": "SSH key used to fetch the repository. The SSH key is configured with the local git config, which enables your scripts to run authenticated git commands. The post-job step removes the SSH key.\n\nWe recommend using a service account with the least permissions necessary.\n\n[Learn more about creating and using encrypted secrets](https://help.github.com/en/actions/automating-your-workflow-with-github-actions/creating-and-using-encrypted-secrets)\n", + "_id": "c53d6fe7a19a576c8bfefe6cfa80870b", + "url": "https://github.com/actions/checkout/tree/main/action.yml", + "required": false, + "labels": [ + "CompositeActionInput" + ] + }, + { + "path": "actions/checkout", + "name": "ssh-known-hosts", + "description": "Known hosts in addition to the user and global host key database. The public SSH keys for a host may be obtained using the utility `ssh-keyscan`. For example, `ssh-keyscan github.com`. The public key for github.com is always implicitly added.\n", + "_id": "145d5c9a78c4d7564f7b79ec495b6ff2", + "url": "https://github.com/actions/checkout/tree/main/action.yml", + "required": false, + "labels": [ + "CompositeActionInput" + ] + }, + { + "path": "actions/checkout", + "default": "true", + "name": "ssh-strict", + "description": "Whether to perform strict host key checking. When true, adds the options `StrictHostKeyChecking=yes` and `CheckHostIP=no` to the SSH command line. Use the input `ssh-known-hosts` to configure additional hosts.\n", + "_id": "ec56ff9d7ca004ba974a2fca1b01f2c5", + "required": false, + "url": "https://github.com/actions/checkout/tree/main/action.yml", + "labels": [ + "CompositeActionInput" + ] + }, + { + "path": "actions/checkout", + "default": "git", + "name": "ssh-user", + "description": "The user to use when connecting to the remote SSH host. By default 'git' is used.\n", + "_id": "c647109fbdf1f6f88205b9d01b7e84cb", + "required": false, + "url": "https://github.com/actions/checkout/tree/main/action.yml", + "labels": [ + "CompositeActionInput" + ] + }, + { + "path": "actions/checkout", + "default": "true", + "name": "persist-credentials", + "description": "Whether to configure the token or SSH key with the local git config", + "_id": "fe19be86972043c79c7ad0f618559ade", + "required": false, + "url": "https://github.com/actions/checkout/tree/main/action.yml", + "labels": [ + "CompositeActionInput" + ] + }, + { + "path": "actions/checkout", + "name": "path", + "description": "Relative path under $GITHUB_WORKSPACE to place the repository", + "_id": "3470907d9833db0f15032d1aadd1dbc2", + "url": "https://github.com/actions/checkout/tree/main/action.yml", + "required": false, + "labels": [ + "CompositeActionInput" + ] + }, + { + "path": "actions/checkout", + "default": "true", + "name": "clean", + "description": "Whether to execute `git clean -ffdx && git reset --hard HEAD` before fetching", + "_id": "a8d8ef152d8986d297b84bc2faf66d1e", + "url": "https://github.com/actions/checkout/tree/main/action.yml", + "required": false, + "labels": [ + "CompositeActionInput" + ] + }, + { + "path": "actions/checkout", + "name": "filter", + "description": "Partially clone against a given filter. Overrides sparse-checkout if set.\n", + "_id": "974202e17cde42b352e662c9c55df9ff", + "url": "https://github.com/actions/checkout/tree/main/action.yml", + "required": false, + "labels": [ + "CompositeActionInput" + ] + }, + { + "path": "actions/checkout", + "name": "sparse-checkout", + "description": "Do a sparse checkout on given patterns. Each pattern should be separated with new lines.\n", + "_id": "c626f387aeac44aa82b5e27f4cbaca54", + "url": "https://github.com/actions/checkout/tree/main/action.yml", + "required": false, + "labels": [ + "CompositeActionInput" + ] + }, + { + "path": "actions/checkout", + "default": "true", + "name": "sparse-checkout-cone-mode", + "description": "Specifies whether to use cone-mode when doing a sparse checkout.\n", + "_id": "90aa405b4e4278ac32892e1e1756b807", + "required": false, + "url": "https://github.com/actions/checkout/tree/main/action.yml", + "labels": [ + "CompositeActionInput" + ] + }, + { + "path": "actions/checkout", + "default": 1, + "name": "fetch-depth", + "description": "Number of commits to fetch. 0 indicates all history for all branches and tags.", + "_id": "00fb2bfc353337457e5be8ea01e27984", + "required": false, + "url": "https://github.com/actions/checkout/tree/main/action.yml", + "labels": [ + "CompositeActionInput" + ] + }, + { + "path": "actions/checkout", + "default": "false", + "name": "fetch-tags", + "description": "Whether to fetch tags, even if fetch-depth > 0.", + "_id": "a32f4c2bbd5fb33b56c7b2634b343e62", + "required": false, + "url": "https://github.com/actions/checkout/tree/main/action.yml", + "labels": [ + "CompositeActionInput" + ] + }, + { + "path": "actions/checkout", + "default": "true", + "name": "show-progress", + "description": "Whether to show progress status output when fetching.", + "_id": "c871387fa99df881290dbd906bae83a6", + "required": false, + "url": "https://github.com/actions/checkout/tree/main/action.yml", + "labels": [ + "CompositeActionInput" + ] + }, + { + "path": "actions/checkout", + "default": "false", + "name": "lfs", + "description": "Whether to download Git-LFS files", + "_id": "99f82cfda3b119fcf2b38ebc651dc0a8", + "url": "https://github.com/actions/checkout/tree/main/action.yml", + "required": false, + "labels": [ + "CompositeActionInput" + ] + }, + { + "path": "actions/checkout", + "default": "false", + "name": "submodules", + "description": "Whether to checkout submodules: `true` to checkout submodules or `recursive` to recursively checkout submodules.\n\nWhen the `ssh-key` input is not provided, SSH URLs beginning with `git@github.com:` are converted to HTTPS.\n", + "_id": "7cdf94b14af56d70417e7b6d92ff316e", + "required": false, + "url": "https://github.com/actions/checkout/tree/main/action.yml", + "labels": [ + "CompositeActionInput" + ] + }, + { + "path": "actions/checkout", + "default": "true", + "name": "set-safe-directory", + "description": "Add repository path as safe.directory for Git global config by running `git config --global --add safe.directory `", + "_id": "98aaa1fa6903184fa592300457546c9e", + "required": false, + "url": "https://github.com/actions/checkout/tree/main/action.yml", + "labels": [ + "CompositeActionInput" + ] + }, + { + "path": "actions/checkout", + "name": "github-server-url", + "description": "The base URL for the GitHub instance that you are trying to clone from, will use environment defaults to fetch from the same instance that the workflow is running from unless specified. Example URLs are https://github.com or https://my-ghes-server.example.com", + "_id": "e8f0d587842e43432e7c96ddcf2ef5e8", + "url": "https://github.com/actions/checkout/tree/main/action.yml", + "required": false, + "labels": [ + "CompositeActionInput" + ] + }, + { + "path": "RavenIntegrationTests/Demo-1/.github/workflows/demo-workflow.yml", + "name": "run", + "is_public": true, + "_id": "22137cee99760506f369934d49f719f8", + "trigger": [ + "workflow_dispatch" + ], + "url": "https://github.com/RavenIntegrationTests/Demo-1/tree/main/.github/workflows/demo-workflow.yml", + "labels": [ + "Workflow" + ] + }, + { + "path": "RavenIntegrationTests/Demo-1/.github/workflows/demo-workflow.yml", + "machine": [ + "ubuntu-latest" + ], + "name": "demo_test", + "_id": "ec3b1d0fd5ec71b9f43547f026f579fd", + "url": "https://github.com/RavenIntegrationTests/Demo-1/tree/main/.github/workflows/demo-workflow.yml", + "labels": [ + "Job" + ] + }, + { + "path": "RavenIntegrationTests/Demo-1/.github/workflows/demo-workflow.yml", + "ref": "v4", + "uses": "actions/checkout@v4", + "_id": "c1306c77e3af9cdedcedee69c59c96c1", + "url": "https://github.com/RavenIntegrationTests/Demo-1/tree/main/.github/workflows/demo-workflow.yml", + "labels": [ + "Step" + ] + }, + { + "path": "RavenIntegrationTests/Demo-1/.github/workflows/demo-workflow.yml", + "name": "PrintEnv", + "run": "printenv", + "_id": "6f76bec5bf9d4e683ef736d3ffe8b842", + "url": "https://github.com/RavenIntegrationTests/Demo-1/tree/main/.github/workflows/demo-workflow.yml", + "labels": [ + "Step" + ] + } + ], + "relationships": [ + { + "start_node": "d35e7df441120da9624b8c11e36151be", + "type": "COMPOSITE_ACTION_INPUT", + "end_node": "775c9f7b8b404a9df37b16c9d4d8f336" + }, + { + "start_node": "d35e7df441120da9624b8c11e36151be", + "type": "COMPOSITE_ACTION_INPUT", + "end_node": "6bf31cd9bee2b2c9a0fd9a8d3c578903" + }, + { + "start_node": "d35e7df441120da9624b8c11e36151be", + "type": "COMPOSITE_ACTION_INPUT", + "end_node": "5f4fa35f28767a9155a5813b2cc934d5" + }, + { + "start_node": "d35e7df441120da9624b8c11e36151be", + "type": "COMPOSITE_ACTION_INPUT", + "end_node": "c53d6fe7a19a576c8bfefe6cfa80870b" + }, + { + "start_node": "d35e7df441120da9624b8c11e36151be", + "type": "COMPOSITE_ACTION_INPUT", + "end_node": "145d5c9a78c4d7564f7b79ec495b6ff2" + }, + { + "start_node": "d35e7df441120da9624b8c11e36151be", + "type": "COMPOSITE_ACTION_INPUT", + "end_node": "ec56ff9d7ca004ba974a2fca1b01f2c5" + }, + { + "start_node": "d35e7df441120da9624b8c11e36151be", + "type": "COMPOSITE_ACTION_INPUT", + "end_node": "c647109fbdf1f6f88205b9d01b7e84cb" + }, + { + "start_node": "d35e7df441120da9624b8c11e36151be", + "type": "COMPOSITE_ACTION_INPUT", + "end_node": "fe19be86972043c79c7ad0f618559ade" + }, + { + "start_node": "d35e7df441120da9624b8c11e36151be", + "type": "COMPOSITE_ACTION_INPUT", + "end_node": "3470907d9833db0f15032d1aadd1dbc2" + }, + { + "start_node": "d35e7df441120da9624b8c11e36151be", + "type": "COMPOSITE_ACTION_INPUT", + "end_node": "a8d8ef152d8986d297b84bc2faf66d1e" + }, + { + "start_node": "d35e7df441120da9624b8c11e36151be", + "type": "COMPOSITE_ACTION_INPUT", + "end_node": "974202e17cde42b352e662c9c55df9ff" + }, + { + "start_node": "d35e7df441120da9624b8c11e36151be", + "type": "COMPOSITE_ACTION_INPUT", + "end_node": "c626f387aeac44aa82b5e27f4cbaca54" + }, + { + "start_node": "d35e7df441120da9624b8c11e36151be", + "type": "COMPOSITE_ACTION_INPUT", + "end_node": "90aa405b4e4278ac32892e1e1756b807" + }, + { + "start_node": "d35e7df441120da9624b8c11e36151be", + "type": "COMPOSITE_ACTION_INPUT", + "end_node": "00fb2bfc353337457e5be8ea01e27984" + }, + { + "start_node": "d35e7df441120da9624b8c11e36151be", + "type": "COMPOSITE_ACTION_INPUT", + "end_node": "a32f4c2bbd5fb33b56c7b2634b343e62" + }, + { + "start_node": "d35e7df441120da9624b8c11e36151be", + "type": "COMPOSITE_ACTION_INPUT", + "end_node": "c871387fa99df881290dbd906bae83a6" + }, + { + "start_node": "d35e7df441120da9624b8c11e36151be", + "type": "COMPOSITE_ACTION_INPUT", + "end_node": "99f82cfda3b119fcf2b38ebc651dc0a8" + }, + { + "start_node": "d35e7df441120da9624b8c11e36151be", + "type": "COMPOSITE_ACTION_INPUT", + "end_node": "7cdf94b14af56d70417e7b6d92ff316e" + }, + { + "start_node": "d35e7df441120da9624b8c11e36151be", + "type": "COMPOSITE_ACTION_INPUT", + "end_node": "98aaa1fa6903184fa592300457546c9e" + }, + { + "start_node": "d35e7df441120da9624b8c11e36151be", + "type": "COMPOSITE_ACTION_INPUT", + "end_node": "e8f0d587842e43432e7c96ddcf2ef5e8" + }, + { + "start_node": "c1306c77e3af9cdedcedee69c59c96c1", + "type": "ACTION", + "end_node": "d35e7df441120da9624b8c11e36151be" + }, + { + "start_node": "ec3b1d0fd5ec71b9f43547f026f579fd", + "type": "STEPS", + "end_node": "c1306c77e3af9cdedcedee69c59c96c1" + }, + { + "start_node": "ec3b1d0fd5ec71b9f43547f026f579fd", + "type": "STEPS", + "end_node": "6f76bec5bf9d4e683ef736d3ffe8b842" + }, + { + "start_node": "22137cee99760506f369934d49f719f8", + "type": "JOBS", + "end_node": "ec3b1d0fd5ec71b9f43547f026f579fd" + } + ] +} \ No newline at end of file diff --git a/tests/integration/test_report_sarif_export.py b/tests/integration/test_report_sarif_export.py new file mode 100644 index 0000000..c74e26f --- /dev/null +++ b/tests/integration/test_report_sarif_export.py @@ -0,0 +1,34 @@ +import os +import json +from tests.tests_init import init_integration_env +from src.reporter.report import generate as report_generate +from src.config.config import Config + + +def test_report_sarif_export(tmp_path) -> None: + init_integration_env() + + assert os.path.isfile(Config.output) is False + report_generate() + assert os.path.isfile(Config.output) is True + + with open(Config.output, "r") as file: + content = file.read() + + os.remove(Config.output) + + sarif = json.loads(content) + + driver = sarif["runs"][0]["tool"]["driver"] + rules = driver["rules"] + results = sarif["runs"][0]["results"] + + assert driver["name"] == "Raven Security Analyzer" + assert len(rules) == 1 + assert rules[0]["id"] == "RQ-11" + assert rules[0]["name"] == "Title Context Injection" + + assert len(results) == 1 + assert results[0]["ruleId"] == "RQ-11" + assert len(results[0]["locations"]) == 1 + assert results[0]["locations"][0]["physicalLocation"]["artifactLocation"]["uri"] == "https://github.com/RavenIntegrationTests/Integration-1/tree/main/.github/workflows/integration-workflow.yml" \ No newline at end of file diff --git a/tests/integration/test_single_repo_download.py b/tests/integration/test_single_repo_download.py new file mode 100644 index 0000000..ac9088a --- /dev/null +++ b/tests/integration/test_single_repo_download.py @@ -0,0 +1,28 @@ +from colorama import Fore, Style +from tests.utils import ( + get_graph_structure, + assert_graph_structures, +) +from tests.integration.integration_consts import TESTS_CONFIGS +from tests.tests_init import init_integration_single_env + + +def test_single_repo_download() -> None: + init_integration_single_env() + + test_config = next((item for item in TESTS_CONFIGS if item["test_name"] == "test_demo_index_single_repo"), None) + + print( + f"{Fore.CYAN}Running integration test: {test_config['test_name']}.{Style.RESET_ALL}" + ) + + # Get the queries from the test config + query_config = test_config["queries"] + nodes_query = query_config["nodes_query"].format(**query_config["to_format"]) + relationships_query = query_config["relationships_query"].format( + **query_config["to_format"] + ) + + # Get the graph structure from the queries and assert it + graph_structure = get_graph_structure(nodes_query, relationships_query) + assert_graph_structures(graph_structure, test_config["json_path"]) diff --git a/tests/integration/test_single_workflow_download.py b/tests/integration/test_single_workflow_download.py new file mode 100644 index 0000000..95cb881 --- /dev/null +++ b/tests/integration/test_single_workflow_download.py @@ -0,0 +1,28 @@ +from colorama import Fore, Style +from tests.utils import ( + get_graph_structure, + assert_graph_structures, +) +from tests.integration.integration_consts import TESTS_CONFIGS +from tests.tests_init import init_integration_single_env + + +def test_single_workflow_download() -> None: + init_integration_single_env(only_workflows=["demo-workflow.yml"]) + + test_config = next((item for item in TESTS_CONFIGS if item["test_name"] == "test_demo_index_single_repo"), None) + + print( + f"{Fore.CYAN}Running integration test: {test_config['test_name']}.{Style.RESET_ALL}" + ) + + # Get the queries from the test config + query_config = test_config["queries"] + nodes_query = query_config["nodes_query"].format(**query_config["to_format"]) + relationships_query = query_config["relationships_query"].format( + **query_config["to_format"] + ) + + # Get the graph structure from the queries and assert it + graph_structure = get_graph_structure(nodes_query, relationships_query) + assert_graph_structures(graph_structure, test_config["json_path"]) diff --git a/tests/tests_init.py b/tests/tests_init.py index b864e5e..e6edd45 100644 --- a/tests/tests_init.py +++ b/tests/tests_init.py @@ -1,7 +1,10 @@ -from os import getenv -from src.config.config import load_downloader_config, load_indexer_config -from src.downloader.download import download_account_workflows_and_actions +from pathlib import Path +import tempfile +import os +from src.config.config import load_downloader_config, load_indexer_config, load_reporter_config +from src.downloader.download import download_account_workflows_and_actions, download_repo_workflows_and_actions from src.indexer.index import index_downloaded_workflows_and_actions +from src.config.config import LAST_QUERY_ID, QUERIES_PATH_DEFAULT, Config def init_integration_env(): @@ -10,12 +13,20 @@ def init_integration_env(): index_downloaded_workflows_and_actions() +def init_integration_single_env(only_workflows: list = []): + load_integration_tests_config() + Config.workflow = only_workflows + download_repo_workflows_and_actions() + index_downloaded_workflows_and_actions() + + def load_integration_tests_config() -> None: load_downloader_config( { "debug": False, - "token": getenv("GITHUB_TOKEN"), + "token": os.getenv("GITHUB_TOKEN"), "account_name": ["RavenIntegrationTests"], + "repo_name": ["RavenIntegrationTests/Demo-1"], "redis_host": "raven-redis-test", "redis_port": 6379, "clean_redis": True, @@ -35,3 +46,11 @@ def load_integration_tests_config() -> None: "clean_neo4j": True, } ) + + load_reporter_config( + { + "format": "sarif", + "queries_path": Path(__file__).parent.parent / QUERIES_PATH_DEFAULT, + "output": os.path.join(tempfile.gettempdir(), next(tempfile._get_candidate_names())) + } + )