From 3c34fcf0c55924b55fbceb73bf9ea8209bf7d76d Mon Sep 17 00:00:00 2001 From: Pavel Tsakalidis
Date: Sat, 6 Jul 2024 21:30:05 +0100
Subject: [PATCH 1/6] Implement support for --repo-name to scan a specific repo
---
src/cmdline.py | 17 +
src/config/config.py | 3 +
src/downloader/download.py | 23 +
tests/integration/integration_consts.py | 15 +
.../structures_json/demo-1-index.json | 415 ++++++++++++++++++
.../integration/test_single_repo_download.py | 28 ++
tests/tests_init.py | 9 +-
7 files changed, 509 insertions(+), 1 deletion(-)
create mode 100644 tests/integration/structures_json/demo-1-index.json
create mode 100644 tests/integration/test_single_repo_download.py
diff --git a/src/cmdline.py b/src/cmdline.py
index aed8c03..2c642eb 100644
--- a/src/cmdline.py
+++ b/src/cmdline.py
@@ -4,6 +4,7 @@
from src.downloader.download import (
download_all_workflows_and_actions,
download_account_workflows_and_actions,
+ download_repo_workflows_and_actions,
)
from src.indexer.index import index_downloaded_workflows_and_actions
from src.reporter.report import generate
@@ -24,6 +25,7 @@
REDIS_CLEAN_DEFAULT,
DOWNLOAD_COMMAND,
DOWNLOAD_ACCOUNT_COMMAND,
+ DOWNLOAD_REPO_COMMAND,
DOWNLOAD_CRAWL_COMMAND,
INDEX_COMMAND,
REPORT_COMMAND,
@@ -39,6 +41,7 @@
DOWNLOAD_COMMAND: {
DOWNLOAD_CRAWL_COMMAND: download_all_workflows_and_actions,
DOWNLOAD_ACCOUNT_COMMAND: download_account_workflows_and_actions,
+ DOWNLOAD_REPO_COMMAND: download_repo_workflows_and_actions,
},
INDEX_COMMAND: index_downloaded_workflows_and_actions,
REPORT_COMMAND: generate,
@@ -165,6 +168,20 @@ def raven() -> None:
help="Download repositories owned by the authenticated user",
)
+ repo_download_parser = download_sub_parser.add_parser(
+ "repo",
+ help="Download specific repository",
+ parents=[download_parser_options, redis_parser],
+ )
+
+ repo_download_parser.add_argument(
+ "--repo-name",
+ required=True,
+ action="append",
+ type=str,
+ help="Repository to download"
+ )
+
crawl_download_parser.add_argument(
"--max-stars", type=int, help="Maximum number of stars for a repository"
)
diff --git a/src/config/config.py b/src/config/config.py
index 9a47471..38b881f 100644
--- a/src/config/config.py
+++ b/src/config/config.py
@@ -38,6 +38,7 @@
# CLI commands
DOWNLOAD_COMMAND = "download"
DOWNLOAD_ACCOUNT_COMMAND = "account"
+DOWNLOAD_REPO_COMMAND = "repo"
DOWNLOAD_CRAWL_COMMAND = "crawl"
INDEX_COMMAND = "index"
REPORT_COMMAND = "report"
@@ -71,6 +72,7 @@ def load_downloader_config(args) -> None:
Config.min_stars = args.get("min_stars", MIN_STARS_DEFAULT)
Config.max_stars = args.get("max_stars")
Config.account_name = args.get("account_name")
+ Config.repo_name = args.get("repo_name")
Config.personal = args.get("personal")
Config.clean_redis = args.get("clean_redis", REDIS_CLEAN_DEFAULT)
@@ -143,6 +145,7 @@ class Config:
min_stars: int = None
max_stars: int = None
account_name: list[str] = []
+ repo_name: list[str] = []
personal: bool = None
# Indexer Configs
diff --git a/src/downloader/download.py b/src/downloader/download.py
index 796f66c..b95fc6d 100644
--- a/src/downloader/download.py
+++ b/src/downloader/download.py
@@ -229,3 +229,26 @@ def download_action_or_reusable_workflow(uses_string: str, repo: str) -> None:
)
# In the future, ref will be with commit sha
add_ref_pointer_to_redis(full_path, full_path)
+
+
+def download_repo_workflows_and_actions() -> None:
+ """Download single repository
+
+ We are enumerating the .github/workflows directory, and downloading all the workflows.
+ In addition if the repository contains action.yml file, it means it is a composite action,
+ so we download it as well.
+
+ For each such workflow we also scan if it uses additional external actions.
+ If so, we download these as well.
+
+ We are trying to cache the downloads as much as we can to reduce redundant download attempts.
+ """
+ log.info(f"[+] Scanning single repository")
+
+ for repo in Config.repo_name:
+ # Ensure it's of the "org/repo" format.
+ if repo.count("/") != 1:
+ log.error(f"[-] Repository '{repo}' is not a repository")
+ log.fail_exit()
+ continue
+ download_workflows_and_actions(repo)
diff --git a/tests/integration/integration_consts.py b/tests/integration/integration_consts.py
index 47d370e..102505d 100644
--- a/tests/integration/integration_consts.py
+++ b/tests/integration/integration_consts.py
@@ -65,4 +65,19 @@
},
},
},
+ {
+ "test_name": "test_demo_index_single_repo",
+ "json_path": "tests/integration/structures_json/demo-1-index.json",
+ "description": "Tests Demo-1's graph structures combined. It has a workflow that uses the checkout action.",
+ "queries": {
+ "nodes_query": GET_NODES_BY_PATH_QUERY,
+ "relationships_query": GET_RELATIONSHIPS_BY_PATH_QUERY,
+ "to_format": {
+ "paths_list": [
+ "RavenIntegrationTests/Demo-1/.github/workflows/demo-workflow.yml",
+ "actions/checkout",
+ ]
+ },
+ },
+ }
]
diff --git a/tests/integration/structures_json/demo-1-index.json b/tests/integration/structures_json/demo-1-index.json
new file mode 100644
index 0000000..564b5ff
--- /dev/null
+++ b/tests/integration/structures_json/demo-1-index.json
@@ -0,0 +1,415 @@
+{
+ "nodes": [
+ {
+ "path": "actions/checkout",
+ "using": "node20",
+ "name": "Checkout",
+ "is_public": true,
+ "_id": "d35e7df441120da9624b8c11e36151be",
+ "url": "https://github.com/actions/checkout/tree/main/action.yml",
+ "labels": [
+ "CompositeAction"
+ ]
+ },
+ {
+ "path": "actions/checkout",
+ "default": "${{ github.repository }}",
+ "name": "repository",
+ "description": "Repository name with owner. For example, actions/checkout",
+ "_id": "775c9f7b8b404a9df37b16c9d4d8f336",
+ "url": "https://github.com/actions/checkout/tree/main/action.yml",
+ "required": false,
+ "labels": [
+ "CompositeActionInput"
+ ]
+ },
+ {
+ "path": "actions/checkout",
+ "name": "ref",
+ "description": "The branch, tag or SHA to checkout. When checking out the repository that triggered a workflow, this defaults to the reference or SHA for that event. Otherwise, uses the default branch.\n",
+ "_id": "6bf31cd9bee2b2c9a0fd9a8d3c578903",
+ "url": "https://github.com/actions/checkout/tree/main/action.yml",
+ "required": false,
+ "labels": [
+ "CompositeActionInput"
+ ]
+ },
+ {
+ "path": "actions/checkout",
+ "default": "${{ github.token }}",
+ "name": "token",
+ "description": "Personal access token (PAT) used to fetch the repository. The PAT is configured with the local git config, which enables your scripts to run authenticated git commands. The post-job step removes the PAT.\n\nWe recommend using a service account with the least permissions necessary. Also when generating a new PAT, select the least scopes necessary.\n\n[Learn more about creating and using encrypted secrets](https://help.github.com/en/actions/automating-your-workflow-with-github-actions/creating-and-using-encrypted-secrets)\n",
+ "_id": "5f4fa35f28767a9155a5813b2cc934d5",
+ "url": "https://github.com/actions/checkout/tree/main/action.yml",
+ "required": false,
+ "labels": [
+ "CompositeActionInput"
+ ]
+ },
+ {
+ "path": "actions/checkout",
+ "name": "ssh-key",
+ "description": "SSH key used to fetch the repository. The SSH key is configured with the local git config, which enables your scripts to run authenticated git commands. The post-job step removes the SSH key.\n\nWe recommend using a service account with the least permissions necessary.\n\n[Learn more about creating and using encrypted secrets](https://help.github.com/en/actions/automating-your-workflow-with-github-actions/creating-and-using-encrypted-secrets)\n",
+ "_id": "c53d6fe7a19a576c8bfefe6cfa80870b",
+ "url": "https://github.com/actions/checkout/tree/main/action.yml",
+ "required": false,
+ "labels": [
+ "CompositeActionInput"
+ ]
+ },
+ {
+ "path": "actions/checkout",
+ "name": "ssh-known-hosts",
+ "description": "Known hosts in addition to the user and global host key database. The public SSH keys for a host may be obtained using the utility `ssh-keyscan`. For example, `ssh-keyscan github.com`. The public key for github.com is always implicitly added.\n",
+ "_id": "145d5c9a78c4d7564f7b79ec495b6ff2",
+ "url": "https://github.com/actions/checkout/tree/main/action.yml",
+ "required": false,
+ "labels": [
+ "CompositeActionInput"
+ ]
+ },
+ {
+ "path": "actions/checkout",
+ "default": "true",
+ "name": "ssh-strict",
+ "description": "Whether to perform strict host key checking. When true, adds the options `StrictHostKeyChecking=yes` and `CheckHostIP=no` to the SSH command line. Use the input `ssh-known-hosts` to configure additional hosts.\n",
+ "_id": "ec56ff9d7ca004ba974a2fca1b01f2c5",
+ "required": false,
+ "url": "https://github.com/actions/checkout/tree/main/action.yml",
+ "labels": [
+ "CompositeActionInput"
+ ]
+ },
+ {
+ "path": "actions/checkout",
+ "default": "git",
+ "name": "ssh-user",
+ "description": "The user to use when connecting to the remote SSH host. By default 'git' is used.\n",
+ "_id": "c647109fbdf1f6f88205b9d01b7e84cb",
+ "required": false,
+ "url": "https://github.com/actions/checkout/tree/main/action.yml",
+ "labels": [
+ "CompositeActionInput"
+ ]
+ },
+ {
+ "path": "actions/checkout",
+ "default": "true",
+ "name": "persist-credentials",
+ "description": "Whether to configure the token or SSH key with the local git config",
+ "_id": "fe19be86972043c79c7ad0f618559ade",
+ "required": false,
+ "url": "https://github.com/actions/checkout/tree/main/action.yml",
+ "labels": [
+ "CompositeActionInput"
+ ]
+ },
+ {
+ "path": "actions/checkout",
+ "name": "path",
+ "description": "Relative path under $GITHUB_WORKSPACE to place the repository",
+ "_id": "3470907d9833db0f15032d1aadd1dbc2",
+ "url": "https://github.com/actions/checkout/tree/main/action.yml",
+ "required": false,
+ "labels": [
+ "CompositeActionInput"
+ ]
+ },
+ {
+ "path": "actions/checkout",
+ "default": "true",
+ "name": "clean",
+ "description": "Whether to execute `git clean -ffdx && git reset --hard HEAD` before fetching",
+ "_id": "a8d8ef152d8986d297b84bc2faf66d1e",
+ "url": "https://github.com/actions/checkout/tree/main/action.yml",
+ "required": false,
+ "labels": [
+ "CompositeActionInput"
+ ]
+ },
+ {
+ "path": "actions/checkout",
+ "name": "filter",
+ "description": "Partially clone against a given filter. Overrides sparse-checkout if set.\n",
+ "_id": "974202e17cde42b352e662c9c55df9ff",
+ "url": "https://github.com/actions/checkout/tree/main/action.yml",
+ "required": false,
+ "labels": [
+ "CompositeActionInput"
+ ]
+ },
+ {
+ "path": "actions/checkout",
+ "name": "sparse-checkout",
+ "description": "Do a sparse checkout on given patterns. Each pattern should be separated with new lines.\n",
+ "_id": "c626f387aeac44aa82b5e27f4cbaca54",
+ "url": "https://github.com/actions/checkout/tree/main/action.yml",
+ "required": false,
+ "labels": [
+ "CompositeActionInput"
+ ]
+ },
+ {
+ "path": "actions/checkout",
+ "default": "true",
+ "name": "sparse-checkout-cone-mode",
+ "description": "Specifies whether to use cone-mode when doing a sparse checkout.\n",
+ "_id": "90aa405b4e4278ac32892e1e1756b807",
+ "required": false,
+ "url": "https://github.com/actions/checkout/tree/main/action.yml",
+ "labels": [
+ "CompositeActionInput"
+ ]
+ },
+ {
+ "path": "actions/checkout",
+ "default": 1,
+ "name": "fetch-depth",
+ "description": "Number of commits to fetch. 0 indicates all history for all branches and tags.",
+ "_id": "00fb2bfc353337457e5be8ea01e27984",
+ "required": false,
+ "url": "https://github.com/actions/checkout/tree/main/action.yml",
+ "labels": [
+ "CompositeActionInput"
+ ]
+ },
+ {
+ "path": "actions/checkout",
+ "default": "false",
+ "name": "fetch-tags",
+ "description": "Whether to fetch tags, even if fetch-depth > 0.",
+ "_id": "a32f4c2bbd5fb33b56c7b2634b343e62",
+ "required": false,
+ "url": "https://github.com/actions/checkout/tree/main/action.yml",
+ "labels": [
+ "CompositeActionInput"
+ ]
+ },
+ {
+ "path": "actions/checkout",
+ "default": "true",
+ "name": "show-progress",
+ "description": "Whether to show progress status output when fetching.",
+ "_id": "c871387fa99df881290dbd906bae83a6",
+ "required": false,
+ "url": "https://github.com/actions/checkout/tree/main/action.yml",
+ "labels": [
+ "CompositeActionInput"
+ ]
+ },
+ {
+ "path": "actions/checkout",
+ "default": "false",
+ "name": "lfs",
+ "description": "Whether to download Git-LFS files",
+ "_id": "99f82cfda3b119fcf2b38ebc651dc0a8",
+ "url": "https://github.com/actions/checkout/tree/main/action.yml",
+ "required": false,
+ "labels": [
+ "CompositeActionInput"
+ ]
+ },
+ {
+ "path": "actions/checkout",
+ "default": "false",
+ "name": "submodules",
+ "description": "Whether to checkout submodules: `true` to checkout submodules or `recursive` to recursively checkout submodules.\n\nWhen the `ssh-key` input is not provided, SSH URLs beginning with `git@github.com:` are converted to HTTPS.\n",
+ "_id": "7cdf94b14af56d70417e7b6d92ff316e",
+ "required": false,
+ "url": "https://github.com/actions/checkout/tree/main/action.yml",
+ "labels": [
+ "CompositeActionInput"
+ ]
+ },
+ {
+ "path": "actions/checkout",
+ "default": "true",
+ "name": "set-safe-directory",
+ "description": "Add repository path as safe.directory for Git global config by running `git config --global --add safe.directory
Date: Sat, 6 Jul 2024 21:57:49 +0100
Subject: [PATCH 2/6] Added support to target specific branch, using the
org/repo@branch format
---
src/downloader/gh_api.py | 9 ++++++++-
1 file changed, 8 insertions(+), 1 deletion(-)
diff --git a/src/downloader/gh_api.py b/src/downloader/gh_api.py
index 6009907..d79c35f 100644
--- a/src/downloader/gh_api.py
+++ b/src/downloader/gh_api.py
@@ -266,8 +266,15 @@ def get_repository_workflows(repo: str) -> Dict[str, str]:
headers["Authorization"] = f"Token {Config.github_token}"
+ repo_name = repo
+ params = {}
+ if '@' in repo:
+ # The repo has the format of "org/repo@branch".
+ repo_name, repo_branch = repo.split('@')
+ params['ref'] = repo_branch
+
file_path = ".github/workflows"
- r = get(CONTENTS_URL.format(repo_path=repo, file_path=file_path), headers=headers)
+ r = get(CONTENTS_URL.format(repo_path=repo_name, file_path=file_path), headers=headers, params=params)
if r.status_code == 404:
return {}
if r.status_code == 403 and int(r.headers["X-RateLimit-Remaining"]) == 0:
From 5a418b2fe068b597ac9b1955f74baa4b79ccb068 Mon Sep 17 00:00:00 2001
From: Pavel Tsakalidis
Date: Sat, 6 Jul 2024 22:52:54 +0100
Subject: [PATCH 3/6] Added support for exporting findings as SARIF
---
src/cmdline.py | 3 +-
src/config/config.py | 1 +
src/queries/__init__.py | 5 +++
src/reporter/report.py | 83 +++++++++++++++++++++++++++++++++++++++++
4 files changed, 91 insertions(+), 1 deletion(-)
diff --git a/src/cmdline.py b/src/cmdline.py
index 2c642eb..af609a0 100644
--- a/src/cmdline.py
+++ b/src/cmdline.py
@@ -32,6 +32,7 @@
QUERIES_PATH_DEFAULT,
REPORT_RAW_FORMAT,
REPORT_JSON_FORMAT,
+ REPORT_SARIF_FORMAT,
SEVERITY_LEVELS,
QUERY_TAGS,
QUERY_IDS,
@@ -247,7 +248,7 @@ def raven() -> None:
"--format",
"-f",
default=REPORT_RAW_FORMAT,
- choices=[REPORT_RAW_FORMAT, REPORT_JSON_FORMAT],
+ choices=[REPORT_RAW_FORMAT, REPORT_JSON_FORMAT, REPORT_SARIF_FORMAT],
help="Report format (default: raw)",
)
diff --git a/src/config/config.py b/src/config/config.py
index 38b881f..2c285fa 100644
--- a/src/config/config.py
+++ b/src/config/config.py
@@ -8,6 +8,7 @@
QUERIES_PATH_DEFAULT = "library"
REPORT_RAW_FORMAT = "raw"
REPORT_JSON_FORMAT = "json"
+REPORT_SARIF_FORMAT = "sarif"
SLACK_REPORTER = "slack"
NEO4J_URI_DEFAULT = "neo4j://localhost:7687"
diff --git a/src/queries/__init__.py b/src/queries/__init__.py
index aeb234f..ff78a32 100644
--- a/src/queries/__init__.py
+++ b/src/queries/__init__.py
@@ -12,6 +12,7 @@ def __init__(
id: str,
name: str,
description: str,
+ full_description: str,
tags: list,
severity: str,
query: list,
@@ -19,6 +20,7 @@ def __init__(
self.id = id
self.name = name
self.description = description
+ self.full_description = full_description
self.tags = tags
self.severity = severity
self.query = query
@@ -81,7 +83,9 @@ def to_raw(self) -> str:
report += f"{Fore.CYAN}Severity:{Style.RESET_ALL} {self.severity}\n"
wrapped_description = textwrap.fill(self.description, width=description_length)
+ wrapped_full_description = textwrap.fill(self.full_description, width=description_length)
report += f"{Fore.CYAN}Description:{Style.RESET_ALL} {wrapped_description}\n"
+ report += f"{Fore.CYAN}Full Description:{Style.RESET_ALL} {wrapped_full_description}\n"
report += f"{Fore.CYAN}Tags:{Style.RESET_ALL} {self.tags}\n"
report += f"{Fore.CYAN}Workflow URLS:{Style.RESET_ALL}\n"
@@ -98,6 +102,7 @@ def _to_dict(self) -> dict:
"id": self.id,
"name": self.name,
"description": self.description,
+ "full_description": self.full_description,
"tags": self.tags,
"severity": self.severity,
"result": self.result,
diff --git a/src/reporter/report.py b/src/reporter/report.py
index 2dbe09e..eeebb4c 100644
--- a/src/reporter/report.py
+++ b/src/reporter/report.py
@@ -2,6 +2,7 @@
Config,
REPORT_RAW_FORMAT,
REPORT_JSON_FORMAT,
+ REPORT_SARIF_FORMAT,
SLACK_REPORTER,
)
from src.reporter import slack_reporter
@@ -27,6 +28,12 @@ def json_reporter(queries: List[Query]) -> str:
return json.dumps([query.to_json() for query in queries], indent=4)
+def sarif_reporter(queries: List[Query]) -> str:
+ # Get the JSON first as it's easier to parse, then create the sarif result.
+ json_report = json.loads(json_reporter(queries))
+ return json.dumps(convert_json_to_sarif(json_report), indent=4)
+
+
def get_queries() -> List[Query]:
queries = []
for query_file in listdir(Config.queries_path):
@@ -38,6 +45,7 @@ def get_queries() -> List[Query]:
id=yml_query.get("id"),
name=detection_info.get("name"),
description=detection_info.get("description"),
+ full_description=detection_info.get("full-description"),
tags=detection_info.get("tags"),
severity=detection_info.get("severity"),
query=yml_query.get("query"),
@@ -60,6 +68,8 @@ def generate() -> None:
report = raw_reporter(filtered_queries)
elif Config.format == REPORT_JSON_FORMAT:
report = json_reporter(filtered_queries)
+ elif Config.format == REPORT_SARIF_FORMAT:
+ report = sarif_reporter(filtered_queries)
if Config.reporter == SLACK_REPORTER:
if Config.slack_token and Config.channel_id:
@@ -76,3 +86,76 @@ def generate() -> None:
print(report)
success_exit()
+
+
+def convert_json_to_sarif(findings: dict) -> dict:
+ all_rules = []
+ all_results = []
+
+ # Match severity to CVSS score.
+ scores = {
+ 'critical': 10,
+ 'high': 8,
+ 'medium': 6,
+ 'low': 3,
+ 'info': 0
+ }
+
+ for finding in findings:
+ # First add the rules.
+ rule = {
+ "id": finding["id"],
+ "name": finding["name"],
+ "shortDescription": {
+ "text": finding["description"]
+ },
+ "fullDescription": {
+ "text": finding["full_description"]
+ },
+ "properties": {
+ "security-severity": scores[finding["severity"]],
+ "tags": [
+ "security"
+ ]
+ }
+ }
+
+ all_rules.append(rule)
+
+ # Now for each file mentioned in the "result" list, add a finding.
+ for result in finding["result"]:
+ item = {
+ "ruleId": finding["id"],
+ "message": {
+ "text": finding["description"]
+ },
+ "locations": [
+ {
+ "physicalLocation": {
+ "artifactLocation": {
+ "uri": result
+ }
+ }
+ }
+ ]
+ }
+
+ all_results.append(item)
+
+ return {
+ "version": "2.1.0",
+ "$schema": "http://json.schemastore.org/sarif-2.1.0.json",
+ "runs": [
+ {
+ "tool": {
+ "driver": {
+ "name": "Raven Security Analyzer",
+ "version": "1.0.0",
+ "informationUri": "https://github.com/CycodeLabs/raven/",
+ "rules": all_rules
+ }
+ },
+ "results": all_results
+ }
+ ]
+ }
From 26f34d78b3180a853e5d6633b4defcc838a55ebc Mon Sep 17 00:00:00 2001
From: Pavel Tsakalidis
Date: Sat, 6 Jul 2024 23:29:48 +0100
Subject: [PATCH 4/6] Added support to save report output using --output, and
added tests for sarif format
---
src/cmdline.py | 9 +++++
src/config/config.py | 3 ++
src/reporter/report.py | 8 +++--
tests/integration/test_report_sarif_export.py | 34 +++++++++++++++++++
tests/tests_init.py | 26 ++++++++++----
5 files changed, 71 insertions(+), 9 deletions(-)
create mode 100644 tests/integration/test_report_sarif_export.py
diff --git a/src/cmdline.py b/src/cmdline.py
index af609a0..36d8be1 100644
--- a/src/cmdline.py
+++ b/src/cmdline.py
@@ -33,6 +33,7 @@
REPORT_RAW_FORMAT,
REPORT_JSON_FORMAT,
REPORT_SARIF_FORMAT,
+ REPORT_OUTPUT,
SEVERITY_LEVELS,
QUERY_TAGS,
QUERY_IDS,
@@ -251,6 +252,12 @@ def raven() -> None:
choices=[REPORT_RAW_FORMAT, REPORT_JSON_FORMAT, REPORT_SARIF_FORMAT],
help="Report format (default: raw)",
)
+ report_parser.add_argument(
+ "--output",
+ "-o",
+ default="",
+ help="Location to save report output"
+ )
format_sub_parser = report_parser.add_subparsers(
dest="report_command",
@@ -289,5 +296,7 @@ def raven() -> None:
elif args.command == REPORT_COMMAND:
load_reporter_config(vars(args))
COMMAND_FUNCTIONS[args.command]()
+ # Moved this function outside of report.py, otherwise pytest was failing
+ log.success_exit()
else:
parser.print_help()
diff --git a/src/config/config.py b/src/config/config.py
index 2c285fa..468fba8 100644
--- a/src/config/config.py
+++ b/src/config/config.py
@@ -9,6 +9,7 @@
REPORT_RAW_FORMAT = "raw"
REPORT_JSON_FORMAT = "json"
REPORT_SARIF_FORMAT = "sarif"
+REPORT_OUTPUT = ""
SLACK_REPORTER = "slack"
NEO4J_URI_DEFAULT = "neo4j://localhost:7687"
@@ -132,6 +133,7 @@ def load_reporter_config(args):
Config.reporter = args.get("report_command")
Config.slack_token = args.get("slack_token")
Config.channel_id = args.get("channel_id")
+ Config.output = args.get("output")
load_redis_config(args)
load_neo4j_config(args)
@@ -179,6 +181,7 @@ class Config:
reporter: str = None
slack_token: str = None
channel_id: str = None
+ output: str = ""
# Neo4j Config
neo4j_uri: str = None
diff --git a/src/reporter/report.py b/src/reporter/report.py
index eeebb4c..96c6dbb 100644
--- a/src/reporter/report.py
+++ b/src/reporter/report.py
@@ -83,9 +83,11 @@ def generate() -> None:
)
else:
- print(report)
-
- success_exit()
+ if len(Config.output) > 0:
+ with open(Config.output, "w") as output_file:
+ output_file.write(report)
+ else:
+ print(report)
def convert_json_to_sarif(findings: dict) -> dict:
diff --git a/tests/integration/test_report_sarif_export.py b/tests/integration/test_report_sarif_export.py
new file mode 100644
index 0000000..c74e26f
--- /dev/null
+++ b/tests/integration/test_report_sarif_export.py
@@ -0,0 +1,34 @@
+import os
+import json
+from tests.tests_init import init_integration_env
+from src.reporter.report import generate as report_generate
+from src.config.config import Config
+
+
+def test_report_sarif_export(tmp_path) -> None:
+ init_integration_env()
+
+ assert os.path.isfile(Config.output) is False
+ report_generate()
+ assert os.path.isfile(Config.output) is True
+
+ with open(Config.output, "r") as file:
+ content = file.read()
+
+ os.remove(Config.output)
+
+ sarif = json.loads(content)
+
+ driver = sarif["runs"][0]["tool"]["driver"]
+ rules = driver["rules"]
+ results = sarif["runs"][0]["results"]
+
+ assert driver["name"] == "Raven Security Analyzer"
+ assert len(rules) == 1
+ assert rules[0]["id"] == "RQ-11"
+ assert rules[0]["name"] == "Title Context Injection"
+
+ assert len(results) == 1
+ assert results[0]["ruleId"] == "RQ-11"
+ assert len(results[0]["locations"]) == 1
+ assert results[0]["locations"][0]["physicalLocation"]["artifactLocation"]["uri"] == "https://github.com/RavenIntegrationTests/Integration-1/tree/main/.github/workflows/integration-workflow.yml"
\ No newline at end of file
diff --git a/tests/tests_init.py b/tests/tests_init.py
index 356d543..4dab629 100644
--- a/tests/tests_init.py
+++ b/tests/tests_init.py
@@ -1,7 +1,10 @@
-from os import getenv
-from src.config.config import load_downloader_config, load_indexer_config
+from pathlib import Path
+import tempfile
+import os
+from src.config.config import load_downloader_config, load_indexer_config, load_reporter_config
from src.downloader.download import download_account_workflows_and_actions, download_repo_workflows_and_actions
from src.indexer.index import index_downloaded_workflows_and_actions
+from src.config.config import LAST_QUERY_ID, QUERIES_PATH_DEFAULT
def init_integration_env():
@@ -20,10 +23,11 @@ def load_integration_tests_config() -> None:
load_downloader_config(
{
"debug": False,
- "token": getenv("GITHUB_TOKEN"),
+ "token": os.getenv("GITHUB_TOKEN"),
"account_name": ["RavenIntegrationTests"],
"repo_name": ["RavenIntegrationTests/Demo-1"],
- "redis_host": "raven-redis-test",
+ # "redis_host": "raven-redis-test",
+ "redis_host": "localhost",
"redis_port": 6379,
"clean_redis": True,
}
@@ -32,13 +36,23 @@ def load_integration_tests_config() -> None:
load_indexer_config(
{
"debug": False,
- "redis_host": "raven-redis-test",
+ # "redis_host": "raven-redis-test",
+ "redis_host": "localhost",
"redis_port": 6379,
"clean_redis": True,
- "neo4j_uri": "neo4j://raven-neo4j-test:7687",
+ # "neo4j_uri": "neo4j://raven-neo4j-test:7687",
+ "neo4j_uri": "neo4j://localhost:7687",
"neo4j_user": "neo4j",
"neo4j_pass": "123456789",
"threads": 1,
"clean_neo4j": True,
}
)
+
+ load_reporter_config(
+ {
+ "format": "sarif",
+ "queries_path": Path(__file__).parent.parent / QUERIES_PATH_DEFAULT,
+ "output": os.path.join(tempfile.gettempdir(), next(tempfile._get_candidate_names()))
+ }
+ )
From d8deee65b260b460ce86e51a81c8d3eb5b71834c Mon Sep 17 00:00:00 2001
From: Pavel Tsakalidis
Date: Sat, 6 Jul 2024 23:58:08 +0100
Subject: [PATCH 5/6] Added support to download individual workflows using
--workflow
---
src/cmdline.py | 8 ++++++
src/config/config.py | 2 ++
src/downloader/download.py | 15 ++++++++--
.../test_single_workflow_download.py | 28 +++++++++++++++++++
tests/tests_init.py | 14 ++++------
5 files changed, 57 insertions(+), 10 deletions(-)
create mode 100644 tests/integration/test_single_workflow_download.py
diff --git a/src/cmdline.py b/src/cmdline.py
index 36d8be1..9a45974 100644
--- a/src/cmdline.py
+++ b/src/cmdline.py
@@ -184,6 +184,14 @@ def raven() -> None:
help="Repository to download"
)
+ repo_download_parser.add_argument(
+ "--workflow",
+ required=False,
+ action="append",
+ type=str,
+ help="Workflow to download"
+ )
+
crawl_download_parser.add_argument(
"--max-stars", type=int, help="Maximum number of stars for a repository"
)
diff --git a/src/config/config.py b/src/config/config.py
index 468fba8..85085b5 100644
--- a/src/config/config.py
+++ b/src/config/config.py
@@ -75,6 +75,7 @@ def load_downloader_config(args) -> None:
Config.max_stars = args.get("max_stars")
Config.account_name = args.get("account_name")
Config.repo_name = args.get("repo_name")
+ Config.workflow = args.get("workflow")
Config.personal = args.get("personal")
Config.clean_redis = args.get("clean_redis", REDIS_CLEAN_DEFAULT)
@@ -149,6 +150,7 @@ class Config:
max_stars: int = None
account_name: list[str] = []
repo_name: list[str] = []
+ workflow: list[str] = []
personal: bool = None
# Indexer Configs
diff --git a/src/downloader/download.py b/src/downloader/download.py
index b95fc6d..34745d7 100644
--- a/src/downloader/download.py
+++ b/src/downloader/download.py
@@ -77,12 +77,14 @@ def download_all_workflows_and_actions() -> None:
download_workflows_and_actions(repo)
-def download_workflows_and_actions(repo: str) -> None:
+def download_workflows_and_actions(repo: str, only_workflows: list = []) -> None:
"""The flow is the following:
- First we enumerate .github/workflows directory for workflows
- For each such workflow we download it
- If that workflow contains uses:..., we analyze the string, and download the action or the reusable workflow.
+
+ We can also filter specific workflows if we only want to test a specific one.
"""
with RedisConnection(Config.redis_objects_ops_db) as ops_db:
if ops_db.exists_in_set(Config.workflow_download_history_set, repo):
@@ -94,6 +96,10 @@ def download_workflows_and_actions(repo: str) -> None:
log.debug(f"[+] Found {len(workflows)} workflows for {repo}")
for name, url in workflows.items():
+ if len(only_workflows) > 0 and name.lower() not in only_workflows:
+ log.debug(f"[+] Skipping {name}")
+ continue
+
if is_url_contains_a_token(url):
"""
If the URL contains a token, it means it is a private repository.
@@ -245,10 +251,15 @@ def download_repo_workflows_and_actions() -> None:
"""
log.info(f"[+] Scanning single repository")
+ only_workflows = []
+ if Config.workflow is not None and len(Config.workflow) > 0:
+ only_workflows = list(map(str.lower, Config.workflow))
+ log.info(f"[+] Will only download the following workflows: {', '.join(only_workflows)}")
+
for repo in Config.repo_name:
# Ensure it's of the "org/repo" format.
if repo.count("/") != 1:
log.error(f"[-] Repository '{repo}' is not a repository")
log.fail_exit()
continue
- download_workflows_and_actions(repo)
+ download_workflows_and_actions(repo, only_workflows=only_workflows)
diff --git a/tests/integration/test_single_workflow_download.py b/tests/integration/test_single_workflow_download.py
new file mode 100644
index 0000000..95cb881
--- /dev/null
+++ b/tests/integration/test_single_workflow_download.py
@@ -0,0 +1,28 @@
+from colorama import Fore, Style
+from tests.utils import (
+ get_graph_structure,
+ assert_graph_structures,
+)
+from tests.integration.integration_consts import TESTS_CONFIGS
+from tests.tests_init import init_integration_single_env
+
+
+def test_single_workflow_download() -> None:
+ init_integration_single_env(only_workflows=["demo-workflow.yml"])
+
+ test_config = next((item for item in TESTS_CONFIGS if item["test_name"] == "test_demo_index_single_repo"), None)
+
+ print(
+ f"{Fore.CYAN}Running integration test: {test_config['test_name']}.{Style.RESET_ALL}"
+ )
+
+ # Get the queries from the test config
+ query_config = test_config["queries"]
+ nodes_query = query_config["nodes_query"].format(**query_config["to_format"])
+ relationships_query = query_config["relationships_query"].format(
+ **query_config["to_format"]
+ )
+
+ # Get the graph structure from the queries and assert it
+ graph_structure = get_graph_structure(nodes_query, relationships_query)
+ assert_graph_structures(graph_structure, test_config["json_path"])
diff --git a/tests/tests_init.py b/tests/tests_init.py
index 4dab629..e6edd45 100644
--- a/tests/tests_init.py
+++ b/tests/tests_init.py
@@ -4,7 +4,7 @@
from src.config.config import load_downloader_config, load_indexer_config, load_reporter_config
from src.downloader.download import download_account_workflows_and_actions, download_repo_workflows_and_actions
from src.indexer.index import index_downloaded_workflows_and_actions
-from src.config.config import LAST_QUERY_ID, QUERIES_PATH_DEFAULT
+from src.config.config import LAST_QUERY_ID, QUERIES_PATH_DEFAULT, Config
def init_integration_env():
@@ -13,8 +13,9 @@ def init_integration_env():
index_downloaded_workflows_and_actions()
-def init_integration_single_env():
+def init_integration_single_env(only_workflows: list = []):
load_integration_tests_config()
+ Config.workflow = only_workflows
download_repo_workflows_and_actions()
index_downloaded_workflows_and_actions()
@@ -26,8 +27,7 @@ def load_integration_tests_config() -> None:
"token": os.getenv("GITHUB_TOKEN"),
"account_name": ["RavenIntegrationTests"],
"repo_name": ["RavenIntegrationTests/Demo-1"],
- # "redis_host": "raven-redis-test",
- "redis_host": "localhost",
+ "redis_host": "raven-redis-test",
"redis_port": 6379,
"clean_redis": True,
}
@@ -36,12 +36,10 @@ def load_integration_tests_config() -> None:
load_indexer_config(
{
"debug": False,
- # "redis_host": "raven-redis-test",
- "redis_host": "localhost",
+ "redis_host": "raven-redis-test",
"redis_port": 6379,
"clean_redis": True,
- # "neo4j_uri": "neo4j://raven-neo4j-test:7687",
- "neo4j_uri": "neo4j://localhost:7687",
+ "neo4j_uri": "neo4j://raven-neo4j-test:7687",
"neo4j_user": "neo4j",
"neo4j_pass": "123456789",
"threads": 1,
From 74f7e4bd7084185773a3f147a8d2902450de8f22 Mon Sep 17 00:00:00 2001
From: Pavel Tsakalidis
Date: Sun, 7 Jul 2024 12:02:17 +0100
Subject: [PATCH 6/6] Added some error checking when parsing workflow files
---
src/indexer/index.py | 4 ++++
src/workflow_components/workflow.py | 4 +++-
2 files changed, 7 insertions(+), 1 deletion(-)
diff --git a/src/indexer/index.py b/src/indexer/index.py
index 4864b90..74ba0c0 100644
--- a/src/indexer/index.py
+++ b/src/indexer/index.py
@@ -150,6 +150,10 @@ def index_workflow_file(workflow: str) -> None:
obj["url"] = url
obj["is_public"] = is_public
+ if 'on' not in obj:
+ # Could be some invalid/config file like:
+ # https://github.com/spotify/druid/blob/master/.github/workflows/codeql-config.yml
+ return
Config.graph.push_object(Workflow.from_dict(obj))
ops_db.insert_to_set(Config.workflow_index_history_set, workflow_full_name)
diff --git a/src/workflow_components/workflow.py b/src/workflow_components/workflow.py
index 4eb2b49..f6750d8 100644
--- a/src/workflow_components/workflow.py
+++ b/src/workflow_components/workflow.py
@@ -207,7 +207,9 @@ def from_dict(obj_dict: Dict[str, Any]) -> "Workflow":
# When we meet it, we want to create a special relation to inputs of the reusable workflow.
# We continue to treat the workflow as a regular workflow, and not as a reusable workflow.
# But the difference is that we connected the different inputs to the workflow.
- if "workflow_call" in w.trigger:
+ # However, a workflow_call can be empty like in:
+ # https://github.com/python/cpython/blob/68e279b37aae3019979a05ca55f462b11aac14be/.github/workflows/reusable-docs.yml#L4
+ if "workflow_call" in w.trigger and obj_dict["on"]["workflow_call"] is not None:
wokrflow_call = obj_dict["on"]["workflow_call"]
inputs = wokrflow_call["inputs"]
for input_name, input in inputs.items():