Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
37 changes: 36 additions & 1 deletion src/cmdline.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
from src.downloader.download import (
download_all_workflows_and_actions,
download_account_workflows_and_actions,
download_repo_workflows_and_actions,
)
from src.indexer.index import index_downloaded_workflows_and_actions
from src.reporter.report import generate
Expand All @@ -24,12 +25,15 @@
REDIS_CLEAN_DEFAULT,
DOWNLOAD_COMMAND,
DOWNLOAD_ACCOUNT_COMMAND,
DOWNLOAD_REPO_COMMAND,
DOWNLOAD_CRAWL_COMMAND,
INDEX_COMMAND,
REPORT_COMMAND,
QUERIES_PATH_DEFAULT,
REPORT_RAW_FORMAT,
REPORT_JSON_FORMAT,
REPORT_SARIF_FORMAT,
REPORT_OUTPUT,
SEVERITY_LEVELS,
QUERY_TAGS,
QUERY_IDS,
Expand All @@ -39,6 +43,7 @@
DOWNLOAD_COMMAND: {
DOWNLOAD_CRAWL_COMMAND: download_all_workflows_and_actions,
DOWNLOAD_ACCOUNT_COMMAND: download_account_workflows_and_actions,
DOWNLOAD_REPO_COMMAND: download_repo_workflows_and_actions,
},
INDEX_COMMAND: index_downloaded_workflows_and_actions,
REPORT_COMMAND: generate,
Expand Down Expand Up @@ -165,6 +170,28 @@ def raven() -> None:
help="Download repositories owned by the authenticated user",
)

repo_download_parser = download_sub_parser.add_parser(
"repo",
help="Download specific repository",
parents=[download_parser_options, redis_parser],
)

repo_download_parser.add_argument(
"--repo-name",
required=True,
action="append",
type=str,
help="Repository to download"
)

repo_download_parser.add_argument(
"--workflow",
required=False,
action="append",
type=str,
help="Workflow to download"
)

crawl_download_parser.add_argument(
"--max-stars", type=int, help="Maximum number of stars for a repository"
)
Expand Down Expand Up @@ -230,9 +257,15 @@ def raven() -> None:
"--format",
"-f",
default=REPORT_RAW_FORMAT,
choices=[REPORT_RAW_FORMAT, REPORT_JSON_FORMAT],
choices=[REPORT_RAW_FORMAT, REPORT_JSON_FORMAT, REPORT_SARIF_FORMAT],
help="Report format (default: raw)",
)
report_parser.add_argument(
"--output",
"-o",
default="",
help="Location to save report output"
)

format_sub_parser = report_parser.add_subparsers(
dest="report_command",
Expand Down Expand Up @@ -271,5 +304,7 @@ def raven() -> None:
elif args.command == REPORT_COMMAND:
load_reporter_config(vars(args))
COMMAND_FUNCTIONS[args.command]()
# Moved this function outside of report.py, otherwise pytest was failing
log.success_exit()
else:
parser.print_help()
9 changes: 9 additions & 0 deletions src/config/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@
QUERIES_PATH_DEFAULT = "library"
REPORT_RAW_FORMAT = "raw"
REPORT_JSON_FORMAT = "json"
REPORT_SARIF_FORMAT = "sarif"
REPORT_OUTPUT = ""
SLACK_REPORTER = "slack"

NEO4J_URI_DEFAULT = "neo4j://localhost:7687"
Expand Down Expand Up @@ -38,6 +40,7 @@
# CLI commands
DOWNLOAD_COMMAND = "download"
DOWNLOAD_ACCOUNT_COMMAND = "account"
DOWNLOAD_REPO_COMMAND = "repo"
DOWNLOAD_CRAWL_COMMAND = "crawl"
INDEX_COMMAND = "index"
REPORT_COMMAND = "report"
Expand Down Expand Up @@ -71,6 +74,8 @@ def load_downloader_config(args) -> None:
Config.min_stars = args.get("min_stars", MIN_STARS_DEFAULT)
Config.max_stars = args.get("max_stars")
Config.account_name = args.get("account_name")
Config.repo_name = args.get("repo_name")
Config.workflow = args.get("workflow")
Config.personal = args.get("personal")
Config.clean_redis = args.get("clean_redis", REDIS_CLEAN_DEFAULT)

Expand Down Expand Up @@ -129,6 +134,7 @@ def load_reporter_config(args):
Config.reporter = args.get("report_command")
Config.slack_token = args.get("slack_token")
Config.channel_id = args.get("channel_id")
Config.output = args.get("output")

load_redis_config(args)
load_neo4j_config(args)
Expand All @@ -143,6 +149,8 @@ class Config:
min_stars: int = None
max_stars: int = None
account_name: list[str] = []
repo_name: list[str] = []
workflow: list[str] = []
personal: bool = None

# Indexer Configs
Expand Down Expand Up @@ -175,6 +183,7 @@ class Config:
reporter: str = None
slack_token: str = None
channel_id: str = None
output: str = ""

# Neo4j Config
neo4j_uri: str = None
Expand Down
36 changes: 35 additions & 1 deletion src/downloader/download.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,12 +77,14 @@ def download_all_workflows_and_actions() -> None:
download_workflows_and_actions(repo)


def download_workflows_and_actions(repo: str) -> None:
def download_workflows_and_actions(repo: str, only_workflows: list = []) -> None:
"""The flow is the following:

- First we enumerate .github/workflows directory for workflows
- For each such workflow we download it
- If that workflow contains uses:..., we analyze the string, and download the action or the reusable workflow.

We can also filter specific workflows if we only want to test a specific one.
"""
with RedisConnection(Config.redis_objects_ops_db) as ops_db:
if ops_db.exists_in_set(Config.workflow_download_history_set, repo):
Expand All @@ -94,6 +96,10 @@ def download_workflows_and_actions(repo: str) -> None:

log.debug(f"[+] Found {len(workflows)} workflows for {repo}")
for name, url in workflows.items():
if len(only_workflows) > 0 and name.lower() not in only_workflows:
log.debug(f"[+] Skipping {name}")
continue

if is_url_contains_a_token(url):
"""
If the URL contains a token, it means it is a private repository.
Expand Down Expand Up @@ -229,3 +235,31 @@ def download_action_or_reusable_workflow(uses_string: str, repo: str) -> None:
)
# In the future, ref will be with commit sha
add_ref_pointer_to_redis(full_path, full_path)


def download_repo_workflows_and_actions() -> None:
"""Download single repository

We are enumerating the .github/workflows directory, and downloading all the workflows.
In addition if the repository contains action.yml file, it means it is a composite action,
so we download it as well.

For each such workflow we also scan if it uses additional external actions.
If so, we download these as well.

We are trying to cache the downloads as much as we can to reduce redundant download attempts.
"""
log.info(f"[+] Scanning single repository")

only_workflows = []
if Config.workflow is not None and len(Config.workflow) > 0:
only_workflows = list(map(str.lower, Config.workflow))
log.info(f"[+] Will only download the following workflows: {', '.join(only_workflows)}")

for repo in Config.repo_name:
# Ensure it's of the "org/repo" format.
if repo.count("/") != 1:
log.error(f"[-] Repository '{repo}' is not a repository")
log.fail_exit()
continue
download_workflows_and_actions(repo, only_workflows=only_workflows)
9 changes: 8 additions & 1 deletion src/downloader/gh_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -266,8 +266,15 @@ def get_repository_workflows(repo: str) -> Dict[str, str]:

headers["Authorization"] = f"Token {Config.github_token}"

repo_name = repo
params = {}
if '@' in repo:
# The repo has the format of "org/repo@branch".
repo_name, repo_branch = repo.split('@')
params['ref'] = repo_branch

file_path = ".github/workflows"
r = get(CONTENTS_URL.format(repo_path=repo, file_path=file_path), headers=headers)
r = get(CONTENTS_URL.format(repo_path=repo_name, file_path=file_path), headers=headers, params=params)
if r.status_code == 404:
return {}
if r.status_code == 403 and int(r.headers["X-RateLimit-Remaining"]) == 0:
Expand Down
4 changes: 4 additions & 0 deletions src/indexer/index.py
Original file line number Diff line number Diff line change
Expand Up @@ -150,6 +150,10 @@ def index_workflow_file(workflow: str) -> None:
obj["url"] = url
obj["is_public"] = is_public

if 'on' not in obj:
# Could be some invalid/config file like:
# https://github.com/spotify/druid/blob/master/.github/workflows/codeql-config.yml
return
Config.graph.push_object(Workflow.from_dict(obj))
ops_db.insert_to_set(Config.workflow_index_history_set, workflow_full_name)

Expand Down
5 changes: 5 additions & 0 deletions src/queries/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,13 +12,15 @@ def __init__(
id: str,
name: str,
description: str,
full_description: str,
tags: list,
severity: str,
query: list,
) -> None:
self.id = id
self.name = name
self.description = description
self.full_description = full_description
self.tags = tags
self.severity = severity
self.query = query
Expand Down Expand Up @@ -81,7 +83,9 @@ def to_raw(self) -> str:
report += f"{Fore.CYAN}Severity:{Style.RESET_ALL} {self.severity}\n"

wrapped_description = textwrap.fill(self.description, width=description_length)
wrapped_full_description = textwrap.fill(self.full_description, width=description_length)
report += f"{Fore.CYAN}Description:{Style.RESET_ALL} {wrapped_description}\n"
report += f"{Fore.CYAN}Full Description:{Style.RESET_ALL} {wrapped_full_description}\n"
report += f"{Fore.CYAN}Tags:{Style.RESET_ALL} {self.tags}\n"

report += f"{Fore.CYAN}Workflow URLS:{Style.RESET_ALL}\n"
Expand All @@ -98,6 +102,7 @@ def _to_dict(self) -> dict:
"id": self.id,
"name": self.name,
"description": self.description,
"full_description": self.full_description,
"tags": self.tags,
"severity": self.severity,
"result": self.result,
Expand Down
91 changes: 88 additions & 3 deletions src/reporter/report.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
Config,
REPORT_RAW_FORMAT,
REPORT_JSON_FORMAT,
REPORT_SARIF_FORMAT,
SLACK_REPORTER,
)
from src.reporter import slack_reporter
Expand All @@ -27,6 +28,12 @@ def json_reporter(queries: List[Query]) -> str:
return json.dumps([query.to_json() for query in queries], indent=4)


def sarif_reporter(queries: List[Query]) -> str:
# Get the JSON first as it's easier to parse, then create the sarif result.
json_report = json.loads(json_reporter(queries))
return json.dumps(convert_json_to_sarif(json_report), indent=4)


def get_queries() -> List[Query]:
queries = []
for query_file in listdir(Config.queries_path):
Expand All @@ -38,6 +45,7 @@ def get_queries() -> List[Query]:
id=yml_query.get("id"),
name=detection_info.get("name"),
description=detection_info.get("description"),
full_description=detection_info.get("full-description"),
tags=detection_info.get("tags"),
severity=detection_info.get("severity"),
query=yml_query.get("query"),
Expand All @@ -60,6 +68,8 @@ def generate() -> None:
report = raw_reporter(filtered_queries)
elif Config.format == REPORT_JSON_FORMAT:
report = json_reporter(filtered_queries)
elif Config.format == REPORT_SARIF_FORMAT:
report = sarif_reporter(filtered_queries)

if Config.reporter == SLACK_REPORTER:
if Config.slack_token and Config.channel_id:
Expand All @@ -73,6 +83,81 @@ def generate() -> None:
)

else:
print(report)

success_exit()
if len(Config.output) > 0:
with open(Config.output, "w") as output_file:
output_file.write(report)
else:
print(report)


def convert_json_to_sarif(findings: dict) -> dict:
all_rules = []
all_results = []

# Match severity to CVSS score.
scores = {
'critical': 10,
'high': 8,
'medium': 6,
'low': 3,
'info': 0
}

for finding in findings:
# First add the rules.
rule = {
"id": finding["id"],
"name": finding["name"],
"shortDescription": {
"text": finding["description"]
},
"fullDescription": {
"text": finding["full_description"]
},
"properties": {
"security-severity": scores[finding["severity"]],
"tags": [
"security"
]
}
}

all_rules.append(rule)

# Now for each file mentioned in the "result" list, add a finding.
for result in finding["result"]:
item = {
"ruleId": finding["id"],
"message": {
"text": finding["description"]
},
"locations": [
{
"physicalLocation": {
"artifactLocation": {
"uri": result
}
}
}
]
}

all_results.append(item)

return {
"version": "2.1.0",
"$schema": "http://json.schemastore.org/sarif-2.1.0.json",
"runs": [
{
"tool": {
"driver": {
"name": "Raven Security Analyzer",
"version": "1.0.0",
"informationUri": "https://github.com/CycodeLabs/raven/",
"rules": all_rules
}
},
"results": all_results
}
]
}
4 changes: 3 additions & 1 deletion src/workflow_components/workflow.py
Original file line number Diff line number Diff line change
Expand Up @@ -207,7 +207,9 @@ def from_dict(obj_dict: Dict[str, Any]) -> "Workflow":
# When we meet it, we want to create a special relation to inputs of the reusable workflow.
# We continue to treat the workflow as a regular workflow, and not as a reusable workflow.
# But the difference is that we connected the different inputs to the workflow.
if "workflow_call" in w.trigger:
# However, a workflow_call can be empty like in:
# https://github.com/python/cpython/blob/68e279b37aae3019979a05ca55f462b11aac14be/.github/workflows/reusable-docs.yml#L4
if "workflow_call" in w.trigger and obj_dict["on"]["workflow_call"] is not None:
wokrflow_call = obj_dict["on"]["workflow_call"]
inputs = wokrflow_call["inputs"]
for input_name, input in inputs.items():
Expand Down
Loading