diff --git a/.gitignore b/.gitignore index b0b06c0..e11e212 100644 --- a/.gitignore +++ b/.gitignore @@ -138,3 +138,4 @@ dmypy.json # Pyre type checker .pyre/ +.idea/ diff --git a/src/cmdline.py b/src/cmdline.py index aed8c03..e8f2449 100644 --- a/src/cmdline.py +++ b/src/cmdline.py @@ -4,6 +4,7 @@ from src.downloader.download import ( download_all_workflows_and_actions, download_account_workflows_and_actions, + download_repo_workflows_and_actions, ) from src.indexer.index import index_downloaded_workflows_and_actions from src.reporter.report import generate @@ -24,6 +25,7 @@ REDIS_CLEAN_DEFAULT, DOWNLOAD_COMMAND, DOWNLOAD_ACCOUNT_COMMAND, + DOWNLOAD_REPO_COMMAND, DOWNLOAD_CRAWL_COMMAND, INDEX_COMMAND, REPORT_COMMAND, @@ -39,6 +41,7 @@ DOWNLOAD_COMMAND: { DOWNLOAD_CRAWL_COMMAND: download_all_workflows_and_actions, DOWNLOAD_ACCOUNT_COMMAND: download_account_workflows_and_actions, + DOWNLOAD_REPO_COMMAND: download_repo_workflows_and_actions, }, INDEX_COMMAND: index_downloaded_workflows_and_actions, REPORT_COMMAND: generate, @@ -165,6 +168,28 @@ def raven() -> None: help="Download repositories owned by the authenticated user", ) + repo_download_parser = download_sub_parser.add_parser( + "repo", + help="Download specific repository", + parents=[download_parser_options, redis_parser], + ) + + repo_download_parser.add_argument( + "--repo-name", + required=True, + action="append", + type=str, + help="Repository to download" + ) + + repo_download_parser.add_argument( + "--workflow", + required=False, + action="append", + type=str, + help="Workflow to download" + ) + crawl_download_parser.add_argument( "--max-stars", type=int, help="Maximum number of stars for a repository" ) diff --git a/src/config/config.py b/src/config/config.py index 9a47471..5eb1282 100644 --- a/src/config/config.py +++ b/src/config/config.py @@ -38,6 +38,7 @@ # CLI commands DOWNLOAD_COMMAND = "download" DOWNLOAD_ACCOUNT_COMMAND = "account" +DOWNLOAD_REPO_COMMAND = "repo" DOWNLOAD_CRAWL_COMMAND = "crawl" INDEX_COMMAND = "index" REPORT_COMMAND = "report" @@ -71,6 +72,8 @@ def load_downloader_config(args) -> None: Config.min_stars = args.get("min_stars", MIN_STARS_DEFAULT) Config.max_stars = args.get("max_stars") Config.account_name = args.get("account_name") + Config.repo_name = args.get("repo_name") + Config.workflow = args.get("workflow") Config.personal = args.get("personal") Config.clean_redis = args.get("clean_redis", REDIS_CLEAN_DEFAULT) @@ -143,6 +146,8 @@ class Config: min_stars: int = None max_stars: int = None account_name: list[str] = [] + repo_name: list[str] = [] + workflow: list[str] = [] personal: bool = None # Indexer Configs diff --git a/src/downloader/download.py b/src/downloader/download.py index 796f66c..13ce562 100644 --- a/src/downloader/download.py +++ b/src/downloader/download.py @@ -77,23 +77,29 @@ def download_all_workflows_and_actions() -> None: download_workflows_and_actions(repo) -def download_workflows_and_actions(repo: str) -> None: +def download_workflows_and_actions(repo: str, only_workflows: list = [], branch: str = '') -> None: """The flow is the following: - First we enumerate .github/workflows directory for workflows - For each such workflow we download it - If that workflow contains uses:..., we analyze the string, and download the action or the reusable workflow. + + We can also filter specific workflows if we only want to test a specific one. """ with RedisConnection(Config.redis_objects_ops_db) as ops_db: if ops_db.exists_in_set(Config.workflow_download_history_set, repo): log.debug(f"[!] Repo {repo} already scanned, skipping.") return - workflows = get_repository_workflows(repo) + workflows = get_repository_workflows(repo, branch=branch) is_public = 1 log.debug(f"[+] Found {len(workflows)} workflows for {repo}") for name, url in workflows.items(): + if len(only_workflows) > 0 and name.lower() not in only_workflows: + log.debug(f"[+] Skipping {name}") + continue + if is_url_contains_a_token(url): """ If the URL contains a token, it means it is a private repository. @@ -112,7 +118,7 @@ def download_workflows_and_actions(repo: str) -> None: # We look for dependant external actions. uses_strings = find_uses_strings(resp.text) for uses_string in uses_strings: - download_action_or_reusable_workflow(uses_string=uses_string, repo=repo) + download_action_or_reusable_workflow(uses_string=uses_string, repo=repo, branch=branch) # Save workflow to redis workflow_unix_path = convert_workflow_to_unix_path(repo, name) @@ -131,7 +137,7 @@ def download_workflows_and_actions(repo: str) -> None: ops_db.insert_to_set(Config.workflow_download_history_set, repo) -def download_action_or_reusable_workflow(uses_string: str, repo: str) -> None: +def download_action_or_reusable_workflow(uses_string: str, repo: str, branch: str = '') -> None: """Whenever we find that workflow is using a "uses:" string, it means we are referencing a composite action or reusable workflow, we try to fetch it. @@ -156,9 +162,9 @@ def download_action_or_reusable_workflow(uses_string: str, repo: str) -> None: return if uses_string_obj.type == UsesStringType.REUSABLE_WORKFLOW: - url = get_repository_reusable_workflow(full_path) + url = get_repository_reusable_workflow(full_path, branch=branch, same_repo=uses_string.startswith('./')) elif uses_string_obj.type == UsesStringType.ACTION: - url = get_repository_composite_action(full_path) + url = get_repository_composite_action(full_path, branch=branch, same_repo=uses_string.startswith('./')) else: # Can happen with docker references. return @@ -229,3 +235,32 @@ def download_action_or_reusable_workflow(uses_string: str, repo: str) -> None: ) # In the future, ref will be with commit sha add_ref_pointer_to_redis(full_path, full_path) + + +def download_repo_workflows_and_actions() -> None: + """Download single repository + We are enumerating the .github/workflows directory, and downloading all the workflows. + In addition if the repository contains action.yml file, it means it is a composite action, + so we download it as well. + For each such workflow we also scan if it uses additional external actions. + If so, we download these as well. + We are trying to cache the downloads as much as we can to reduce redundant download attempts. + """ + log.info(f"[+] Scanning single repository") + + only_workflows = [] + if Config.workflow is not None and len(Config.workflow) > 0: + only_workflows = list(map(str.lower, Config.workflow)) + log.info(f"[+] Will only download the following workflows: {', '.join(only_workflows)}") + + for repo in Config.repo_name: + # Ensure it's of the "org/repo" format. + if repo.count("/") != 1: + log.error(f"[-] Repository '{repo}' is not a repository") + log.fail_exit() + + branch = '' + if '@' in repo: + repo, branch = repo.split('@') + + download_workflows_and_actions(repo, only_workflows=only_workflows, branch=branch) diff --git a/src/downloader/gh_api.py b/src/downloader/gh_api.py index 6009907..4f64a0e 100644 --- a/src/downloader/gh_api.py +++ b/src/downloader/gh_api.py @@ -255,7 +255,7 @@ def get_repository_search(query: str, page: int = 1) -> Dict[str, Any]: return r.json()["items"] -def get_repository_workflows(repo: str) -> Dict[str, str]: +def get_repository_workflows(repo: str, branch: str = '') -> Dict[str, str]: """Returns list of workflows for the specified repository. Returns a dictionary that maps workflow file name, to its donwloadable URL. @@ -265,9 +265,10 @@ def get_repository_workflows(repo: str) -> Dict[str, str]: """ headers["Authorization"] = f"Token {Config.github_token}" + params = {} if len(branch) == 0 else {'ref': branch} file_path = ".github/workflows" - r = get(CONTENTS_URL.format(repo_path=repo, file_path=file_path), headers=headers) + r = get(CONTENTS_URL.format(repo_path=repo, file_path=file_path), headers=headers, params=params) if r.status_code == 404: return {} if r.status_code == 403 and int(r.headers["X-RateLimit-Remaining"]) == 0: @@ -278,7 +279,7 @@ def get_repository_workflows(repo: str) -> Dict[str, str]: f"[*] Ratelimit for for contents API depleted. Sleeping {time_to_sleep} seconds" ) time.sleep(time_to_sleep) - return get_repository_workflows(repo) + return get_repository_workflows(repo, branch=branch) if r.status_code != 200: log.error(f"status code: {r.status_code}. Response: {r.text}") return {} @@ -298,7 +299,7 @@ def get_repository_workflows(repo: str) -> Dict[str, str]: return workflows -def get_repository_composite_action(path: str) -> str: +def get_repository_composite_action(path: str, branch: str = '', same_repo: bool = False) -> str: """Returns downloadble URL for a composite action in the specific path. receives 'path_in_repo' relative path to the repository root to where search the action.yml. @@ -311,12 +312,14 @@ def get_repository_composite_action(path: str) -> str: relative_path = "/".join(path_splitted[2:]) headers["Authorization"] = f"Token {Config.github_token}" + params = {'ref': branch} if len(branch) > 0 and same_repo else {} for suffix in ["action.yml", "action.yaml"]: file_path = os.path.join(relative_path, suffix) r = get( CONTENTS_URL.format(repo_path=repo, file_path=file_path), headers=headers, + params=params ) if r.status_code == 404: # can be both yml and yaml @@ -329,7 +332,7 @@ def get_repository_composite_action(path: str) -> str: return r.json()["download_url"] -def get_repository_reusable_workflow(path: str) -> str: +def get_repository_reusable_workflow(path: str, branch: str = '', same_repo: bool = False) -> str: """Returns downlodable URL for a reusable workflows in the specific path. Raises exception if network error occured. @@ -339,10 +342,12 @@ def get_repository_reusable_workflow(path: str) -> str: relative_path = "/".join(path_splitted[2:]) headers["Authorization"] = f"Token {Config.github_token}" + params = {'ref': branch} if len(branch) > 0 and same_repo else {} r = get( CONTENTS_URL.format(repo_path=repo, file_path=relative_path), headers=headers, + params=params ) if r.status_code == 404: return diff --git a/src/indexer/index.py b/src/indexer/index.py index 4864b90..cf66bc1 100644 --- a/src/indexer/index.py +++ b/src/indexer/index.py @@ -150,6 +150,11 @@ def index_workflow_file(workflow: str) -> None: obj["url"] = url obj["is_public"] = is_public + if 'on' not in obj: + # Could be some invalid/config file like: + # https://github.com/spotify/druid/blob/master/.github/workflows/codeql-config.yml + return + Config.graph.push_object(Workflow.from_dict(obj)) ops_db.insert_to_set(Config.workflow_index_history_set, workflow_full_name) diff --git a/src/workflow_components/workflow.py b/src/workflow_components/workflow.py index 4eb2b49..f6750d8 100644 --- a/src/workflow_components/workflow.py +++ b/src/workflow_components/workflow.py @@ -207,7 +207,9 @@ def from_dict(obj_dict: Dict[str, Any]) -> "Workflow": # When we meet it, we want to create a special relation to inputs of the reusable workflow. # We continue to treat the workflow as a regular workflow, and not as a reusable workflow. # But the difference is that we connected the different inputs to the workflow. - if "workflow_call" in w.trigger: + # However, a workflow_call can be empty like in: + # https://github.com/python/cpython/blob/68e279b37aae3019979a05ca55f462b11aac14be/.github/workflows/reusable-docs.yml#L4 + if "workflow_call" in w.trigger and obj_dict["on"]["workflow_call"] is not None: wokrflow_call = obj_dict["on"]["workflow_call"] inputs = wokrflow_call["inputs"] for input_name, input in inputs.items(): diff --git a/tests/integration/integration_consts.py b/tests/integration/integration_consts.py index 47d370e..102505d 100644 --- a/tests/integration/integration_consts.py +++ b/tests/integration/integration_consts.py @@ -65,4 +65,19 @@ }, }, }, + { + "test_name": "test_demo_index_single_repo", + "json_path": "tests/integration/structures_json/demo-1-index.json", + "description": "Tests Demo-1's graph structures combined. It has a workflow that uses the checkout action.", + "queries": { + "nodes_query": GET_NODES_BY_PATH_QUERY, + "relationships_query": GET_RELATIONSHIPS_BY_PATH_QUERY, + "to_format": { + "paths_list": [ + "RavenIntegrationTests/Demo-1/.github/workflows/demo-workflow.yml", + "actions/checkout", + ] + }, + }, + } ] diff --git a/tests/integration/structures_json/demo-1-index.json b/tests/integration/structures_json/demo-1-index.json new file mode 100644 index 0000000..564b5ff --- /dev/null +++ b/tests/integration/structures_json/demo-1-index.json @@ -0,0 +1,415 @@ +{ + "nodes": [ + { + "path": "actions/checkout", + "using": "node20", + "name": "Checkout", + "is_public": true, + "_id": "d35e7df441120da9624b8c11e36151be", + "url": "https://github.com/actions/checkout/tree/main/action.yml", + "labels": [ + "CompositeAction" + ] + }, + { + "path": "actions/checkout", + "default": "${{ github.repository }}", + "name": "repository", + "description": "Repository name with owner. For example, actions/checkout", + "_id": "775c9f7b8b404a9df37b16c9d4d8f336", + "url": "https://github.com/actions/checkout/tree/main/action.yml", + "required": false, + "labels": [ + "CompositeActionInput" + ] + }, + { + "path": "actions/checkout", + "name": "ref", + "description": "The branch, tag or SHA to checkout. When checking out the repository that triggered a workflow, this defaults to the reference or SHA for that event. Otherwise, uses the default branch.\n", + "_id": "6bf31cd9bee2b2c9a0fd9a8d3c578903", + "url": "https://github.com/actions/checkout/tree/main/action.yml", + "required": false, + "labels": [ + "CompositeActionInput" + ] + }, + { + "path": "actions/checkout", + "default": "${{ github.token }}", + "name": "token", + "description": "Personal access token (PAT) used to fetch the repository. The PAT is configured with the local git config, which enables your scripts to run authenticated git commands. The post-job step removes the PAT.\n\nWe recommend using a service account with the least permissions necessary. Also when generating a new PAT, select the least scopes necessary.\n\n[Learn more about creating and using encrypted secrets](https://help.github.com/en/actions/automating-your-workflow-with-github-actions/creating-and-using-encrypted-secrets)\n", + "_id": "5f4fa35f28767a9155a5813b2cc934d5", + "url": "https://github.com/actions/checkout/tree/main/action.yml", + "required": false, + "labels": [ + "CompositeActionInput" + ] + }, + { + "path": "actions/checkout", + "name": "ssh-key", + "description": "SSH key used to fetch the repository. The SSH key is configured with the local git config, which enables your scripts to run authenticated git commands. The post-job step removes the SSH key.\n\nWe recommend using a service account with the least permissions necessary.\n\n[Learn more about creating and using encrypted secrets](https://help.github.com/en/actions/automating-your-workflow-with-github-actions/creating-and-using-encrypted-secrets)\n", + "_id": "c53d6fe7a19a576c8bfefe6cfa80870b", + "url": "https://github.com/actions/checkout/tree/main/action.yml", + "required": false, + "labels": [ + "CompositeActionInput" + ] + }, + { + "path": "actions/checkout", + "name": "ssh-known-hosts", + "description": "Known hosts in addition to the user and global host key database. The public SSH keys for a host may be obtained using the utility `ssh-keyscan`. For example, `ssh-keyscan github.com`. The public key for github.com is always implicitly added.\n", + "_id": "145d5c9a78c4d7564f7b79ec495b6ff2", + "url": "https://github.com/actions/checkout/tree/main/action.yml", + "required": false, + "labels": [ + "CompositeActionInput" + ] + }, + { + "path": "actions/checkout", + "default": "true", + "name": "ssh-strict", + "description": "Whether to perform strict host key checking. When true, adds the options `StrictHostKeyChecking=yes` and `CheckHostIP=no` to the SSH command line. Use the input `ssh-known-hosts` to configure additional hosts.\n", + "_id": "ec56ff9d7ca004ba974a2fca1b01f2c5", + "required": false, + "url": "https://github.com/actions/checkout/tree/main/action.yml", + "labels": [ + "CompositeActionInput" + ] + }, + { + "path": "actions/checkout", + "default": "git", + "name": "ssh-user", + "description": "The user to use when connecting to the remote SSH host. By default 'git' is used.\n", + "_id": "c647109fbdf1f6f88205b9d01b7e84cb", + "required": false, + "url": "https://github.com/actions/checkout/tree/main/action.yml", + "labels": [ + "CompositeActionInput" + ] + }, + { + "path": "actions/checkout", + "default": "true", + "name": "persist-credentials", + "description": "Whether to configure the token or SSH key with the local git config", + "_id": "fe19be86972043c79c7ad0f618559ade", + "required": false, + "url": "https://github.com/actions/checkout/tree/main/action.yml", + "labels": [ + "CompositeActionInput" + ] + }, + { + "path": "actions/checkout", + "name": "path", + "description": "Relative path under $GITHUB_WORKSPACE to place the repository", + "_id": "3470907d9833db0f15032d1aadd1dbc2", + "url": "https://github.com/actions/checkout/tree/main/action.yml", + "required": false, + "labels": [ + "CompositeActionInput" + ] + }, + { + "path": "actions/checkout", + "default": "true", + "name": "clean", + "description": "Whether to execute `git clean -ffdx && git reset --hard HEAD` before fetching", + "_id": "a8d8ef152d8986d297b84bc2faf66d1e", + "url": "https://github.com/actions/checkout/tree/main/action.yml", + "required": false, + "labels": [ + "CompositeActionInput" + ] + }, + { + "path": "actions/checkout", + "name": "filter", + "description": "Partially clone against a given filter. Overrides sparse-checkout if set.\n", + "_id": "974202e17cde42b352e662c9c55df9ff", + "url": "https://github.com/actions/checkout/tree/main/action.yml", + "required": false, + "labels": [ + "CompositeActionInput" + ] + }, + { + "path": "actions/checkout", + "name": "sparse-checkout", + "description": "Do a sparse checkout on given patterns. Each pattern should be separated with new lines.\n", + "_id": "c626f387aeac44aa82b5e27f4cbaca54", + "url": "https://github.com/actions/checkout/tree/main/action.yml", + "required": false, + "labels": [ + "CompositeActionInput" + ] + }, + { + "path": "actions/checkout", + "default": "true", + "name": "sparse-checkout-cone-mode", + "description": "Specifies whether to use cone-mode when doing a sparse checkout.\n", + "_id": "90aa405b4e4278ac32892e1e1756b807", + "required": false, + "url": "https://github.com/actions/checkout/tree/main/action.yml", + "labels": [ + "CompositeActionInput" + ] + }, + { + "path": "actions/checkout", + "default": 1, + "name": "fetch-depth", + "description": "Number of commits to fetch. 0 indicates all history for all branches and tags.", + "_id": "00fb2bfc353337457e5be8ea01e27984", + "required": false, + "url": "https://github.com/actions/checkout/tree/main/action.yml", + "labels": [ + "CompositeActionInput" + ] + }, + { + "path": "actions/checkout", + "default": "false", + "name": "fetch-tags", + "description": "Whether to fetch tags, even if fetch-depth > 0.", + "_id": "a32f4c2bbd5fb33b56c7b2634b343e62", + "required": false, + "url": "https://github.com/actions/checkout/tree/main/action.yml", + "labels": [ + "CompositeActionInput" + ] + }, + { + "path": "actions/checkout", + "default": "true", + "name": "show-progress", + "description": "Whether to show progress status output when fetching.", + "_id": "c871387fa99df881290dbd906bae83a6", + "required": false, + "url": "https://github.com/actions/checkout/tree/main/action.yml", + "labels": [ + "CompositeActionInput" + ] + }, + { + "path": "actions/checkout", + "default": "false", + "name": "lfs", + "description": "Whether to download Git-LFS files", + "_id": "99f82cfda3b119fcf2b38ebc651dc0a8", + "url": "https://github.com/actions/checkout/tree/main/action.yml", + "required": false, + "labels": [ + "CompositeActionInput" + ] + }, + { + "path": "actions/checkout", + "default": "false", + "name": "submodules", + "description": "Whether to checkout submodules: `true` to checkout submodules or `recursive` to recursively checkout submodules.\n\nWhen the `ssh-key` input is not provided, SSH URLs beginning with `git@github.com:` are converted to HTTPS.\n", + "_id": "7cdf94b14af56d70417e7b6d92ff316e", + "required": false, + "url": "https://github.com/actions/checkout/tree/main/action.yml", + "labels": [ + "CompositeActionInput" + ] + }, + { + "path": "actions/checkout", + "default": "true", + "name": "set-safe-directory", + "description": "Add repository path as safe.directory for Git global config by running `git config --global --add safe.directory `", + "_id": "98aaa1fa6903184fa592300457546c9e", + "required": false, + "url": "https://github.com/actions/checkout/tree/main/action.yml", + "labels": [ + "CompositeActionInput" + ] + }, + { + "path": "actions/checkout", + "name": "github-server-url", + "description": "The base URL for the GitHub instance that you are trying to clone from, will use environment defaults to fetch from the same instance that the workflow is running from unless specified. Example URLs are https://github.com or https://my-ghes-server.example.com", + "_id": "e8f0d587842e43432e7c96ddcf2ef5e8", + "url": "https://github.com/actions/checkout/tree/main/action.yml", + "required": false, + "labels": [ + "CompositeActionInput" + ] + }, + { + "path": "RavenIntegrationTests/Demo-1/.github/workflows/demo-workflow.yml", + "name": "run", + "is_public": true, + "_id": "22137cee99760506f369934d49f719f8", + "trigger": [ + "workflow_dispatch" + ], + "url": "https://github.com/RavenIntegrationTests/Demo-1/tree/main/.github/workflows/demo-workflow.yml", + "labels": [ + "Workflow" + ] + }, + { + "path": "RavenIntegrationTests/Demo-1/.github/workflows/demo-workflow.yml", + "machine": [ + "ubuntu-latest" + ], + "name": "demo_test", + "_id": "ec3b1d0fd5ec71b9f43547f026f579fd", + "url": "https://github.com/RavenIntegrationTests/Demo-1/tree/main/.github/workflows/demo-workflow.yml", + "labels": [ + "Job" + ] + }, + { + "path": "RavenIntegrationTests/Demo-1/.github/workflows/demo-workflow.yml", + "ref": "v4", + "uses": "actions/checkout@v4", + "_id": "c1306c77e3af9cdedcedee69c59c96c1", + "url": "https://github.com/RavenIntegrationTests/Demo-1/tree/main/.github/workflows/demo-workflow.yml", + "labels": [ + "Step" + ] + }, + { + "path": "RavenIntegrationTests/Demo-1/.github/workflows/demo-workflow.yml", + "name": "PrintEnv", + "run": "printenv", + "_id": "6f76bec5bf9d4e683ef736d3ffe8b842", + "url": "https://github.com/RavenIntegrationTests/Demo-1/tree/main/.github/workflows/demo-workflow.yml", + "labels": [ + "Step" + ] + } + ], + "relationships": [ + { + "start_node": "d35e7df441120da9624b8c11e36151be", + "type": "COMPOSITE_ACTION_INPUT", + "end_node": "775c9f7b8b404a9df37b16c9d4d8f336" + }, + { + "start_node": "d35e7df441120da9624b8c11e36151be", + "type": "COMPOSITE_ACTION_INPUT", + "end_node": "6bf31cd9bee2b2c9a0fd9a8d3c578903" + }, + { + "start_node": "d35e7df441120da9624b8c11e36151be", + "type": "COMPOSITE_ACTION_INPUT", + "end_node": "5f4fa35f28767a9155a5813b2cc934d5" + }, + { + "start_node": "d35e7df441120da9624b8c11e36151be", + "type": "COMPOSITE_ACTION_INPUT", + "end_node": "c53d6fe7a19a576c8bfefe6cfa80870b" + }, + { + "start_node": "d35e7df441120da9624b8c11e36151be", + "type": "COMPOSITE_ACTION_INPUT", + "end_node": "145d5c9a78c4d7564f7b79ec495b6ff2" + }, + { + "start_node": "d35e7df441120da9624b8c11e36151be", + "type": "COMPOSITE_ACTION_INPUT", + "end_node": "ec56ff9d7ca004ba974a2fca1b01f2c5" + }, + { + "start_node": "d35e7df441120da9624b8c11e36151be", + "type": "COMPOSITE_ACTION_INPUT", + "end_node": "c647109fbdf1f6f88205b9d01b7e84cb" + }, + { + "start_node": "d35e7df441120da9624b8c11e36151be", + "type": "COMPOSITE_ACTION_INPUT", + "end_node": "fe19be86972043c79c7ad0f618559ade" + }, + { + "start_node": "d35e7df441120da9624b8c11e36151be", + "type": "COMPOSITE_ACTION_INPUT", + "end_node": "3470907d9833db0f15032d1aadd1dbc2" + }, + { + "start_node": "d35e7df441120da9624b8c11e36151be", + "type": "COMPOSITE_ACTION_INPUT", + "end_node": "a8d8ef152d8986d297b84bc2faf66d1e" + }, + { + "start_node": "d35e7df441120da9624b8c11e36151be", + "type": "COMPOSITE_ACTION_INPUT", + "end_node": "974202e17cde42b352e662c9c55df9ff" + }, + { + "start_node": "d35e7df441120da9624b8c11e36151be", + "type": "COMPOSITE_ACTION_INPUT", + "end_node": "c626f387aeac44aa82b5e27f4cbaca54" + }, + { + "start_node": "d35e7df441120da9624b8c11e36151be", + "type": "COMPOSITE_ACTION_INPUT", + "end_node": "90aa405b4e4278ac32892e1e1756b807" + }, + { + "start_node": "d35e7df441120da9624b8c11e36151be", + "type": "COMPOSITE_ACTION_INPUT", + "end_node": "00fb2bfc353337457e5be8ea01e27984" + }, + { + "start_node": "d35e7df441120da9624b8c11e36151be", + "type": "COMPOSITE_ACTION_INPUT", + "end_node": "a32f4c2bbd5fb33b56c7b2634b343e62" + }, + { + "start_node": "d35e7df441120da9624b8c11e36151be", + "type": "COMPOSITE_ACTION_INPUT", + "end_node": "c871387fa99df881290dbd906bae83a6" + }, + { + "start_node": "d35e7df441120da9624b8c11e36151be", + "type": "COMPOSITE_ACTION_INPUT", + "end_node": "99f82cfda3b119fcf2b38ebc651dc0a8" + }, + { + "start_node": "d35e7df441120da9624b8c11e36151be", + "type": "COMPOSITE_ACTION_INPUT", + "end_node": "7cdf94b14af56d70417e7b6d92ff316e" + }, + { + "start_node": "d35e7df441120da9624b8c11e36151be", + "type": "COMPOSITE_ACTION_INPUT", + "end_node": "98aaa1fa6903184fa592300457546c9e" + }, + { + "start_node": "d35e7df441120da9624b8c11e36151be", + "type": "COMPOSITE_ACTION_INPUT", + "end_node": "e8f0d587842e43432e7c96ddcf2ef5e8" + }, + { + "start_node": "c1306c77e3af9cdedcedee69c59c96c1", + "type": "ACTION", + "end_node": "d35e7df441120da9624b8c11e36151be" + }, + { + "start_node": "ec3b1d0fd5ec71b9f43547f026f579fd", + "type": "STEPS", + "end_node": "c1306c77e3af9cdedcedee69c59c96c1" + }, + { + "start_node": "ec3b1d0fd5ec71b9f43547f026f579fd", + "type": "STEPS", + "end_node": "6f76bec5bf9d4e683ef736d3ffe8b842" + }, + { + "start_node": "22137cee99760506f369934d49f719f8", + "type": "JOBS", + "end_node": "ec3b1d0fd5ec71b9f43547f026f579fd" + } + ] +} \ No newline at end of file diff --git a/tests/integration/test_single_repo_download.py b/tests/integration/test_single_repo_download.py new file mode 100644 index 0000000..ac9088a --- /dev/null +++ b/tests/integration/test_single_repo_download.py @@ -0,0 +1,28 @@ +from colorama import Fore, Style +from tests.utils import ( + get_graph_structure, + assert_graph_structures, +) +from tests.integration.integration_consts import TESTS_CONFIGS +from tests.tests_init import init_integration_single_env + + +def test_single_repo_download() -> None: + init_integration_single_env() + + test_config = next((item for item in TESTS_CONFIGS if item["test_name"] == "test_demo_index_single_repo"), None) + + print( + f"{Fore.CYAN}Running integration test: {test_config['test_name']}.{Style.RESET_ALL}" + ) + + # Get the queries from the test config + query_config = test_config["queries"] + nodes_query = query_config["nodes_query"].format(**query_config["to_format"]) + relationships_query = query_config["relationships_query"].format( + **query_config["to_format"] + ) + + # Get the graph structure from the queries and assert it + graph_structure = get_graph_structure(nodes_query, relationships_query) + assert_graph_structures(graph_structure, test_config["json_path"]) diff --git a/tests/integration/test_single_workflow_download.py b/tests/integration/test_single_workflow_download.py new file mode 100644 index 0000000..95cb881 --- /dev/null +++ b/tests/integration/test_single_workflow_download.py @@ -0,0 +1,28 @@ +from colorama import Fore, Style +from tests.utils import ( + get_graph_structure, + assert_graph_structures, +) +from tests.integration.integration_consts import TESTS_CONFIGS +from tests.tests_init import init_integration_single_env + + +def test_single_workflow_download() -> None: + init_integration_single_env(only_workflows=["demo-workflow.yml"]) + + test_config = next((item for item in TESTS_CONFIGS if item["test_name"] == "test_demo_index_single_repo"), None) + + print( + f"{Fore.CYAN}Running integration test: {test_config['test_name']}.{Style.RESET_ALL}" + ) + + # Get the queries from the test config + query_config = test_config["queries"] + nodes_query = query_config["nodes_query"].format(**query_config["to_format"]) + relationships_query = query_config["relationships_query"].format( + **query_config["to_format"] + ) + + # Get the graph structure from the queries and assert it + graph_structure = get_graph_structure(nodes_query, relationships_query) + assert_graph_structures(graph_structure, test_config["json_path"]) diff --git a/tests/tests_init.py b/tests/tests_init.py index b864e5e..11015ef 100644 --- a/tests/tests_init.py +++ b/tests/tests_init.py @@ -1,7 +1,8 @@ from os import getenv from src.config.config import load_downloader_config, load_indexer_config -from src.downloader.download import download_account_workflows_and_actions +from src.downloader.download import download_account_workflows_and_actions, download_repo_workflows_and_actions from src.indexer.index import index_downloaded_workflows_and_actions +from src.config.config import LAST_QUERY_ID, QUERIES_PATH_DEFAULT, Config def init_integration_env(): @@ -10,12 +11,20 @@ def init_integration_env(): index_downloaded_workflows_and_actions() +def init_integration_single_env(only_workflows: list = []): + load_integration_tests_config() + Config.workflow = only_workflows + download_repo_workflows_and_actions() + index_downloaded_workflows_and_actions() + + def load_integration_tests_config() -> None: load_downloader_config( { "debug": False, "token": getenv("GITHUB_TOKEN"), "account_name": ["RavenIntegrationTests"], + "repo_name": ["RavenIntegrationTests/Demo-1"], "redis_host": "raven-redis-test", "redis_port": 6379, "clean_redis": True,