diff --git a/copier.yml b/copier.yml index bce991a..f0a5c34 100644 --- a/copier.yml +++ b/copier.yml @@ -23,7 +23,8 @@ _tasks: REQUIRED_APPROVING_REVIEW_COUNT="{{ required_approving_review_count }}" REQUIRE_LAST_PUSH_APPROVAL="{{ require_last_push_approval | lower }}" REQUIRE_CONVERSATION_RESOLUTION="{{ require_conversation_resolution | lower }}" - ./copier_scripts/run_tasks.sh + +#./copier_scripts/run_tasks.sh # Questions to ask the user _message_before_copy: | @@ -85,8 +86,8 @@ repository_slug: is_public_repo: type: bool help: "Not prompted. This is computed for re-use." - default: "{{ repository_visibility == 'public' }}" - when: false + default: false + when: "{{ repository_visibility == 'public' }}" module_name: type: str diff --git a/project_template/.devcontainer/devcontainer.json b/project_template/.devcontainer/devcontainer.json index 185c78e..365d433 100644 --- a/project_template/.devcontainer/devcontainer.json +++ b/project_template/.devcontainer/devcontainer.json @@ -2,7 +2,7 @@ "name": "Python RAP Development", "image": "mcr.microsoft.com/devcontainers/python:3.12", "features": { - "ghcr.io/devcontainers-contrib/features/pre-commit:2": {} + "ghcr.io/devcontainers-extra/features/pre-commit:2": {} }, "customizations": { "vscode": { @@ -25,11 +25,11 @@ "python.testing.pytestEnabled": true, "editor.formatOnSave": true, "editor.codeActionsOnSave": { - "source.fixAll": true, - "source.organizeImports": true + "source.fixAll": "always", + "source.organizeImports": "always" }, "[python]": { - "editor.defaultFormatter": "charliermarsh.ruff" + "editor.defaultFormatter": "ms-python.python" } } } diff --git a/project_template/.github/CODEOWNERS.jinja b/project_template/.github/CODEOWNERS.jinja new file mode 100644 index 0000000..7ef063b --- /dev/null +++ b/project_template/.github/CODEOWNERS.jinja @@ -0,0 +1 @@ +@${{ code_owners }} diff --git a/project_template/.github/CODEOWNERS.njk b/project_template/.github/CODEOWNERS.njk deleted file mode 100644 index 7c1c6d4..0000000 --- a/project_template/.github/CODEOWNERS.njk +++ /dev/null @@ -1 +0,0 @@ -@${{ values.code_owners }} diff --git a/project_template/.github/dependabot.yml.njk b/project_template/.github/dependabot.yml.jinja similarity index 93% rename from project_template/.github/dependabot.yml.njk rename to project_template/.github/dependabot.yml.jinja index 7367b95..8716c99 100644 --- a/project_template/.github/dependabot.yml.njk +++ b/project_template/.github/dependabot.yml.jinja @@ -27,7 +27,7 @@ updates: - package-ecosystem: pip # When set to 0, version updates are disabled. - open-pull-requests-limit: ${{ values.dependabot_open_pull_requests_limit }} + open-pull-requests-limit: ${{ dependabot_open_pull_requests_limit }} # Look for a pyproject.toml/requirements.txt/Pipfile.lock in the root directory directory: / diff --git a/project_template/.github/linters/.markdown-link-check.json.njk b/project_template/.github/linters/.markdown-link-check.json.jinja similarity index 59% rename from project_template/.github/linters/.markdown-link-check.json.njk rename to project_template/.github/linters/.markdown-link-check.json.jinja index 3df4376..e98e851 100644 --- a/project_template/.github/linters/.markdown-link-check.json.njk +++ b/project_template/.github/linters/.markdown-link-check.json.jinja @@ -1,10 +1,10 @@ { - {% if values.is_public_repo -%} + {% if is_public_repo -%} "ignorePatterns": [], {%- else -%} "ignorePatterns": [ { - "pattern": "${{ values.repository_owner }}/${{ values.repository_name }}" + "pattern": "${{ repository_owner }}/${{ repository_name }}" } ], {%- endif %} diff --git a/project_template/.github/workflows/ci.yml.njk b/project_template/.github/workflows/ci.yml.jinja similarity index 88% rename from project_template/.github/workflows/ci.yml.njk rename to project_template/.github/workflows/ci.yml.jinja index b33e334..82d6924 100644 --- a/project_template/.github/workflows/ci.yml.njk +++ b/project_template/.github/workflows/ci.yml.jinja @@ -3,9 +3,9 @@ name: CI on: # yamllint disable-line rule:truthy push: - branches: [${{ values.default_branch }}] + branches: [${{ default_branch }}] pull_request: - branches: [${{ values.default_branch }}] + branches: [${{ default_branch }}] concurrency: {% raw %} diff --git a/project_template/.github/workflows/mypy.yml.njk b/project_template/.github/workflows/mypy.yml.jinja similarity index 81% rename from project_template/.github/workflows/mypy.yml.njk rename to project_template/.github/workflows/mypy.yml.jinja index 5dcbf12..766d8b8 100644 --- a/project_template/.github/workflows/mypy.yml.njk +++ b/project_template/.github/workflows/mypy.yml.jinja @@ -3,9 +3,9 @@ name: Type Check on: # yamllint disable-line rule:truthy push: - branches: [${{ values.default_branch }}] + branches: [${{ default_branch }}] pull_request: - branches: [${{ values.default_branch }}] + branches: [${{ default_branch }}] concurrency: {% raw %} @@ -33,4 +33,4 @@ jobs: run: make install-dev - name: Run MyPy - run: poetry run mypy ${{ values.module_name }} + run: poetry run mypy ${{ module_name }} diff --git a/project_template/.github/workflows/security-scan.yml.njk b/project_template/.github/workflows/security-scan.yml.jinja similarity index 88% rename from project_template/.github/workflows/security-scan.yml.njk rename to project_template/.github/workflows/security-scan.yml.jinja index 54a45a5..69860d9 100644 --- a/project_template/.github/workflows/security-scan.yml.njk +++ b/project_template/.github/workflows/security-scan.yml.jinja @@ -3,9 +3,9 @@ name: Security Scan on: # yamllint disable-line rule:truthy push: - branches: [${{ values.default_branch }}] + branches: [${{ default_branch }}] pull_request: - branches: [${{ values.default_branch }}] + branches: [${{ default_branch }}] concurrency: {% raw %} diff --git a/project_template/.github/workflows/${{ 'codeql.yml' if values.is_public_repo }}.njk b/project_template/.github/workflows/{{ 'codeql.yml' if is_public_repo }}.jinja similarity index 97% rename from project_template/.github/workflows/${{ 'codeql.yml' if values.is_public_repo }}.njk rename to project_template/.github/workflows/{{ 'codeql.yml' if is_public_repo }}.jinja index 7fbeb77..ccbeff1 100644 --- a/project_template/.github/workflows/${{ 'codeql.yml' if values.is_public_repo }}.njk +++ b/project_template/.github/workflows/{{ 'codeql.yml' if is_public_repo }}.jinja @@ -9,10 +9,10 @@ name: CodeQL on: # yamllint disable-line rule:truthy push: - branches: [${{ values.default_branch }}] + branches: [${{ default_branch }}] pull_request: # The branches below must be a subset of the branches above - branches: [${{ values.default_branch }}] + branches: [${{ default_branch }}] schedule: # Run on a daily interval at 12pm UTC diff --git a/project_template/.gitignore b/project_template/.gitignore index 989982f..41a9781 100644 --- a/project_template/.gitignore +++ b/project_template/.gitignore @@ -160,8 +160,6 @@ cython_debug/ # Bandit .bandit -# Copier helpers -copier_scripts/ # DS_Store files .DS_Store diff --git a/project_template/COMPLIANCE.md.njk b/project_template/COMPLIANCE.md.jinja similarity index 96% rename from project_template/COMPLIANCE.md.njk rename to project_template/COMPLIANCE.md.jinja index d08d986..8494071 100644 --- a/project_template/COMPLIANCE.md.njk +++ b/project_template/COMPLIANCE.md.jinja @@ -1,6 +1,6 @@ # Policy Compliance Checklist -This document verifies compliance with ONS policies for the ${{ values.repository_name }} project. +This document verifies compliance with ONS policies for the ${{ repository_name }} project. ## GitHub Usage Policy Compliance @@ -15,12 +15,12 @@ This document verifies compliance with ONS policies for the ${{ values.repositor - **Configuration**: - Security updates: Always enabled - Version updates: Configurable via template options - - Open PR limit: {{ values.dependabot_open_pull_requests_limit or 10 }} + - Open PR limit: {{ dependabot_open_pull_requests_limit or 10 }} ### ✅ CODEOWNERS File - **Status**: Generated automatically - **Location**: `.github/CODEOWNERS` -- **Owners**: {{ values.code_owners }} +- **Owners**: {{ code_owners }} ### ✅ README File - **Status**: Generated with comprehensive documentation @@ -33,7 +33,7 @@ This document verifies compliance with ONS policies for the ${{ values.repositor - **Badge**: License badge included in README ### ✅ Branching Strategy -- **Default Branch**: {{ values.default_branch }} +- **Default Branch**: {{ default_branch }} - **Protection Rules**: Configured via Backstage template - **Reviews Required**: Minimum 1 approving review - **Status Checks**: CI/CD workflows must pass diff --git a/project_template/Makefile.njk b/project_template/Makefile.jinja similarity index 78% rename from project_template/Makefile.njk rename to project_template/Makefile.jinja index 98db5ca..edc4615 100644 --- a/project_template/Makefile.njk +++ b/project_template/Makefile.jinja @@ -24,15 +24,15 @@ lint: ## Run all linters (ruff). .PHONY: security-scan security-scan: ## Run security scan using Bandit. - poetry run bandit -r ${{ values.module_name }} + poetry run bandit -r ${{ module_name }} .PHONY: test test: ## Run all the tests and check coverage. - poetry run pytest -n auto --cov=${{ values.module_name }} --cov-report term-missing --cov-fail-under=50 tests/ + poetry run pytest -n auto --cov=${{ module_name }} --cov-report term-missing --cov-fail-under=50 tests/ .PHONY: test-unit test-unit: ## Run the unit tests and check coverage. - poetry run pytest -n auto --cov=${{ values.module_name }} --cov-report term-missing --cov-fail-under=50 tests/unit + poetry run pytest -n auto --cov=${{ module_name }} --cov-report term-missing --cov-fail-under=50 tests/unit .PHONY: test-e2e test-e2e: ## Run the end-to-end tests. diff --git a/project_template/README.md.njk b/project_template/README.md.jinja similarity index 83% rename from project_template/README.md.njk rename to project_template/README.md.jinja index 0fe3013..0aba5a7 100644 --- a/project_template/README.md.njk +++ b/project_template/README.md.jinja @@ -1,15 +1,15 @@ -# ${{ values.repository_name }} +# ${{ repository_name }} -[![Build Status](https://github.com/${{ values.repository_owner }}/${{ values.repository_name }}/actions/workflows/ci.yml/badge.svg)](https://github.com/${{ values.repository_owner }}/${{ values.repository_name }}/actions/workflows/ci.yml) -[![Build Status](https://github.com/${{ values.repository_owner }}/${{ values.repository_name }}/actions/workflows/security-scan.yml/badge.svg)](https://github.com/${{ values.repository_owner }}/${{ values.repository_name }}/actions/workflows/security-scan.yml) -{%- if values.is_public_repo %} -[![Build Status](https://github.com/${{ values.repository_owner }}/${{ values.repository_name }}/actions/workflows/codeql.yml/badge.svg)](https://github.com/${{ values.repository_owner }}/${{ values.repository_name }}/actions/workflows/codeql.yml) +[![Build Status](https://github.com/${{ repository_owner }}/${{ repository_name }}/actions/workflows/ci.yml/badge.svg)](https://github.com/${{ repository_owner }}/${{ repository_name }}/actions/workflows/ci.yml) +[![Build Status](https://github.com/${{ repository_owner }}/${{ repository_name }}/actions/workflows/security-scan.yml/badge.svg)](https://github.com/${{ repository_owner }}/${{ repository_name }}/actions/workflows/security-scan.yml) +{%- if is_public_repo %} +[![Build Status](https://github.com/${{ repository_owner }}/${{ repository_name }}/actions/workflows/codeql.yml/badge.svg)](https://github.com/${{ repository_owner }}/${{ repository_name }}/actions/workflows/codeql.yml) {%- endif %} [![Linting: Ruff](https://img.shields.io/endpoint?url=https://raw.githubusercontent.com/charliermarsh/ruff/main/assets/badge/v2.json)](https://github.com/astral-sh/ruff) [![poetry-managed](https://img.shields.io/badge/poetry-managed-blue)](https://python-poetry.org/) -[![License - MIT](https://img.shields.io/badge/licence%20-MIT-1ac403.svg)](https://github.com/${{ values.repository_owner }}/${{ values.repository_name }}/blob/main/LICENSE) +[![License - MIT](https://img.shields.io/badge/licence%20-MIT-1ac403.svg)](https://github.com/${{ repository_owner }}/${{ repository_name }}/blob/main/LICENSE) -${{ values.repository_description }} +${{ repository_description }} This project follows the Reproducible Analytical Pipeline (RAP) methodology, providing a modular ETL (Extract, Transform, Load) framework for data processing workflows. @@ -72,7 +72,7 @@ Ensure you have the following installed: 1. Clone the repository and install the required dependencies. ```bash -git clone https://github.com/${{ values.repository_owner }}/${{ values.repository_name }}.git +git clone https://github.com/${{ repository_owner }}/${{ repository_name }}.git ``` 2. Install dependencies @@ -106,7 +106,7 @@ make run ```bash # Extract data -poetry run python -c "from ${{ values.module_name }}.extract import extract_from_source; print(extract_from_source('example_data.csv'))" +poetry run python -c "from ${{ module_name }}.extract import extract_from_source; print(extract_from_source('example_data.csv'))" # Run full pipeline with custom parameters poetry run python run_etl.py @@ -115,7 +115,7 @@ poetry run python run_etl.py 3. **Programmatic usage:** ```python -from ${{ values.module_name }} import ETLPipeline +from ${{ module_name }} import ETLPipeline pipeline = ETLPipeline() success = pipeline.run_pipeline( @@ -194,12 +194,12 @@ make security-scan To run type checking, run: ```bash -poetry run mypy ${{ values.module_name }} +poetry run mypy ${{ module_name }} ``` ### GitHub actions -Linting/formatting and Security Scanning GitHub actions are enabled by default on template repositories. If you go to the `Actions` tab on your [repository](https://github.com/${{ values.repository_owner }}/${{ values.repository_name }}/actions), you can view all the workflows for the repository. If an action has failed, it will show a red circle with a cross in it. +Linting/formatting and Security Scanning GitHub actions are enabled by default on template repositories. If you go to the `Actions` tab on your [repository](https://github.com/${{ repository_owner }}/${{ repository_name }}/actions), you can view all the workflows for the repository. If an action has failed, it will show a red circle with a cross in it. To find out more details about why it failed: diff --git a/project_template/catalog-info.yaml.njk b/project_template/catalog-info.yaml.jinja similarity index 61% rename from project_template/catalog-info.yaml.njk rename to project_template/catalog-info.yaml.jinja index d42f4c0..a61f845 100644 --- a/project_template/catalog-info.yaml.njk +++ b/project_template/catalog-info.yaml.jinja @@ -1,17 +1,17 @@ apiVersion: backstage.io/v1alpha1 kind: Component metadata: - name: ${{ values.repository_name }} - description: ${{ values.repository_description | dump }} + name: {{ repository_name }} + description: {{ repository_description }} tags: - python - etl annotations: backstage.io/techdocs-ref: dir:. - github.com/project-slug: ${{ values.repository_owner }}/${{ values.repository_name }} + github.com/project-slug: {{ repository_owner }}/{{ repository_name }} backstage.io/source-template: "python-etl-template" links: - - url: https://codespaces.new/${{ values.repository_owner }}/${{ values.repository_name }} + - url: https://codespaces.new/{{ repository_owner }}/{{ repository_name }} title: Open Development Environment icon: scaffolder - url: https://officenationalstatistics.sharepoint.com/sites/ONS_DDaT_Communities/SitePages/SE%20-%20Principles,%20Policies,%20Guidelines%20&%20more.aspx?csf=1&web=1&share=EQI8qpmdcZ1MvVA__6yKAtgBkWCMhU-pty-SBeAs1KVAEQ&e=jlgchY @@ -20,13 +20,13 @@ metadata: - url: https://app.pluralsight.com/channels/details/c4f16af4-8278-440c-9ae8-fc9e95f1f34d title: Learning Pathway icon: help - - url: vscode://vscode.git/clone?url=https://github.com/${{ values.repository_owner }}/${{ values.repository_name }}.git + - url: vscode://vscode.git/clone?url=https://github.com/{{ repository_owner }}/{{ repository_name }}.git title: Open in Local Visual Studio Code icon: scaffolder spec: type: module lifecycle: experimental - owner: ${{ values.catalog_owner | dump }} - {%- if values.catalog_system %} - system: ${{ values.catalog_system | dump }} + owner: {{ catalog_owner }} + {%- if catalog_system %} + system: {{ catalog_system }} {%- endif %} diff --git a/project_template/copier_scripts/helpers.sh b/project_template/copier_scripts/helpers.sh new file mode 100755 index 0000000..2c980dc --- /dev/null +++ b/project_template/copier_scripts/helpers.sh @@ -0,0 +1,203 @@ +#!/bin/bash + +set -euo pipefail + +# Define ANSI color codes +NC='\033[0m' # No Color +GREEN='\033[0;32m' +CYAN='\033[0;36m' +YELLOW='\033[1;33m' +RED='\033[0;31m' + +# Functions for colored logging +success() { + printf "✅ ${GREEN}%s${NC}\n" "$1" +} + +info() { + printf "ℹ️ ${CYAN}%s${NC}\n" "$1" +} + +warn() { + printf "⚠️ ${YELLOW}%s${NC}\n" "$1" +} + +error() { + printf "❌ ${RED}%s${NC}\n" "$1" +} + +gh_authenticated() { + gh auth status &>/dev/null +} + +repo_exists() { + gh repo view "$REPO_OWNER/$REPO_NAME" &>/dev/null +} + +create_repo() { + if [ -z "$REPO_NAME" ]; then + error "Repository name (REPO_NAME) is not set. Please set it first. Skipping repository creation." + return 1 + fi + + if [ -z "$REPO_OWNER" ]; then + error "Repository owner (REPO_OWNER) is not set. Please set it first. Skipping repository creation." + return 1 + fi + + REPO_DESCRIPTION="${REPO_DESCRIPTION:-}" # Default to empty string if not provided + REPO_VISIBILITY="${REPO_VISIBILITY:-public}" # Default to public if not provided + + if [[ "$REPO_VISIBILITY" != "public" && "$REPO_VISIBILITY" != "private" && "$REPO_VISIBILITY" != "internal" ]]; then + error "Invalid visibility. Use 'public', 'private', or 'internal'. Skipping repository creation." + return 1 + fi + + if repo_exists; then + warn "Repository $REPO_OWNER/$REPO_NAME already exists. Skipping repository creation." + # We don't want to throw an error if the repository already exists as the next steps are graceful + return 0 + fi + + # Create the repository + if ! push_status=$( + gh repo create "$REPO_OWNER/$REPO_NAME" --description "$REPO_DESCRIPTION" "--$REPO_VISIBILITY" 2>&1 + ); then + error "Repo Creation Failure: $push_status" + else + success "Created Repo: $push_status" + fi +} + +set_remote_url() { + if git remote get-url origin &>/dev/null; then + return 0 + fi + + if ssh -T git@github.com &>/dev/null; then + git remote add origin "https://github.com/$REPO_OWNER/$REPO_NAME.git" + else + git remote add origin "git@github.com:$REPO_OWNER/$REPO_NAME.git" + fi +} + +# Function to check whether secret scanning should be enabled +enable_secret_scanning() { + # if repo not public, secret scanning is not available without GitHub Advanced Security + if [[ "$REPO_VISIBILITY" != "public" ]]; then + return 1 + fi +} + +update_repo_settings() { + if ! repo_setting_status=$(gh api -X PATCH "/repos/$REPO_OWNER/$REPO_NAME" \ + --input=<(echo "$JSON_REPO_CONFIG") 2>&1); then + error "Repository Configuration Failure: $repo_setting_status" + else + success "Repository Configuration Updated" + fi +} + +enable_vulnerability_alerts() { + if ! vulnerability_alerts_status=$(gh api \ + --method PUT \ + -H "Accept: application/vnd.github+json" \ + -H "X-GitHub-Api-Version: 2022-11-28" \ + "/repos/$REPO_OWNER/$REPO_NAME/vulnerability-alerts" 2>&1); then + error "Vulnerability Alerts Failure: $vulnerability_alerts_status" + else + success "Vulnerability Alerts Enabled" + fi +} + +enable_automated_security_fixes() { + enable_vulnerability_alerts + + if ! security_fixes_status=$(gh api \ + --method PUT \ + -H "Accept: application/vnd.github+json" \ + -H "X-GitHub-Api-Version: 2022-11-28" \ + "/repos/$REPO_OWNER/$REPO_NAME/automated-security-fixes" 2>&1); then + error "Dependabot Security Fixes Failure: $security_fixes_status" + else + success "Automated Dependabot Security Fixes Enabled" + fi +} + +update_branch_protection() { + if ! branch_protection_status=$(gh api -X PUT "/repos/$REPO_OWNER/$REPO_NAME/branches/${DEFAULT_BRANCH}/protection" \ + --input=<(echo "$JSON_BRANCH_PROTECTION_CONFIG") 2>&1); then + error "Branch Protection Failure: $branch_protection_status" + else + success "Branch Protection Enabled" + fi +} + +JSON_REPO_CONFIG=$( + cat </dev/null; then + warn "GitHub CLI (gh) is not installed https://cli.github.com/. Skipping repository creation and setup." + info "If you do not wish to use the GitHub CLI, you can manually create a repository and push up the contents. https://github.com/ONSdigital/ons-python-template#initialising-a-git-repository-and-pushing-to-github" + exit 0 +fi + +# Check if GitHub CLI is authenticated +if ! gh_authenticated; then + error "GitHub CLI (gh) is installed but is not authenticated. Please authenticate using 'gh auth login' first. Skipping repository creation and setup." + exit 0 +fi + +################################ +# Create & set up the repository +################################ + +# Check if branch protection is set up, if so create a pull request +branch_protection_exists=$( + gh api "repos/$REPO_OWNER/$REPO_NAME/branches/$DEFAULT_BRANCH/protection" &>/dev/null + echo $? +) + +# Initialise the repository gracefully +git init >/dev/null + +# Set the remote URL +set_remote_url + +# Update the repository contents if there are any changes and repo setup is successful +if [[ $(git status --porcelain) ]] && create_repo; then + git branch -M "$DEFAULT_BRANCH" + git add . + + if ! commit_status=$( + git commit -m "Update contents from base template" 2>&1 + ); then + error "Commit Failure: $commit_status" + fi + + if [ "$branch_protection_exists" -ne 0 ]; then + # Branch protection is not set up, push directly for the first time + + if ! push_status=$( + git push -u origin "$DEFAULT_BRANCH" -f 2>&1 + ); then + error "Push Failure: $push_status" + else + success "Repository Contents Pushed" + fi + + else + # Branch protection is set up, create a pull request + git checkout -b "update-contents-from-base-template" >/dev/null 2>&1 + + if ! push_status=$( + git push -u origin "update-contents-from-base-template" -f 2>&1 + ); then + error "Push Failure: $push_status" + else + success "Repository contents pushed to branch update-contents-from-base-template" + fi + + if ! status=$( + gh pr create \ + --base "$DEFAULT_BRANCH" \ + --title "Update contents from base template" \ + --body "Automated pull request to update repo contents from https://github.com/ONSdigital/ons-python-template" \ + 2>&1 + ); then + warn "$status" + warn "Updated existing PR with the new contents." + else + success "A pull request has been created with the updated contents since branch protection is enabled. $status" + fi + fi +fi + +# Update repository settings and branch protection if they do not exist +if [ "$branch_protection_exists" -ne 0 ]; then + update_repo_settings + enable_automated_security_fixes + update_branch_protection +fi diff --git a/project_template/copier_scripts/setup_package_manager.sh b/project_template/copier_scripts/setup_package_manager.sh new file mode 100755 index 0000000..96329ff --- /dev/null +++ b/project_template/copier_scripts/setup_package_manager.sh @@ -0,0 +1,18 @@ +#!/bin/bash + +set -euo pipefail + +if [[ "$PACKAGE_MANAGER" == "poetry" ]]; then + rm -f Pipfile Pipfile.lock + + # Add content of linter-configs.toml into pyproject.toml at the end of the file + echo "" >>pyproject.toml + cat copier_scripts/linter-configs.toml >>pyproject.toml + +elif [[ "$PACKAGE_MANAGER" == "pipenv" ]]; then + rm -f poetry.lock pyproject.toml + + # Add content of linter-configs.toml into pyproject.toml + cat copier_scripts/linter-configs.toml >pyproject.toml + +fi diff --git a/project_template/docs/adr/0001-package-manager.md.njk b/project_template/docs/adr/0001-package-manager.md.jinja similarity index 100% rename from project_template/docs/adr/0001-package-manager.md.njk rename to project_template/docs/adr/0001-package-manager.md.jinja diff --git a/project_template/docs/adr/0002-linting-tools.md.njk b/project_template/docs/adr/0002-linting-tools.md.jinja similarity index 100% rename from project_template/docs/adr/0002-linting-tools.md.njk rename to project_template/docs/adr/0002-linting-tools.md.jinja diff --git a/project_template/docs/adr/0003-testing-framework.md.njk b/project_template/docs/adr/0003-testing-framework.md.jinja similarity index 100% rename from project_template/docs/adr/0003-testing-framework.md.njk rename to project_template/docs/adr/0003-testing-framework.md.jinja diff --git a/project_template/docs/adr/index.md.njk b/project_template/docs/adr/index.md.jinja similarity index 88% rename from project_template/docs/adr/index.md.njk rename to project_template/docs/adr/index.md.jinja index 65c9685..3945861 100644 --- a/project_template/docs/adr/index.md.njk +++ b/project_template/docs/adr/index.md.jinja @@ -1,6 +1,6 @@ # Architectural Decision Records -This directory contains Architectural Decision Records (ADRs) for the ${{ values.repository_name }} project. +This directory contains Architectural Decision Records (ADRs) for the ${{ repository_name }} project. ## What are ADRs? @@ -8,7 +8,7 @@ ADRs are documents that capture important architectural decisions made along wit ## ADR Index -1. [Package Manager Selection](./0001-package-manager.md) - Decision on using ${{ values.package_manager }} for dependency management +1. [Package Manager Selection](./0001-package-manager.md) - Decision on using ${{ package_manager }} for dependency management 2. [Linting and Code Quality Tools](./0002-linting-tools.md) - Selection of Ruff, MyPy, Bandit, and other code quality tools 3. [Testing Framework](./0003-testing-framework.md) - Decision to use pytest for testing diff --git a/project_template/docs/index.md.njk b/project_template/docs/index.md.jinja similarity index 90% rename from project_template/docs/index.md.njk rename to project_template/docs/index.md.jinja index 7c5d9be..0baa386 100644 --- a/project_template/docs/index.md.njk +++ b/project_template/docs/index.md.jinja @@ -1,10 +1,10 @@ -# ${{ values.repository_name }} Documentation +# ${{ repository_name }} Documentation -Welcome to the documentation for the ${{ values.repository_name }} project. +Welcome to the documentation for the ${{ repository_name }} project. ## Overview -${{ values.repository_description }} +${{ repository_description }} This project follows the Reproducible Analytical Pipeline (RAP) methodology, which emphasizes: @@ -19,8 +19,8 @@ This project follows the Reproducible Analytical Pipeline (RAP) methodology, whi The project follows a standard structure for RAP projects: ``` -${{ values.repository_name }}/ -├── ${{ values.module_name }}/ # Main Python package +${{ repository_name }}/ +├── ${{ module_name }}/ # Main Python package │ ├── __init__.py # Package initialization │ ├── extract.py # Data extraction functionality │ ├── transform.py # Data transformation functionality diff --git a/project_template/pyproject.toml.jinja b/project_template/pyproject.toml.jinja new file mode 100644 index 0000000..e1f76ec --- /dev/null +++ b/project_template/pyproject.toml.jinja @@ -0,0 +1,98 @@ +[build-system] +requires = ["poetry-core"] +build-backend = "poetry.core.masonry.api" + +[tool.ruff] +target-version = "py312" +line-length = 120 +indent-width = 4 + +[tool.ruff.lint] +select = [ + "E", # pycodestyle erros + "W", # pycodestyle warnings + "F", # Pyflakes + "UP", # pyupgrade + "I", # isort + "B", # flake8-bugbear + "SIM", # flake8-simplify + "C4", # flake8-comprehensions + "S", # flake8-bandit + "D", # pydocstyle - Enforce existing docstrings only + "C90", # mccabe + "RUF", # Ruff specific rules +] + +ignore = [ + # Conflicts with google docstring style + "D205", + # Allow missing docstring, remove to enforce docstrings across the board + "D100", + "D101", + "D102", + "D103", + "D104", + "D105", + "D106", + "D107", + # indentation contains tabs + "W191", + # Too many args in functions + "PLR0913", +] + +unfixable = ["F401", "F841"] + +[tool.ruff.lint.isort] +known-first-party = ["joejoe"] + +[tool.ruff.lint.pydocstyle] +convention = "google" + +[tool.ruff.lint.per-file-ignores] +"tests/*" = [ + # Allow use of assert statements in tests + "S101", +] + +[tool.ruff.format] +quote-style = "double" +indent-style = "space" +line-ending = "auto" + +[tool.bandit] +exclude_dirs = ["tests"] +skips = ["B101"] + +[tool.mypy] +python_version = "3.12" +warn_return_any = true +warn_unused_configs = true +disallow_untyped_defs = true +disallow_incomplete_defs = true +check_untyped_defs = true +disallow_untyped_decorators = true +no_implicit_optional = true +strict_optional = true + +[tool.poetry] +name = "joejoe" +version = "0.1.0" +description = "stuff" +authors = ["ONSdigital"] +license = "MIT" +readme = "README.md" +package-mode = false + +[tool.poetry.dependencies] +python = "^3.12" +pandas = "^2.3.1" + +[tool.poetry.group.dev.dependencies] +bandit = "^1.8.6" +pytest = "^8.4.1" +pytest-xdist = "^3.8.0" +ruff = "^0.12.7" +pytest-cov = "^6.2.1" +mypy = "^1.9.0" +pre-commit = "^3.6.2" diff --git a/project_template/pyproject.toml.njk b/project_template/pyproject.toml.njk deleted file mode 100644 index de76e1e..0000000 --- a/project_template/pyproject.toml.njk +++ /dev/null @@ -1,57 +0,0 @@ -[build-system] -requires = ["poetry-core"] -build-backend = "poetry.core.masonry.api" - -[tool.ruff] -target-version = "py312" -line-length = 100 - -[tool.ruff.lint] -select = ["E", "F", "I", "W", "N", "UP", "ANN", "B", "A", "COM", "C4", "DTZ", "ISC", "ICN", "PIE", "PYI", "PT", "Q", "RET", "SIM", "TID", "ARG", "ERA", "PD", "PGH", "PL", "TRY", "NPY", "RUF"] -ignore = ["ANN401", "COM812", "ISC001", "E203", "G004", "TRY300", "PLR0913", "PD901"] -unfixable = ["F401", "F841"] - -[tool.ruff.lint.isort] -known-first-party = ["{{ values.module_name }}"] - -[tool.ruff.format] -quote-style = "double" -indent-style = "space" -line-ending = "auto" - -[tool.bandit] -exclude_dirs = ["tests"] -skips = ["B101"] - -[tool.mypy] -python_version = "3.12" -warn_return_any = true -warn_unused_configs = true -disallow_untyped_defs = true -disallow_incomplete_defs = true -check_untyped_defs = true -disallow_untyped_decorators = true -no_implicit_optional = true -strict_optional = true - -[tool.poetry] -name = "${{ values.repository_name }}" -version = "0.1.0" -description = "${{ values.repository_description }}" -authors = ["${{ values.repository_owner }}"] -license = "MIT" -readme = "README.md" -package-mode = false - -[tool.poetry.dependencies] -python = "^3.12" -pandas = "^2.3.1" - -[tool.poetry.group.dev.dependencies] -bandit = "^1.8.6" -pytest = "^8.4.1" -pytest-xdist = "^3.8.0" -ruff = "^0.12.7" -pytest-cov = "^6.2.1" -mypy = "^1.9.0" -pre-commit = "^3.6.2" diff --git a/project_template/run_etl.py.njk b/project_template/run_etl.py.jinja similarity index 95% rename from project_template/run_etl.py.njk rename to project_template/run_etl.py.jinja index c8fa607..e37b2be 100644 --- a/project_template/run_etl.py.njk +++ b/project_template/run_etl.py.jinja @@ -2,7 +2,7 @@ import logging -from ${{ values.module_name }} import ( +from {{ module_name }} import ( DataExtractor, DataLoader, DataTransformer, @@ -11,7 +11,7 @@ from ${{ values.module_name }} import ( ) -def main(): +def main() -> None: """Main function to demonstrate ETL pipeline usage.""" # Define file paths source_file = "example_data.csv" @@ -74,8 +74,9 @@ def main(): logger.info("3. Using individual ETL components...") # Extract + from pathlib import Path extractor = DataExtractor() - df = extractor.extract_csv(source_file) + df = extractor.extract_csv(Path(source_file)) logger.info(f"Extracted {len(df)} rows") # Transform diff --git a/project_template/tests/e2e/test_etl_workflow.py.njk b/project_template/tests/e2e/test_etl_workflow.py.jinja similarity index 70% rename from project_template/tests/e2e/test_etl_workflow.py.njk rename to project_template/tests/e2e/test_etl_workflow.py.jinja index 3ebaa4f..dddcd48 100644 --- a/project_template/tests/e2e/test_etl_workflow.py.njk +++ b/project_template/tests/e2e/test_etl_workflow.py.jinja @@ -1,29 +1,29 @@ import unittest -from ${{ values.module_name }}.extract import extract_from_source -from ${{ values.module_name }}.load import save_to_destination -from ${{ values.module_name }}.transform import apply_business_rules +from {{ module_name }}.extract import extract_from_source +from {{ module_name }}.load import save_to_destination +from {{ module_name }}.transform import apply_business_rules class TestETLWorkflow(unittest.TestCase): - def setUp(self): + def setUp(self) -> None: self.raw_data = extract_from_source("example_data.csv") self.transformed_data = apply_business_rules(self.raw_data) - def test_extract_data(self): + def test_extract_data(self) -> None: self.assertIsNotNone(self.raw_data) self.assertGreater(len(self.raw_data), 0) - def test_transform_data(self): + def test_transform_data(self) -> None: self.assertIsNotNone(self.transformed_data) self.assertGreater(len(self.transformed_data), 0) # Add more assertions based on expected transformations - def test_load_data(self): + def test_load_data(self) -> None: result = save_to_destination(self.transformed_data, "test_destination.csv") self.assertTrue(result) - def test_etl_workflow(self): + def test_etl_workflow(self) -> None: raw_data = extract_from_source("example_data.csv") transformed_data = apply_business_rules(raw_data) result = save_to_destination(transformed_data, "test_destination.csv") diff --git a/project_template/tests/unit/test_extract.py.jinja b/project_template/tests/unit/test_extract.py.jinja new file mode 100644 index 0000000..5fcc1d8 --- /dev/null +++ b/project_template/tests/unit/test_extract.py.jinja @@ -0,0 +1,35 @@ +import unittest +from unittest.mock import patch + +from {{ module_name }}.extract import extract_from_source + + +class TestExtractData(unittest.TestCase): + def test_extract_data_success(self) -> None: + # Assuming extract_from_source returns a DataFrame or list of records + with patch("{{ module_name }}.extract.extract_from_source", return_value=[{"id": 1}]): + data = extract_from_source("example_data.csv") + self.assertGreater(len(data), 0) + + def test_extract_data_empty(self) -> None: + # Mocking extract_from_source to return empty list + with patch("{{ module_name }}.extract.extract_from_source", return_value=[]): + data = extract_from_source("empty.csv") + self.assertEqual(len(data), 0) + + def test_extract_data_file_not_found(self) -> None: + # Test for handling of a non-existent file + with ( + patch("{{ module_name }}.extract.extract_from_source", side_effect=FileNotFoundError), + self.assertRaises(FileNotFoundError), + ): + extract_from_source("non_existent_file.csv") + + def test_extract_data_invalid_source_type(self) -> None: + # Test for handling of invalid data source type + with patch("{{ module_name }}.extract.extract_from_source", side_effect=ValueError), self.assertRaises(ValueError): + extract_from_source("example_data.csv", source_type="invalid") + + +if __name__ == "__main__": + unittest.main() diff --git a/project_template/tests/unit/test_extract.py.njk b/project_template/tests/unit/test_extract.py.njk deleted file mode 100644 index 0a92535..0000000 --- a/project_template/tests/unit/test_extract.py.njk +++ /dev/null @@ -1,30 +0,0 @@ -import unittest - -from ${{ values.module_name }}.extract import extract_from_source - - -class TestExtractData(unittest.TestCase): - def test_extract_data_success(self): - # Assuming extract_data returns a list of records - data = extract_from_source("example_data.csv") - self.assertGreater(len(data), 0) - - def test_extract_data_empty(self): - # Mocking the data source to return no data - # This would require a mocking library like unittest.mock - # For example, if using a database, you would mock the database call - pass - - def test_extract_data_file_not_found(self): - # Test for handling of a non-existent file - with self.assertRaises(FileNotFoundError): - extract_from_source("non_existent_file.csv") - - def test_extract_data_invalid_source_type(self): - # Test for handling of invalid data source type - with self.assertRaises(ValueError): - extract_from_source("example_data.csv", source_type="invalid") - - -if __name__ == "__main__": - unittest.main() diff --git a/project_template/tests/unit/test_load.py.njk b/project_template/tests/unit/test_load.py.jinja similarity index 67% rename from project_template/tests/unit/test_load.py.njk rename to project_template/tests/unit/test_load.py.jinja index bc70bb5..3afaacd 100644 --- a/project_template/tests/unit/test_load.py.njk +++ b/project_template/tests/unit/test_load.py.jinja @@ -2,11 +2,11 @@ import unittest import pandas as pd -from ${{ values.module_name }}.load import save_to_destination +from {{ module_name }}.load import save_to_destination class TestLoadData(unittest.TestCase): - def test_load_data_success(self): + def test_load_data_success(self) -> None: # Sample data to load data = pd.DataFrame({"id": [1], "name": ["Test"]}) destination = "test_destination.csv" @@ -15,12 +15,12 @@ class TestLoadData(unittest.TestCase): result = save_to_destination(data, destination) self.assertTrue(result) - def test_load_data_failure(self): + def test_load_data_failure(self) -> None: # Sample data with an invalid destination data = pd.DataFrame({"id": [1], "name": ["Test"]}) - destination = None # Invalid destination + destination = "" # Invalid destination (empty string) - # Assuming load_data raises an exception on failure + # Assuming load_data raises an exception or returns False on failure result = save_to_destination(data, destination) self.assertFalse(result) diff --git a/project_template/tests/unit/test_transform.py.njk b/project_template/tests/unit/test_transform.py.jinja similarity index 88% rename from project_template/tests/unit/test_transform.py.njk rename to project_template/tests/unit/test_transform.py.jinja index 3eab67f..6367d94 100644 --- a/project_template/tests/unit/test_transform.py.njk +++ b/project_template/tests/unit/test_transform.py.jinja @@ -2,11 +2,11 @@ import unittest import pandas as pd -from ${{ values.module_name }}.transform import apply_business_rules +from {{ module_name }}.transform import apply_business_rules class TestTransformData(unittest.TestCase): - def test_transform_data(self): + def test_transform_data(self) -> None: # Sample input data input_data = pd.DataFrame( [ diff --git a/project_template/${{ 'PIRR.md' if not values.is_public_repo }}.njk b/project_template/{{ 'PIRR.md' if not is_public_repo }}.jinja similarity index 95% rename from project_template/${{ 'PIRR.md' if not values.is_public_repo }}.njk rename to project_template/{{ 'PIRR.md' if not is_public_repo }}.jinja index f339a03..4c4c762 100644 --- a/project_template/${{ 'PIRR.md' if not values.is_public_repo }}.njk +++ b/project_template/{{ 'PIRR.md' if not is_public_repo }}.jinja @@ -2,7 +2,7 @@ ## What visibility is the repository set to? -The repository is set to: **${{ values.repository_visibility | capitalize }}** +The repository is set to: **${{ repository_visibility | capitalize }}** ## What decision led to this? diff --git a/project_template/${{values.module_name}}/__init__.py b/project_template/{{module_name}}/__init__.py similarity index 98% rename from project_template/${{values.module_name}}/__init__.py rename to project_template/{{module_name}}/__init__.py index f2f1e2a..70b727c 100644 --- a/project_template/${{values.module_name}}/__init__.py +++ b/project_template/{{module_name}}/__init__.py @@ -17,7 +17,7 @@ def __init__(self) -> None: self.extractor = DataExtractor() self.transformer = DataTransformer() self.loader = DataLoader() - self.pipeline_summary = {} + self.pipeline_summary: dict[str, object] = {} def run_pipeline( self, diff --git a/project_template/${{values.module_name}}/extract.py b/project_template/{{module_name}}/extract.py similarity index 78% rename from project_template/${{values.module_name}}/extract.py rename to project_template/{{module_name}}/extract.py index 6c81b67..e9672b0 100644 --- a/project_template/${{values.module_name}}/extract.py +++ b/project_template/{{module_name}}/extract.py @@ -13,7 +13,7 @@ class DataExtractor: def __init__(self) -> None: self.supported_formats = [".csv", ".xlsx", ".json"] - def extract_csv(self, file_path: str, **kwargs: Any) -> pd.DataFrame: + def extract_csv(self, file_path: Path, **kwargs: Any) -> pd.DataFrame: """Extract data from CSV file. Args: @@ -25,14 +25,14 @@ def extract_csv(self, file_path: str, **kwargs: Any) -> pd.DataFrame: """ try: logger.info("Extracting data from %s", file_path) - data = pd.read_csv(file_path, **kwargs) + data: pd.DataFrame = pd.read_csv(file_path, **kwargs) logger.info("Successfully extracted %d rows from %s", len(data), file_path) return data except Exception: logger.exception("Error extracting data from %s", file_path) raise - def validate_file_exists(self, file_path: str) -> bool: + def validate_file_exists(self, file_path: Path) -> bool: """Validate that the file exists. Args: @@ -41,9 +41,9 @@ def validate_file_exists(self, file_path: str) -> bool: Returns: True if file exists, False otherwise """ - return Path(file_path).exists() + return file_path.exists() - def get_file_info(self, file_path: str) -> dict: + def get_file_info(self, file_path: Path) -> dict[str, Any]: """Get basic information about the file. Args: @@ -52,7 +52,6 @@ def get_file_info(self, file_path: str) -> dict: Returns: Dictionary with file information """ - file_path = Path(file_path) if not file_path.exists(): return {"exists": False} @@ -64,7 +63,7 @@ def get_file_info(self, file_path: str) -> dict: } -def extract_from_source(source_path: str, source_type: str = "csv") -> pd.DataFrame: +def extract_from_source(source_path: str | Path, source_type: str = "csv") -> pd.DataFrame: """Helper function to extract data from a source. Args: @@ -75,13 +74,14 @@ def extract_from_source(source_path: str, source_type: str = "csv") -> pd.DataFr DataFrame containing the extracted data """ extractor = DataExtractor() + path_obj = Path(source_path) - if not extractor.validate_file_exists(source_path): + if not extractor.validate_file_exists(path_obj): msg = f"Source file not found: {source_path}" raise FileNotFoundError(msg) if source_type.lower() == "csv": - return extractor.extract_csv(source_path) + return extractor.extract_csv(path_obj) msg = f"Unsupported source type: {source_type}" raise ValueError(msg) diff --git a/project_template/${{values.module_name}}/load.py b/project_template/{{module_name}}/load.py similarity index 100% rename from project_template/${{values.module_name}}/load.py rename to project_template/{{module_name}}/load.py diff --git a/project_template/${{values.module_name}}/transform.py b/project_template/{{module_name}}/transform.py similarity index 100% rename from project_template/${{values.module_name}}/transform.py rename to project_template/{{module_name}}/transform.py