diff --git a/Makefile b/Makefile new file mode 100644 index 00000000..d9121c8b --- /dev/null +++ b/Makefile @@ -0,0 +1,208 @@ +# Makefile for PocketFlow Tutorial Codebase Knowledge + +# Variables - can be overridden from command line +IMAGE_NAME ?= documentation-generator +IMAGE_TAG ?= latest +REGISTRY ?= docker.io +REGISTRY_USER ?= your-username +FULL_IMAGE_NAME = $(REGISTRY)/$(REGISTRY_USER)/$(IMAGE_NAME):$(IMAGE_TAG) + +# Docker build arguments +DOCKER_BUILD_ARGS ?= --no-cache + +# Default paths for local development +CONFIG_FILE ?= configs/example_config.yaml +OUTPUT_DIR ?= ./output +MOUNT_DIR ?= ./mount + +# Colors for output +GREEN = \033[0;32m +YELLOW = \033[1;33m +RED = \033[0;31m +NC = \033[0m # No Color + +.PHONY: help build push run run-local clean validate lint test all + +# Default target +all: build + +## Help - Display available targets +help: + @echo "$(GREEN)PocketFlow Tutorial Docker Management$(NC)" + @echo "" + @echo "$(YELLOW)Available targets:$(NC)" + @echo " $(GREEN)build$(NC) - Build the Docker image" + @echo " $(GREEN)push$(NC) - Push the Docker image to registry" + @echo " $(GREEN)run$(NC) - Run the Docker container with file input" + @echo " $(GREEN)run-local$(NC) - Run container with local directory mounting" + @echo " $(GREEN)validate$(NC) - Validate configuration file without running" + @echo " $(GREEN)clean$(NC) - Clean up Docker images and containers" + @echo " $(GREEN)lint$(NC) - Run linting and code quality checks" + @echo " $(GREEN)test$(NC) - Run tests" + @echo " $(GREEN)shell$(NC) - Open an interactive shell in the container" + @echo " $(GREEN)logs$(NC) - Show logs from the last container run" + @echo " $(GREEN)help$(NC) - Show this help message" + @echo "" + @echo "$(YELLOW)Configuration:$(NC)" + @echo " IMAGE_NAME=$(IMAGE_NAME)" + @echo " IMAGE_TAG=$(IMAGE_TAG)" + @echo " REGISTRY=$(REGISTRY)" + @echo " REGISTRY_USER=$(REGISTRY_USER)" + @echo " FULL_IMAGE_NAME=$(FULL_IMAGE_NAME)" + @echo "" + @echo "$(YELLOW)Usage examples:$(NC)" + @echo " make build" + @echo " make push REGISTRY_USER=myusername" + @echo " make run CONFIG_FILE=configs/my_config.yaml" + @echo " make run-local CONFIG_FILE=configs/my_config.yaml MOUNT_DIR=/path/to/source" + +## Build - Build the Docker image +build: + @echo "$(GREEN)Building Docker image: $(FULL_IMAGE_NAME)$(NC)" + docker build $(DOCKER_BUILD_ARGS) -t $(IMAGE_NAME):$(IMAGE_TAG) -t $(FULL_IMAGE_NAME) . + @echo "$(GREEN)✅ Build completed successfully!$(NC)" + +## Push - Push the Docker image to registry +push: build + @echo "$(GREEN)Pushing Docker image: $(FULL_IMAGE_NAME)$(NC)" + docker push $(FULL_IMAGE_NAME) + @echo "$(GREEN)✅ Push completed successfully!$(NC)" + +## Run - Run the Docker container with file input support +run: build + @echo "$(GREEN)Running Docker container with config: $(CONFIG_FILE)$(NC)" + @if [ ! -f "$(CONFIG_FILE)" ]; then \ + echo "$(RED)❌ Configuration file not found: $(CONFIG_FILE)$(NC)"; \ + echo "$(YELLOW)Please specify a valid config file with: make run CONFIG_FILE=path/to/config.yaml$(NC)"; \ + exit 1; \ + fi + @mkdir -p $(OUTPUT_DIR) + docker run --rm \ + -v "$(PWD)/$(CONFIG_FILE):/app/config.yaml:ro" \ + -v "$(PWD)/$(OUTPUT_DIR):/app/output" \ + -v "$(PWD)/.env:/app/.env:ro" \ + --name $(IMAGE_NAME)-run \ + $(FULL_IMAGE_NAME) config.yaml + @echo "$(GREEN)✅ Container run completed! Check output in: $(OUTPUT_DIR)$(NC)" + +## Run with local directory mounting +run-local: build + @echo "$(GREEN)Running Docker container with local directory mounting$(NC)" + @if [ ! -f "$(CONFIG_FILE)" ]; then \ + echo "$(RED)❌ Configuration file not found: $(CONFIG_FILE)$(NC)"; \ + exit 1; \ + fi + @mkdir -p $(OUTPUT_DIR) + @if [ -d "$(MOUNT_DIR)" ]; then \ + echo "$(YELLOW)Mounting local directory: $(MOUNT_DIR)$(NC)"; \ + docker run --rm \ + -v "$(PWD)/$(CONFIG_FILE):/app/config.yaml:ro" \ + -v "$(PWD)/$(OUTPUT_DIR):/app/output" \ + -v "$(PWD)/$(MOUNT_DIR):/app/mount:ro" \ + -v "$(PWD)/.env:/app/.env:ro" \ + --name $(IMAGE_NAME)-run \ + $(FULL_IMAGE_NAME) config.yaml; \ + else \ + echo "$(YELLOW)Mount directory not found, running without local mount$(NC)"; \ + $(MAKE) run CONFIG_FILE=$(CONFIG_FILE); \ + fi + +## Validate - Validate configuration file without running analysis +validate: build + @echo "$(GREEN)Validating configuration: $(CONFIG_FILE)$(NC)" + @if [ ! -f "$(CONFIG_FILE)" ]; then \ + echo "$(RED)❌ Configuration file not found: $(CONFIG_FILE)$(NC)"; \ + exit 1; \ + fi + docker run --rm \ + -v "$(PWD)/$(CONFIG_FILE):/app/config.yaml:ro" \ + --name $(IMAGE_NAME)-validate \ + $(FULL_IMAGE_NAME) config.yaml --validate-only + @echo "$(GREEN)✅ Configuration validation completed!$(NC)" + +## Shell - Open an interactive shell in the container +shell: build + @echo "$(GREEN)Opening interactive shell in container$(NC)" + docker run --rm -it \ + -v "$(PWD):/app/workspace:ro" \ + -v "$(PWD)/$(OUTPUT_DIR):/app/output" \ + --name $(IMAGE_NAME)-shell \ + --entrypoint /bin/bash \ + $(FULL_IMAGE_NAME) + +## Logs - Show logs from the last container run +logs: + @echo "$(GREEN)Showing logs from last container run$(NC)" + docker logs $(IMAGE_NAME)-run 2>/dev/null || echo "$(YELLOW)No logs found for $(IMAGE_NAME)-run$(NC)" + +## Clean - Clean up Docker images and containers +clean: + @echo "$(GREEN)Cleaning up Docker resources$(NC)" + @docker rm -f $(IMAGE_NAME)-run 2>/dev/null || true + @docker rm -f $(IMAGE_NAME)-validate 2>/dev/null || true + @docker rm -f $(IMAGE_NAME)-shell 2>/dev/null || true + @docker rmi $(IMAGE_NAME):$(IMAGE_TAG) 2>/dev/null || true + @docker rmi $(FULL_IMAGE_NAME) 2>/dev/null || true + @docker system prune -f + @echo "$(GREEN)✅ Cleanup completed!$(NC)" + +## Lint - Run linting and code quality checks +lint: + @echo "$(GREEN)Running linting checks$(NC)" + @if command -v python3 >/dev/null 2>&1; then \ + python3 -m py_compile main.py flow.py nodes.py utils/*.py; \ + echo "$(GREEN)✅ Python syntax check passed$(NC)"; \ + else \ + echo "$(YELLOW)⚠️ Python3 not found, skipping syntax check$(NC)"; \ + fi + @if command -v yamllint >/dev/null 2>&1; then \ + find configs -name "*.yaml" -o -name "*.yml" | xargs yamllint; \ + echo "$(GREEN)✅ YAML lint check passed$(NC)"; \ + else \ + echo "$(YELLOW)⚠️ yamllint not found, skipping YAML check$(NC)"; \ + fi + +## Test - Run tests +test: build + @echo "$(GREEN)Running tests$(NC)" + docker run --rm \ + -v "$(PWD)/configs/example_config.yaml:/app/config.yaml:ro" \ + --name $(IMAGE_NAME)-test \ + $(FULL_IMAGE_NAME) config.yaml --validate-only + @echo "$(GREEN)✅ Tests completed!$(NC)" + +## Quick start for development +dev: build validate + @echo "$(GREEN)Development environment ready!$(NC)" + @echo "$(YELLOW)Try: make run CONFIG_FILE=configs/example_config.yaml$(NC)" + +# Advanced targets + +## Build with custom args +build-dev: + $(MAKE) build DOCKER_BUILD_ARGS="--target development" + +## Run with debug mode +run-debug: build + @echo "$(GREEN)Running in debug mode$(NC)" + docker run --rm -it \ + -v "$(PWD)/$(CONFIG_FILE):/app/config.yaml:ro" \ + -v "$(PWD)/$(OUTPUT_DIR):/app/output" \ + -v "$(PWD)/.env:/app/.env:ro" \ + --name $(IMAGE_NAME)-debug \ + --entrypoint /bin/bash \ + $(FULL_IMAGE_NAME) + +## Tag and push with version +tag-and-push: + @if [ -z "$(VERSION)" ]; then \ + echo "$(RED)❌ VERSION is required. Usage: make tag-and-push VERSION=1.0.0$(NC)"; \ + exit 1; \ + fi + docker tag $(IMAGE_NAME):$(IMAGE_TAG) $(REGISTRY)/$(REGISTRY_USER)/$(IMAGE_NAME):$(VERSION) + docker push $(REGISTRY)/$(REGISTRY_USER)/$(IMAGE_NAME):$(VERSION) + @echo "$(GREEN)✅ Tagged and pushed version $(VERSION)$(NC)" + +# Make sure output directory exists +$(OUTPUT_DIR): + mkdir -p $(OUTPUT_DIR) diff --git a/README.md b/README.md index cc8ad4e8..41366d65 100644 --- a/README.md +++ b/README.md @@ -1,3 +1,231 @@ +# Docker Usage Guide + +This guide explains how to build, push, and run the PocketFlow Tutorial application using Docker and the provided Makefile. + +## Quick Start + +1. **Build the Docker image:** + ```bash + make build + ``` + +2. **Run with a configuration file:** + ```bash + make run CONFIG_FILE=configs/example_config.yaml + ``` + +3. **Validate configuration without running:** + ```bash + make validate CONFIG_FILE=configs/my_config.yaml + ``` + +## Configuration + +The Makefile supports several configurable variables: + +| Variable | Default | Description | +|----------|---------|-------------| +| `IMAGE_NAME` | `pocketflow-tutorial` | Docker image name | +| `IMAGE_TAG` | `latest` | Docker image tag | +| `REGISTRY` | `docker.io` | Docker registry URL | +| `REGISTRY_USER` | `your-username` | Registry username | +| `CONFIG_FILE` | `configs/example_config.yaml` | Path to YAML config file | +| `OUTPUT_DIR` | `./output` | Local output directory | +| `MOUNT_DIR` | `./mount` | Local directory to mount for source code | + +## Available Commands + +### Build Commands +- `make build` - Build the Docker image +- `make build-dev` - Build with development target +- `make push` - Build and push to registry +- `make tag-and-push VERSION=1.0.0` - Tag with version and push + +### Run Commands +- `make run` - Run with config file +- `make run-local` - Run with local directory mounting +- `make run-debug` - Run in debug mode with interactive shell +- `make shell` - Open interactive shell in container + +### Utility Commands +- `make validate` - Validate configuration file +- `make clean` - Clean up Docker resources +- `make lint` - Run code quality checks +- `make test` - Run tests +- `make logs` - Show container logs +- `make help` - Show help message + +## Usage Examples + +### Basic Usage +```bash +# Build and run with default config +make build +make run + +# Use custom configuration +make run CONFIG_FILE=configs/corebanking.yaml + +# Validate configuration before running +make validate CONFIG_FILE=configs/my_config.yaml +``` + +### Working with Local Directories +```bash +# Mount a local source directory +make run-local CONFIG_FILE=configs/my_config.yaml MOUNT_DIR=/path/to/source/code + +# The mounted directory will be available at /app/mount inside the container +``` + +### Registry Operations +```bash +# Push to Docker Hub +make push REGISTRY_USER=myusername + +# Push to a different registry +make push REGISTRY=ghcr.io REGISTRY_USER=myusername + +# Tag and push a specific version +make tag-and-push VERSION=1.2.3 REGISTRY_USER=myusername +``` + +### Development Workflow +```bash +# Development setup +make dev + +# Debug a configuration issue +make run-debug CONFIG_FILE=configs/problematic.yaml + +# Check logs from the last run +make logs + +# Clean up when done +make clean +``` + +## File Mounting + +The Makefile automatically mounts several directories: + +1. **Configuration file**: Mounted as read-only at `/app/config.yaml` +2. **Output directory**: Mounted at `/app/output` for results +3. **Environment file**: `.env` mounted as read-only at `/app/.env` +4. **Source directory** (when using `run-local`): Mounted at `/app/mount` + +## Configuration File Format + +Your YAML configuration file should follow this structure: + +```yaml +source: + repo: "https://github.com/user/repo" # OR + local_dir: "/app/mount" # for local directories + +project: + name: "My Project" + output_dir: "output" + language: "english" + +file_processing: + max_file_size: 100000 + include_patterns: + - "*.py" + - "*.js" + - "*.md" + exclude_patterns: + - "*test*" + - "node_modules/*" + +analysis: + max_abstractions: 10 + abstractions_hints: + - "authentication" + - "data processing" + +llm: + use_cache: true + +github: + token: "${GITHUB_TOKEN}" # Will use environment variable +``` + +## Environment Variables + +Create a `.env` file in your project root: + +```bash +# .env +GITHUB_TOKEN=your_github_token_here +OPENAI_API_KEY=your_openai_api_key_here +# Add other required environment variables +``` + +## Troubleshooting + +### Common Issues + +1. **Configuration file not found** + ```bash + make run CONFIG_FILE=path/to/your/config.yaml + ``` + +2. **Permission denied for output directory** + ```bash + sudo chown -R $USER:$USER ./output + ``` + +3. **Docker build fails** + ```bash + make clean + make build DOCKER_BUILD_ARGS="--no-cache --pull" + ``` + +4. **Registry push fails** + ```bash + docker login + make push REGISTRY_USER=your_actual_username + ``` + +### Debug Mode + +Use debug mode to troubleshoot issues: + +```bash +make run-debug CONFIG_FILE=configs/my_config.yaml + +# Inside the container: +python main.py config.yaml --validate-only +python main.py config.yaml +``` + +## Advanced Usage + +### Custom Docker Build Args +```bash +make build DOCKER_BUILD_ARGS="--build-arg PYTHON_VERSION=3.11" +``` + +### Multiple Configurations +```bash +# Process multiple projects +for config in configs/*.yaml; do + echo "Processing $config" + make run CONFIG_FILE="$config" +done +``` + +### CI/CD Integration +```bash +# In your CI pipeline +make build +make test +make push REGISTRY_USER=$CI_REGISTRY_USER +``` + + +

Turns Codebase into Easy Tutorial with AI

![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg) diff --git a/WorkflowDocumentation.md b/WorkflowDocumentation.md new file mode 100644 index 00000000..2d5bb9a7 --- /dev/null +++ b/WorkflowDocumentation.md @@ -0,0 +1,109 @@ + # Documentation Generation Workflow + +This document explains the workflow of the documentation generation process, focusing on how the Large Language Model (LLM) prompts chain together to create a comprehensive tutorial for a given codebase. + +## Overview + +The process is a pipeline of six sequential steps. Four of these steps involve interactions with an LLM to analyze the code, structure the content, and write the documentation. + +```mermaid +flowchart TD + A[1. Fetch Repository] --> B[2. Identify Abstractions] + B --> C[3. Analyze Relationships] + C --> D[4. Order Chapters] + D --> E[5. Write Chapters] + E --> F[6. Combine Tutorial] +``` + +## Detailed Steps & Prompt Chaining + +Here’s a detailed breakdown of each step, focusing on the inputs and outputs of the LLM prompts. + +### 1. Fetch Repository (`FetchRepo`) +This initial step does not use an LLM. It's responsible for gathering the source code. + +- **Input**: A repository URL or a local directory path. +- **Output**: A list of all relevant code files and their content. This list becomes the foundational input for the entire workflow. +- **Chaining**: The raw codebase content is passed to the next step. + +--- + +### 2. Identify Abstractions (`IdentifyAbstractions`) +This is the first interaction with the LLM. Its goal is to identify the most important, high-level concepts in the codebase. + +- **Input to Prompt**: The entire codebase content fetched in the previous step. +- **LLM Prompt Goal**: The LLM is asked to analyze the code and identify the top 5-10 core abstractions. For each abstraction, it must provide: + 1. A concise `name`. + 2. A beginner-friendly `description` with an analogy. + 3. A list of relevant `file_indices` that implement or define the abstraction. +- **Output of Prompt**: The LLM returns a YAML-formatted string. + ```yaml + - name: | + Core Concept A + description: | + An explanation of what this concept does, like a central controller. + file_indices: + - 0 # path/to/file1.py + - 3 # path/to/file2.py + ``` +- **Chaining**: The validated list of abstractions (name, description, file indices) is passed to the next step. + +--- + +### 3. Analyze Relationships (`AnalyzeRelationships`) +The second LLM interaction focuses on understanding how the identified abstractions interact. + +- **Input to Prompt**: The list of abstractions (names and descriptions) and the code snippets from their relevant files. +- **LLM Prompt Goal**: The LLM is prompted to: + 1. Create a high-level `summary` of the project's purpose. + 2. Define the `relationships` between the abstractions, describing how they interact (e.g., "Manages", "Inherits from", "Uses"). +- **Output of Prompt**: A YAML object containing the summary and a list of relationships. + ```yaml + summary: | + A brief, simple explanation of the project's purpose. + relationships: + - from_abstraction: 0 # Core Concept A + to_abstraction: 1 # Core Concept B + label: "Manages" + ``` +- **Chaining**: The project summary and the list of relationships are passed to the next step. + +--- + +### 4. Order Chapters (`OrderChapters`) +The third LLM interaction determines the best pedagogical order to present the concepts. + +- **Input to Prompt**: The project summary, the list of abstractions, and their relationships. +- **LLM Prompt Goal**: The LLM is asked to determine the optimal sequence for a tutorial. It's instructed to start with foundational or user-facing concepts and then move to lower-level implementation details, respecting dependencies revealed in the relationships. +- **Output of Prompt**: A YAML list of the abstraction indices, sorted in the recommended chapter order. + ```yaml + - 2 # FoundationalConcept + - 0 # CoreClassA + - 1 # CoreClassB (uses CoreClassA) + ``` +- **Chaining**: This ordered list of indices dictates the structure of the final tutorial and is passed to the chapter writing step. + +--- + +### 5. Write Chapters (`WriteChapters`) +This is the most intensive LLM step, where the actual tutorial content is generated for each abstraction, one by one. + +- **Input to Prompt (for each chapter)**: + - The specific abstraction's details (name, description). + - The full tutorial structure (for linking to other chapters). + - The content of previously written chapters (to ensure smooth transitions). + - Relevant code snippets for the current abstraction. +- **LLM Prompt Goal**: The LLM is given a detailed set of instructions to write a beginner-friendly Markdown chapter. This includes creating a heading, explaining the concept with analogies, providing simplified code examples (under 10 lines), using Mermaid diagrams for illustration, and writing transitions to the previous and next chapters. +- **Output of Prompt**: A Markdown-formatted string for each chapter. +- **Chaining**: The list of all generated Markdown chapter strings is passed to the final step. + +--- + +### 6. Combine Tutorial (`CombineTutorial`) +This final step does not use an LLM. It assembles all the generated pieces into the final documentation. + +- **Input**: The project summary, relationship graph, chapter order, and the content of all chapters. +- **Output**: A directory containing: + - `index.md`: An overview page with the project summary, a Mermaid diagram of the relationships, and a linked table of contents. + - `01_concept.md`, `02_another.md`, etc.: Individual chapter files in Markdown format. +- **Chaining**: This is the final step, and the output is the completed tutorial saved to the filesystem. \ No newline at end of file diff --git a/configs/corebanking.yaml b/configs/corebanking.yaml new file mode 100644 index 00000000..a93a1063 --- /dev/null +++ b/configs/corebanking.yaml @@ -0,0 +1,91 @@ +# Corebanking Project Analysis Configuration + +# Source configuration - specify either repo or local_dir +source: + local_dir: "/Users/louis-davidcoulombe/github/corebanking" + # repo: "https://github.com/example/corebanking" # Use this instead of local_dir for GitHub repos + +# Project settings +project: + name: "corebanking" + language: "english" + output_dir: "nesto/corebanking/analysis_output" + +# File processing settings +file_processing: + max_file_size: 150000 # Maximum file size in bytes + include_patterns: + - "*.py" + - "*.js" + - "*.jsx" + - "*.ts" + - "*.tsx" + - "*.go" + - "*.java" + - "*.pyi" + - "*.pyx" + - "*.c" + - "*.cc" + - "*.cpp" + - "*.h" + - "*.md" + - "*.rst" + - "*Dockerfile" + - "*Makefile" + - "*.yaml" + - "*.yml" + exclude_patterns: + - "assets/*" + - "data/*" + - "images/*" + - "public/*" + - "static/*" + - "temp/*" + - "*docs/*" + - "*venv/*" + - "*.venv/*" + - "*test*" + - "*tests/*" + - "*examples/*" + - "v1/*" + - "*dist/*" + - "*build/*" + - "*experimental/*" + - "*deprecated/*" + - "*misc/*" + - "*legacy/*" + - ".git/*" + - ".github/*" + - ".next/*" + - ".vscode/*" + - "*obj/*" + - "*bin/*" + - "*node_modules/*" + - "*.log" + +# Analysis settings +analysis: + max_abstractions: 13 # Based on number of abstraction hints provided + abstractions_hints: + - "Event" + - "Command" + - "Aggregate" + - "Repository" + - "API Handler" + - "Core Facade" + - "Service" + - "Consumer" + - "Product Engine" + - "Simulation Services and Repositories" + - "products" + - "parameters" + - "customers" + feedback_file: "nesto/corebanking/review.md" + +# LLM settings +llm: + use_cache: true # Set to false to disable caching + +# GitHub settings (only needed if using repo instead of local_dir) +github: + # token: "your_github_token_here" # Or set GITHUB_TOKEN environment variable diff --git a/configs/documents-extractor.yaml b/configs/documents-extractor.yaml new file mode 100644 index 00000000..3fafdcf5 --- /dev/null +++ b/configs/documents-extractor.yaml @@ -0,0 +1,91 @@ +# Corebanking Project Analysis Configuration + +# Source configuration - specify either repo or local_dir +source: + local_dir: "/Users/louis-davidcoulombe/src/github.com/nestoca/documents-extractor" + # repo: "https://github.com/example/corebanking" # Use this instead of local_dir for GitHub repos + +# Project settings +project: + name: "documents-extractor" + language: "english" + output_dir: "nesto/corebanking/documents-extractor" + +# File processing settings +file_processing: + max_file_size: 150000 # Maximum file size in bytes + include_patterns: + - "*.py" + - "*.js" + - "*.jsx" + - "*.ts" + - "*.tsx" + - "*.go" + - "*.java" + - "*.pyi" + - "*.pyx" + - "*.c" + - "*.cc" + - "*.cpp" + - "*.h" + - "*.md" + - "*.rst" + - "*Dockerfile" + - "*Makefile" + - "*.yaml" + - "*.yml" + exclude_patterns: + - "assets/*" + - "data/*" + - "images/*" + - "public/*" + - "static/*" + - "temp/*" + - "*docs/*" + - "*venv/*" + - "*.venv/*" + - "*test*" + - "*tests/*" + - "*examples/*" + - "v1/*" + - "*dist/*" + - "*build/*" + - "*experimental/*" + - "*deprecated/*" + - "*misc/*" + - "*legacy/*" + - ".git/*" + - ".github/*" + - ".next/*" + - ".vscode/*" + - "*obj/*" + - "*bin/*" + - "*node_modules/*" + - "*.log" + +# Analysis settings +# analysis: +# max_abstractions: 13 # Based on number of abstraction hints provided +# abstractions_hints: +# - "Event" +# - "Command" +# - "Aggregate" +# - "Repository" +# - "API Handler" +# - "Core Facade" +# - "Service" +# - "Consumer" +# - "Product Engine" +# - "Simulation Services and Repositories" +# - "products" +# - "parameters" +# - "customers" +# feedback_file: "nesto/corebanking/review.md" + +# LLM settings +llm: + use_cache: false # Set to false to disable caching + +# GitHub settings (only needed if using repo instead of local_dir) +github: + # token: "your_github_token_here" # Or set GITHUB_TOKEN environment variable diff --git a/configs/example_config.yaml b/configs/example_config.yaml new file mode 100644 index 00000000..e9c8d04c --- /dev/null +++ b/configs/example_config.yaml @@ -0,0 +1,72 @@ +# Example Configuration File for Tutorial Generation +# Copy this file and modify for your specific project + +# Source configuration - specify either repo or local_dir (not both) +source: + # For local directories: + local_dir: "/path/to/your/local/project" + + # For GitHub repositories (comment out local_dir if using this): + # repo: "https://github.com/username/repository" + +# Project settings +project: + name: "your_project_name" + language: "english" # or "french", "spanish", etc. + output_dir: "output/your_project" # where to save the generated tutorial + +# File processing settings +file_processing: + max_file_size: 100000 # Maximum file size in bytes (default: 100KB) + + # File patterns to include in analysis + include_patterns: + - "*.py" + - "*.js" + - "*.jsx" + - "*.ts" + - "*.tsx" + - "*.go" + - "*.java" + - "*.md" + - "*.rst" + - "*.yaml" + - "*.yml" + + # File patterns to exclude from analysis + exclude_patterns: + - "*test*" + - "*tests/*" + - "*node_modules/*" + - "*venv/*" + - "*.venv/*" + - "*dist/*" + - "*build/*" + - ".git/*" + - "*.log" + +# Analysis settings +analysis: + max_abstractions: 10 # Maximum number of abstractions to identify + + # Specific abstractions to look for (optional) + abstractions_hints: + - "Class" + - "Interface" + - "Service" + - "Controller" + - "Repository" + - "Model" + - "Component" + - "Module" + + # Path to feedback file from previous runs (optional) + # feedback_file: "path/to/feedback.md" + +# LLM settings +llm: + use_cache: true # Set to false to disable response caching + +# GitHub settings (only needed if using repo instead of local_dir) +github: + # token: "your_github_token_here" # Or set GITHUB_TOKEN environment variable diff --git a/main.py b/main.py index dbba523f..40c0b2cc 100644 --- a/main.py +++ b/main.py @@ -1,6 +1,7 @@ import dotenv import os import argparse +import yaml # Import the function that creates the flow from flow import create_tutorial_flow @@ -35,58 +36,108 @@ "*.log" } -# --- Main Function --- -def main(): - parser = argparse.ArgumentParser(description="Generate a tutorial for a GitHub codebase or local directory.") - - # Create mutually exclusive group for source - source_group = parser.add_mutually_exclusive_group(required=True) - source_group.add_argument("--repo", help="URL of the public GitHub repository.") - source_group.add_argument("--dir", help="Path to local directory.") - - parser.add_argument("-n", "--name", help="Project name (optional, derived from repo/directory if omitted).") - parser.add_argument("-t", "--token", help="GitHub personal access token (optional, reads from GITHUB_TOKEN env var if not provided).") - parser.add_argument("-o", "--output", default="output", help="Base directory for output (default: ./output).") - parser.add_argument("-i", "--include", nargs="+", help="Include file patterns (e.g. '*.py' '*.js'). Defaults to common code files if not specified.") - parser.add_argument("-e", "--exclude", nargs="+", help="Exclude file patterns (e.g. 'tests/*' 'docs/*'). Defaults to test/build directories if not specified.") - parser.add_argument("-s", "--max-size", type=int, default=100000, help="Maximum file size in bytes (default: 100000, about 100KB).") - # Add language parameter for multi-language support - parser.add_argument("--language", default="english", help="Language for the generated tutorial (default: english)") - # Add use_cache parameter to control LLM caching - parser.add_argument("--no-cache", action="store_true", help="Disable LLM response caching (default: caching enabled)") - # Add max_abstraction_num parameter to control the number of abstractions - parser.add_argument("--max-abstractions", type=int, default=10, help="Maximum number of abstractions to identify (default: 10)") - - args = parser.parse_args() - - # Get GitHub token from argument or environment variable if using repo +def load_config(config_path): + """Load and validate YAML configuration file.""" + try: + with open(config_path, 'r', encoding='utf-8') as f: + config = yaml.safe_load(f) + + # Validate required sections + required_sections = ['source', 'project'] + for section in required_sections: + if section not in config: + raise ValueError(f"Missing required section '{section}' in config file") + + # Validate source (must have either repo or local_dir) + source = config['source'] + if not ('repo' in source or 'local_dir' in source): + raise ValueError("Source section must contain either 'repo' or 'local_dir'") + + # Set defaults for optional sections + if 'file_processing' not in config: + config['file_processing'] = {} + if 'analysis' not in config: + config['analysis'] = {} + if 'llm' not in config: + config['llm'] = {} + if 'github' not in config: + config['github'] = {} + + return config + + except FileNotFoundError: + raise FileNotFoundError(f"Configuration file not found: {config_path}") + except yaml.YAMLError as e: + raise ValueError(f"Invalid YAML in configuration file: {e}") + except Exception as e: + raise Exception(f"Error loading configuration: {e}") + +def config_to_shared(config): + """Convert YAML config to shared dictionary format.""" + source = config['source'] + project = config['project'] + file_proc = config.get('file_processing', {}) + analysis = config.get('analysis', {}) + llm_config = config.get('llm', {}) + github_config = config.get('github', {}) + + # Get GitHub token from config or environment variable github_token = None - if args.repo: - github_token = args.token or os.environ.get('GITHUB_TOKEN') + if 'repo' in source: + github_token = github_config.get('token') or os.environ.get('GITHUB_TOKEN') if not github_token: print("Warning: No GitHub token provided. You might hit rate limits for public repositories.") - - # Initialize the shared dictionary with inputs + + # Load feedback from file if provided + feedback_content = None + feedback_file = analysis.get('feedback_file') + if feedback_file: + try: + with open(feedback_file, 'r', encoding='utf-8') as f: + feedback_content = f.read() + print(f"Loaded feedback from: {feedback_file}") + except FileNotFoundError: + print(f"Warning: Feedback file not found: {feedback_file}") + except Exception as e: + print(f"Warning: Could not read feedback file {feedback_file}: {e}") + + # Get abstractions hints and calculate max abstractions + abstractions_hints = analysis.get('abstractions_hints') + max_abstractions = analysis.get('max_abstractions', 10) + + # Ensure abstractions_hints is never None, default to empty list + if abstractions_hints is None: + abstractions_hints = [] + + # If specific abstractions are provided, use their count as max + if abstractions_hints: + max_abstractions = len(abstractions_hints) + print(f"✓ Using {len(abstractions_hints)} specific abstraction hints") + else: + print(f"✓ No specific abstraction hints provided, will identify up to {max_abstractions} abstractions") + shared = { - "repo_url": args.repo, - "local_dir": args.dir, - "project_name": args.name, # Can be None, FetchRepo will derive it + "repo_url": source.get('repo'), + "local_dir": source.get('local_dir'), + "project_name": project.get('name'), "github_token": github_token, - "output_dir": args.output, # Base directory for CombineTutorial output + "output_dir": project.get('output_dir', 'output'), - # Add include/exclude patterns and max file size - "include_patterns": set(args.include) if args.include else DEFAULT_INCLUDE_PATTERNS, - "exclude_patterns": set(args.exclude) if args.exclude else DEFAULT_EXCLUDE_PATTERNS, - "max_file_size": args.max_size, + # File processing settings + "include_patterns": set(file_proc.get('include_patterns', DEFAULT_INCLUDE_PATTERNS)), + "exclude_patterns": set(file_proc.get('exclude_patterns', DEFAULT_EXCLUDE_PATTERNS)), + "max_file_size": file_proc.get('max_file_size', 100000), - # Add language for multi-language support - "language": args.language, + # Project settings + "language": project.get('language', 'english'), - # Add use_cache flag (inverse of no-cache flag) - "use_cache": not args.no_cache, + # LLM settings + "use_cache": llm_config.get('use_cache', True), - # Add max_abstraction_num parameter - "max_abstraction_num": args.max_abstractions, + # Analysis settings + "abstractions_hints": abstractions_hints, + "max_abstraction_num": max_abstractions, + "feedback_content": feedback_content, # Outputs will be populated by the nodes "files": [], @@ -96,16 +147,54 @@ def main(): "chapters": [], "final_output_dir": None } + + return shared - # Display starting message with repository/directory and language - print(f"Starting tutorial generation for: {args.repo or args.dir} in {args.language.capitalize()} language") - print(f"LLM caching: {'Disabled' if args.no_cache else 'Enabled'}") - - # Create the flow instance - tutorial_flow = create_tutorial_flow() - - # Run the flow - tutorial_flow.run(shared) +# --- Main Function --- +def main(): + parser = argparse.ArgumentParser(description="Generate a tutorial for a GitHub codebase or local directory using YAML configuration.") + parser.add_argument("config", help="Path to YAML configuration file") + parser.add_argument("--validate-only", action="store_true", help="Only validate the configuration file without running the analysis") + + args = parser.parse_args() + + try: + # Load and validate configuration + config = load_config(args.config) + print(f"✓ Configuration loaded successfully from: {args.config}") + + if args.validate_only: + print("✓ Configuration is valid!") + return + + # Convert config to shared dictionary + shared = config_to_shared(config) + + # Display starting message + source_info = shared.get('repo_url') or shared.get('local_dir') + language = shared.get('language', 'english') + print(f"🚀 Starting tutorial generation for: {source_info} in {language.capitalize()} language") + print(f"📊 Configuration:") + print(f" ├─ Project name: {shared.get('project_name', 'Auto-detected')}") + print(f" ├─ Output directory: {shared.get('output_dir')}") + print(f" ├─ Max file size: {shared.get('max_file_size'):,} bytes") + print(f" ├─ LLM caching: {'Enabled' if shared.get('use_cache') else 'Disabled'}") + print(f" ├─ Language: {language.capitalize()}") + print(f" └─ Feedback from previous run: {'Yes' if shared.get('feedback_content') else 'No'}") + + include_count = len(shared.get('include_patterns', [])) + exclude_count = len(shared.get('exclude_patterns', [])) + print(f"📁 File patterns: {include_count} include, {exclude_count} exclude") + + # Create the flow instance + tutorial_flow = create_tutorial_flow() + + # Run the flow + tutorial_flow.run(shared) + + except Exception as e: + print(f"Error: {e}") + exit(1) if __name__ == "__main__": main() diff --git a/nodes.py b/nodes.py index 0e3fa587..3eef25b0 100644 --- a/nodes.py +++ b/nodes.py @@ -71,10 +71,20 @@ def exec(self, prep_res): ) # Convert dict to list of tuples: [(path, content), ...] + if result is None: + raise ValueError("Failed to fetch files - crawling function returned None") + files_list = list(result.get("files", {}).items()) if len(files_list) == 0: raise (ValueError("Failed to fetch files")) - print(f"Fetched {len(files_list)} files.") + print(f"✅ Fetched {len(files_list)} files successfully") + + # Show some stats about the files + total_size = sum(len(content) for _, content in files_list) + avg_size = total_size // len(files_list) if files_list else 0 + print(f" ├─ Total content size: {total_size:,} characters") + print(f" ├─ Average file size: {avg_size:,} characters") + print(f" └─ Largest file: {max((len(content) for _, content in files_list), default=0):,} characters") return files_list def post(self, shared, prep_res, exec_res): @@ -88,6 +98,8 @@ def prep(self, shared): language = shared.get("language", "english") # Get language use_cache = shared.get("use_cache", True) # Get use_cache flag, default to True max_abstraction_num = shared.get("max_abstraction_num", 10) # Get max_abstraction_num, default to 10 + abstractions_hints = shared.get("abstractions_hints", []) # List of abstractions to include, if empty, all abstractions will be included + feedback_content = shared.get("feedback_content") # Get feedback from previous runs # Helper to create context from files, respecting limits (basic example) def create_llm_context(files_data): @@ -113,7 +125,9 @@ def create_llm_context(files_data): language, use_cache, max_abstraction_num, - ) # Return all parameters + abstractions_hints, + feedback_content, + ) # Return all parameters including feedback def exec(self, prep_res): ( @@ -124,13 +138,55 @@ def exec(self, prep_res): language, use_cache, max_abstraction_num, + abstractions_hints, + feedback_content, ) = prep_res # Unpack all parameters - print(f"Identifying abstractions using LLM...") + + print(f"🔍 Identifying abstractions using LLM...") + print(f" ├─ Project: {project_name}") + print(f" ├─ Files analyzed: {file_count}") + print(f" ├─ Language: {language}") + print(f" ├─ Max abstractions: {max_abstraction_num}") + print(f" ├─ Cache enabled: {use_cache}") + print(f" ├─ Feedback provided: {'Yes' if feedback_content else 'No'}") + print(f" └─ Abstraction hints: {len(abstractions_hints) if abstractions_hints else 0} provided") # Add language instruction and hints only if not English language_instruction = "" name_lang_hint = "" desc_lang_hint = "" + + # Safety check for abstractions_hints + if abstractions_hints is None: + abstractions_hints = [] + print("Warning: abstractions_hints was None, defaulting to empty list") + + abstractions_hints_str = "" + if len(abstractions_hints) > 0: + print(f"✓ Using specific abstraction hints: {abstractions_hints}") + abstractions_hints_str = f"IMPORTANT: Specific abstractions to include: {abstractions_hints}\n\n" + max_abstraction_num = len(abstractions_hints) + else: + print(f"✓ No specific abstraction hints, identifying up to {max_abstraction_num} abstractions") + + # Add feedback section if available + feedback_section = "" + if feedback_content: + print(f"📝 Including feedback from previous analysis run") + feedback_section = f""" +CRITICAL: Learn from Previous Analysis Feedback +The following feedback was provided from a previous analysis of this codebase. Please carefully consider these points to avoid repeating the same mistakes: + +{feedback_content} + +Based on this feedback, pay special attention to: +- Avoiding hallucinated commands or concepts that don't exist in the codebase +- Following proper naming conventions used in the code +- Ensuring accuracy in descriptions and relationships +- Including all important abstractions mentioned in the feedback + +""" + if language.lower() != "english": language_instruction = f"IMPORTANT: Generate the `name` and `description` for each abstraction in **{language.capitalize()}** language. Do NOT use English for these fields.\n\n" # Keep specific hints here as name/description are primary targets @@ -140,10 +196,10 @@ def exec(self, prep_res): prompt = f""" For the project `{project_name}`: -Codebase Context: +{feedback_section}Codebase Context: {context} -{language_instruction}Analyze the codebase context. +{abstractions_hints_str}{language_instruction}Analyze the codebase context. Identify the top 5-{max_abstraction_num} core most important abstractions to help those new to the codebase. For each abstraction, provide: @@ -173,9 +229,12 @@ def exec(self, prep_res): - 5 # path/to/another.js # ... up to {max_abstraction_num} abstractions ```""" + print(f"🤖 Calling LLM for abstraction identification (attempt {self.cur_retry + 1})") response = call_llm(prompt, use_cache=(use_cache and self.cur_retry == 0)) # Use cache only if enabled and not retrying + print(f"✅ Received LLM response ({len(response)} characters)") # --- Validation --- + print(f"🔍 Parsing and validating LLM response...") yaml_str = response.strip().split("```yaml")[1].split("```")[0].strip() abstractions = yaml.safe_load(yaml_str) @@ -228,7 +287,10 @@ def exec(self, prep_res): } ) - print(f"Identified {len(validated_abstractions)} abstractions.") + print(f"✅ Successfully identified {len(validated_abstractions)} abstractions:") + for i, abstr in enumerate(validated_abstractions): + file_count = len(abstr.get('files', [])) + print(f" {i+1}. {abstr['name']} ({file_count} files)") return validated_abstractions def post(self, shared, prep_res, exec_res): @@ -246,6 +308,7 @@ def prep(self, shared): project_name = shared["project_name"] # Get project name language = shared.get("language", "english") # Get language use_cache = shared.get("use_cache", True) # Get use_cache flag, default to True + feedback_content = shared.get("feedback_content") # Get feedback from previous runs # Get the actual number of abstractions directly num_abstractions = len(abstractions) @@ -284,7 +347,8 @@ def prep(self, shared): project_name, language, use_cache, - ) # Return use_cache + feedback_content, + ) # Return feedback_content def exec(self, prep_res): ( @@ -294,7 +358,8 @@ def exec(self, prep_res): project_name, language, use_cache, - ) = prep_res # Unpack use_cache + feedback_content, + ) = prep_res # Unpack use_cache and feedback_content print(f"Analyzing relationships using LLM...") # Add language instruction and hints only if not English @@ -306,6 +371,24 @@ def exec(self, prep_res): lang_hint = f" (in {language.capitalize()})" list_lang_note = f" (Names might be in {language.capitalize()})" # Note for the input list + # Add feedback section if available + feedback_section = "" + if feedback_content: + feedback_section = f""" +CRITICAL: Learn from Previous Analysis Feedback +The following feedback was provided from a previous analysis of this codebase. Please carefully consider these points to avoid repeating the same mistakes: + +{feedback_content} + +Based on this feedback, pay special attention to: +- Avoiding hallucinated commands or concepts that don't exist in the codebase +- Ensuring relationships are accurate and based on actual code interactions +- Following proper naming conventions used in the code +- Being precise about how different components actually interact in the codebase +- Including all important relationships mentioned in the feedback + +""" + prompt = f""" Based on the following abstractions and relevant code snippets from the project `{project_name}`: @@ -315,7 +398,7 @@ def exec(self, prep_res): Context (Abstractions, Descriptions, Code): {context} -{language_instruction}Please provide: +{feedback_section}{language_instruction}Please provide: 1. A high-level `summary` of the project's main purpose and functionality in a few beginner-friendly sentences{lang_hint}. Use markdown formatting with **bold** and *italic* text to highlight important concepts. 2. A list (`relationships`) describing the key interactions between these abstractions. For each relationship, specify: - `from_abstraction`: Index of the source abstraction (e.g., `0 # AbstractionName1`) @@ -544,6 +627,7 @@ def prep(self, shared): project_name = shared["project_name"] language = shared.get("language", "english") use_cache = shared.get("use_cache", True) # Get use_cache flag, default to True + feedback_content = shared.get("feedback_content") # Get feedback from previous runs # Get already written chapters to provide context # We store them temporarily during the batch run, not in shared memory yet @@ -616,6 +700,7 @@ def prep(self, shared): "next_chapter": next_chapter, # Add next chapter info (uses potentially translated name) "language": language, # Add language for multi-language support "use_cache": use_cache, # Pass use_cache flag + "feedback_content": feedback_content, # Add feedback content # previous_chapters_summary will be added dynamically in exec } ) @@ -639,6 +724,7 @@ def exec(self, item): project_name = item.get("project_name") language = item.get("language", "english") use_cache = item.get("use_cache", True) # Read use_cache from item + feedback_content = item.get("feedback_content") # Get feedback content print(f"Writing chapter {chapter_num} for: {abstraction_name} using LLM...") # Prepare file context string from the map @@ -675,6 +761,27 @@ def exec(self, item): ) tone_note = f" (appropriate for {lang_cap} readers)" + # Add feedback section if available + feedback_section = "" + if feedback_content: + feedback_section = f""" + +CRITICAL: Learn from Previous Analysis Feedback +The following feedback was provided from a previous analysis of this codebase. Please carefully consider these points to avoid repeating the same mistakes in this chapter: + +{feedback_content} + +Based on this feedback, pay special attention to: +- Avoiding hallucinated commands, classes, or concepts that don't exist in the codebase +- Following proper naming conventions used in the actual code (like `accountsSvc`, `accountsRepo`) +- Ensuring code examples are realistic and follow the patterns seen in the actual codebase +- Creating accurate diagrams that reflect the actual architecture, not invented layers +- Being precise about how components actually work based on the code, not assumptions +- Including accurate comments and struct descriptions +- Avoiding pseudo-code that might mislead readers about the actual implementation + +""" + prompt = f""" {language_instruction}Write a very beginner-friendly tutorial chapter (in Markdown format) for the project `{project_name}` about the concept: "{abstraction_name}". This is Chapter {chapter_num}. @@ -691,7 +798,7 @@ def exec(self, item): Relevant Code Snippets (Code itself remains unchanged): {file_context_str if file_context_str else "No specific code snippets provided for this abstraction."} - +{feedback_section} Instructions for the chapter (Generate content in {language.capitalize()} unless specified otherwise): - Start with a clear heading (e.g., `# Chapter {chapter_num}: {abstraction_name}`). Use the provided concept name. diff --git a/prompt.md b/prompt.md new file mode 100644 index 00000000..af69e582 --- /dev/null +++ b/prompt.md @@ -0,0 +1,177 @@ +Of course. I will extract all the LLM prompts from the `nodes.py` file and present them in a markdown format. First, I need to read the `nodes.py` file. +Read file: nodes.py +Okay, I have the content of `nodes.py`. Now I will extract the prompts and format them into a markdown file named `prompts.md`. +My apologies, it seems I tried to use a tool that isn't available to me. I will now output the extracted prompts directly into our conversation. + +# LLM Prompts from `nodes.py` + +This file contains all the LLM prompts used in the documentation generation workflow, extracted from the `nodes.py` file. + +## 1. `IdentifyAbstractions` Node + +This prompt analyzes the entire codebase to identify the most important core abstractions. + +```python +f""" +For the project `{project_name}`: + +Codebase Context: +{context} + +{language_instruction}Analyze the codebase context. +Identify the top 5-{max_abstraction_num} core most important abstractions to help those new to the codebase. + +For each abstraction, provide: +1. A concise `name`{name_lang_hint}. +2. A beginner-friendly `description` explaining what it is with a simple analogy, in around 100 words{desc_lang_hint}. +3. A list of relevant `file_indices` (integers) using the format `idx # path/comment`. + +List of file indices and paths present in the context: +{file_listing_for_prompt} + +Format the output as a YAML list of dictionaries: + +```yaml +- name: | + Query Processing{name_lang_hint} + description: | + Explains what the abstraction does. + It's like a central dispatcher routing requests.{desc_lang_hint} + file_indices: + - 0 # path/to/file1.py + - 3 # path/to/related.py +- name: | + Query Optimization{name_lang_hint} + description: | + Another core concept, similar to a blueprint for objects.{desc_lang_hint} + file_indices: + - 5 # path/to/another.js +# ... up to {max_abstraction_num} abstractions +```""" +``` + +## 2. `AnalyzeRelationships` Node + +This prompt takes the identified abstractions and analyzes their relationships to build a high-level overview of the project. + +```python +f""" +Based on the following abstractions and relevant code snippets from the project `{project_name}`: + +List of Abstraction Indices and Names{list_lang_note}: +{abstraction_listing} + +Context (Abstractions, Descriptions, Code): +{context} + +{language_instruction}Please provide: +1. A high-level `summary` of the project's main purpose and functionality in a few beginner-friendly sentences{lang_hint}. Use markdown formatting with **bold** and *italic* text to highlight important concepts. +2. A list (`relationships`) describing the key interactions between these abstractions. For each relationship, specify: + - `from_abstraction`: Index of the source abstraction (e.g., `0 # AbstractionName1`) + - `to_abstraction`: Index of the target abstraction (e.g., `1 # AbstractionName2`) + - `label`: A brief label for the interaction **in just a few words**{lang_hint} (e.g., "Manages", "Inherits", "Uses"). + Ideally the relationship should be backed by one abstraction calling or passing parameters to another. + Simplify the relationship and exclude those non-important ones. + +IMPORTANT: Make sure EVERY abstraction is involved in at least ONE relationship (either as source or target). Each abstraction index must appear at least once across all relationships. + +Format the output as YAML: + +```yaml +summary: | + A brief, simple explanation of the project{lang_hint}. + Can span multiple lines with **bold** and *italic* for emphasis. +relationships: + - from_abstraction: 0 # AbstractionName1 + to_abstraction: 1 # AbstractionName2 + label: "Manages"{lang_hint} + - from_abstraction: 2 # AbstractionName3 + to_abstraction: 0 # AbstractionName1 + label: "Provides config"{lang_hint} + # ... other relationships +``` + + +## 3. `OrderChapters` Node + +This prompt determines the best pedagogical order to explain the identified abstractions to a newcomer. + +```python +f""" +Given the following project abstractions and their relationships for the project `{project_name}`: + +Abstractions (Index # Name){list_lang_note}: +{abstraction_listing} + +Context about relationships and project summary: +{context} + +If you are going to make a tutorial for ```` {project_name} ````, what is the best order to explain these abstractions, from first to last? +Ideally, first explain those that are the most important or foundational, perhaps user-facing concepts or entry points. Then move to more detailed, lower-level implementation details or supporting concepts. + +Output the ordered list of abstraction indices, including the name in a comment for clarity. Use the format `idx # AbstractionName`. + +```yaml +- 2 # FoundationalConcept +- 0 # CoreClassA +- 1 # CoreClassB (uses CoreClassA) +- ... +``` + + + + +## 4. `WriteChapters` Node + +This prompt is executed for each abstraction to generate a detailed, beginner-friendly tutorial chapter in Markdown. + +```python +f""" +{language_instruction}Write a very beginner-friendly tutorial chapter (in Markdown format) for the project `{project_name}` about the concept: "{abstraction_name}". This is Chapter {chapter_num}. + +Concept Details{concept_details_note}: +- Name: {abstraction_name} +- Description: +{abstraction_description} + +Complete Tutorial Structure{structure_note}: +{item["full_chapter_listing"]} + +Context from previous chapters{prev_summary_note}: +{previous_chapters_summary if previous_chapters_summary else "This is the first chapter."} + +Relevant Code Snippets (Code itself remains unchanged): +{file_context_str if file_context_str else "No specific code snippets provided for this abstraction."} + +Instructions for the chapter (Generate content in {language.capitalize()} unless specified otherwise): +- Start with a clear heading (e.g., `# Chapter {chapter_num}: {abstraction_name}`). Use the provided concept name. + +- If this is not the first chapter, begin with a brief transition from the previous chapter{instruction_lang_note}, referencing it with a proper Markdown link using its name{link_lang_note}. + +- Begin with a high-level motivation explaining what problem this abstraction solves{instruction_lang_note}. Start with a central use case as a concrete example. The whole chapter should guide the reader to understand how to solve this use case. Make it very minimal and friendly to beginners. + +- If the abstraction is complex, break it down into key concepts. Explain each concept one-by-one in a very beginner-friendly way{instruction_lang_note}. + +- Explain how to use this abstraction to solve the use case{instruction_lang_note}. Give example inputs and outputs for code snippets (if the output isn't values, describe at a high level what will happen{instruction_lang_note}). + +- Each code block should be BELOW 10 lines! If longer code blocks are needed, break them down into smaller pieces and walk through them one-by-one. Aggresively simplify the code to make it minimal. Use comments{code_comment_note} to skip non-important implementation details. Each code block should have a beginner friendly explanation right after it{instruction_lang_note}. + +- Describe the internal implementation to help understand what's under the hood{instruction_lang_note}. First provide a non-code or code-light walkthrough on what happens step-by-step when the abstraction is called{instruction_lang_note}. It's recommended to use a simple sequenceDiagram with a dummy example - keep it minimal with at most 5 participants to ensure clarity. If participant name has space, use: `participant QP as Query Processing`. {mermaid_lang_note}. + +- Then dive deeper into code for the internal implementation with references to files. Provide example code blocks, but make them similarly simple and beginner-friendly. Explain{instruction_lang_note}. + +- IMPORTANT: When you need to refer to other core abstractions covered in other chapters, ALWAYS use proper Markdown links like this: [Chapter Title](filename.md). Use the Complete Tutorial Structure above to find the correct filename and the chapter title{link_lang_note}. Translate the surrounding text. + +- Use mermaid diagrams to illustrate complex concepts (```mermaid``` format). {mermaid_lang_note}. + +- Heavily use analogies and examples throughout{instruction_lang_note} to help beginners understand. + +- End the chapter with a brief conclusion that summarizes what was learned{instruction_lang_note} and provides a transition to the next chapter{instruction_lang_note}. If there is a next chapter, use a proper Markdown link: [Next Chapter Title](next_chapter_filename){link_lang_note}. + +- Ensure the tone is welcoming and easy for a newcomer to understand{tone_note}. + +- Output *only* the Markdown content for this chapter. + +Now, directly provide a super beginner-friendly Markdown output (DON'T need ```markdown``` tags): +""" +``` \ No newline at end of file diff --git a/run_analysis.sh b/run_analysis.sh new file mode 100755 index 00000000..3e5b3b22 --- /dev/null +++ b/run_analysis.sh @@ -0,0 +1,21 @@ +#!/bin/bash + +# Shell script to analyze the local corebanking repository +# using YAML configuration file + +echo "Starting corebanking tutorial generation..." +source .venv/bin/activate + +# Validate configuration before running +echo "Validating configuration..." +python main.py "$1" --validate-only + +if [ $? -eq 0 ]; then + echo "Configuration is valid. Running analysis..." + python main.py "$1" +else + echo "Configuration validation failed. Please check the config file." + exit 1 +fi + +echo "Done" \ No newline at end of file