diff --git a/Makefile b/Makefile
new file mode 100644
index 00000000..d9121c8b
--- /dev/null
+++ b/Makefile
@@ -0,0 +1,208 @@
+# Makefile for PocketFlow Tutorial Codebase Knowledge
+
+# Variables - can be overridden from command line
+IMAGE_NAME ?= documentation-generator
+IMAGE_TAG ?= latest
+REGISTRY ?= docker.io
+REGISTRY_USER ?= your-username
+FULL_IMAGE_NAME = $(REGISTRY)/$(REGISTRY_USER)/$(IMAGE_NAME):$(IMAGE_TAG)
+
+# Docker build arguments
+DOCKER_BUILD_ARGS ?= --no-cache
+
+# Default paths for local development
+CONFIG_FILE ?= configs/example_config.yaml
+OUTPUT_DIR ?= ./output
+MOUNT_DIR ?= ./mount
+
+# Colors for output
+GREEN = \033[0;32m
+YELLOW = \033[1;33m
+RED = \033[0;31m
+NC = \033[0m # No Color
+
+.PHONY: help build push run run-local clean validate lint test all
+
+# Default target
+all: build
+
+## Help - Display available targets
+help:
+ @echo "$(GREEN)PocketFlow Tutorial Docker Management$(NC)"
+ @echo ""
+ @echo "$(YELLOW)Available targets:$(NC)"
+ @echo " $(GREEN)build$(NC) - Build the Docker image"
+ @echo " $(GREEN)push$(NC) - Push the Docker image to registry"
+ @echo " $(GREEN)run$(NC) - Run the Docker container with file input"
+ @echo " $(GREEN)run-local$(NC) - Run container with local directory mounting"
+ @echo " $(GREEN)validate$(NC) - Validate configuration file without running"
+ @echo " $(GREEN)clean$(NC) - Clean up Docker images and containers"
+ @echo " $(GREEN)lint$(NC) - Run linting and code quality checks"
+ @echo " $(GREEN)test$(NC) - Run tests"
+ @echo " $(GREEN)shell$(NC) - Open an interactive shell in the container"
+ @echo " $(GREEN)logs$(NC) - Show logs from the last container run"
+ @echo " $(GREEN)help$(NC) - Show this help message"
+ @echo ""
+ @echo "$(YELLOW)Configuration:$(NC)"
+ @echo " IMAGE_NAME=$(IMAGE_NAME)"
+ @echo " IMAGE_TAG=$(IMAGE_TAG)"
+ @echo " REGISTRY=$(REGISTRY)"
+ @echo " REGISTRY_USER=$(REGISTRY_USER)"
+ @echo " FULL_IMAGE_NAME=$(FULL_IMAGE_NAME)"
+ @echo ""
+ @echo "$(YELLOW)Usage examples:$(NC)"
+ @echo " make build"
+ @echo " make push REGISTRY_USER=myusername"
+ @echo " make run CONFIG_FILE=configs/my_config.yaml"
+ @echo " make run-local CONFIG_FILE=configs/my_config.yaml MOUNT_DIR=/path/to/source"
+
+## Build - Build the Docker image
+build:
+ @echo "$(GREEN)Building Docker image: $(FULL_IMAGE_NAME)$(NC)"
+ docker build $(DOCKER_BUILD_ARGS) -t $(IMAGE_NAME):$(IMAGE_TAG) -t $(FULL_IMAGE_NAME) .
+ @echo "$(GREEN)✅ Build completed successfully!$(NC)"
+
+## Push - Push the Docker image to registry
+push: build
+ @echo "$(GREEN)Pushing Docker image: $(FULL_IMAGE_NAME)$(NC)"
+ docker push $(FULL_IMAGE_NAME)
+ @echo "$(GREEN)✅ Push completed successfully!$(NC)"
+
+## Run - Run the Docker container with file input support
+run: build
+ @echo "$(GREEN)Running Docker container with config: $(CONFIG_FILE)$(NC)"
+ @if [ ! -f "$(CONFIG_FILE)" ]; then \
+ echo "$(RED)❌ Configuration file not found: $(CONFIG_FILE)$(NC)"; \
+ echo "$(YELLOW)Please specify a valid config file with: make run CONFIG_FILE=path/to/config.yaml$(NC)"; \
+ exit 1; \
+ fi
+ @mkdir -p $(OUTPUT_DIR)
+ docker run --rm \
+ -v "$(PWD)/$(CONFIG_FILE):/app/config.yaml:ro" \
+ -v "$(PWD)/$(OUTPUT_DIR):/app/output" \
+ -v "$(PWD)/.env:/app/.env:ro" \
+ --name $(IMAGE_NAME)-run \
+ $(FULL_IMAGE_NAME) config.yaml
+ @echo "$(GREEN)✅ Container run completed! Check output in: $(OUTPUT_DIR)$(NC)"
+
+## Run with local directory mounting
+run-local: build
+ @echo "$(GREEN)Running Docker container with local directory mounting$(NC)"
+ @if [ ! -f "$(CONFIG_FILE)" ]; then \
+ echo "$(RED)❌ Configuration file not found: $(CONFIG_FILE)$(NC)"; \
+ exit 1; \
+ fi
+ @mkdir -p $(OUTPUT_DIR)
+ @if [ -d "$(MOUNT_DIR)" ]; then \
+ echo "$(YELLOW)Mounting local directory: $(MOUNT_DIR)$(NC)"; \
+ docker run --rm \
+ -v "$(PWD)/$(CONFIG_FILE):/app/config.yaml:ro" \
+ -v "$(PWD)/$(OUTPUT_DIR):/app/output" \
+ -v "$(PWD)/$(MOUNT_DIR):/app/mount:ro" \
+ -v "$(PWD)/.env:/app/.env:ro" \
+ --name $(IMAGE_NAME)-run \
+ $(FULL_IMAGE_NAME) config.yaml; \
+ else \
+ echo "$(YELLOW)Mount directory not found, running without local mount$(NC)"; \
+ $(MAKE) run CONFIG_FILE=$(CONFIG_FILE); \
+ fi
+
+## Validate - Validate configuration file without running analysis
+validate: build
+ @echo "$(GREEN)Validating configuration: $(CONFIG_FILE)$(NC)"
+ @if [ ! -f "$(CONFIG_FILE)" ]; then \
+ echo "$(RED)❌ Configuration file not found: $(CONFIG_FILE)$(NC)"; \
+ exit 1; \
+ fi
+ docker run --rm \
+ -v "$(PWD)/$(CONFIG_FILE):/app/config.yaml:ro" \
+ --name $(IMAGE_NAME)-validate \
+ $(FULL_IMAGE_NAME) config.yaml --validate-only
+ @echo "$(GREEN)✅ Configuration validation completed!$(NC)"
+
+## Shell - Open an interactive shell in the container
+shell: build
+ @echo "$(GREEN)Opening interactive shell in container$(NC)"
+ docker run --rm -it \
+ -v "$(PWD):/app/workspace:ro" \
+ -v "$(PWD)/$(OUTPUT_DIR):/app/output" \
+ --name $(IMAGE_NAME)-shell \
+ --entrypoint /bin/bash \
+ $(FULL_IMAGE_NAME)
+
+## Logs - Show logs from the last container run
+logs:
+ @echo "$(GREEN)Showing logs from last container run$(NC)"
+ docker logs $(IMAGE_NAME)-run 2>/dev/null || echo "$(YELLOW)No logs found for $(IMAGE_NAME)-run$(NC)"
+
+## Clean - Clean up Docker images and containers
+clean:
+ @echo "$(GREEN)Cleaning up Docker resources$(NC)"
+ @docker rm -f $(IMAGE_NAME)-run 2>/dev/null || true
+ @docker rm -f $(IMAGE_NAME)-validate 2>/dev/null || true
+ @docker rm -f $(IMAGE_NAME)-shell 2>/dev/null || true
+ @docker rmi $(IMAGE_NAME):$(IMAGE_TAG) 2>/dev/null || true
+ @docker rmi $(FULL_IMAGE_NAME) 2>/dev/null || true
+ @docker system prune -f
+ @echo "$(GREEN)✅ Cleanup completed!$(NC)"
+
+## Lint - Run linting and code quality checks
+lint:
+ @echo "$(GREEN)Running linting checks$(NC)"
+ @if command -v python3 >/dev/null 2>&1; then \
+ python3 -m py_compile main.py flow.py nodes.py utils/*.py; \
+ echo "$(GREEN)✅ Python syntax check passed$(NC)"; \
+ else \
+ echo "$(YELLOW)⚠️ Python3 not found, skipping syntax check$(NC)"; \
+ fi
+ @if command -v yamllint >/dev/null 2>&1; then \
+ find configs -name "*.yaml" -o -name "*.yml" | xargs yamllint; \
+ echo "$(GREEN)✅ YAML lint check passed$(NC)"; \
+ else \
+ echo "$(YELLOW)⚠️ yamllint not found, skipping YAML check$(NC)"; \
+ fi
+
+## Test - Run tests
+test: build
+ @echo "$(GREEN)Running tests$(NC)"
+ docker run --rm \
+ -v "$(PWD)/configs/example_config.yaml:/app/config.yaml:ro" \
+ --name $(IMAGE_NAME)-test \
+ $(FULL_IMAGE_NAME) config.yaml --validate-only
+ @echo "$(GREEN)✅ Tests completed!$(NC)"
+
+## Quick start for development
+dev: build validate
+ @echo "$(GREEN)Development environment ready!$(NC)"
+ @echo "$(YELLOW)Try: make run CONFIG_FILE=configs/example_config.yaml$(NC)"
+
+# Advanced targets
+
+## Build with custom args
+build-dev:
+ $(MAKE) build DOCKER_BUILD_ARGS="--target development"
+
+## Run with debug mode
+run-debug: build
+ @echo "$(GREEN)Running in debug mode$(NC)"
+ docker run --rm -it \
+ -v "$(PWD)/$(CONFIG_FILE):/app/config.yaml:ro" \
+ -v "$(PWD)/$(OUTPUT_DIR):/app/output" \
+ -v "$(PWD)/.env:/app/.env:ro" \
+ --name $(IMAGE_NAME)-debug \
+ --entrypoint /bin/bash \
+ $(FULL_IMAGE_NAME)
+
+## Tag and push with version
+tag-and-push:
+ @if [ -z "$(VERSION)" ]; then \
+ echo "$(RED)❌ VERSION is required. Usage: make tag-and-push VERSION=1.0.0$(NC)"; \
+ exit 1; \
+ fi
+ docker tag $(IMAGE_NAME):$(IMAGE_TAG) $(REGISTRY)/$(REGISTRY_USER)/$(IMAGE_NAME):$(VERSION)
+ docker push $(REGISTRY)/$(REGISTRY_USER)/$(IMAGE_NAME):$(VERSION)
+ @echo "$(GREEN)✅ Tagged and pushed version $(VERSION)$(NC)"
+
+# Make sure output directory exists
+$(OUTPUT_DIR):
+ mkdir -p $(OUTPUT_DIR)
diff --git a/README.md b/README.md
index cc8ad4e8..41366d65 100644
--- a/README.md
+++ b/README.md
@@ -1,3 +1,231 @@
+# Docker Usage Guide
+
+This guide explains how to build, push, and run the PocketFlow Tutorial application using Docker and the provided Makefile.
+
+## Quick Start
+
+1. **Build the Docker image:**
+ ```bash
+ make build
+ ```
+
+2. **Run with a configuration file:**
+ ```bash
+ make run CONFIG_FILE=configs/example_config.yaml
+ ```
+
+3. **Validate configuration without running:**
+ ```bash
+ make validate CONFIG_FILE=configs/my_config.yaml
+ ```
+
+## Configuration
+
+The Makefile supports several configurable variables:
+
+| Variable | Default | Description |
+|----------|---------|-------------|
+| `IMAGE_NAME` | `pocketflow-tutorial` | Docker image name |
+| `IMAGE_TAG` | `latest` | Docker image tag |
+| `REGISTRY` | `docker.io` | Docker registry URL |
+| `REGISTRY_USER` | `your-username` | Registry username |
+| `CONFIG_FILE` | `configs/example_config.yaml` | Path to YAML config file |
+| `OUTPUT_DIR` | `./output` | Local output directory |
+| `MOUNT_DIR` | `./mount` | Local directory to mount for source code |
+
+## Available Commands
+
+### Build Commands
+- `make build` - Build the Docker image
+- `make build-dev` - Build with development target
+- `make push` - Build and push to registry
+- `make tag-and-push VERSION=1.0.0` - Tag with version and push
+
+### Run Commands
+- `make run` - Run with config file
+- `make run-local` - Run with local directory mounting
+- `make run-debug` - Run in debug mode with interactive shell
+- `make shell` - Open interactive shell in container
+
+### Utility Commands
+- `make validate` - Validate configuration file
+- `make clean` - Clean up Docker resources
+- `make lint` - Run code quality checks
+- `make test` - Run tests
+- `make logs` - Show container logs
+- `make help` - Show help message
+
+## Usage Examples
+
+### Basic Usage
+```bash
+# Build and run with default config
+make build
+make run
+
+# Use custom configuration
+make run CONFIG_FILE=configs/corebanking.yaml
+
+# Validate configuration before running
+make validate CONFIG_FILE=configs/my_config.yaml
+```
+
+### Working with Local Directories
+```bash
+# Mount a local source directory
+make run-local CONFIG_FILE=configs/my_config.yaml MOUNT_DIR=/path/to/source/code
+
+# The mounted directory will be available at /app/mount inside the container
+```
+
+### Registry Operations
+```bash
+# Push to Docker Hub
+make push REGISTRY_USER=myusername
+
+# Push to a different registry
+make push REGISTRY=ghcr.io REGISTRY_USER=myusername
+
+# Tag and push a specific version
+make tag-and-push VERSION=1.2.3 REGISTRY_USER=myusername
+```
+
+### Development Workflow
+```bash
+# Development setup
+make dev
+
+# Debug a configuration issue
+make run-debug CONFIG_FILE=configs/problematic.yaml
+
+# Check logs from the last run
+make logs
+
+# Clean up when done
+make clean
+```
+
+## File Mounting
+
+The Makefile automatically mounts several directories:
+
+1. **Configuration file**: Mounted as read-only at `/app/config.yaml`
+2. **Output directory**: Mounted at `/app/output` for results
+3. **Environment file**: `.env` mounted as read-only at `/app/.env`
+4. **Source directory** (when using `run-local`): Mounted at `/app/mount`
+
+## Configuration File Format
+
+Your YAML configuration file should follow this structure:
+
+```yaml
+source:
+ repo: "https://github.com/user/repo" # OR
+ local_dir: "/app/mount" # for local directories
+
+project:
+ name: "My Project"
+ output_dir: "output"
+ language: "english"
+
+file_processing:
+ max_file_size: 100000
+ include_patterns:
+ - "*.py"
+ - "*.js"
+ - "*.md"
+ exclude_patterns:
+ - "*test*"
+ - "node_modules/*"
+
+analysis:
+ max_abstractions: 10
+ abstractions_hints:
+ - "authentication"
+ - "data processing"
+
+llm:
+ use_cache: true
+
+github:
+ token: "${GITHUB_TOKEN}" # Will use environment variable
+```
+
+## Environment Variables
+
+Create a `.env` file in your project root:
+
+```bash
+# .env
+GITHUB_TOKEN=your_github_token_here
+OPENAI_API_KEY=your_openai_api_key_here
+# Add other required environment variables
+```
+
+## Troubleshooting
+
+### Common Issues
+
+1. **Configuration file not found**
+ ```bash
+ make run CONFIG_FILE=path/to/your/config.yaml
+ ```
+
+2. **Permission denied for output directory**
+ ```bash
+ sudo chown -R $USER:$USER ./output
+ ```
+
+3. **Docker build fails**
+ ```bash
+ make clean
+ make build DOCKER_BUILD_ARGS="--no-cache --pull"
+ ```
+
+4. **Registry push fails**
+ ```bash
+ docker login
+ make push REGISTRY_USER=your_actual_username
+ ```
+
+### Debug Mode
+
+Use debug mode to troubleshoot issues:
+
+```bash
+make run-debug CONFIG_FILE=configs/my_config.yaml
+
+# Inside the container:
+python main.py config.yaml --validate-only
+python main.py config.yaml
+```
+
+## Advanced Usage
+
+### Custom Docker Build Args
+```bash
+make build DOCKER_BUILD_ARGS="--build-arg PYTHON_VERSION=3.11"
+```
+
+### Multiple Configurations
+```bash
+# Process multiple projects
+for config in configs/*.yaml; do
+ echo "Processing $config"
+ make run CONFIG_FILE="$config"
+done
+```
+
+### CI/CD Integration
+```bash
+# In your CI pipeline
+make build
+make test
+make push REGISTRY_USER=$CI_REGISTRY_USER
+```
+
+
+
Turns Codebase into Easy Tutorial with AI

diff --git a/WorkflowDocumentation.md b/WorkflowDocumentation.md
new file mode 100644
index 00000000..2d5bb9a7
--- /dev/null
+++ b/WorkflowDocumentation.md
@@ -0,0 +1,109 @@
+ # Documentation Generation Workflow
+
+This document explains the workflow of the documentation generation process, focusing on how the Large Language Model (LLM) prompts chain together to create a comprehensive tutorial for a given codebase.
+
+## Overview
+
+The process is a pipeline of six sequential steps. Four of these steps involve interactions with an LLM to analyze the code, structure the content, and write the documentation.
+
+```mermaid
+flowchart TD
+ A[1. Fetch Repository] --> B[2. Identify Abstractions]
+ B --> C[3. Analyze Relationships]
+ C --> D[4. Order Chapters]
+ D --> E[5. Write Chapters]
+ E --> F[6. Combine Tutorial]
+```
+
+## Detailed Steps & Prompt Chaining
+
+Here’s a detailed breakdown of each step, focusing on the inputs and outputs of the LLM prompts.
+
+### 1. Fetch Repository (`FetchRepo`)
+This initial step does not use an LLM. It's responsible for gathering the source code.
+
+- **Input**: A repository URL or a local directory path.
+- **Output**: A list of all relevant code files and their content. This list becomes the foundational input for the entire workflow.
+- **Chaining**: The raw codebase content is passed to the next step.
+
+---
+
+### 2. Identify Abstractions (`IdentifyAbstractions`)
+This is the first interaction with the LLM. Its goal is to identify the most important, high-level concepts in the codebase.
+
+- **Input to Prompt**: The entire codebase content fetched in the previous step.
+- **LLM Prompt Goal**: The LLM is asked to analyze the code and identify the top 5-10 core abstractions. For each abstraction, it must provide:
+ 1. A concise `name`.
+ 2. A beginner-friendly `description` with an analogy.
+ 3. A list of relevant `file_indices` that implement or define the abstraction.
+- **Output of Prompt**: The LLM returns a YAML-formatted string.
+ ```yaml
+ - name: |
+ Core Concept A
+ description: |
+ An explanation of what this concept does, like a central controller.
+ file_indices:
+ - 0 # path/to/file1.py
+ - 3 # path/to/file2.py
+ ```
+- **Chaining**: The validated list of abstractions (name, description, file indices) is passed to the next step.
+
+---
+
+### 3. Analyze Relationships (`AnalyzeRelationships`)
+The second LLM interaction focuses on understanding how the identified abstractions interact.
+
+- **Input to Prompt**: The list of abstractions (names and descriptions) and the code snippets from their relevant files.
+- **LLM Prompt Goal**: The LLM is prompted to:
+ 1. Create a high-level `summary` of the project's purpose.
+ 2. Define the `relationships` between the abstractions, describing how they interact (e.g., "Manages", "Inherits from", "Uses").
+- **Output of Prompt**: A YAML object containing the summary and a list of relationships.
+ ```yaml
+ summary: |
+ A brief, simple explanation of the project's purpose.
+ relationships:
+ - from_abstraction: 0 # Core Concept A
+ to_abstraction: 1 # Core Concept B
+ label: "Manages"
+ ```
+- **Chaining**: The project summary and the list of relationships are passed to the next step.
+
+---
+
+### 4. Order Chapters (`OrderChapters`)
+The third LLM interaction determines the best pedagogical order to present the concepts.
+
+- **Input to Prompt**: The project summary, the list of abstractions, and their relationships.
+- **LLM Prompt Goal**: The LLM is asked to determine the optimal sequence for a tutorial. It's instructed to start with foundational or user-facing concepts and then move to lower-level implementation details, respecting dependencies revealed in the relationships.
+- **Output of Prompt**: A YAML list of the abstraction indices, sorted in the recommended chapter order.
+ ```yaml
+ - 2 # FoundationalConcept
+ - 0 # CoreClassA
+ - 1 # CoreClassB (uses CoreClassA)
+ ```
+- **Chaining**: This ordered list of indices dictates the structure of the final tutorial and is passed to the chapter writing step.
+
+---
+
+### 5. Write Chapters (`WriteChapters`)
+This is the most intensive LLM step, where the actual tutorial content is generated for each abstraction, one by one.
+
+- **Input to Prompt (for each chapter)**:
+ - The specific abstraction's details (name, description).
+ - The full tutorial structure (for linking to other chapters).
+ - The content of previously written chapters (to ensure smooth transitions).
+ - Relevant code snippets for the current abstraction.
+- **LLM Prompt Goal**: The LLM is given a detailed set of instructions to write a beginner-friendly Markdown chapter. This includes creating a heading, explaining the concept with analogies, providing simplified code examples (under 10 lines), using Mermaid diagrams for illustration, and writing transitions to the previous and next chapters.
+- **Output of Prompt**: A Markdown-formatted string for each chapter.
+- **Chaining**: The list of all generated Markdown chapter strings is passed to the final step.
+
+---
+
+### 6. Combine Tutorial (`CombineTutorial`)
+This final step does not use an LLM. It assembles all the generated pieces into the final documentation.
+
+- **Input**: The project summary, relationship graph, chapter order, and the content of all chapters.
+- **Output**: A directory containing:
+ - `index.md`: An overview page with the project summary, a Mermaid diagram of the relationships, and a linked table of contents.
+ - `01_concept.md`, `02_another.md`, etc.: Individual chapter files in Markdown format.
+- **Chaining**: This is the final step, and the output is the completed tutorial saved to the filesystem.
\ No newline at end of file
diff --git a/configs/corebanking.yaml b/configs/corebanking.yaml
new file mode 100644
index 00000000..a93a1063
--- /dev/null
+++ b/configs/corebanking.yaml
@@ -0,0 +1,91 @@
+# Corebanking Project Analysis Configuration
+
+# Source configuration - specify either repo or local_dir
+source:
+ local_dir: "/Users/louis-davidcoulombe/github/corebanking"
+ # repo: "https://github.com/example/corebanking" # Use this instead of local_dir for GitHub repos
+
+# Project settings
+project:
+ name: "corebanking"
+ language: "english"
+ output_dir: "nesto/corebanking/analysis_output"
+
+# File processing settings
+file_processing:
+ max_file_size: 150000 # Maximum file size in bytes
+ include_patterns:
+ - "*.py"
+ - "*.js"
+ - "*.jsx"
+ - "*.ts"
+ - "*.tsx"
+ - "*.go"
+ - "*.java"
+ - "*.pyi"
+ - "*.pyx"
+ - "*.c"
+ - "*.cc"
+ - "*.cpp"
+ - "*.h"
+ - "*.md"
+ - "*.rst"
+ - "*Dockerfile"
+ - "*Makefile"
+ - "*.yaml"
+ - "*.yml"
+ exclude_patterns:
+ - "assets/*"
+ - "data/*"
+ - "images/*"
+ - "public/*"
+ - "static/*"
+ - "temp/*"
+ - "*docs/*"
+ - "*venv/*"
+ - "*.venv/*"
+ - "*test*"
+ - "*tests/*"
+ - "*examples/*"
+ - "v1/*"
+ - "*dist/*"
+ - "*build/*"
+ - "*experimental/*"
+ - "*deprecated/*"
+ - "*misc/*"
+ - "*legacy/*"
+ - ".git/*"
+ - ".github/*"
+ - ".next/*"
+ - ".vscode/*"
+ - "*obj/*"
+ - "*bin/*"
+ - "*node_modules/*"
+ - "*.log"
+
+# Analysis settings
+analysis:
+ max_abstractions: 13 # Based on number of abstraction hints provided
+ abstractions_hints:
+ - "Event"
+ - "Command"
+ - "Aggregate"
+ - "Repository"
+ - "API Handler"
+ - "Core Facade"
+ - "Service"
+ - "Consumer"
+ - "Product Engine"
+ - "Simulation Services and Repositories"
+ - "products"
+ - "parameters"
+ - "customers"
+ feedback_file: "nesto/corebanking/review.md"
+
+# LLM settings
+llm:
+ use_cache: true # Set to false to disable caching
+
+# GitHub settings (only needed if using repo instead of local_dir)
+github:
+ # token: "your_github_token_here" # Or set GITHUB_TOKEN environment variable
diff --git a/configs/documents-extractor.yaml b/configs/documents-extractor.yaml
new file mode 100644
index 00000000..3fafdcf5
--- /dev/null
+++ b/configs/documents-extractor.yaml
@@ -0,0 +1,91 @@
+# Corebanking Project Analysis Configuration
+
+# Source configuration - specify either repo or local_dir
+source:
+ local_dir: "/Users/louis-davidcoulombe/src/github.com/nestoca/documents-extractor"
+ # repo: "https://github.com/example/corebanking" # Use this instead of local_dir for GitHub repos
+
+# Project settings
+project:
+ name: "documents-extractor"
+ language: "english"
+ output_dir: "nesto/corebanking/documents-extractor"
+
+# File processing settings
+file_processing:
+ max_file_size: 150000 # Maximum file size in bytes
+ include_patterns:
+ - "*.py"
+ - "*.js"
+ - "*.jsx"
+ - "*.ts"
+ - "*.tsx"
+ - "*.go"
+ - "*.java"
+ - "*.pyi"
+ - "*.pyx"
+ - "*.c"
+ - "*.cc"
+ - "*.cpp"
+ - "*.h"
+ - "*.md"
+ - "*.rst"
+ - "*Dockerfile"
+ - "*Makefile"
+ - "*.yaml"
+ - "*.yml"
+ exclude_patterns:
+ - "assets/*"
+ - "data/*"
+ - "images/*"
+ - "public/*"
+ - "static/*"
+ - "temp/*"
+ - "*docs/*"
+ - "*venv/*"
+ - "*.venv/*"
+ - "*test*"
+ - "*tests/*"
+ - "*examples/*"
+ - "v1/*"
+ - "*dist/*"
+ - "*build/*"
+ - "*experimental/*"
+ - "*deprecated/*"
+ - "*misc/*"
+ - "*legacy/*"
+ - ".git/*"
+ - ".github/*"
+ - ".next/*"
+ - ".vscode/*"
+ - "*obj/*"
+ - "*bin/*"
+ - "*node_modules/*"
+ - "*.log"
+
+# Analysis settings
+# analysis:
+# max_abstractions: 13 # Based on number of abstraction hints provided
+# abstractions_hints:
+# - "Event"
+# - "Command"
+# - "Aggregate"
+# - "Repository"
+# - "API Handler"
+# - "Core Facade"
+# - "Service"
+# - "Consumer"
+# - "Product Engine"
+# - "Simulation Services and Repositories"
+# - "products"
+# - "parameters"
+# - "customers"
+# feedback_file: "nesto/corebanking/review.md"
+
+# LLM settings
+llm:
+ use_cache: false # Set to false to disable caching
+
+# GitHub settings (only needed if using repo instead of local_dir)
+github:
+ # token: "your_github_token_here" # Or set GITHUB_TOKEN environment variable
diff --git a/configs/example_config.yaml b/configs/example_config.yaml
new file mode 100644
index 00000000..e9c8d04c
--- /dev/null
+++ b/configs/example_config.yaml
@@ -0,0 +1,72 @@
+# Example Configuration File for Tutorial Generation
+# Copy this file and modify for your specific project
+
+# Source configuration - specify either repo or local_dir (not both)
+source:
+ # For local directories:
+ local_dir: "/path/to/your/local/project"
+
+ # For GitHub repositories (comment out local_dir if using this):
+ # repo: "https://github.com/username/repository"
+
+# Project settings
+project:
+ name: "your_project_name"
+ language: "english" # or "french", "spanish", etc.
+ output_dir: "output/your_project" # where to save the generated tutorial
+
+# File processing settings
+file_processing:
+ max_file_size: 100000 # Maximum file size in bytes (default: 100KB)
+
+ # File patterns to include in analysis
+ include_patterns:
+ - "*.py"
+ - "*.js"
+ - "*.jsx"
+ - "*.ts"
+ - "*.tsx"
+ - "*.go"
+ - "*.java"
+ - "*.md"
+ - "*.rst"
+ - "*.yaml"
+ - "*.yml"
+
+ # File patterns to exclude from analysis
+ exclude_patterns:
+ - "*test*"
+ - "*tests/*"
+ - "*node_modules/*"
+ - "*venv/*"
+ - "*.venv/*"
+ - "*dist/*"
+ - "*build/*"
+ - ".git/*"
+ - "*.log"
+
+# Analysis settings
+analysis:
+ max_abstractions: 10 # Maximum number of abstractions to identify
+
+ # Specific abstractions to look for (optional)
+ abstractions_hints:
+ - "Class"
+ - "Interface"
+ - "Service"
+ - "Controller"
+ - "Repository"
+ - "Model"
+ - "Component"
+ - "Module"
+
+ # Path to feedback file from previous runs (optional)
+ # feedback_file: "path/to/feedback.md"
+
+# LLM settings
+llm:
+ use_cache: true # Set to false to disable response caching
+
+# GitHub settings (only needed if using repo instead of local_dir)
+github:
+ # token: "your_github_token_here" # Or set GITHUB_TOKEN environment variable
diff --git a/main.py b/main.py
index dbba523f..40c0b2cc 100644
--- a/main.py
+++ b/main.py
@@ -1,6 +1,7 @@
import dotenv
import os
import argparse
+import yaml
# Import the function that creates the flow
from flow import create_tutorial_flow
@@ -35,58 +36,108 @@
"*.log"
}
-# --- Main Function ---
-def main():
- parser = argparse.ArgumentParser(description="Generate a tutorial for a GitHub codebase or local directory.")
-
- # Create mutually exclusive group for source
- source_group = parser.add_mutually_exclusive_group(required=True)
- source_group.add_argument("--repo", help="URL of the public GitHub repository.")
- source_group.add_argument("--dir", help="Path to local directory.")
-
- parser.add_argument("-n", "--name", help="Project name (optional, derived from repo/directory if omitted).")
- parser.add_argument("-t", "--token", help="GitHub personal access token (optional, reads from GITHUB_TOKEN env var if not provided).")
- parser.add_argument("-o", "--output", default="output", help="Base directory for output (default: ./output).")
- parser.add_argument("-i", "--include", nargs="+", help="Include file patterns (e.g. '*.py' '*.js'). Defaults to common code files if not specified.")
- parser.add_argument("-e", "--exclude", nargs="+", help="Exclude file patterns (e.g. 'tests/*' 'docs/*'). Defaults to test/build directories if not specified.")
- parser.add_argument("-s", "--max-size", type=int, default=100000, help="Maximum file size in bytes (default: 100000, about 100KB).")
- # Add language parameter for multi-language support
- parser.add_argument("--language", default="english", help="Language for the generated tutorial (default: english)")
- # Add use_cache parameter to control LLM caching
- parser.add_argument("--no-cache", action="store_true", help="Disable LLM response caching (default: caching enabled)")
- # Add max_abstraction_num parameter to control the number of abstractions
- parser.add_argument("--max-abstractions", type=int, default=10, help="Maximum number of abstractions to identify (default: 10)")
-
- args = parser.parse_args()
-
- # Get GitHub token from argument or environment variable if using repo
+def load_config(config_path):
+ """Load and validate YAML configuration file."""
+ try:
+ with open(config_path, 'r', encoding='utf-8') as f:
+ config = yaml.safe_load(f)
+
+ # Validate required sections
+ required_sections = ['source', 'project']
+ for section in required_sections:
+ if section not in config:
+ raise ValueError(f"Missing required section '{section}' in config file")
+
+ # Validate source (must have either repo or local_dir)
+ source = config['source']
+ if not ('repo' in source or 'local_dir' in source):
+ raise ValueError("Source section must contain either 'repo' or 'local_dir'")
+
+ # Set defaults for optional sections
+ if 'file_processing' not in config:
+ config['file_processing'] = {}
+ if 'analysis' not in config:
+ config['analysis'] = {}
+ if 'llm' not in config:
+ config['llm'] = {}
+ if 'github' not in config:
+ config['github'] = {}
+
+ return config
+
+ except FileNotFoundError:
+ raise FileNotFoundError(f"Configuration file not found: {config_path}")
+ except yaml.YAMLError as e:
+ raise ValueError(f"Invalid YAML in configuration file: {e}")
+ except Exception as e:
+ raise Exception(f"Error loading configuration: {e}")
+
+def config_to_shared(config):
+ """Convert YAML config to shared dictionary format."""
+ source = config['source']
+ project = config['project']
+ file_proc = config.get('file_processing', {})
+ analysis = config.get('analysis', {})
+ llm_config = config.get('llm', {})
+ github_config = config.get('github', {})
+
+ # Get GitHub token from config or environment variable
github_token = None
- if args.repo:
- github_token = args.token or os.environ.get('GITHUB_TOKEN')
+ if 'repo' in source:
+ github_token = github_config.get('token') or os.environ.get('GITHUB_TOKEN')
if not github_token:
print("Warning: No GitHub token provided. You might hit rate limits for public repositories.")
-
- # Initialize the shared dictionary with inputs
+
+ # Load feedback from file if provided
+ feedback_content = None
+ feedback_file = analysis.get('feedback_file')
+ if feedback_file:
+ try:
+ with open(feedback_file, 'r', encoding='utf-8') as f:
+ feedback_content = f.read()
+ print(f"Loaded feedback from: {feedback_file}")
+ except FileNotFoundError:
+ print(f"Warning: Feedback file not found: {feedback_file}")
+ except Exception as e:
+ print(f"Warning: Could not read feedback file {feedback_file}: {e}")
+
+ # Get abstractions hints and calculate max abstractions
+ abstractions_hints = analysis.get('abstractions_hints')
+ max_abstractions = analysis.get('max_abstractions', 10)
+
+ # Ensure abstractions_hints is never None, default to empty list
+ if abstractions_hints is None:
+ abstractions_hints = []
+
+ # If specific abstractions are provided, use their count as max
+ if abstractions_hints:
+ max_abstractions = len(abstractions_hints)
+ print(f"✓ Using {len(abstractions_hints)} specific abstraction hints")
+ else:
+ print(f"✓ No specific abstraction hints provided, will identify up to {max_abstractions} abstractions")
+
shared = {
- "repo_url": args.repo,
- "local_dir": args.dir,
- "project_name": args.name, # Can be None, FetchRepo will derive it
+ "repo_url": source.get('repo'),
+ "local_dir": source.get('local_dir'),
+ "project_name": project.get('name'),
"github_token": github_token,
- "output_dir": args.output, # Base directory for CombineTutorial output
+ "output_dir": project.get('output_dir', 'output'),
- # Add include/exclude patterns and max file size
- "include_patterns": set(args.include) if args.include else DEFAULT_INCLUDE_PATTERNS,
- "exclude_patterns": set(args.exclude) if args.exclude else DEFAULT_EXCLUDE_PATTERNS,
- "max_file_size": args.max_size,
+ # File processing settings
+ "include_patterns": set(file_proc.get('include_patterns', DEFAULT_INCLUDE_PATTERNS)),
+ "exclude_patterns": set(file_proc.get('exclude_patterns', DEFAULT_EXCLUDE_PATTERNS)),
+ "max_file_size": file_proc.get('max_file_size', 100000),
- # Add language for multi-language support
- "language": args.language,
+ # Project settings
+ "language": project.get('language', 'english'),
- # Add use_cache flag (inverse of no-cache flag)
- "use_cache": not args.no_cache,
+ # LLM settings
+ "use_cache": llm_config.get('use_cache', True),
- # Add max_abstraction_num parameter
- "max_abstraction_num": args.max_abstractions,
+ # Analysis settings
+ "abstractions_hints": abstractions_hints,
+ "max_abstraction_num": max_abstractions,
+ "feedback_content": feedback_content,
# Outputs will be populated by the nodes
"files": [],
@@ -96,16 +147,54 @@ def main():
"chapters": [],
"final_output_dir": None
}
+
+ return shared
- # Display starting message with repository/directory and language
- print(f"Starting tutorial generation for: {args.repo or args.dir} in {args.language.capitalize()} language")
- print(f"LLM caching: {'Disabled' if args.no_cache else 'Enabled'}")
-
- # Create the flow instance
- tutorial_flow = create_tutorial_flow()
-
- # Run the flow
- tutorial_flow.run(shared)
+# --- Main Function ---
+def main():
+ parser = argparse.ArgumentParser(description="Generate a tutorial for a GitHub codebase or local directory using YAML configuration.")
+ parser.add_argument("config", help="Path to YAML configuration file")
+ parser.add_argument("--validate-only", action="store_true", help="Only validate the configuration file without running the analysis")
+
+ args = parser.parse_args()
+
+ try:
+ # Load and validate configuration
+ config = load_config(args.config)
+ print(f"✓ Configuration loaded successfully from: {args.config}")
+
+ if args.validate_only:
+ print("✓ Configuration is valid!")
+ return
+
+ # Convert config to shared dictionary
+ shared = config_to_shared(config)
+
+ # Display starting message
+ source_info = shared.get('repo_url') or shared.get('local_dir')
+ language = shared.get('language', 'english')
+ print(f"🚀 Starting tutorial generation for: {source_info} in {language.capitalize()} language")
+ print(f"📊 Configuration:")
+ print(f" ├─ Project name: {shared.get('project_name', 'Auto-detected')}")
+ print(f" ├─ Output directory: {shared.get('output_dir')}")
+ print(f" ├─ Max file size: {shared.get('max_file_size'):,} bytes")
+ print(f" ├─ LLM caching: {'Enabled' if shared.get('use_cache') else 'Disabled'}")
+ print(f" ├─ Language: {language.capitalize()}")
+ print(f" └─ Feedback from previous run: {'Yes' if shared.get('feedback_content') else 'No'}")
+
+ include_count = len(shared.get('include_patterns', []))
+ exclude_count = len(shared.get('exclude_patterns', []))
+ print(f"📁 File patterns: {include_count} include, {exclude_count} exclude")
+
+ # Create the flow instance
+ tutorial_flow = create_tutorial_flow()
+
+ # Run the flow
+ tutorial_flow.run(shared)
+
+ except Exception as e:
+ print(f"Error: {e}")
+ exit(1)
if __name__ == "__main__":
main()
diff --git a/nodes.py b/nodes.py
index 0e3fa587..3eef25b0 100644
--- a/nodes.py
+++ b/nodes.py
@@ -71,10 +71,20 @@ def exec(self, prep_res):
)
# Convert dict to list of tuples: [(path, content), ...]
+ if result is None:
+ raise ValueError("Failed to fetch files - crawling function returned None")
+
files_list = list(result.get("files", {}).items())
if len(files_list) == 0:
raise (ValueError("Failed to fetch files"))
- print(f"Fetched {len(files_list)} files.")
+ print(f"✅ Fetched {len(files_list)} files successfully")
+
+ # Show some stats about the files
+ total_size = sum(len(content) for _, content in files_list)
+ avg_size = total_size // len(files_list) if files_list else 0
+ print(f" ├─ Total content size: {total_size:,} characters")
+ print(f" ├─ Average file size: {avg_size:,} characters")
+ print(f" └─ Largest file: {max((len(content) for _, content in files_list), default=0):,} characters")
return files_list
def post(self, shared, prep_res, exec_res):
@@ -88,6 +98,8 @@ def prep(self, shared):
language = shared.get("language", "english") # Get language
use_cache = shared.get("use_cache", True) # Get use_cache flag, default to True
max_abstraction_num = shared.get("max_abstraction_num", 10) # Get max_abstraction_num, default to 10
+ abstractions_hints = shared.get("abstractions_hints", []) # List of abstractions to include, if empty, all abstractions will be included
+ feedback_content = shared.get("feedback_content") # Get feedback from previous runs
# Helper to create context from files, respecting limits (basic example)
def create_llm_context(files_data):
@@ -113,7 +125,9 @@ def create_llm_context(files_data):
language,
use_cache,
max_abstraction_num,
- ) # Return all parameters
+ abstractions_hints,
+ feedback_content,
+ ) # Return all parameters including feedback
def exec(self, prep_res):
(
@@ -124,13 +138,55 @@ def exec(self, prep_res):
language,
use_cache,
max_abstraction_num,
+ abstractions_hints,
+ feedback_content,
) = prep_res # Unpack all parameters
- print(f"Identifying abstractions using LLM...")
+
+ print(f"🔍 Identifying abstractions using LLM...")
+ print(f" ├─ Project: {project_name}")
+ print(f" ├─ Files analyzed: {file_count}")
+ print(f" ├─ Language: {language}")
+ print(f" ├─ Max abstractions: {max_abstraction_num}")
+ print(f" ├─ Cache enabled: {use_cache}")
+ print(f" ├─ Feedback provided: {'Yes' if feedback_content else 'No'}")
+ print(f" └─ Abstraction hints: {len(abstractions_hints) if abstractions_hints else 0} provided")
# Add language instruction and hints only if not English
language_instruction = ""
name_lang_hint = ""
desc_lang_hint = ""
+
+ # Safety check for abstractions_hints
+ if abstractions_hints is None:
+ abstractions_hints = []
+ print("Warning: abstractions_hints was None, defaulting to empty list")
+
+ abstractions_hints_str = ""
+ if len(abstractions_hints) > 0:
+ print(f"✓ Using specific abstraction hints: {abstractions_hints}")
+ abstractions_hints_str = f"IMPORTANT: Specific abstractions to include: {abstractions_hints}\n\n"
+ max_abstraction_num = len(abstractions_hints)
+ else:
+ print(f"✓ No specific abstraction hints, identifying up to {max_abstraction_num} abstractions")
+
+ # Add feedback section if available
+ feedback_section = ""
+ if feedback_content:
+ print(f"📝 Including feedback from previous analysis run")
+ feedback_section = f"""
+CRITICAL: Learn from Previous Analysis Feedback
+The following feedback was provided from a previous analysis of this codebase. Please carefully consider these points to avoid repeating the same mistakes:
+
+{feedback_content}
+
+Based on this feedback, pay special attention to:
+- Avoiding hallucinated commands or concepts that don't exist in the codebase
+- Following proper naming conventions used in the code
+- Ensuring accuracy in descriptions and relationships
+- Including all important abstractions mentioned in the feedback
+
+"""
+
if language.lower() != "english":
language_instruction = f"IMPORTANT: Generate the `name` and `description` for each abstraction in **{language.capitalize()}** language. Do NOT use English for these fields.\n\n"
# Keep specific hints here as name/description are primary targets
@@ -140,10 +196,10 @@ def exec(self, prep_res):
prompt = f"""
For the project `{project_name}`:
-Codebase Context:
+{feedback_section}Codebase Context:
{context}
-{language_instruction}Analyze the codebase context.
+{abstractions_hints_str}{language_instruction}Analyze the codebase context.
Identify the top 5-{max_abstraction_num} core most important abstractions to help those new to the codebase.
For each abstraction, provide:
@@ -173,9 +229,12 @@ def exec(self, prep_res):
- 5 # path/to/another.js
# ... up to {max_abstraction_num} abstractions
```"""
+ print(f"🤖 Calling LLM for abstraction identification (attempt {self.cur_retry + 1})")
response = call_llm(prompt, use_cache=(use_cache and self.cur_retry == 0)) # Use cache only if enabled and not retrying
+ print(f"✅ Received LLM response ({len(response)} characters)")
# --- Validation ---
+ print(f"🔍 Parsing and validating LLM response...")
yaml_str = response.strip().split("```yaml")[1].split("```")[0].strip()
abstractions = yaml.safe_load(yaml_str)
@@ -228,7 +287,10 @@ def exec(self, prep_res):
}
)
- print(f"Identified {len(validated_abstractions)} abstractions.")
+ print(f"✅ Successfully identified {len(validated_abstractions)} abstractions:")
+ for i, abstr in enumerate(validated_abstractions):
+ file_count = len(abstr.get('files', []))
+ print(f" {i+1}. {abstr['name']} ({file_count} files)")
return validated_abstractions
def post(self, shared, prep_res, exec_res):
@@ -246,6 +308,7 @@ def prep(self, shared):
project_name = shared["project_name"] # Get project name
language = shared.get("language", "english") # Get language
use_cache = shared.get("use_cache", True) # Get use_cache flag, default to True
+ feedback_content = shared.get("feedback_content") # Get feedback from previous runs
# Get the actual number of abstractions directly
num_abstractions = len(abstractions)
@@ -284,7 +347,8 @@ def prep(self, shared):
project_name,
language,
use_cache,
- ) # Return use_cache
+ feedback_content,
+ ) # Return feedback_content
def exec(self, prep_res):
(
@@ -294,7 +358,8 @@ def exec(self, prep_res):
project_name,
language,
use_cache,
- ) = prep_res # Unpack use_cache
+ feedback_content,
+ ) = prep_res # Unpack use_cache and feedback_content
print(f"Analyzing relationships using LLM...")
# Add language instruction and hints only if not English
@@ -306,6 +371,24 @@ def exec(self, prep_res):
lang_hint = f" (in {language.capitalize()})"
list_lang_note = f" (Names might be in {language.capitalize()})" # Note for the input list
+ # Add feedback section if available
+ feedback_section = ""
+ if feedback_content:
+ feedback_section = f"""
+CRITICAL: Learn from Previous Analysis Feedback
+The following feedback was provided from a previous analysis of this codebase. Please carefully consider these points to avoid repeating the same mistakes:
+
+{feedback_content}
+
+Based on this feedback, pay special attention to:
+- Avoiding hallucinated commands or concepts that don't exist in the codebase
+- Ensuring relationships are accurate and based on actual code interactions
+- Following proper naming conventions used in the code
+- Being precise about how different components actually interact in the codebase
+- Including all important relationships mentioned in the feedback
+
+"""
+
prompt = f"""
Based on the following abstractions and relevant code snippets from the project `{project_name}`:
@@ -315,7 +398,7 @@ def exec(self, prep_res):
Context (Abstractions, Descriptions, Code):
{context}
-{language_instruction}Please provide:
+{feedback_section}{language_instruction}Please provide:
1. A high-level `summary` of the project's main purpose and functionality in a few beginner-friendly sentences{lang_hint}. Use markdown formatting with **bold** and *italic* text to highlight important concepts.
2. A list (`relationships`) describing the key interactions between these abstractions. For each relationship, specify:
- `from_abstraction`: Index of the source abstraction (e.g., `0 # AbstractionName1`)
@@ -544,6 +627,7 @@ def prep(self, shared):
project_name = shared["project_name"]
language = shared.get("language", "english")
use_cache = shared.get("use_cache", True) # Get use_cache flag, default to True
+ feedback_content = shared.get("feedback_content") # Get feedback from previous runs
# Get already written chapters to provide context
# We store them temporarily during the batch run, not in shared memory yet
@@ -616,6 +700,7 @@ def prep(self, shared):
"next_chapter": next_chapter, # Add next chapter info (uses potentially translated name)
"language": language, # Add language for multi-language support
"use_cache": use_cache, # Pass use_cache flag
+ "feedback_content": feedback_content, # Add feedback content
# previous_chapters_summary will be added dynamically in exec
}
)
@@ -639,6 +724,7 @@ def exec(self, item):
project_name = item.get("project_name")
language = item.get("language", "english")
use_cache = item.get("use_cache", True) # Read use_cache from item
+ feedback_content = item.get("feedback_content") # Get feedback content
print(f"Writing chapter {chapter_num} for: {abstraction_name} using LLM...")
# Prepare file context string from the map
@@ -675,6 +761,27 @@ def exec(self, item):
)
tone_note = f" (appropriate for {lang_cap} readers)"
+ # Add feedback section if available
+ feedback_section = ""
+ if feedback_content:
+ feedback_section = f"""
+
+CRITICAL: Learn from Previous Analysis Feedback
+The following feedback was provided from a previous analysis of this codebase. Please carefully consider these points to avoid repeating the same mistakes in this chapter:
+
+{feedback_content}
+
+Based on this feedback, pay special attention to:
+- Avoiding hallucinated commands, classes, or concepts that don't exist in the codebase
+- Following proper naming conventions used in the actual code (like `accountsSvc`, `accountsRepo`)
+- Ensuring code examples are realistic and follow the patterns seen in the actual codebase
+- Creating accurate diagrams that reflect the actual architecture, not invented layers
+- Being precise about how components actually work based on the code, not assumptions
+- Including accurate comments and struct descriptions
+- Avoiding pseudo-code that might mislead readers about the actual implementation
+
+"""
+
prompt = f"""
{language_instruction}Write a very beginner-friendly tutorial chapter (in Markdown format) for the project `{project_name}` about the concept: "{abstraction_name}". This is Chapter {chapter_num}.
@@ -691,7 +798,7 @@ def exec(self, item):
Relevant Code Snippets (Code itself remains unchanged):
{file_context_str if file_context_str else "No specific code snippets provided for this abstraction."}
-
+{feedback_section}
Instructions for the chapter (Generate content in {language.capitalize()} unless specified otherwise):
- Start with a clear heading (e.g., `# Chapter {chapter_num}: {abstraction_name}`). Use the provided concept name.
diff --git a/prompt.md b/prompt.md
new file mode 100644
index 00000000..af69e582
--- /dev/null
+++ b/prompt.md
@@ -0,0 +1,177 @@
+Of course. I will extract all the LLM prompts from the `nodes.py` file and present them in a markdown format. First, I need to read the `nodes.py` file.
+Read file: nodes.py
+Okay, I have the content of `nodes.py`. Now I will extract the prompts and format them into a markdown file named `prompts.md`.
+My apologies, it seems I tried to use a tool that isn't available to me. I will now output the extracted prompts directly into our conversation.
+
+# LLM Prompts from `nodes.py`
+
+This file contains all the LLM prompts used in the documentation generation workflow, extracted from the `nodes.py` file.
+
+## 1. `IdentifyAbstractions` Node
+
+This prompt analyzes the entire codebase to identify the most important core abstractions.
+
+```python
+f"""
+For the project `{project_name}`:
+
+Codebase Context:
+{context}
+
+{language_instruction}Analyze the codebase context.
+Identify the top 5-{max_abstraction_num} core most important abstractions to help those new to the codebase.
+
+For each abstraction, provide:
+1. A concise `name`{name_lang_hint}.
+2. A beginner-friendly `description` explaining what it is with a simple analogy, in around 100 words{desc_lang_hint}.
+3. A list of relevant `file_indices` (integers) using the format `idx # path/comment`.
+
+List of file indices and paths present in the context:
+{file_listing_for_prompt}
+
+Format the output as a YAML list of dictionaries:
+
+```yaml
+- name: |
+ Query Processing{name_lang_hint}
+ description: |
+ Explains what the abstraction does.
+ It's like a central dispatcher routing requests.{desc_lang_hint}
+ file_indices:
+ - 0 # path/to/file1.py
+ - 3 # path/to/related.py
+- name: |
+ Query Optimization{name_lang_hint}
+ description: |
+ Another core concept, similar to a blueprint for objects.{desc_lang_hint}
+ file_indices:
+ - 5 # path/to/another.js
+# ... up to {max_abstraction_num} abstractions
+```"""
+```
+
+## 2. `AnalyzeRelationships` Node
+
+This prompt takes the identified abstractions and analyzes their relationships to build a high-level overview of the project.
+
+```python
+f"""
+Based on the following abstractions and relevant code snippets from the project `{project_name}`:
+
+List of Abstraction Indices and Names{list_lang_note}:
+{abstraction_listing}
+
+Context (Abstractions, Descriptions, Code):
+{context}
+
+{language_instruction}Please provide:
+1. A high-level `summary` of the project's main purpose and functionality in a few beginner-friendly sentences{lang_hint}. Use markdown formatting with **bold** and *italic* text to highlight important concepts.
+2. A list (`relationships`) describing the key interactions between these abstractions. For each relationship, specify:
+ - `from_abstraction`: Index of the source abstraction (e.g., `0 # AbstractionName1`)
+ - `to_abstraction`: Index of the target abstraction (e.g., `1 # AbstractionName2`)
+ - `label`: A brief label for the interaction **in just a few words**{lang_hint} (e.g., "Manages", "Inherits", "Uses").
+ Ideally the relationship should be backed by one abstraction calling or passing parameters to another.
+ Simplify the relationship and exclude those non-important ones.
+
+IMPORTANT: Make sure EVERY abstraction is involved in at least ONE relationship (either as source or target). Each abstraction index must appear at least once across all relationships.
+
+Format the output as YAML:
+
+```yaml
+summary: |
+ A brief, simple explanation of the project{lang_hint}.
+ Can span multiple lines with **bold** and *italic* for emphasis.
+relationships:
+ - from_abstraction: 0 # AbstractionName1
+ to_abstraction: 1 # AbstractionName2
+ label: "Manages"{lang_hint}
+ - from_abstraction: 2 # AbstractionName3
+ to_abstraction: 0 # AbstractionName1
+ label: "Provides config"{lang_hint}
+ # ... other relationships
+```
+
+
+## 3. `OrderChapters` Node
+
+This prompt determines the best pedagogical order to explain the identified abstractions to a newcomer.
+
+```python
+f"""
+Given the following project abstractions and their relationships for the project `{project_name}`:
+
+Abstractions (Index # Name){list_lang_note}:
+{abstraction_listing}
+
+Context about relationships and project summary:
+{context}
+
+If you are going to make a tutorial for ```` {project_name} ````, what is the best order to explain these abstractions, from first to last?
+Ideally, first explain those that are the most important or foundational, perhaps user-facing concepts or entry points. Then move to more detailed, lower-level implementation details or supporting concepts.
+
+Output the ordered list of abstraction indices, including the name in a comment for clarity. Use the format `idx # AbstractionName`.
+
+```yaml
+- 2 # FoundationalConcept
+- 0 # CoreClassA
+- 1 # CoreClassB (uses CoreClassA)
+- ...
+```
+
+
+
+
+## 4. `WriteChapters` Node
+
+This prompt is executed for each abstraction to generate a detailed, beginner-friendly tutorial chapter in Markdown.
+
+```python
+f"""
+{language_instruction}Write a very beginner-friendly tutorial chapter (in Markdown format) for the project `{project_name}` about the concept: "{abstraction_name}". This is Chapter {chapter_num}.
+
+Concept Details{concept_details_note}:
+- Name: {abstraction_name}
+- Description:
+{abstraction_description}
+
+Complete Tutorial Structure{structure_note}:
+{item["full_chapter_listing"]}
+
+Context from previous chapters{prev_summary_note}:
+{previous_chapters_summary if previous_chapters_summary else "This is the first chapter."}
+
+Relevant Code Snippets (Code itself remains unchanged):
+{file_context_str if file_context_str else "No specific code snippets provided for this abstraction."}
+
+Instructions for the chapter (Generate content in {language.capitalize()} unless specified otherwise):
+- Start with a clear heading (e.g., `# Chapter {chapter_num}: {abstraction_name}`). Use the provided concept name.
+
+- If this is not the first chapter, begin with a brief transition from the previous chapter{instruction_lang_note}, referencing it with a proper Markdown link using its name{link_lang_note}.
+
+- Begin with a high-level motivation explaining what problem this abstraction solves{instruction_lang_note}. Start with a central use case as a concrete example. The whole chapter should guide the reader to understand how to solve this use case. Make it very minimal and friendly to beginners.
+
+- If the abstraction is complex, break it down into key concepts. Explain each concept one-by-one in a very beginner-friendly way{instruction_lang_note}.
+
+- Explain how to use this abstraction to solve the use case{instruction_lang_note}. Give example inputs and outputs for code snippets (if the output isn't values, describe at a high level what will happen{instruction_lang_note}).
+
+- Each code block should be BELOW 10 lines! If longer code blocks are needed, break them down into smaller pieces and walk through them one-by-one. Aggresively simplify the code to make it minimal. Use comments{code_comment_note} to skip non-important implementation details. Each code block should have a beginner friendly explanation right after it{instruction_lang_note}.
+
+- Describe the internal implementation to help understand what's under the hood{instruction_lang_note}. First provide a non-code or code-light walkthrough on what happens step-by-step when the abstraction is called{instruction_lang_note}. It's recommended to use a simple sequenceDiagram with a dummy example - keep it minimal with at most 5 participants to ensure clarity. If participant name has space, use: `participant QP as Query Processing`. {mermaid_lang_note}.
+
+- Then dive deeper into code for the internal implementation with references to files. Provide example code blocks, but make them similarly simple and beginner-friendly. Explain{instruction_lang_note}.
+
+- IMPORTANT: When you need to refer to other core abstractions covered in other chapters, ALWAYS use proper Markdown links like this: [Chapter Title](filename.md). Use the Complete Tutorial Structure above to find the correct filename and the chapter title{link_lang_note}. Translate the surrounding text.
+
+- Use mermaid diagrams to illustrate complex concepts (```mermaid``` format). {mermaid_lang_note}.
+
+- Heavily use analogies and examples throughout{instruction_lang_note} to help beginners understand.
+
+- End the chapter with a brief conclusion that summarizes what was learned{instruction_lang_note} and provides a transition to the next chapter{instruction_lang_note}. If there is a next chapter, use a proper Markdown link: [Next Chapter Title](next_chapter_filename){link_lang_note}.
+
+- Ensure the tone is welcoming and easy for a newcomer to understand{tone_note}.
+
+- Output *only* the Markdown content for this chapter.
+
+Now, directly provide a super beginner-friendly Markdown output (DON'T need ```markdown``` tags):
+"""
+```
\ No newline at end of file
diff --git a/run_analysis.sh b/run_analysis.sh
new file mode 100755
index 00000000..3e5b3b22
--- /dev/null
+++ b/run_analysis.sh
@@ -0,0 +1,21 @@
+#!/bin/bash
+
+# Shell script to analyze the local corebanking repository
+# using YAML configuration file
+
+echo "Starting corebanking tutorial generation..."
+source .venv/bin/activate
+
+# Validate configuration before running
+echo "Validating configuration..."
+python main.py "$1" --validate-only
+
+if [ $? -eq 0 ]; then
+ echo "Configuration is valid. Running analysis..."
+ python main.py "$1"
+else
+ echo "Configuration validation failed. Please check the config file."
+ exit 1
+fi
+
+echo "Done"
\ No newline at end of file