diff --git a/.blackboxrules b/.blackboxrules deleted file mode 100644 index af4572e..0000000 --- a/.blackboxrules +++ /dev/null @@ -1,1219 +0,0 @@ - -# Pipelex Coding Rules - -## Guide to write or edit pipelines using the Pipelex language in .plx files - -- Always first write your "plan" in natural language, then transcribe it in pipelex. -- You should ALWAYS RUN validation when you are writing or editing a `.plx` file. It will ensure the pipe is runnable. If not, iterate. - - For a specific file: `pipelex validate path_to_file.plx` - - For all pipelines: `pipelex validate all` - - **IMPORTANT**: Ensure the Python virtual environment is activated before running `pipelex` commands. For standard installations, the venv is named `.venv` - always check that first. The commands will not work without proper venv activation. -- Please use POSIX standard for files. (empty lines, no trailing whitespaces, etc.) - -### Pipeline File Naming -- Files must be `.plx` for pipelines (Always add an empty line at the end of the file, and do not add trailing whitespaces to PLX files at all) -- Files must be `.py` for code defining the data structures -- Use descriptive names in `snake_case` - -### Pipeline File Outline -A pipeline file has three main sections: -1. Domain statement -2. Concept definitions -3. Pipe definitions - -#### Domain Statement -```plx -domain = "domain_code" -description = "Description of the domain" # Optional -``` -Note: The domain code usually matches the plx filename for single-file domains. For multi-file domains, use the subdirectory name. - -#### Concept Definitions - -Concepts represent ideas and semantic entities in your pipeline. They define what something *is*, not how it's structured. - -```plx -[concept] -ConceptName = "Description of the concept" -``` - -**Naming Rules:** -- Use PascalCase for concept names -- Never use plurals (no "Stories", use "Story") - lists are handled implicitly by Pipelex -- Avoid circumstantial adjectives (no "LargeText", use "Text") - focus on the essence of what the concept represents -- Don't redefine native concepts (Text, Image, PDF, TextAndImages, Number, Page, JSON) - -**Native Concepts:** -Pipelex provides built-in native concepts: `Text`, `Image`, `PDF`, `TextAndImages`, `Number`, `Page`, `JSON`. Use these directly or refine them when appropriate. - -**Refining Native Concepts:** -To create a concept that specializes a native concept without adding fields: - -```plx -[concept.Landscape] -description = "A scenic outdoor photograph" -refines = "Image" -``` - -For details on how to structure concepts with fields, see the "Structuring Models" section below. - -#### Pipe Definitions - -### Pipe Base Definition - -```plx -[pipe.your_pipe_code] -type = "PipeLLM" -description = "A description of what your pipe does" -inputs = { input_1 = "ConceptName1", input_2 = "ConceptName2" } -output = "ConceptName" -``` - -The pipes will all have at least this base definition. -- `inputs`: Dictionary of key being the variable used in the prompts, and the value being the ConceptName. It should ALSO LIST THE INPUTS OF THE INTERMEDIATE STEPS (if PipeSequence) or of the conditional pipes (if PipeCondition). -So If you have this error: -`PipeValidationError: missing_input_variable • domain='expense_validator' • pipe='validate_expense' • -variable='['invoice']'`` -That means that the pipe validate_expense is missing the input `invoice` because one of the subpipe is needing it. - -NEVER WRITE THE INPUTS BY BREAKING THE LINE LIKE THIS: - -```plx -inputs = { - input_1 = "ConceptName1", - input_2 = "ConceptName2" -} -``` - - -- `output`: The name of the concept to output. The `ConceptName` should have the same name as the python class if you want structured output: - -#### Input Multiplicity - -By default, inputs expect a single item. Use bracket notation to specify multiple items: - -```plx -## Single item (default) -inputs = { document = "Text" } - -## Variable list - indeterminate number of items -inputs = { documents = "Text[]" } - -## Fixed count - exactly N items -inputs = { comparison_items = "Image[2]" } -``` - -**Key points:** -- No brackets = single item (default behavior) -- Use `[]` for lists of unknown length -- Use `[N]` (where N is an integer) when operation requires exact count (e.g., comparing 2 items) - -### Structuring Models - -Once you've defined your concepts semantically (see "Concept Definitions" above), you need to specify their structure if they have fields. - -#### Three Ways to Structure Concepts - -**1. No Structure Needed** - -If a concept only refines a native concept without adding fields, use the TOML table syntax shown in "Concept Definitions" above. No structure section is needed. - -**2. Inline Structure Definition (RECOMMENDED for most cases)** - -For concepts with structured fields, define them inline using TOML syntax: - -```plx -[concept.Invoice] -description = "A commercial document issued by a seller to a buyer" - -[concept.Invoice.structure] -invoice_number = "The unique invoice identifier" # This will be optional by default -issue_date = { type = "date", description = "The date the invoice was issued", required = true } -total_amount = { type = "number", description = "The total invoice amount", required = true } -vendor_name = "The name of the vendor" # This will be optional by default -line_items = { type = "list", item_type = "text", description = "List of items" } -``` - -**Supported inline field types:** `text`, `integer`, `boolean`, `number`, `date`, `list`, `dict` - -**Field properties:** `type`, `description`, `required` (default: false), `default_value`, `choices`, `item_type` (for lists), `key_type` and `value_type` (for dicts) - -**Simple syntax** (creates required text field): -```plx -field_name = "Field description" -``` - -**Detailed syntax** (with explicit properties): -```plx -field_name = { type = "text", description = "Field description", default_value = "default" } -``` - -**3. Python StructuredContent Class (For Advanced Features)** - -Create a Python class when you need: -- Custom validation logic (@field_validator, @model_validator) -- Computed properties (@property methods) -- Custom methods or class methods -- Complex cross-field validation -- Reusable structures across multiple domains - -```python -from pipelex.core.stuffs.structured_content import StructuredContent -from pydantic import Field, field_validator - -class Invoice(StructuredContent): - """A commercial invoice with validation.""" - - invoice_number: str = Field(description="The unique invoice identifier") - total_amount: float = Field(ge=0, description="The total invoice amount") - tax_amount: float = Field(ge=0, description="Tax amount") - - @field_validator('tax_amount') - @classmethod - def validate_tax(cls, v, info): - """Ensure tax doesn't exceed total.""" - total = info.data.get('total_amount', 0) - if v > total: - raise ValueError('Tax amount cannot exceed total amount') - return v -``` - -**Location:** Create models in `my_project/some_domain/some_domain_struct.py`. Classes inheriting from `StructuredContent` are automatically discovered. - -#### Decision Rules for Agents - -**If concept already exists:** -- If it's already inline → KEEP IT INLINE unless user explicitly asks to convert or features require Python class -- If it's already a Python class → KEEP IT as Python class - -**If creating new concept:** -1. Does it only refine a native concept without adding fields? → Use concept-only declaration -2. Does it need custom validation, computed properties, or methods? → Use Python class -3. Otherwise → Use inline structure (fastest and simplest) - -**When to suggest conversion to Python class:** -- User needs validation logic beyond type checking -- User needs computed properties or custom methods -- Structure needs to be reused across multiple domains -- Complex type relationships or inheritance required - -#### Inline Structure Limitations - -Inline structures: -- ✅ Support all common field types (text, number, date, list, dict, etc.) -- ✅ Support required/optional fields, defaults, choices -- ✅ Generate full Pydantic models with validation -- ❌ Cannot have custom validators or complex validation logic -- ❌ Cannot have computed properties or custom methods -- ❌ Cannot refine custom (non-native) concepts -- ❌ Limited IDE autocomplete compared to explicit Python classes - - -### Pipe Controllers and Pipe Operators - -Look at the Pipes we have in order to adapt it. Pipes are organized in two categories: - -1. **Controllers** - For flow control: - - `PipeSequence` - For creating a sequence of multiple steps - - `PipeCondition` - If the next pipe depends of the expression of a stuff in the working memory - - `PipeParallel` - For parallelizing pipes - -2. **Operators** - For specific tasks: - - `PipeLLM` - Generate Text and Objects (include Vision LLM) - - `PipeExtract` - Extract text and images from an image or a PDF - - `PipeCompose` - For composing text using Jinja2 templates: supports html, markdown, mermaid, etc. - - `PipeImgGen` - Generate Images - - `PipeFunc` - For running classic python scripts - -### PipeSequence controller - -Purpose: PipeSequence executes multiple pipes in a defined order, where each step can use results from original inputs or from previous steps. - -#### Basic Definition -```plx -[pipe.your_sequence_name] -type = "PipeSequence" -description = "Description of what this sequence does" -inputs = { input_name = "InputType" } # All the inputs of the sub pipes, except the ones generated by intermediate steps -output = "OutputType" -steps = [ - { pipe = "first_pipe", result = "first_result" }, - { pipe = "second_pipe", result = "second_result" }, - { pipe = "final_pipe", result = "final_result" } -] -``` - -#### Key Components - -1. **Steps Array**: List of pipes to execute in sequence - - `pipe`: Name of the pipe to execute - - `result`: Name to assign to the pipe's output that will be in the working memory - -#### Using PipeBatch in Steps - -You can use PipeBatch functionality within steps using `batch_over` and `batch_as`: - -```plx -steps = [ - { pipe = "process_items", batch_over = "input_list", batch_as = "current_item", result = "processed_items" - } -] -``` - -1. **batch_over**: Specifies a `ListContent` field to iterate over. Each item in the list will be processed individually and IN PARALLEL by the pipe. - - Must be a `ListContent` type containing the items to process - - Can reference inputs or results from previous steps - -2. **batch_as**: Defines the name that will be used to reference the current item being processed - - This name can be used in the pipe's input mappings - - Makes each item from the batch available as a single element - -The result of a batched step will be a `ListContent` containing the outputs from processing each item. - -### PipeCondition controller - -The PipeCondition controller allows you to implement conditional logic in your pipeline, choosing which pipe to execute based on an evaluated expression. It supports both direct expressions and expression templates. - -#### Basic usage - -```plx -[pipe.conditional_operation] -type = "PipeCondition" -description = "A conditional pipe to decide whether..." -inputs = { input_data = "CategoryInput" } -output = "native.Text" -expression = "input_data.category" -default_outcome = "process_medium" - -[pipe.conditional_operation.outcomes] -small = "process_small" -medium = "process_medium" -large = "process_large" -``` -or -```plx -[pipe.conditional_operation] -type = "PipeCondition" -description = "A conditional pipe to decide whether..." -inputs = { input_data = "CategoryInput" } -output = "native.Text" -expression_template = "{{ input_data.category }}" # Jinja2 code -default_outcome = "process_medium" - -[pipe.conditional_operation.outcomes] -small = "process_small" -medium = "process_medium" -large = "process_large" -``` - -#### Key Parameters - -- `expression`: Direct boolean or string expression (mutually exclusive with expression_template) -- `expression_template`: Jinja2 template for more complex conditional logic (mutually exclusive with expression) -- `outcomes`: Dictionary mapping expression results to pipe codes: - 1. The key on the left (`small`, `medium`) is the result of `expression` or `expression_template` - 2. The value on the right (`process_small`, `process_medium`, etc.) is the name of the pipe to trigger -- `default_outcome`: **Required** - The pipe to execute if the expression doesn't match any key in outcomes. Use `"fail"` if you want the pipeline to fail when no match is found - -Example with fail as default: -```plx -[pipe.strict_validation] -type = "PipeCondition" -description = "Validate with strict matching" -inputs = { status = "Status" } -output = "Text" -expression = "status.value" -default_outcome = "fail" - -[pipe.strict_validation.outcomes] -approved = "process_approved" -rejected = "process_rejected" -``` - -### PipeLLM operator - -PipeLLM is used to: -1. Generate text or objects with LLMs -2. Process images with Vision LLMs - -#### Basic Usage - -Simple Text Generation: -```plx -[pipe.write_story] -type = "PipeLLM" -description = "Write a short story" -output = "Text" -prompt = """ -Write a short story about a programmer. -""" -``` - -Structured Data Extraction: -```plx -[pipe.extract_info] -type = "PipeLLM" -description = "Extract information" -inputs = { text = "Text" } -output = "PersonInfo" -prompt = """ -Extract person information from this text: -@text -""" -``` - -Supports system instructions: -```plx -[pipe.expert_analysis] -type = "PipeLLM" -description = "Expert analysis" -output = "Analysis" -system_prompt = "You are a data analysis expert" -prompt = "Analyze this data" -``` - -#### Multiple Outputs - -Generate multiple outputs (fixed number) - use bracket notation: -```plx -[pipe.generate_ideas] -type = "PipeLLM" -description = "Generate ideas" -output = "Idea[3]" # Generate exactly 3 ideas -prompt = "Generate 3 ideas" -``` - -Generate multiple outputs (variable number) - use bracket notation: -```plx -[pipe.generate_ideas] -type = "PipeLLM" -description = "Generate ideas" -output = "Idea[]" # Let the LLM decide how many to generate -prompt = "Generate ideas" -``` - -#### Vision - -Process images with VLMs (image inputs must be tagged in the prompt): -```plx -[pipe.analyze_image] -type = "PipeLLM" -description = "Analyze image" -inputs = { image = "Image" } -output = "ImageAnalysis" -prompt = """ -Describe what you see in this image: - -$image -""" -``` - -You can also reference images inline in meaningful sentences to guide the Visual LLM: -```plx -[pipe.compare_images] -type = "PipeLLM" -description = "Compare two images" -inputs = { photo = "Image", painting = "Image" } -output = "Analysis" -prompt = "Analyze the colors in $photo and the shapes in $painting." -``` - -#### Writing prompts for PipeLLM - -**Insert stuff inside a tagged block** - -If the inserted text is supposedly a long text, made of several lines or paragraphs, you want it inserted inside a block, possibly a block tagged and delimlited with proper syntax as one would do in a markdown documentation. To include stuff as a block, use the "@" prefix. - -Example template: -```plx -prompt = """ -Match the expense with its corresponding invoice: - -@expense - -@invoices -""" -``` -In the example above, the expense data and the invoices data are obviously made of several lines each, that's why it makes sense to use the "@" prefix in order to have them delimited inside a block. Note that our preprocessor will automatically include the block's title, so it doesn't need to be explicitly written in the prompt. - -DO NOT write things like "Here is the expense: @expense". -DO write simply "@expense" alone in an isolated line. - -**Insert stuff inline** - -If the inserted text is short text and it makes sense to have it inserted directly into a sentence, you want it inserted inline. To insert stuff inline, use the "$" prefix. This will insert the stuff without delimiters and the content will be rendered as plain text. - -Example template: -```plx -prompt = """ -Your goal is to summarize everything related to $topic in the provided text: - -@text - -Please provide only the summary, with no additional text or explanations. -Your summary should not be longer than 2 sentences. -""" -``` - -In the example above, $topic will be inserted inline, whereas @text will be a a delimited block. -Be sure to make the proper choice of prefix for each insertion. - -DO NOT write "$topic" alone in an isolated line. -DO write things like "Write an essay about $topic" to include text into an actual sentence. - - -### PipeExtract operator - -The PipeExtract operator is used to extract text and images from an image or a PDF - -#### Simple Text Extraction -```plx -[pipe.extract_info] -type = "PipeExtract" -description = "extract the information" -inputs = { document = "Document" } # or { image = "Image" } if it's an image. This is the only input. -output = "Page" -``` - -Using Extract Model Settings: -```plx -[pipe.extract_with_model] -type = "PipeExtract" -description = "Extract with specific model" -inputs = { document = "Document" } -output = "Page" -model = "base_extract_mistral" # Use predefined extract preset or model alias -``` - -Only one input is allowed and it must either be an `Image` or a `PDF`. The input can be named anything. - -The output concept `Page` is a native concept, with the structure `PageContent`: -It corresponds to 1 page. Therefore, the PipeExtract is outputing a `ListContent` of `Page` - -```python -class TextAndImagesContent(StuffContent): - text: TextContent | None - images: list[ImageContent] | None - -class PageContent(StructuredContent): # CONCEPT IS "Page" - text_and_images: TextAndImagesContent - page_view: ImageContent | None = None -``` -- `text_and_images` are the text, and the related images found in the input image or PDF. -- `page_view` is the screenshot of the whole pdf page/image. - -### PipeCompose operator - -The PipeCompose operator is used to compose text using Jinja2 templates. It supports various output formats including HTML, Markdown, Mermaid diagrams, and more. - -#### Basic Usage - -Simple Template Composition: -```plx -[pipe.compose_report] -type = "PipeCompose" -description = "Compose a report using template" -inputs = { data = "ReportData" } -output = "Text" -template = """ -## Report Summary - -Based on the analysis: -$data - -Generated on: {{ current_date }} -""" -``` - -Using Named Templates: -```plx -[pipe.use_template] -type = "PipeCompose" -description = "Use a predefined template" -inputs = { content = "Text" } -output = "Text" -template_name = "standard_report_template" -``` - -Using Nested Template Section (for more control): -```plx -[pipe.advanced_template] -type = "PipeCompose" -description = "Use advanced template settings" -inputs = { data = "ReportData" } -output = "Text" - -[pipe.advanced_template.template] -template = "Report: $data" -category = "html" -templating_style = { tag_style = "square_brackets", text_format = "html" } -``` - -CRM Email Template: -```plx -[pipe.compose_follow_up_email] -type = "PipeCompose" -description = "Compose a personalized follow-up email for CRM" -inputs = { customer = "Customer", deal = "Deal", sales_rep = "SalesRep" } -output = "Text" -template_category = "html" -templating_style = { tag_style = "square_brackets", text_format = "html" } -template = """ -Subject: Following up on our $deal.product_name discussion - -Hi $customer.first_name, - -I hope this email finds you well! I wanted to follow up on our conversation about $deal.product_name from $deal.last_contact_date. - -Based on our discussion, I understand that your key requirements are: $deal.customer_requirements - -I'm excited to let you know that we can definitely help you achieve your goals. Here's what I'd like to propose: - -**Next Steps:** -- Schedule a demo tailored to your specific needs -- Provide you with a customized quote based on your requirements -- Connect you with our implementation team - -Would you be available for a 30-minute call this week? I have openings on: -{% for slot in available_slots %} -- {{ slot }} -{% endfor %} - -Looking forward to moving this forward together! - -Best regards, -$sales_rep.name -$sales_rep.title -$sales_rep.phone | $sales_rep.email -""" -``` - -#### Key Parameters (Template Mode) - -- `template`: Inline template string (mutually exclusive with template_name and construct) -- `template_name`: Name of a predefined template (mutually exclusive with template) -- `template_category`: Template type ("llm_prompt", "html", "markdown", "mermaid", etc.) -- `templating_style`: Styling options for template rendering -- `extra_context`: Additional context variables for template - -For more control, you can use a nested `template` section instead of the `template` field: - -- `template.template`: The template string -- `template.category`: Template type -- `template.templating_style`: Styling options - -#### Template Variables - -Use the same variable insertion rules as PipeLLM: - -- `@variable` for block insertion (multi-line content) -- `$variable` for inline insertion (short text) - -#### Construct Mode (for StructuredContent Output) - -PipeCompose can also generate `StructuredContent` objects using the `construct` section. This mode composes field values from fixed values, variable references, templates, or nested structures. - -**When to use construct mode:** - -- You need to output a structured object (not just Text) -- You want to deterministically compose fields from existing data -- No LLM is needed - just data composition and templating - -##### Basic Construct Usage - -```plx -[concept.SalesSummary] -description = "A structured sales summary" - -[concept.SalesSummary.structure] -report_title = { type = "text", description = "Title of the report" } -customer_name = { type = "text", description = "Customer name" } -deal_value = { type = "number", description = "Deal value" } -summary_text = { type = "text", description = "Generated summary text" } - -[pipe.compose_summary] -type = "PipeCompose" -description = "Compose a sales summary from deal data" -inputs = { deal = "Deal" } -output = "SalesSummary" - -[pipe.compose_summary.construct] -report_title = "Monthly Sales Report" -customer_name = { from = "deal.customer_name" } -deal_value = { from = "deal.amount" } -summary_text = { template = "Deal worth $deal.amount with $deal.customer_name" } -``` - -##### Field Composition Methods - -There are four ways to define field values in a construct: - -**1. Fixed Value (literal)** - -Use a literal value directly: - -```plx -[pipe.compose_report.construct] -report_title = "Annual Report" -report_year = 2024 -is_draft = false -``` - -**2. Variable Reference (`from`)** - -Get a value from working memory using a dotted path: - -```plx -[pipe.compose_report.construct] -customer_name = { from = "deal.customer_name" } -total_amount = { from = "order.total" } -street_address = { from = "customer.address.street" } -``` - -**3. Template (`template`)** - -Render a Jinja2 template with variable substitution: - -```plx -[pipe.compose_report.construct] -invoice_number = { template = "INV-$order.id" } -summary = { template = "Deal worth $deal.amount with $deal.customer_name on {{ current_date }}" } -``` - -**4. Nested Construct** - -For nested structures, use a TOML subsection: - -```plx -[pipe.compose_invoice.construct] -invoice_number = { template = "INV-$order.id" } -total = { from = "order.total_amount" } - -[pipe.compose_invoice.construct.billing_address] -street = { from = "customer.address.street" } -city = { from = "customer.address.city" } -country = "France" -``` - -##### Complete Construct Example - -```plx -domain = "invoicing" - -[concept.Address] -description = "A postal address" - -[concept.Address.structure] -street = { type = "text", description = "Street address" } -city = { type = "text", description = "City name" } -country = { type = "text", description = "Country name" } - -[concept.Invoice] -description = "An invoice document" - -[concept.Invoice.structure] -invoice_number = { type = "text", description = "Invoice number" } -total = { type = "number", description = "Total amount" } - -[pipe.compose_invoice] -type = "PipeCompose" -description = "Compose an invoice from order and customer data" -inputs = { order = "Order", customer = "Customer" } -output = "Invoice" - -[pipe.compose_invoice.construct] -invoice_number = { template = "INV-$order.id" } -total = { from = "order.total_amount" } - -[pipe.compose_invoice.construct.billing_address] -street = { from = "customer.address.street" } -city = { from = "customer.address.city" } -country = "France" -``` - -##### Key Parameters (Construct Mode) - -- `construct`: Dictionary mapping field names to their composition rules -- Each field can be: - - A literal value (string, number, boolean) - - A dict with `from` key for variable reference - - A dict with `template` key for template rendering - - A nested dict for nested structures - -**Note:** You must use either `template` or `construct`, not both. They are mutually exclusive. - -### PipeImgGen operator - -The PipeImgGen operator is used to generate images using AI image generation models. - -#### Basic Usage - -Simple Image Generation: -```plx -[pipe.generate_image] -type = "PipeImgGen" -description = "Generate an image from prompt" -inputs = { prompt = "ImgGenPrompt" } -output = "Image" -``` - -Using Image Generation Settings: -```plx -[pipe.generate_photo] -type = "PipeImgGen" -description = "Generate a high-quality photo" -inputs = { prompt = "ImgGenPrompt" } -output = "Photo" -model = { model = "fast-img-gen" } -aspect_ratio = "16:9" -quality = "hd" -``` - -Multiple Image Generation: -```plx -[pipe.generate_variations] -type = "PipeImgGen" -description = "Generate multiple image variations" -inputs = { prompt = "ImgGenPrompt" } -output = "Image[3]" -seed = "auto" -``` - -Advanced Configuration: -```plx -[pipe.generate_custom] -type = "PipeImgGen" -description = "Generate image with custom settings" -inputs = { prompt = "ImgGenPrompt" } -output = "Image" -model = "img_gen_preset_name" # Use predefined preset -aspect_ratio = "1:1" -quality = "hd" -background = "transparent" -output_format = "png" -is_raw = false -safety_tolerance = 3 -``` - -#### Key Parameters - -**Image Generation Settings:** -- `model`: Model choice (preset name or inline settings with model name) -- `quality`: Image quality ("standard", "hd") - -**Output Configuration:** -- `aspect_ratio`: Image dimensions ("1:1", "16:9", "9:16", etc.) -- `output_format`: File format ("png", "jpeg", "webp") -- `background`: Background type ("default", "transparent") - -**Generation Control:** -- `seed`: Random seed (integer or "auto") -- `is_raw`: Whether to apply post-processing -- `is_moderated`: Enable content moderation -- `safety_tolerance`: Content safety level (1-6) - -#### Input Requirements - -PipeImgGen requires exactly one input that must be either: -- An `ImgGenPrompt` concept -- A concept that refines `ImgGenPrompt` - -The input can be named anything but must contain the prompt text for image generation. - -### PipeFunc operator - -The PipeFunc operator is used to run custom Python functions within a pipeline. This allows integration of classic Python scripts and custom logic. - -#### Basic Usage - -Simple Function Call: -```plx -[pipe.process_data] -type = "PipeFunc" -description = "Process data using custom function" -inputs = { input_data = "DataType" } -output = "ProcessedData" -function_name = "process_data_function" -``` - -File Processing Example: -```plx -[pipe.read_file] -type = "PipeFunc" -description = "Read file content" -inputs = { file_path = "FilePath" } -output = "FileContent" -function_name = "read_file_content" -``` - -#### Key Parameters - -- `function_name`: Name of the Python function to call (must be registered in func_registry) - -#### Function Requirements - -The Python function must: - -1. **Be registered** in the `func_registry` -2. **Accept `working_memory`** as a parameter: - ```python - async def my_function(working_memory: WorkingMemory) -> StuffContent | list[StuffContent] | str: - # Function implementation - pass - ``` - -3. **Return appropriate types**: - - `StuffContent`: Single content object - - `list[StuffContent]`: Multiple content objects (becomes ListContent) - - `str`: Simple string (becomes TextContent) - -#### Function Registration - -Functions must be registered in the function registry before use: - -```python -from pipelex.system.registries.func_registry import func_registry - -@func_registry.register("my_function_name") -async def my_custom_function(working_memory: WorkingMemory) -> StuffContent: - # Access inputs from working memory - input_data = working_memory.get_stuff("input_name") - - # Process data - result = process_logic(input_data.content) - - # Return result - return MyResultContent(data=result) -``` - -#### Working Memory Access - -Inside the function, access pipeline inputs through working memory: - -```python -async def process_function(working_memory: WorkingMemory) -> TextContent: - # Get input stuff by name - input_stuff = working_memory.get_stuff("input_name") - - # Access the content - input_content = input_stuff.content - - # Process and return - processed_text = f"Processed: {input_content.text}" - return TextContent(text=processed_text) -``` - ---- - -### Rules to choose LLM models used in PipeLLMs. - -#### LLM Configuration System - -In order to use it in a pipe, an LLM is referenced by its llm_handle (alias) and possibly by an llm_preset. -LLM configurations are managed through the new inference backend system with files located in `.pipelex/inference/`: - -- **Model Deck**: `.pipelex/inference/deck/base_deck.toml` and `.pipelex/inference/deck/overrides.toml` -- **Backends**: `.pipelex/inference/backends.toml` and `.pipelex/inference/backends/*.toml` -- **Routing**: `.pipelex/inference/routing_profiles.toml` - -#### LLM Handles - -An llm_handle can be either: -1. **A direct model name** (like "gpt-4o-mini", "claude-3-sonnet") - automatically available for all models loaded by the inference backend system -2. **An alias** - user-defined shortcuts that map to model names, defined in the `[aliases]` section: - -```toml -[aliases] -base-claude = "claude-4.5-sonnet" -base-gpt = "gpt-5" -base-gemini = "gemini-2.5-flash" -base-mistral = "mistral-medium" -``` - -The system first looks for direct model names, then checks aliases if no direct match is found. The system handles model routing through backends automatically. - -#### Using an LLM Handle in a PipeLLM - -Here is an example of using a model to specify which LLM to use in a PipeLLM: - -```plx -[pipe.hello_world] -type = "PipeLLM" -description = "Write text about Hello World." -output = "Text" -model = { model = "gpt-5", temperature = 0.9 } -prompt = """ -Write a haiku about Hello World. -""" -``` - -As you can see, to use the LLM, you must also indicate the temperature (float between 0 and 1) and max_tokens (either an int or the string "auto"). - -#### LLM Presets - -Presets are meant to record the choice of an llm with its hyper parameters (temperature and max_tokens) if it's good for a particular task. LLM Presets are skill-oriented. - -Examples: -```toml -llm_to_engineer = { model = "base-claude", temperature = 1 } -llm_to_extract_invoice = { model = "claude-3-7-sonnet", temperature = 0.1, max_tokens = "auto" } -``` - -The interest is that these presets can be used to set the LLM choice in a PipeLLM, like this: - -```plx -[pipe.extract_invoice] -type = "PipeLLM" -description = "Extract invoice information from an invoice text transcript" -inputs = { invoice_text = "InvoiceText" } -output = "Invoice" -model = "llm_to_extract_invoice" -prompt = """ -Extract invoice information from this invoice: - -The category of this invoice is: $invoice_details.category. - -@invoice_text -""" -``` - -The setting here `model = "llm_to_extract_invoice"` works because "llm_to_extract_invoice" has been declared as an llm_preset in the deck. -You must not use an LLM preset in a PipeLLM that does not exist in the deck. If needed, you can add llm presets. - -You can override the predefined llm presets by setting them in `.pipelex/inference/deck/overrides.toml`. - ---- - -ALWAYS RUN validation when you are finished writing pipelines: This checks for errors. If there are errors, iterate until it works. -- For a specific bundle/file: `pipelex validate path_to_file.plx` -- For all pipelines: `pipelex validate all` -- Remember: Ensure your Python virtual environment is activated (typically `.venv` for standard installations) before running `pipelex` commands. - -Then, create an example file to run the pipeline in the `examples` folder. -But don't write documentation unless asked explicitly to. - -## Guide to execute a pipeline and write example code - -### Example to execute a pipeline with text output - -```python -import asyncio - -from pipelex import pretty_print -from pipelex.pipelex import Pipelex -from pipelex.pipeline.execute import execute_pipeline - - -async def hello_world() -> str: - """ - This function demonstrates the use of a super simple Pipelex pipeline to generate text. - """ - # Run the pipe - pipe_output = await execute_pipeline( - pipe_code="hello_world", - ) - - return pipe_output.main_stuff_as_str - - -## start Pipelex -Pipelex.make() -## run sample using asyncio -output_text = asyncio.run(hello_world()) -pretty_print(output_text, title="Your first Pipelex output") -``` - -### Example to execute a pipeline with structured output - -```python -import asyncio - -from pipelex import pretty_print -from pipelex.pipelex import Pipelex -from pipelex.pipeline.execute import execute_pipeline -from pipelex.core.stuffs.image_content import ImageContent - -from my_project.gantt.gantt_struct import GanttChart - -SAMPLE_NAME = "extract_gantt" -IMAGE_URL = "assets/gantt/gantt_tree_house.png" - - -async def extract_gantt(image_url: str) -> GanttChart: - # Run the pipe - pipe_output = await execute_pipeline( - pipe_code="extract_gantt_by_steps", - inputs={ - "gantt_chart_image": { - "concept": "gantt.GanttImage", - "content": ImageContent(url=image_url), - } - }, - ) - # Output the result - return pipe_output.main_stuff_as(content_type=GanttChart) - - -## start Pipelex -Pipelex.make() - -## run sample using asyncio -gantt_chart = asyncio.run(extract_gantt(image_url=IMAGE_URL)) -pretty_print(gantt_chart, title="Gantt Chart") -``` - -### Setting up the input memory - -#### Explanation of input memory - -The input memory is a dictionary, where the key is the name of the input variable and the value provides details to make it a stuff object. The relevant definitions are: -```python -StuffContentOrData = dict[str, Any] | StuffContent | list[Any] | str -PipelineInputs = dict[str, StuffContentOrData] -``` -As you can seen, we made it so different ways can be used to define that stuff using structured content or data. - -#### Different ways to set up the input memory - -So here are a few concrete examples of calls to execute_pipeline with various ways to set up the input memory: - -```python -## Here we have a single input and it's a Text. -## If you assign a string, by default it will be considered as a TextContent. - pipe_output = await execute_pipeline( - pipe_code="master_advisory_orchestrator", - inputs={ - "user_input": problem_description, - }, - ) - -## Here we have a single input and it's a document. -## Because DocumentContent is a native concept, we can use it directly as a value, -## the system knows what content it corresponds to: - pipe_output = await execute_pipeline( - pipe_code="power_extractor_dpe", - inputs={ - "document": DocumentContent(url=pdf_url), - }, - ) - -## Here we have a single input and it's an Image. -## Because ImageContent is a native concept, we can use it directly as a value: - pipe_output = await execute_pipeline( - pipe_code="fashion_variation_pipeline", - inputs={ - "fashion_photo": ImageContent(url=image_url), - }, - ) - -## Here we have a single input, it's an image but -## its actually a more specific concept gantt.GanttImage which refines Image, -## so we must provide it using a dict with the concept and the content: - pipe_output = await execute_pipeline( - pipe_code="extract_gantt_by_steps", - inputs={ - "gantt_chart_image": { - "concept": "gantt.GanttImage", - "content": ImageContent(url=image_url), - } - }, - ) - -## Here is a more complex example with multiple inputs assigned using different ways: - pipe_output = await execute_pipeline( - pipe_code="retrieve_then_answer", - dynamic_output_concept_code="contracts.Fees", - inputs={ - "text": load_text_from_path(path=text_path), - "question": { - "concept": "answer.Question", - "content": question, - }, - "client_instructions": client_instructions, - }, - ) -``` - -### Using the outputs of a pipeline - -All pipe executions return a `PipeOutput` object. -It's a BaseModel which contains the resulting working memory at the end of the execution and the pipeline run id. -It also provides a bunch of accessor functions and properties to unwrap the main stuff, which is the last stuff added to the working memory: - -```python - -class PipeOutput(BaseModel): - working_memory: WorkingMemory = Field(default_factory=WorkingMemory) - pipeline_run_id: str = Field(default=SpecialPipelineId.UNTITLED) - - @property - def main_stuff(self) -> Stuff: - ... - - def main_stuff_as_list(self, item_type: type[StuffContentType]) -> ListContent[StuffContentType]: - ... - - def main_stuff_as_items(self, item_type: type[StuffContentType]) -> list[StuffContentType]: - ... - - def main_stuff_as(self, content_type: type[StuffContentType]) -> StuffContentType: - ... - - @property - def main_stuff_as_text(self) -> TextContent: - ... - - @property - def main_stuff_as_str(self) -> str: - ... - - @property - def main_stuff_as_image(self) -> ImageContent: - ... - - @property - def main_stuff_as_text_and_image(self) -> TextAndImagesContent: - ... - - @property - def main_stuff_as_number(self) -> NumberContent: - ... - - @property - def main_stuff_as_html(self) -> HtmlContent: - ... - - @property - def main_stuff_as_mermaid(self) -> MermaidContent: - ... -``` - -As you can see, you can extract any variable from the output working memory. - -#### Getting the main stuff as a specific type - -Simple text as a string: - -```python -result = pipe_output.main_stuff_as_str -``` -Structured object (BaseModel): - -```python -result = pipe_output.main_stuff_as(content_type=GanttChart) -``` - -If it's a list, you can get a `ListContent` of the specific type. - -```python -result_list_content = pipe_output.main_stuff_as_list(item_type=GanttChart) -``` - -or if you want, you can get the actual items as a regular python list: - -```python -result_list = pipe_output.main_stuff_as_items(item_type=GanttChart) -``` - ---- - diff --git a/.cursor/rules/llms.mdc b/.cursor/rules/llms.mdc deleted file mode 100644 index 8babc76..0000000 --- a/.cursor/rules/llms.mdc +++ /dev/null @@ -1,85 +0,0 @@ ---- -alwaysApply: false -description: LLM configuration and usage guidelines -globs: -- '*.plx' -- '*.toml' ---- -# Rules to choose LLM models used in PipeLLMs. - -## LLM Configuration System - -In order to use it in a pipe, an LLM is referenced by its llm_handle (alias) and possibly by an llm_preset. -LLM configurations are managed through the new inference backend system with files located in `.pipelex/inference/`: - -- **Model Deck**: `.pipelex/inference/deck/base_deck.toml` and `.pipelex/inference/deck/overrides.toml` -- **Backends**: `.pipelex/inference/backends.toml` and `.pipelex/inference/backends/*.toml` -- **Routing**: `.pipelex/inference/routing_profiles.toml` - -## LLM Handles - -An llm_handle can be either: -1. **A direct model name** (like "gpt-4o-mini", "claude-3-sonnet") - automatically available for all models loaded by the inference backend system -2. **An alias** - user-defined shortcuts that map to model names, defined in the `[aliases]` section: - -```toml -[aliases] -base-claude = "claude-4.5-sonnet" -base-gpt = "gpt-5" -base-gemini = "gemini-2.5-flash" -base-mistral = "mistral-medium" -``` - -The system first looks for direct model names, then checks aliases if no direct match is found. The system handles model routing through backends automatically. - -## Using an LLM Handle in a PipeLLM - -Here is an example of using an llm_handle to specify which LLM to use in a PipeLLM: - -```plx -[pipe.hello_world] -type = "PipeLLM" -description = "Write text about Hello World." -output = "Text" -model = { model = "gpt-5", temperature = 0.9 } -prompt = """ -Write a haiku about Hello World. -""" -``` - -As you can see, to use the LLM, you must also indicate the temperature (float between 0 and 1) and max_tokens (either an int or the string "auto"). - -## LLM Presets - -Presets are meant to record the choice of an llm with its hyper parameters (temperature and max_tokens) if it's good for a particular task. LLM Presets are skill-oriented. - -Examples: -```toml -llm_for_complex_reasoning = { model = "base-claude", temperature = 1 } -llm_to_extract_invoice = { model = "claude-3-7-sonnet", temperature = 0.1, max_tokens = "auto" } -``` - -The interest is that these presets can be used to set the LLM choice in a PipeLLM, like this: - -```plx -[pipe.extract_invoice] -type = "PipeLLM" -description = "Extract invoice information from an invoice text transcript" -inputs = { invoice_text = "InvoiceText" } -output = "Invoice" -model = "llm_to_extract_invoice" -prompt = """ -Extract invoice information from this invoice: - -The category of this invoice is: $invoice_details.category. - -@invoice_text -""" -``` - -The setting here `model = "llm_to_extract_invoice"` works because "llm_to_extract_invoice" has been declared as an llm_preset in the deck. -You must not use an LLM preset in a PipeLLM that does not exist in the deck. If needed, you can add llm presets. - - -You can override the predefined llm presets by setting them in `.pipelex/inference/deck/overrides.toml`. - diff --git a/.cursor/rules/run_pipelex.mdc b/.cursor/rules/run_pipelex.mdc deleted file mode 100644 index 7650051..0000000 --- a/.cursor/rules/run_pipelex.mdc +++ /dev/null @@ -1,231 +0,0 @@ ---- -alwaysApply: false -description: Guidelines for running Pipelex pipelines -globs: -- examples/**/*.py ---- -# Guide to execute a pipeline and write example code - -## Example to execute a pipeline with text output - -```python -import asyncio - -from pipelex import pretty_print -from pipelex.pipelex import Pipelex -from pipelex.pipeline.execute import execute_pipeline - - -async def hello_world() -> str: - """ - This function demonstrates the use of a super simple Pipelex pipeline to generate text. - """ - # Run the pipe - pipe_output = await execute_pipeline( - pipe_code="hello_world", - ) - - return pipe_output.main_stuff_as_str - - -# start Pipelex -Pipelex.make() -# run sample using asyncio -output_text = asyncio.run(hello_world()) -pretty_print(output_text, title="Your first Pipelex output") -``` - -## Example to execute a pipeline with structured output - -```python -import asyncio - -from pipelex import pretty_print -from pipelex.pipelex import Pipelex -from pipelex.pipeline.execute import execute_pipeline -from pipelex.core.stuffs.image_content import ImageContent - -from my_project.gantt.gantt_struct import GanttChart - -SAMPLE_NAME = "extract_gantt" -IMAGE_URL = "assets/gantt/gantt_tree_house.png" - - -async def extract_gantt(image_url: str) -> GanttChart: - # Run the pipe - pipe_output = await execute_pipeline( - pipe_code="extract_gantt_by_steps", - inputs={ - "gantt_chart_image": { - "concept": "gantt.GanttImage", - "content": ImageContent(url=image_url), - } - }, - ) - # Output the result - return pipe_output.main_stuff_as(content_type=GanttChart) - - -# start Pipelex -Pipelex.make() - -# run sample using asyncio -gantt_chart = asyncio.run(extract_gantt(image_url=IMAGE_URL)) -pretty_print(gantt_chart, title="Gantt Chart") -``` - -## Setting up the input memory - -### Explanation of input memory - -The input memory is a dictionary, where the key is the name of the input variable and the value provides details to make it a stuff object. The relevant definitions are: -```python -StuffContentOrData = dict[str, Any] | StuffContent | list[Any] | str -PipelineInputs = dict[str, StuffContentOrData] -``` -As you can seen, we made it so different ways can be used to define that stuff using structured content or data. - -### Different ways to set up the input memory - -So here are a few concrete examples of calls to execute_pipeline with various ways to set up the input memory: - -```python -# Here we have a single input and it's a Text. -# If you assign a string, by default it will be considered as a TextContent. - pipe_output = await execute_pipeline( - pipe_code="master_advisory_orchestrator", - inputs={ - "user_input": problem_description, - }, - ) - -# Here we have a single input and it's a document. -# Because DocumentContent is a native concept, we can use it directly as a value, -# the system knows what content it corresponds to: - pipe_output = await execute_pipeline( - pipe_code="power_extractor_dpe", - inputs={ - "document": DocumentContent(url=pdf_url), - }, - ) - -# Here we have a single input and it's an Image. -# Because ImageContent is a native concept, we can use it directly as a value: - pipe_output = await execute_pipeline( - pipe_code="fashion_variation_pipeline", - inputs={ - "fashion_photo": ImageContent(url=image_url), - }, - ) - -# Here we have a single input, it's an image but -# its actually a more specific concept gantt.GanttImage which refines Image, -# so we must provide it using a dict with the concept and the content: - pipe_output = await execute_pipeline( - pipe_code="extract_gantt_by_steps", - inputs={ - "gantt_chart_image": { - "concept": "gantt.GanttImage", - "content": ImageContent(url=image_url), - } - }, - ) - -# Here is a more complex example with multiple inputs assigned using different ways: - pipe_output = await execute_pipeline( - pipe_code="retrieve_then_answer", - dynamic_output_concept_code="contracts.Fees", - inputs={ - "text": load_text_from_path(path=text_path), - "question": { - "concept": "answer.Question", - "content": question, - }, - "client_instructions": client_instructions, - }, - ) -``` - -## Using the outputs of a pipeline - -All pipe executions return a `PipeOutput` object. -It's a BaseModel which contains the resulting working memory at the end of the execution and the pipeline run id. -It also provides a bunch of accessor functions and properties to unwrap the main stuff, which is the last stuff added to the working memory: - -```python - -class PipeOutput(BaseModel): - working_memory: WorkingMemory = Field(default_factory=WorkingMemory) - pipeline_run_id: str = Field(default=SpecialPipelineId.UNTITLED) - - @property - def main_stuff(self) -> Stuff: - ... - - def main_stuff_as_list(self, item_type: type[StuffContentType]) -> ListContent[StuffContentType]: - ... - - def main_stuff_as_items(self, item_type: type[StuffContentType]) -> list[StuffContentType]: - ... - - def main_stuff_as(self, content_type: type[StuffContentType]) -> StuffContentType: - ... - - @property - def main_stuff_as_text(self) -> TextContent: - ... - - @property - def main_stuff_as_str(self) -> str: - ... - - @property - def main_stuff_as_image(self) -> ImageContent: - ... - - @property - def main_stuff_as_text_and_image(self) -> TextAndImagesContent: - ... - - @property - def main_stuff_as_number(self) -> NumberContent: - ... - - @property - def main_stuff_as_html(self) -> HtmlContent: - ... - - @property - def main_stuff_as_mermaid(self) -> MermaidContent: - ... -``` - -As you can see, you can extract any variable from the output working memory. - -### Getting the main stuff as a specific type - -Simple text as a string: - -```python -result = pipe_output.main_stuff_as_str -``` -Structured object (BaseModel): - -```python -result = pipe_output.main_stuff_as(content_type=GanttChart) -``` - -If it's a list, you can get a `ListContent` of the specific type. - -```python -result_list_content = pipe_output.main_stuff_as_list(item_type=GanttChart) -``` - -or if you want, you can get the actual items as a regular python list: - -```python -result_list = pipe_output.main_stuff_as_items(item_type=GanttChart) -``` - ---- - diff --git a/.cursor/rules/write_pipelex.mdc b/.cursor/rules/write_pipelex.mdc deleted file mode 100644 index 93422cc..0000000 --- a/.cursor/rules/write_pipelex.mdc +++ /dev/null @@ -1,997 +0,0 @@ ---- -alwaysApply: false -description: Guidelines for writing Pipelex pipelines -globs: -- '**/*.plx' -- '**/pipelines/**/*.py' ---- -# Guide to write or edit pipelines using the Pipelex language in .plx files - -- Always first write your "plan" in natural language, then transcribe it in pipelex. -- You should ALWAYS RUN validation when you are writing or editing a `.plx` file. It will ensure the pipe is runnable. If not, iterate. - - For a specific file: `pipelex validate path_to_file.plx` - - For all pipelines: `pipelex validate all` - - **IMPORTANT**: Ensure the Python virtual environment is activated before running `pipelex` commands. For standard installations, the venv is named `.venv` - always check that first. The commands will not work without proper venv activation. -- Please use POSIX standard for files. (empty lines, no trailing whitespaces, etc.) - -## Pipeline File Naming -- Files must be `.plx` for pipelines (Always add an empty line at the end of the file, and do not add trailing whitespaces to PLX files at all) -- Files must be `.py` for code defining the data structures -- Use descriptive names in `snake_case` - -## Pipeline File Outline -A pipeline file has three main sections: -1. Domain statement -2. Concept definitions -3. Pipe definitions - -### Domain Statement -```plx -domain = "domain_code" -description = "Description of the domain" # Optional -``` -Note: The domain code usually matches the plx filename for single-file domains. For multi-file domains, use the subdirectory name. - -### Concept Definitions - -Concepts represent ideas and semantic entities in your pipeline. They define what something *is*, not how it's structured. - -```plx -[concept] -ConceptName = "Description of the concept" -``` - -**Naming Rules:** -- Use PascalCase for concept names -- Never use plurals (no "Stories", use "Story") - lists are handled implicitly by Pipelex -- Avoid circumstantial adjectives (no "LargeText", use "Text") - focus on the essence of what the concept represents -- Don't redefine native concepts (Text, Image, PDF, TextAndImages, Number, Page, JSON) - -**Native Concepts:** -Pipelex provides built-in native concepts: `Text`, `Image`, `PDF`, `TextAndImages`, `Number`, `Page`, `JSON`. Use these directly or refine them when appropriate. - -**Refining Native Concepts:** -To create a concept that specializes a native concept without adding fields: - -```plx -[concept.Landscape] -description = "A scenic outdoor photograph" -refines = "Image" -``` - -For details on how to structure concepts with fields, see the "Structuring Models" section below. - -### Pipe Definitions - -## Pipe Base Definition - -```plx -[pipe.your_pipe_code] -type = "PipeLLM" -description = "A description of what your pipe does" -inputs = { input_1 = "ConceptName1", input_2 = "ConceptName2" } -output = "ConceptName" -``` - -The pipes will all have at least this base definition. -- `inputs`: Dictionary of key being the variable used in the prompts, and the value being the ConceptName. It should ALSO LIST THE INPUTS OF THE INTERMEDIATE STEPS (if PipeSequence) or of the conditional pipes (if PipeCondition). -So If you have this error: -`PipeValidationError: missing_input_variable • domain='expense_validator' • pipe='validate_expense' • -variable='['invoice']'`` -That means that the pipe validate_expense is missing the input `invoice` because one of the subpipe is needing it. - -NEVER WRITE THE INPUTS BY BREAKING THE LINE LIKE THIS: - -```plx -inputs = { - input_1 = "ConceptName1", - input_2 = "ConceptName2" -} -``` - - -- `output`: The name of the concept to output. The `ConceptName` should have the same name as the python class if you want structured output: - -### Input Multiplicity - -By default, inputs expect a single item. Use bracket notation to specify multiple items: - -```plx -# Single item (default) -inputs = { document = "Text" } - -# Variable list - indeterminate number of items -inputs = { documents = "Text[]" } - -# Fixed count - exactly N items -inputs = { comparison_items = "Image[2]" } -``` - -**Key points:** -- No brackets = single item (default behavior) -- Use `[]` for lists of unknown length -- Use `[N]` (where N is an integer) when operation requires exact count (e.g., comparing 2 items) - -## Structuring Models - -Once you've defined your concepts semantically (see "Concept Definitions" above), you need to specify their structure if they have fields. - -### Three Ways to Structure Concepts - -**1. No Structure Needed** - -If a concept only refines a native concept without adding fields, use the TOML table syntax shown in "Concept Definitions" above. No structure section is needed. - -**2. Inline Structure Definition (RECOMMENDED for most cases)** - -For concepts with structured fields, define them inline using TOML syntax: - -```plx -[concept.Invoice] -description = "A commercial document issued by a seller to a buyer" - -[concept.Invoice.structure] -invoice_number = "The unique invoice identifier" # This will be optional by default -issue_date = { type = "date", description = "The date the invoice was issued", required = true } -total_amount = { type = "number", description = "The total invoice amount", required = true } -vendor_name = "The name of the vendor" # This will be optional by default -line_items = { type = "list", item_type = "text", description = "List of items" } -``` - -**Supported inline field types:** `text`, `integer`, `boolean`, `number`, `date`, `list`, `dict` - -**Field properties:** `type`, `description`, `required` (default: false), `default_value`, `choices`, `item_type` (for lists), `key_type` and `value_type` (for dicts) - -**Simple syntax** (creates required text field): -```plx -field_name = "Field description" -``` - -**Detailed syntax** (with explicit properties): -```plx -field_name = { type = "text", description = "Field description", default_value = "default" } -``` - -**3. Python StructuredContent Class (For Advanced Features)** - -Create a Python class when you need: -- Custom validation logic (@field_validator, @model_validator) -- Computed properties (@property methods) -- Custom methods or class methods -- Complex cross-field validation -- Reusable structures across multiple domains - -```python -from pipelex.core.stuffs.structured_content import StructuredContent -from pydantic import Field, field_validator - -class Invoice(StructuredContent): - """A commercial invoice with validation.""" - - invoice_number: str = Field(description="The unique invoice identifier") - total_amount: float = Field(ge=0, description="The total invoice amount") - tax_amount: float = Field(ge=0, description="Tax amount") - - @field_validator('tax_amount') - @classmethod - def validate_tax(cls, v, info): - """Ensure tax doesn't exceed total.""" - total = info.data.get('total_amount', 0) - if v > total: - raise ValueError('Tax amount cannot exceed total amount') - return v -``` - -**Location:** Create models in `my_project/some_domain/some_domain_struct.py`. Classes inheriting from `StructuredContent` are automatically discovered. - -### Decision Rules for Agents - -**If concept already exists:** -- If it's already inline → KEEP IT INLINE unless user explicitly asks to convert or features require Python class -- If it's already a Python class → KEEP IT as Python class - -**If creating new concept:** -1. Does it only refine a native concept without adding fields? → Use concept-only declaration -2. Does it need custom validation, computed properties, or methods? → Use Python class -3. Otherwise → Use inline structure (fastest and simplest) - -**When to suggest conversion to Python class:** -- User needs validation logic beyond type checking -- User needs computed properties or custom methods -- Structure needs to be reused across multiple domains -- Complex type relationships or inheritance required - -### Inline Structure Limitations - -Inline structures: -- ✅ Support all common field types (text, number, date, list, dict, etc.) -- ✅ Support required/optional fields, defaults, choices -- ✅ Generate full Pydantic models with validation -- ❌ Cannot have custom validators or complex validation logic -- ❌ Cannot have computed properties or custom methods -- ❌ Cannot refine custom (non-native) concepts -- ❌ Limited IDE autocomplete compared to explicit Python classes - - -## Pipe Controllers and Pipe Operators - -Look at the Pipes we have in order to adapt it. Pipes are organized in two categories: - -1. **Controllers** - For flow control: - - `PipeSequence` - For creating a sequence of multiple steps - - `PipeCondition` - If the next pipe depends of the expression of a stuff in the working memory - - `PipeParallel` - For parallelizing pipes - -2. **Operators** - For specific tasks: - - `PipeLLM` - Generate Text and Objects (include Vision LLM) - - `PipeExtract` - Extract text and images from an image or a PDF - - `PipeCompose` - For composing text using Jinja2 templates: supports html, markdown, mermaid, etc. - - `PipeImgGen` - Generate Images - - `PipeFunc` - For running classic python scripts - -## PipeSequence controller - -Purpose: PipeSequence executes multiple pipes in a defined order, where each step can use results from original inputs or from previous steps. - -### Basic Definition -```plx -[pipe.your_sequence_name] -type = "PipeSequence" -description = "Description of what this sequence does" -inputs = { input_name = "InputType" } # All the inputs of the sub pipes, except the ones generated by intermediate steps -output = "OutputType" -steps = [ - { pipe = "first_pipe", result = "first_result" }, - { pipe = "second_pipe", result = "second_result" }, - { pipe = "final_pipe", result = "final_result" } -] -``` - -### Key Components - -1. **Steps Array**: List of pipes to execute in sequence - - `pipe`: Name of the pipe to execute - - `result`: Name to assign to the pipe's output that will be in the working memory - -### Using PipeBatch in Steps - -You can use PipeBatch functionality within steps using `batch_over` and `batch_as`: - -```plx -steps = [ - { pipe = "process_items", batch_over = "input_list", batch_as = "current_item", result = "processed_items" - } -] -``` - -1. **batch_over**: Specifies a `ListContent` field to iterate over. Each item in the list will be processed individually and IN PARALLEL by the pipe. - - Must be a `ListContent` type containing the items to process - - Can reference inputs or results from previous steps - -2. **batch_as**: Defines the name that will be used to reference the current item being processed - - This name can be used in the pipe's input mappings - - Makes each item from the batch available as a single element - -The result of a batched step will be a `ListContent` containing the outputs from processing each item. - -## PipeCondition controller - -The PipeCondition controller allows you to implement conditional logic in your pipeline, choosing which pipe to execute based on an evaluated expression. It supports both direct expressions and expression templates. - -### Basic usage - -```plx -[pipe.conditional_operation] -type = "PipeCondition" -description = "A conditional pipe to decide whether..." -inputs = { input_data = "CategoryInput" } -output = "native.Text" -expression = "input_data.category" -default_outcome = "process_medium" - -[pipe.conditional_operation.outcomes] -small = "process_small" -medium = "process_medium" -large = "process_large" -``` -or -```plx -[pipe.conditional_operation] -type = "PipeCondition" -description = "A conditional pipe to decide whether..." -inputs = { input_data = "CategoryInput" } -output = "native.Text" -expression_template = "{{ input_data.category }}" # Jinja2 code -default_outcome = "process_medium" - -[pipe.conditional_operation.outcomes] -small = "process_small" -medium = "process_medium" -large = "process_large" -``` - -### Key Parameters - -- `expression`: Direct boolean or string expression (mutually exclusive with expression_template) -- `expression_template`: Jinja2 template for more complex conditional logic (mutually exclusive with expression) -- `outcomes`: Dictionary mapping expression results to pipe codes: - 1. The key on the left (`small`, `medium`) is the result of `expression` or `expression_template` - 2. The value on the right (`process_small`, `process_medium`, etc.) is the name of the pipe to trigger -- `default_outcome`: **Required** - The pipe to execute if the expression doesn't match any key in outcomes. Use `"fail"` if you want the pipeline to fail when no match is found - -Example with fail as default: -```plx -[pipe.strict_validation] -type = "PipeCondition" -description = "Validate with strict matching" -inputs = { status = "Status" } -output = "Text" -expression = "status.value" -default_outcome = "fail" - -[pipe.strict_validation.outcomes] -approved = "process_approved" -rejected = "process_rejected" -``` - -## PipeLLM operator - -PipeLLM is used to: -1. Generate text or objects with LLMs -2. Process images with Vision LLMs - -### Basic Usage - -Simple Text Generation: -```plx -[pipe.write_story] -type = "PipeLLM" -description = "Write a short story" -output = "Text" -prompt = """ -Write a short story about a programmer. -""" -``` - -Structured Data Extraction: -```plx -[pipe.extract_info] -type = "PipeLLM" -description = "Extract information" -inputs = { text = "Text" } -output = "PersonInfo" -prompt = """ -Extract person information from this text: -@text -""" -``` - -Supports system instructions: -```plx -[pipe.expert_analysis] -type = "PipeLLM" -description = "Expert analysis" -output = "Analysis" -system_prompt = "You are a data analysis expert" -prompt = "Analyze this data" -``` - -### Multiple Outputs - -Generate multiple outputs (fixed number) - use bracket notation: -```plx -[pipe.generate_ideas] -type = "PipeLLM" -description = "Generate ideas" -output = "Idea[3]" # Generate exactly 3 ideas -prompt = "Generate 3 ideas" -``` - -Generate multiple outputs (variable number) - use bracket notation: -```plx -[pipe.generate_ideas] -type = "PipeLLM" -description = "Generate ideas" -output = "Idea[]" # Let the LLM decide how many to generate -prompt = "Generate ideas" -``` - -### Vision - -Process images with VLMs (image inputs must be tagged in the prompt): -```plx -[pipe.analyze_image] -type = "PipeLLM" -description = "Analyze image" -inputs = { image = "Image" } -output = "ImageAnalysis" -prompt = """ -Describe what you see in this image: - -$image -""" -``` - -You can also reference images inline in meaningful sentences to guide the Visual LLM: -```plx -[pipe.compare_images] -type = "PipeLLM" -description = "Compare two images" -inputs = { photo = "Image", painting = "Image" } -output = "Analysis" -prompt = "Analyze the colors in $photo and the shapes in $painting." -``` - -### Writing prompts for PipeLLM - -**Insert stuff inside a tagged block** - -If the inserted text is supposedly a long text, made of several lines or paragraphs, you want it inserted inside a block, possibly a block tagged and delimlited with proper syntax as one would do in a markdown documentation. To include stuff as a block, use the "@" prefix. - -Example template: -```plx -prompt = """ -Match the expense with its corresponding invoice: - -@expense - -@invoices -""" -``` -In the example above, the expense data and the invoices data are obviously made of several lines each, that's why it makes sense to use the "@" prefix in order to have them delimited inside a block. Note that our preprocessor will automatically include the block's title, so it doesn't need to be explicitly written in the prompt. - -DO NOT write things like "Here is the expense: @expense". -DO write simply "@expense" alone in an isolated line. - -**Insert stuff inline** - -If the inserted text is short text and it makes sense to have it inserted directly into a sentence, you want it inserted inline. To insert stuff inline, use the "$" prefix. This will insert the stuff without delimiters and the content will be rendered as plain text. - -Example template: -```plx -prompt = """ -Your goal is to summarize everything related to $topic in the provided text: - -@text - -Please provide only the summary, with no additional text or explanations. -Your summary should not be longer than 2 sentences. -""" -``` - -In the example above, $topic will be inserted inline, whereas @text will be a a delimited block. -Be sure to make the proper choice of prefix for each insertion. - -DO NOT write "$topic" alone in an isolated line. -DO write things like "Write an essay about $topic" to include text into an actual sentence. - - -## PipeExtract operator - -The PipeExtract operator is used to extract text and images from an image or a PDF - -### Simple Text Extraction -```plx -[pipe.extract_info] -type = "PipeExtract" -description = "extract the information" -inputs = { document = "Document" } # or { image = "Image" } if it's an image. This is the only input. -output = "Page" -``` - -Using Extract Model Settings: -```plx -[pipe.extract_with_model] -type = "PipeExtract" -description = "Extract with specific model" -inputs = { document = "Document" } -output = "Page" -model = "base_extract_mistral" # Use predefined extract preset or model alias -``` - -Only one input is allowed and it must either be an `Image` or a `PDF`. The input can be named anything. - -The output concept `Page` is a native concept, with the structure `PageContent`: -It corresponds to 1 page. Therefore, the PipeExtract is outputing a `ListContent` of `Page` - -```python -class TextAndImagesContent(StuffContent): - text: TextContent | None - images: list[ImageContent] | None - -class PageContent(StructuredContent): # CONCEPT IS "Page" - text_and_images: TextAndImagesContent - page_view: ImageContent | None = None -``` -- `text_and_images` are the text, and the related images found in the input image or PDF. -- `page_view` is the screenshot of the whole pdf page/image. - -## PipeCompose operator - -The PipeCompose operator is used to compose text using Jinja2 templates. It supports various output formats including HTML, Markdown, Mermaid diagrams, and more. - -### Basic Usage - -Simple Template Composition: -```plx -[pipe.compose_report] -type = "PipeCompose" -description = "Compose a report using template" -inputs = { data = "ReportData" } -output = "Text" -template = """ -# Report Summary - -Based on the analysis: -$data - -Generated on: {{ current_date }} -""" -``` - -Using Named Templates: -```plx -[pipe.use_template] -type = "PipeCompose" -description = "Use a predefined template" -inputs = { content = "Text" } -output = "Text" -template_name = "standard_report_template" -``` - -Using Nested Template Section (for more control): -```plx -[pipe.advanced_template] -type = "PipeCompose" -description = "Use advanced template settings" -inputs = { data = "ReportData" } -output = "Text" - -[pipe.advanced_template.template] -template = "Report: $data" -category = "html" -templating_style = { tag_style = "square_brackets", text_format = "html" } -``` - -CRM Email Template: -```plx -[pipe.compose_follow_up_email] -type = "PipeCompose" -description = "Compose a personalized follow-up email for CRM" -inputs = { customer = "Customer", deal = "Deal", sales_rep = "SalesRep" } -output = "Text" -template_category = "html" -templating_style = { tag_style = "square_brackets", text_format = "html" } -template = """ -Subject: Following up on our $deal.product_name discussion - -Hi $customer.first_name, - -I hope this email finds you well! I wanted to follow up on our conversation about $deal.product_name from $deal.last_contact_date. - -Based on our discussion, I understand that your key requirements are: $deal.customer_requirements - -I'm excited to let you know that we can definitely help you achieve your goals. Here's what I'd like to propose: - -**Next Steps:** -- Schedule a demo tailored to your specific needs -- Provide you with a customized quote based on your requirements -- Connect you with our implementation team - -Would you be available for a 30-minute call this week? I have openings on: -{% for slot in available_slots %} -- {{ slot }} -{% endfor %} - -Looking forward to moving this forward together! - -Best regards, -$sales_rep.name -$sales_rep.title -$sales_rep.phone | $sales_rep.email -""" -``` - -### Key Parameters (Template Mode) - -- `template`: Inline template string (mutually exclusive with template_name and construct) -- `template_name`: Name of a predefined template (mutually exclusive with template) -- `template_category`: Template type ("llm_prompt", "html", "markdown", "mermaid", etc.) -- `templating_style`: Styling options for template rendering -- `extra_context`: Additional context variables for template - -For more control, you can use a nested `template` section instead of the `template` field: - -- `template.template`: The template string -- `template.category`: Template type -- `template.templating_style`: Styling options - -### Template Variables - -Use the same variable insertion rules as PipeLLM: - -- `@variable` for block insertion (multi-line content) -- `$variable` for inline insertion (short text) - -### Construct Mode (for StructuredContent Output) - -PipeCompose can also generate `StructuredContent` objects using the `construct` section. This mode composes field values from fixed values, variable references, templates, or nested structures. - -**When to use construct mode:** - -- You need to output a structured object (not just Text) -- You want to deterministically compose fields from existing data -- No LLM is needed - just data composition and templating - -#### Basic Construct Usage - -```plx -[concept.SalesSummary] -description = "A structured sales summary" - -[concept.SalesSummary.structure] -report_title = { type = "text", description = "Title of the report" } -customer_name = { type = "text", description = "Customer name" } -deal_value = { type = "number", description = "Deal value" } -summary_text = { type = "text", description = "Generated summary text" } - -[pipe.compose_summary] -type = "PipeCompose" -description = "Compose a sales summary from deal data" -inputs = { deal = "Deal" } -output = "SalesSummary" - -[pipe.compose_summary.construct] -report_title = "Monthly Sales Report" -customer_name = { from = "deal.customer_name" } -deal_value = { from = "deal.amount" } -summary_text = { template = "Deal worth $deal.amount with $deal.customer_name" } -``` - -#### Field Composition Methods - -There are four ways to define field values in a construct: - -**1. Fixed Value (literal)** - -Use a literal value directly: - -```plx -[pipe.compose_report.construct] -report_title = "Annual Report" -report_year = 2024 -is_draft = false -``` - -**2. Variable Reference (`from`)** - -Get a value from working memory using a dotted path: - -```plx -[pipe.compose_report.construct] -customer_name = { from = "deal.customer_name" } -total_amount = { from = "order.total" } -street_address = { from = "customer.address.street" } -``` - -**3. Template (`template`)** - -Render a Jinja2 template with variable substitution: - -```plx -[pipe.compose_report.construct] -invoice_number = { template = "INV-$order.id" } -summary = { template = "Deal worth $deal.amount with $deal.customer_name on {{ current_date }}" } -``` - -**4. Nested Construct** - -For nested structures, use a TOML subsection: - -```plx -[pipe.compose_invoice.construct] -invoice_number = { template = "INV-$order.id" } -total = { from = "order.total_amount" } - -[pipe.compose_invoice.construct.billing_address] -street = { from = "customer.address.street" } -city = { from = "customer.address.city" } -country = "France" -``` - -#### Complete Construct Example - -```plx -domain = "invoicing" - -[concept.Address] -description = "A postal address" - -[concept.Address.structure] -street = { type = "text", description = "Street address" } -city = { type = "text", description = "City name" } -country = { type = "text", description = "Country name" } - -[concept.Invoice] -description = "An invoice document" - -[concept.Invoice.structure] -invoice_number = { type = "text", description = "Invoice number" } -total = { type = "number", description = "Total amount" } - -[pipe.compose_invoice] -type = "PipeCompose" -description = "Compose an invoice from order and customer data" -inputs = { order = "Order", customer = "Customer" } -output = "Invoice" - -[pipe.compose_invoice.construct] -invoice_number = { template = "INV-$order.id" } -total = { from = "order.total_amount" } - -[pipe.compose_invoice.construct.billing_address] -street = { from = "customer.address.street" } -city = { from = "customer.address.city" } -country = "France" -``` - -#### Key Parameters (Construct Mode) - -- `construct`: Dictionary mapping field names to their composition rules -- Each field can be: - - A literal value (string, number, boolean) - - A dict with `from` key for variable reference - - A dict with `template` key for template rendering - - A nested dict for nested structures - -**Note:** You must use either `template` or `construct`, not both. They are mutually exclusive. - -## PipeImgGen operator - -The PipeImgGen operator is used to generate images using AI image generation models. - -### Basic Usage - -Simple Image Generation: -```plx -[pipe.generate_image] -type = "PipeImgGen" -description = "Generate an image from prompt" -inputs = { prompt = "ImgGenPrompt" } -output = "Image" -``` - -Using Image Generation Settings: -```plx -[pipe.generate_photo] -type = "PipeImgGen" -description = "Generate a high-quality photo" -inputs = { prompt = "ImgGenPrompt" } -output = "Photo" -model = { model = "fast-img-gen" } -aspect_ratio = "16:9" -quality = "hd" -``` - -Multiple Image Generation: -```plx -[pipe.generate_variations] -type = "PipeImgGen" -description = "Generate multiple image variations" -inputs = { prompt = "ImgGenPrompt" } -output = "Image[3]" -seed = "auto" -``` - -Advanced Configuration: -```plx -[pipe.generate_custom] -type = "PipeImgGen" -description = "Generate image with custom settings" -inputs = { prompt = "ImgGenPrompt" } -output = "Image" -model = "img_gen_preset_name" # Use predefined preset -aspect_ratio = "1:1" -quality = "hd" -background = "transparent" -output_format = "png" -is_raw = false -safety_tolerance = 3 -``` - -### Key Parameters - -**Image Generation Settings:** -- `model`: Model choice (preset name or inline settings with model name) -- `quality`: Image quality ("standard", "hd") - -**Output Configuration:** -- `aspect_ratio`: Image dimensions ("1:1", "16:9", "9:16", etc.) -- `output_format`: File format ("png", "jpeg", "webp") -- `background`: Background type ("default", "transparent") - -**Generation Control:** -- `seed`: Random seed (integer or "auto") -- `is_raw`: Whether to apply post-processing -- `is_moderated`: Enable content moderation -- `safety_tolerance`: Content safety level (1-6) - -### Input Requirements - -PipeImgGen requires exactly one input that must be either: -- An `ImgGenPrompt` concept -- A concept that refines `ImgGenPrompt` - -The input can be named anything but must contain the prompt text for image generation. - -## PipeFunc operator - -The PipeFunc operator is used to run custom Python functions within a pipeline. This allows integration of classic Python scripts and custom logic. - -### Basic Usage - -Simple Function Call: -```plx -[pipe.process_data] -type = "PipeFunc" -description = "Process data using custom function" -inputs = { input_data = "DataType" } -output = "ProcessedData" -function_name = "process_data_function" -``` - -File Processing Example: -```plx -[pipe.read_file] -type = "PipeFunc" -description = "Read file content" -inputs = { file_path = "FilePath" } -output = "FileContent" -function_name = "read_file_content" -``` - -### Key Parameters - -- `function_name`: Name of the Python function to call (must be registered in func_registry) - -### Function Requirements - -The Python function must: - -1. **Be registered** in the `func_registry` -2. **Accept `working_memory`** as a parameter: - ```python - async def my_function(working_memory: WorkingMemory) -> StuffContent | list[StuffContent] | str: - # Function implementation - pass - ``` - -3. **Return appropriate types**: - - `StuffContent`: Single content object - - `list[StuffContent]`: Multiple content objects (becomes ListContent) - - `str`: Simple string (becomes TextContent) - -### Function Registration - -Functions must be registered in the function registry before use: - -```python -from pipelex.system.registries.func_registry import func_registry - -@func_registry.register("my_function_name") -async def my_custom_function(working_memory: WorkingMemory) -> StuffContent: - # Access inputs from working memory - input_data = working_memory.get_stuff("input_name") - - # Process data - result = process_logic(input_data.content) - - # Return result - return MyResultContent(data=result) -``` - -### Working Memory Access - -Inside the function, access pipeline inputs through working memory: - -```python -async def process_function(working_memory: WorkingMemory) -> TextContent: - # Get input stuff by name - input_stuff = working_memory.get_stuff("input_name") - - # Access the content - input_content = input_stuff.content - - # Process and return - processed_text = f"Processed: {input_content.text}" - return TextContent(text=processed_text) -``` - ---- - -## Rules to choose LLM models used in PipeLLMs. - -### LLM Configuration System - -In order to use it in a pipe, an LLM is referenced by its llm_handle (alias) and possibly by an llm_preset. -LLM configurations are managed through the new inference backend system with files located in `.pipelex/inference/`: - -- **Model Deck**: `.pipelex/inference/deck/base_deck.toml` and `.pipelex/inference/deck/overrides.toml` -- **Backends**: `.pipelex/inference/backends.toml` and `.pipelex/inference/backends/*.toml` -- **Routing**: `.pipelex/inference/routing_profiles.toml` - -### LLM Handles - -An llm_handle can be either: -1. **A direct model name** (like "gpt-4o-mini", "claude-3-sonnet") - automatically available for all models loaded by the inference backend system -2. **An alias** - user-defined shortcuts that map to model names, defined in the `[aliases]` section: - -```toml -[aliases] -base-claude = "claude-4.5-sonnet" -base-gpt = "gpt-5" -base-gemini = "gemini-2.5-flash" -base-mistral = "mistral-medium" -``` - -The system first looks for direct model names, then checks aliases if no direct match is found. The system handles model routing through backends automatically. - -### Using an LLM Handle in a PipeLLM - -Here is an example of using a model to specify which LLM to use in a PipeLLM: - -```plx -[pipe.hello_world] -type = "PipeLLM" -description = "Write text about Hello World." -output = "Text" -model = { model = "gpt-5", temperature = 0.9 } -prompt = """ -Write a haiku about Hello World. -""" -``` - -As you can see, to use the LLM, you must also indicate the temperature (float between 0 and 1) and max_tokens (either an int or the string "auto"). - -### LLM Presets - -Presets are meant to record the choice of an llm with its hyper parameters (temperature and max_tokens) if it's good for a particular task. LLM Presets are skill-oriented. - -Examples: -```toml -llm_to_engineer = { model = "base-claude", temperature = 1 } -llm_to_extract_invoice = { model = "claude-3-7-sonnet", temperature = 0.1, max_tokens = "auto" } -``` - -The interest is that these presets can be used to set the LLM choice in a PipeLLM, like this: - -```plx -[pipe.extract_invoice] -type = "PipeLLM" -description = "Extract invoice information from an invoice text transcript" -inputs = { invoice_text = "InvoiceText" } -output = "Invoice" -model = "llm_to_extract_invoice" -prompt = """ -Extract invoice information from this invoice: - -The category of this invoice is: $invoice_details.category. - -@invoice_text -""" -``` - -The setting here `model = "llm_to_extract_invoice"` works because "llm_to_extract_invoice" has been declared as an llm_preset in the deck. -You must not use an LLM preset in a PipeLLM that does not exist in the deck. If needed, you can add llm presets. - -You can override the predefined llm presets by setting them in `.pipelex/inference/deck/overrides.toml`. - ---- - -ALWAYS RUN validation when you are finished writing pipelines: This checks for errors. If there are errors, iterate until it works. -- For a specific bundle/file: `pipelex validate path_to_file.plx` -- For all pipelines: `pipelex validate all` -- Remember: Ensure your Python virtual environment is activated (typically `.venv` for standard installations) before running `pipelex` commands. - -Then, create an example file to run the pipeline in the `examples` folder. -But don't write documentation unless asked explicitly to. diff --git a/.github/copilot-instructions.md b/.github/copilot-instructions.md deleted file mode 100644 index af4572e..0000000 --- a/.github/copilot-instructions.md +++ /dev/null @@ -1,1219 +0,0 @@ - -# Pipelex Coding Rules - -## Guide to write or edit pipelines using the Pipelex language in .plx files - -- Always first write your "plan" in natural language, then transcribe it in pipelex. -- You should ALWAYS RUN validation when you are writing or editing a `.plx` file. It will ensure the pipe is runnable. If not, iterate. - - For a specific file: `pipelex validate path_to_file.plx` - - For all pipelines: `pipelex validate all` - - **IMPORTANT**: Ensure the Python virtual environment is activated before running `pipelex` commands. For standard installations, the venv is named `.venv` - always check that first. The commands will not work without proper venv activation. -- Please use POSIX standard for files. (empty lines, no trailing whitespaces, etc.) - -### Pipeline File Naming -- Files must be `.plx` for pipelines (Always add an empty line at the end of the file, and do not add trailing whitespaces to PLX files at all) -- Files must be `.py` for code defining the data structures -- Use descriptive names in `snake_case` - -### Pipeline File Outline -A pipeline file has three main sections: -1. Domain statement -2. Concept definitions -3. Pipe definitions - -#### Domain Statement -```plx -domain = "domain_code" -description = "Description of the domain" # Optional -``` -Note: The domain code usually matches the plx filename for single-file domains. For multi-file domains, use the subdirectory name. - -#### Concept Definitions - -Concepts represent ideas and semantic entities in your pipeline. They define what something *is*, not how it's structured. - -```plx -[concept] -ConceptName = "Description of the concept" -``` - -**Naming Rules:** -- Use PascalCase for concept names -- Never use plurals (no "Stories", use "Story") - lists are handled implicitly by Pipelex -- Avoid circumstantial adjectives (no "LargeText", use "Text") - focus on the essence of what the concept represents -- Don't redefine native concepts (Text, Image, PDF, TextAndImages, Number, Page, JSON) - -**Native Concepts:** -Pipelex provides built-in native concepts: `Text`, `Image`, `PDF`, `TextAndImages`, `Number`, `Page`, `JSON`. Use these directly or refine them when appropriate. - -**Refining Native Concepts:** -To create a concept that specializes a native concept without adding fields: - -```plx -[concept.Landscape] -description = "A scenic outdoor photograph" -refines = "Image" -``` - -For details on how to structure concepts with fields, see the "Structuring Models" section below. - -#### Pipe Definitions - -### Pipe Base Definition - -```plx -[pipe.your_pipe_code] -type = "PipeLLM" -description = "A description of what your pipe does" -inputs = { input_1 = "ConceptName1", input_2 = "ConceptName2" } -output = "ConceptName" -``` - -The pipes will all have at least this base definition. -- `inputs`: Dictionary of key being the variable used in the prompts, and the value being the ConceptName. It should ALSO LIST THE INPUTS OF THE INTERMEDIATE STEPS (if PipeSequence) or of the conditional pipes (if PipeCondition). -So If you have this error: -`PipeValidationError: missing_input_variable • domain='expense_validator' • pipe='validate_expense' • -variable='['invoice']'`` -That means that the pipe validate_expense is missing the input `invoice` because one of the subpipe is needing it. - -NEVER WRITE THE INPUTS BY BREAKING THE LINE LIKE THIS: - -```plx -inputs = { - input_1 = "ConceptName1", - input_2 = "ConceptName2" -} -``` - - -- `output`: The name of the concept to output. The `ConceptName` should have the same name as the python class if you want structured output: - -#### Input Multiplicity - -By default, inputs expect a single item. Use bracket notation to specify multiple items: - -```plx -## Single item (default) -inputs = { document = "Text" } - -## Variable list - indeterminate number of items -inputs = { documents = "Text[]" } - -## Fixed count - exactly N items -inputs = { comparison_items = "Image[2]" } -``` - -**Key points:** -- No brackets = single item (default behavior) -- Use `[]` for lists of unknown length -- Use `[N]` (where N is an integer) when operation requires exact count (e.g., comparing 2 items) - -### Structuring Models - -Once you've defined your concepts semantically (see "Concept Definitions" above), you need to specify their structure if they have fields. - -#### Three Ways to Structure Concepts - -**1. No Structure Needed** - -If a concept only refines a native concept without adding fields, use the TOML table syntax shown in "Concept Definitions" above. No structure section is needed. - -**2. Inline Structure Definition (RECOMMENDED for most cases)** - -For concepts with structured fields, define them inline using TOML syntax: - -```plx -[concept.Invoice] -description = "A commercial document issued by a seller to a buyer" - -[concept.Invoice.structure] -invoice_number = "The unique invoice identifier" # This will be optional by default -issue_date = { type = "date", description = "The date the invoice was issued", required = true } -total_amount = { type = "number", description = "The total invoice amount", required = true } -vendor_name = "The name of the vendor" # This will be optional by default -line_items = { type = "list", item_type = "text", description = "List of items" } -``` - -**Supported inline field types:** `text`, `integer`, `boolean`, `number`, `date`, `list`, `dict` - -**Field properties:** `type`, `description`, `required` (default: false), `default_value`, `choices`, `item_type` (for lists), `key_type` and `value_type` (for dicts) - -**Simple syntax** (creates required text field): -```plx -field_name = "Field description" -``` - -**Detailed syntax** (with explicit properties): -```plx -field_name = { type = "text", description = "Field description", default_value = "default" } -``` - -**3. Python StructuredContent Class (For Advanced Features)** - -Create a Python class when you need: -- Custom validation logic (@field_validator, @model_validator) -- Computed properties (@property methods) -- Custom methods or class methods -- Complex cross-field validation -- Reusable structures across multiple domains - -```python -from pipelex.core.stuffs.structured_content import StructuredContent -from pydantic import Field, field_validator - -class Invoice(StructuredContent): - """A commercial invoice with validation.""" - - invoice_number: str = Field(description="The unique invoice identifier") - total_amount: float = Field(ge=0, description="The total invoice amount") - tax_amount: float = Field(ge=0, description="Tax amount") - - @field_validator('tax_amount') - @classmethod - def validate_tax(cls, v, info): - """Ensure tax doesn't exceed total.""" - total = info.data.get('total_amount', 0) - if v > total: - raise ValueError('Tax amount cannot exceed total amount') - return v -``` - -**Location:** Create models in `my_project/some_domain/some_domain_struct.py`. Classes inheriting from `StructuredContent` are automatically discovered. - -#### Decision Rules for Agents - -**If concept already exists:** -- If it's already inline → KEEP IT INLINE unless user explicitly asks to convert or features require Python class -- If it's already a Python class → KEEP IT as Python class - -**If creating new concept:** -1. Does it only refine a native concept without adding fields? → Use concept-only declaration -2. Does it need custom validation, computed properties, or methods? → Use Python class -3. Otherwise → Use inline structure (fastest and simplest) - -**When to suggest conversion to Python class:** -- User needs validation logic beyond type checking -- User needs computed properties or custom methods -- Structure needs to be reused across multiple domains -- Complex type relationships or inheritance required - -#### Inline Structure Limitations - -Inline structures: -- ✅ Support all common field types (text, number, date, list, dict, etc.) -- ✅ Support required/optional fields, defaults, choices -- ✅ Generate full Pydantic models with validation -- ❌ Cannot have custom validators or complex validation logic -- ❌ Cannot have computed properties or custom methods -- ❌ Cannot refine custom (non-native) concepts -- ❌ Limited IDE autocomplete compared to explicit Python classes - - -### Pipe Controllers and Pipe Operators - -Look at the Pipes we have in order to adapt it. Pipes are organized in two categories: - -1. **Controllers** - For flow control: - - `PipeSequence` - For creating a sequence of multiple steps - - `PipeCondition` - If the next pipe depends of the expression of a stuff in the working memory - - `PipeParallel` - For parallelizing pipes - -2. **Operators** - For specific tasks: - - `PipeLLM` - Generate Text and Objects (include Vision LLM) - - `PipeExtract` - Extract text and images from an image or a PDF - - `PipeCompose` - For composing text using Jinja2 templates: supports html, markdown, mermaid, etc. - - `PipeImgGen` - Generate Images - - `PipeFunc` - For running classic python scripts - -### PipeSequence controller - -Purpose: PipeSequence executes multiple pipes in a defined order, where each step can use results from original inputs or from previous steps. - -#### Basic Definition -```plx -[pipe.your_sequence_name] -type = "PipeSequence" -description = "Description of what this sequence does" -inputs = { input_name = "InputType" } # All the inputs of the sub pipes, except the ones generated by intermediate steps -output = "OutputType" -steps = [ - { pipe = "first_pipe", result = "first_result" }, - { pipe = "second_pipe", result = "second_result" }, - { pipe = "final_pipe", result = "final_result" } -] -``` - -#### Key Components - -1. **Steps Array**: List of pipes to execute in sequence - - `pipe`: Name of the pipe to execute - - `result`: Name to assign to the pipe's output that will be in the working memory - -#### Using PipeBatch in Steps - -You can use PipeBatch functionality within steps using `batch_over` and `batch_as`: - -```plx -steps = [ - { pipe = "process_items", batch_over = "input_list", batch_as = "current_item", result = "processed_items" - } -] -``` - -1. **batch_over**: Specifies a `ListContent` field to iterate over. Each item in the list will be processed individually and IN PARALLEL by the pipe. - - Must be a `ListContent` type containing the items to process - - Can reference inputs or results from previous steps - -2. **batch_as**: Defines the name that will be used to reference the current item being processed - - This name can be used in the pipe's input mappings - - Makes each item from the batch available as a single element - -The result of a batched step will be a `ListContent` containing the outputs from processing each item. - -### PipeCondition controller - -The PipeCondition controller allows you to implement conditional logic in your pipeline, choosing which pipe to execute based on an evaluated expression. It supports both direct expressions and expression templates. - -#### Basic usage - -```plx -[pipe.conditional_operation] -type = "PipeCondition" -description = "A conditional pipe to decide whether..." -inputs = { input_data = "CategoryInput" } -output = "native.Text" -expression = "input_data.category" -default_outcome = "process_medium" - -[pipe.conditional_operation.outcomes] -small = "process_small" -medium = "process_medium" -large = "process_large" -``` -or -```plx -[pipe.conditional_operation] -type = "PipeCondition" -description = "A conditional pipe to decide whether..." -inputs = { input_data = "CategoryInput" } -output = "native.Text" -expression_template = "{{ input_data.category }}" # Jinja2 code -default_outcome = "process_medium" - -[pipe.conditional_operation.outcomes] -small = "process_small" -medium = "process_medium" -large = "process_large" -``` - -#### Key Parameters - -- `expression`: Direct boolean or string expression (mutually exclusive with expression_template) -- `expression_template`: Jinja2 template for more complex conditional logic (mutually exclusive with expression) -- `outcomes`: Dictionary mapping expression results to pipe codes: - 1. The key on the left (`small`, `medium`) is the result of `expression` or `expression_template` - 2. The value on the right (`process_small`, `process_medium`, etc.) is the name of the pipe to trigger -- `default_outcome`: **Required** - The pipe to execute if the expression doesn't match any key in outcomes. Use `"fail"` if you want the pipeline to fail when no match is found - -Example with fail as default: -```plx -[pipe.strict_validation] -type = "PipeCondition" -description = "Validate with strict matching" -inputs = { status = "Status" } -output = "Text" -expression = "status.value" -default_outcome = "fail" - -[pipe.strict_validation.outcomes] -approved = "process_approved" -rejected = "process_rejected" -``` - -### PipeLLM operator - -PipeLLM is used to: -1. Generate text or objects with LLMs -2. Process images with Vision LLMs - -#### Basic Usage - -Simple Text Generation: -```plx -[pipe.write_story] -type = "PipeLLM" -description = "Write a short story" -output = "Text" -prompt = """ -Write a short story about a programmer. -""" -``` - -Structured Data Extraction: -```plx -[pipe.extract_info] -type = "PipeLLM" -description = "Extract information" -inputs = { text = "Text" } -output = "PersonInfo" -prompt = """ -Extract person information from this text: -@text -""" -``` - -Supports system instructions: -```plx -[pipe.expert_analysis] -type = "PipeLLM" -description = "Expert analysis" -output = "Analysis" -system_prompt = "You are a data analysis expert" -prompt = "Analyze this data" -``` - -#### Multiple Outputs - -Generate multiple outputs (fixed number) - use bracket notation: -```plx -[pipe.generate_ideas] -type = "PipeLLM" -description = "Generate ideas" -output = "Idea[3]" # Generate exactly 3 ideas -prompt = "Generate 3 ideas" -``` - -Generate multiple outputs (variable number) - use bracket notation: -```plx -[pipe.generate_ideas] -type = "PipeLLM" -description = "Generate ideas" -output = "Idea[]" # Let the LLM decide how many to generate -prompt = "Generate ideas" -``` - -#### Vision - -Process images with VLMs (image inputs must be tagged in the prompt): -```plx -[pipe.analyze_image] -type = "PipeLLM" -description = "Analyze image" -inputs = { image = "Image" } -output = "ImageAnalysis" -prompt = """ -Describe what you see in this image: - -$image -""" -``` - -You can also reference images inline in meaningful sentences to guide the Visual LLM: -```plx -[pipe.compare_images] -type = "PipeLLM" -description = "Compare two images" -inputs = { photo = "Image", painting = "Image" } -output = "Analysis" -prompt = "Analyze the colors in $photo and the shapes in $painting." -``` - -#### Writing prompts for PipeLLM - -**Insert stuff inside a tagged block** - -If the inserted text is supposedly a long text, made of several lines or paragraphs, you want it inserted inside a block, possibly a block tagged and delimlited with proper syntax as one would do in a markdown documentation. To include stuff as a block, use the "@" prefix. - -Example template: -```plx -prompt = """ -Match the expense with its corresponding invoice: - -@expense - -@invoices -""" -``` -In the example above, the expense data and the invoices data are obviously made of several lines each, that's why it makes sense to use the "@" prefix in order to have them delimited inside a block. Note that our preprocessor will automatically include the block's title, so it doesn't need to be explicitly written in the prompt. - -DO NOT write things like "Here is the expense: @expense". -DO write simply "@expense" alone in an isolated line. - -**Insert stuff inline** - -If the inserted text is short text and it makes sense to have it inserted directly into a sentence, you want it inserted inline. To insert stuff inline, use the "$" prefix. This will insert the stuff without delimiters and the content will be rendered as plain text. - -Example template: -```plx -prompt = """ -Your goal is to summarize everything related to $topic in the provided text: - -@text - -Please provide only the summary, with no additional text or explanations. -Your summary should not be longer than 2 sentences. -""" -``` - -In the example above, $topic will be inserted inline, whereas @text will be a a delimited block. -Be sure to make the proper choice of prefix for each insertion. - -DO NOT write "$topic" alone in an isolated line. -DO write things like "Write an essay about $topic" to include text into an actual sentence. - - -### PipeExtract operator - -The PipeExtract operator is used to extract text and images from an image or a PDF - -#### Simple Text Extraction -```plx -[pipe.extract_info] -type = "PipeExtract" -description = "extract the information" -inputs = { document = "Document" } # or { image = "Image" } if it's an image. This is the only input. -output = "Page" -``` - -Using Extract Model Settings: -```plx -[pipe.extract_with_model] -type = "PipeExtract" -description = "Extract with specific model" -inputs = { document = "Document" } -output = "Page" -model = "base_extract_mistral" # Use predefined extract preset or model alias -``` - -Only one input is allowed and it must either be an `Image` or a `PDF`. The input can be named anything. - -The output concept `Page` is a native concept, with the structure `PageContent`: -It corresponds to 1 page. Therefore, the PipeExtract is outputing a `ListContent` of `Page` - -```python -class TextAndImagesContent(StuffContent): - text: TextContent | None - images: list[ImageContent] | None - -class PageContent(StructuredContent): # CONCEPT IS "Page" - text_and_images: TextAndImagesContent - page_view: ImageContent | None = None -``` -- `text_and_images` are the text, and the related images found in the input image or PDF. -- `page_view` is the screenshot of the whole pdf page/image. - -### PipeCompose operator - -The PipeCompose operator is used to compose text using Jinja2 templates. It supports various output formats including HTML, Markdown, Mermaid diagrams, and more. - -#### Basic Usage - -Simple Template Composition: -```plx -[pipe.compose_report] -type = "PipeCompose" -description = "Compose a report using template" -inputs = { data = "ReportData" } -output = "Text" -template = """ -## Report Summary - -Based on the analysis: -$data - -Generated on: {{ current_date }} -""" -``` - -Using Named Templates: -```plx -[pipe.use_template] -type = "PipeCompose" -description = "Use a predefined template" -inputs = { content = "Text" } -output = "Text" -template_name = "standard_report_template" -``` - -Using Nested Template Section (for more control): -```plx -[pipe.advanced_template] -type = "PipeCompose" -description = "Use advanced template settings" -inputs = { data = "ReportData" } -output = "Text" - -[pipe.advanced_template.template] -template = "Report: $data" -category = "html" -templating_style = { tag_style = "square_brackets", text_format = "html" } -``` - -CRM Email Template: -```plx -[pipe.compose_follow_up_email] -type = "PipeCompose" -description = "Compose a personalized follow-up email for CRM" -inputs = { customer = "Customer", deal = "Deal", sales_rep = "SalesRep" } -output = "Text" -template_category = "html" -templating_style = { tag_style = "square_brackets", text_format = "html" } -template = """ -Subject: Following up on our $deal.product_name discussion - -Hi $customer.first_name, - -I hope this email finds you well! I wanted to follow up on our conversation about $deal.product_name from $deal.last_contact_date. - -Based on our discussion, I understand that your key requirements are: $deal.customer_requirements - -I'm excited to let you know that we can definitely help you achieve your goals. Here's what I'd like to propose: - -**Next Steps:** -- Schedule a demo tailored to your specific needs -- Provide you with a customized quote based on your requirements -- Connect you with our implementation team - -Would you be available for a 30-minute call this week? I have openings on: -{% for slot in available_slots %} -- {{ slot }} -{% endfor %} - -Looking forward to moving this forward together! - -Best regards, -$sales_rep.name -$sales_rep.title -$sales_rep.phone | $sales_rep.email -""" -``` - -#### Key Parameters (Template Mode) - -- `template`: Inline template string (mutually exclusive with template_name and construct) -- `template_name`: Name of a predefined template (mutually exclusive with template) -- `template_category`: Template type ("llm_prompt", "html", "markdown", "mermaid", etc.) -- `templating_style`: Styling options for template rendering -- `extra_context`: Additional context variables for template - -For more control, you can use a nested `template` section instead of the `template` field: - -- `template.template`: The template string -- `template.category`: Template type -- `template.templating_style`: Styling options - -#### Template Variables - -Use the same variable insertion rules as PipeLLM: - -- `@variable` for block insertion (multi-line content) -- `$variable` for inline insertion (short text) - -#### Construct Mode (for StructuredContent Output) - -PipeCompose can also generate `StructuredContent` objects using the `construct` section. This mode composes field values from fixed values, variable references, templates, or nested structures. - -**When to use construct mode:** - -- You need to output a structured object (not just Text) -- You want to deterministically compose fields from existing data -- No LLM is needed - just data composition and templating - -##### Basic Construct Usage - -```plx -[concept.SalesSummary] -description = "A structured sales summary" - -[concept.SalesSummary.structure] -report_title = { type = "text", description = "Title of the report" } -customer_name = { type = "text", description = "Customer name" } -deal_value = { type = "number", description = "Deal value" } -summary_text = { type = "text", description = "Generated summary text" } - -[pipe.compose_summary] -type = "PipeCompose" -description = "Compose a sales summary from deal data" -inputs = { deal = "Deal" } -output = "SalesSummary" - -[pipe.compose_summary.construct] -report_title = "Monthly Sales Report" -customer_name = { from = "deal.customer_name" } -deal_value = { from = "deal.amount" } -summary_text = { template = "Deal worth $deal.amount with $deal.customer_name" } -``` - -##### Field Composition Methods - -There are four ways to define field values in a construct: - -**1. Fixed Value (literal)** - -Use a literal value directly: - -```plx -[pipe.compose_report.construct] -report_title = "Annual Report" -report_year = 2024 -is_draft = false -``` - -**2. Variable Reference (`from`)** - -Get a value from working memory using a dotted path: - -```plx -[pipe.compose_report.construct] -customer_name = { from = "deal.customer_name" } -total_amount = { from = "order.total" } -street_address = { from = "customer.address.street" } -``` - -**3. Template (`template`)** - -Render a Jinja2 template with variable substitution: - -```plx -[pipe.compose_report.construct] -invoice_number = { template = "INV-$order.id" } -summary = { template = "Deal worth $deal.amount with $deal.customer_name on {{ current_date }}" } -``` - -**4. Nested Construct** - -For nested structures, use a TOML subsection: - -```plx -[pipe.compose_invoice.construct] -invoice_number = { template = "INV-$order.id" } -total = { from = "order.total_amount" } - -[pipe.compose_invoice.construct.billing_address] -street = { from = "customer.address.street" } -city = { from = "customer.address.city" } -country = "France" -``` - -##### Complete Construct Example - -```plx -domain = "invoicing" - -[concept.Address] -description = "A postal address" - -[concept.Address.structure] -street = { type = "text", description = "Street address" } -city = { type = "text", description = "City name" } -country = { type = "text", description = "Country name" } - -[concept.Invoice] -description = "An invoice document" - -[concept.Invoice.structure] -invoice_number = { type = "text", description = "Invoice number" } -total = { type = "number", description = "Total amount" } - -[pipe.compose_invoice] -type = "PipeCompose" -description = "Compose an invoice from order and customer data" -inputs = { order = "Order", customer = "Customer" } -output = "Invoice" - -[pipe.compose_invoice.construct] -invoice_number = { template = "INV-$order.id" } -total = { from = "order.total_amount" } - -[pipe.compose_invoice.construct.billing_address] -street = { from = "customer.address.street" } -city = { from = "customer.address.city" } -country = "France" -``` - -##### Key Parameters (Construct Mode) - -- `construct`: Dictionary mapping field names to their composition rules -- Each field can be: - - A literal value (string, number, boolean) - - A dict with `from` key for variable reference - - A dict with `template` key for template rendering - - A nested dict for nested structures - -**Note:** You must use either `template` or `construct`, not both. They are mutually exclusive. - -### PipeImgGen operator - -The PipeImgGen operator is used to generate images using AI image generation models. - -#### Basic Usage - -Simple Image Generation: -```plx -[pipe.generate_image] -type = "PipeImgGen" -description = "Generate an image from prompt" -inputs = { prompt = "ImgGenPrompt" } -output = "Image" -``` - -Using Image Generation Settings: -```plx -[pipe.generate_photo] -type = "PipeImgGen" -description = "Generate a high-quality photo" -inputs = { prompt = "ImgGenPrompt" } -output = "Photo" -model = { model = "fast-img-gen" } -aspect_ratio = "16:9" -quality = "hd" -``` - -Multiple Image Generation: -```plx -[pipe.generate_variations] -type = "PipeImgGen" -description = "Generate multiple image variations" -inputs = { prompt = "ImgGenPrompt" } -output = "Image[3]" -seed = "auto" -``` - -Advanced Configuration: -```plx -[pipe.generate_custom] -type = "PipeImgGen" -description = "Generate image with custom settings" -inputs = { prompt = "ImgGenPrompt" } -output = "Image" -model = "img_gen_preset_name" # Use predefined preset -aspect_ratio = "1:1" -quality = "hd" -background = "transparent" -output_format = "png" -is_raw = false -safety_tolerance = 3 -``` - -#### Key Parameters - -**Image Generation Settings:** -- `model`: Model choice (preset name or inline settings with model name) -- `quality`: Image quality ("standard", "hd") - -**Output Configuration:** -- `aspect_ratio`: Image dimensions ("1:1", "16:9", "9:16", etc.) -- `output_format`: File format ("png", "jpeg", "webp") -- `background`: Background type ("default", "transparent") - -**Generation Control:** -- `seed`: Random seed (integer or "auto") -- `is_raw`: Whether to apply post-processing -- `is_moderated`: Enable content moderation -- `safety_tolerance`: Content safety level (1-6) - -#### Input Requirements - -PipeImgGen requires exactly one input that must be either: -- An `ImgGenPrompt` concept -- A concept that refines `ImgGenPrompt` - -The input can be named anything but must contain the prompt text for image generation. - -### PipeFunc operator - -The PipeFunc operator is used to run custom Python functions within a pipeline. This allows integration of classic Python scripts and custom logic. - -#### Basic Usage - -Simple Function Call: -```plx -[pipe.process_data] -type = "PipeFunc" -description = "Process data using custom function" -inputs = { input_data = "DataType" } -output = "ProcessedData" -function_name = "process_data_function" -``` - -File Processing Example: -```plx -[pipe.read_file] -type = "PipeFunc" -description = "Read file content" -inputs = { file_path = "FilePath" } -output = "FileContent" -function_name = "read_file_content" -``` - -#### Key Parameters - -- `function_name`: Name of the Python function to call (must be registered in func_registry) - -#### Function Requirements - -The Python function must: - -1. **Be registered** in the `func_registry` -2. **Accept `working_memory`** as a parameter: - ```python - async def my_function(working_memory: WorkingMemory) -> StuffContent | list[StuffContent] | str: - # Function implementation - pass - ``` - -3. **Return appropriate types**: - - `StuffContent`: Single content object - - `list[StuffContent]`: Multiple content objects (becomes ListContent) - - `str`: Simple string (becomes TextContent) - -#### Function Registration - -Functions must be registered in the function registry before use: - -```python -from pipelex.system.registries.func_registry import func_registry - -@func_registry.register("my_function_name") -async def my_custom_function(working_memory: WorkingMemory) -> StuffContent: - # Access inputs from working memory - input_data = working_memory.get_stuff("input_name") - - # Process data - result = process_logic(input_data.content) - - # Return result - return MyResultContent(data=result) -``` - -#### Working Memory Access - -Inside the function, access pipeline inputs through working memory: - -```python -async def process_function(working_memory: WorkingMemory) -> TextContent: - # Get input stuff by name - input_stuff = working_memory.get_stuff("input_name") - - # Access the content - input_content = input_stuff.content - - # Process and return - processed_text = f"Processed: {input_content.text}" - return TextContent(text=processed_text) -``` - ---- - -### Rules to choose LLM models used in PipeLLMs. - -#### LLM Configuration System - -In order to use it in a pipe, an LLM is referenced by its llm_handle (alias) and possibly by an llm_preset. -LLM configurations are managed through the new inference backend system with files located in `.pipelex/inference/`: - -- **Model Deck**: `.pipelex/inference/deck/base_deck.toml` and `.pipelex/inference/deck/overrides.toml` -- **Backends**: `.pipelex/inference/backends.toml` and `.pipelex/inference/backends/*.toml` -- **Routing**: `.pipelex/inference/routing_profiles.toml` - -#### LLM Handles - -An llm_handle can be either: -1. **A direct model name** (like "gpt-4o-mini", "claude-3-sonnet") - automatically available for all models loaded by the inference backend system -2. **An alias** - user-defined shortcuts that map to model names, defined in the `[aliases]` section: - -```toml -[aliases] -base-claude = "claude-4.5-sonnet" -base-gpt = "gpt-5" -base-gemini = "gemini-2.5-flash" -base-mistral = "mistral-medium" -``` - -The system first looks for direct model names, then checks aliases if no direct match is found. The system handles model routing through backends automatically. - -#### Using an LLM Handle in a PipeLLM - -Here is an example of using a model to specify which LLM to use in a PipeLLM: - -```plx -[pipe.hello_world] -type = "PipeLLM" -description = "Write text about Hello World." -output = "Text" -model = { model = "gpt-5", temperature = 0.9 } -prompt = """ -Write a haiku about Hello World. -""" -``` - -As you can see, to use the LLM, you must also indicate the temperature (float between 0 and 1) and max_tokens (either an int or the string "auto"). - -#### LLM Presets - -Presets are meant to record the choice of an llm with its hyper parameters (temperature and max_tokens) if it's good for a particular task. LLM Presets are skill-oriented. - -Examples: -```toml -llm_to_engineer = { model = "base-claude", temperature = 1 } -llm_to_extract_invoice = { model = "claude-3-7-sonnet", temperature = 0.1, max_tokens = "auto" } -``` - -The interest is that these presets can be used to set the LLM choice in a PipeLLM, like this: - -```plx -[pipe.extract_invoice] -type = "PipeLLM" -description = "Extract invoice information from an invoice text transcript" -inputs = { invoice_text = "InvoiceText" } -output = "Invoice" -model = "llm_to_extract_invoice" -prompt = """ -Extract invoice information from this invoice: - -The category of this invoice is: $invoice_details.category. - -@invoice_text -""" -``` - -The setting here `model = "llm_to_extract_invoice"` works because "llm_to_extract_invoice" has been declared as an llm_preset in the deck. -You must not use an LLM preset in a PipeLLM that does not exist in the deck. If needed, you can add llm presets. - -You can override the predefined llm presets by setting them in `.pipelex/inference/deck/overrides.toml`. - ---- - -ALWAYS RUN validation when you are finished writing pipelines: This checks for errors. If there are errors, iterate until it works. -- For a specific bundle/file: `pipelex validate path_to_file.plx` -- For all pipelines: `pipelex validate all` -- Remember: Ensure your Python virtual environment is activated (typically `.venv` for standard installations) before running `pipelex` commands. - -Then, create an example file to run the pipeline in the `examples` folder. -But don't write documentation unless asked explicitly to. - -## Guide to execute a pipeline and write example code - -### Example to execute a pipeline with text output - -```python -import asyncio - -from pipelex import pretty_print -from pipelex.pipelex import Pipelex -from pipelex.pipeline.execute import execute_pipeline - - -async def hello_world() -> str: - """ - This function demonstrates the use of a super simple Pipelex pipeline to generate text. - """ - # Run the pipe - pipe_output = await execute_pipeline( - pipe_code="hello_world", - ) - - return pipe_output.main_stuff_as_str - - -## start Pipelex -Pipelex.make() -## run sample using asyncio -output_text = asyncio.run(hello_world()) -pretty_print(output_text, title="Your first Pipelex output") -``` - -### Example to execute a pipeline with structured output - -```python -import asyncio - -from pipelex import pretty_print -from pipelex.pipelex import Pipelex -from pipelex.pipeline.execute import execute_pipeline -from pipelex.core.stuffs.image_content import ImageContent - -from my_project.gantt.gantt_struct import GanttChart - -SAMPLE_NAME = "extract_gantt" -IMAGE_URL = "assets/gantt/gantt_tree_house.png" - - -async def extract_gantt(image_url: str) -> GanttChart: - # Run the pipe - pipe_output = await execute_pipeline( - pipe_code="extract_gantt_by_steps", - inputs={ - "gantt_chart_image": { - "concept": "gantt.GanttImage", - "content": ImageContent(url=image_url), - } - }, - ) - # Output the result - return pipe_output.main_stuff_as(content_type=GanttChart) - - -## start Pipelex -Pipelex.make() - -## run sample using asyncio -gantt_chart = asyncio.run(extract_gantt(image_url=IMAGE_URL)) -pretty_print(gantt_chart, title="Gantt Chart") -``` - -### Setting up the input memory - -#### Explanation of input memory - -The input memory is a dictionary, where the key is the name of the input variable and the value provides details to make it a stuff object. The relevant definitions are: -```python -StuffContentOrData = dict[str, Any] | StuffContent | list[Any] | str -PipelineInputs = dict[str, StuffContentOrData] -``` -As you can seen, we made it so different ways can be used to define that stuff using structured content or data. - -#### Different ways to set up the input memory - -So here are a few concrete examples of calls to execute_pipeline with various ways to set up the input memory: - -```python -## Here we have a single input and it's a Text. -## If you assign a string, by default it will be considered as a TextContent. - pipe_output = await execute_pipeline( - pipe_code="master_advisory_orchestrator", - inputs={ - "user_input": problem_description, - }, - ) - -## Here we have a single input and it's a document. -## Because DocumentContent is a native concept, we can use it directly as a value, -## the system knows what content it corresponds to: - pipe_output = await execute_pipeline( - pipe_code="power_extractor_dpe", - inputs={ - "document": DocumentContent(url=pdf_url), - }, - ) - -## Here we have a single input and it's an Image. -## Because ImageContent is a native concept, we can use it directly as a value: - pipe_output = await execute_pipeline( - pipe_code="fashion_variation_pipeline", - inputs={ - "fashion_photo": ImageContent(url=image_url), - }, - ) - -## Here we have a single input, it's an image but -## its actually a more specific concept gantt.GanttImage which refines Image, -## so we must provide it using a dict with the concept and the content: - pipe_output = await execute_pipeline( - pipe_code="extract_gantt_by_steps", - inputs={ - "gantt_chart_image": { - "concept": "gantt.GanttImage", - "content": ImageContent(url=image_url), - } - }, - ) - -## Here is a more complex example with multiple inputs assigned using different ways: - pipe_output = await execute_pipeline( - pipe_code="retrieve_then_answer", - dynamic_output_concept_code="contracts.Fees", - inputs={ - "text": load_text_from_path(path=text_path), - "question": { - "concept": "answer.Question", - "content": question, - }, - "client_instructions": client_instructions, - }, - ) -``` - -### Using the outputs of a pipeline - -All pipe executions return a `PipeOutput` object. -It's a BaseModel which contains the resulting working memory at the end of the execution and the pipeline run id. -It also provides a bunch of accessor functions and properties to unwrap the main stuff, which is the last stuff added to the working memory: - -```python - -class PipeOutput(BaseModel): - working_memory: WorkingMemory = Field(default_factory=WorkingMemory) - pipeline_run_id: str = Field(default=SpecialPipelineId.UNTITLED) - - @property - def main_stuff(self) -> Stuff: - ... - - def main_stuff_as_list(self, item_type: type[StuffContentType]) -> ListContent[StuffContentType]: - ... - - def main_stuff_as_items(self, item_type: type[StuffContentType]) -> list[StuffContentType]: - ... - - def main_stuff_as(self, content_type: type[StuffContentType]) -> StuffContentType: - ... - - @property - def main_stuff_as_text(self) -> TextContent: - ... - - @property - def main_stuff_as_str(self) -> str: - ... - - @property - def main_stuff_as_image(self) -> ImageContent: - ... - - @property - def main_stuff_as_text_and_image(self) -> TextAndImagesContent: - ... - - @property - def main_stuff_as_number(self) -> NumberContent: - ... - - @property - def main_stuff_as_html(self) -> HtmlContent: - ... - - @property - def main_stuff_as_mermaid(self) -> MermaidContent: - ... -``` - -As you can see, you can extract any variable from the output working memory. - -#### Getting the main stuff as a specific type - -Simple text as a string: - -```python -result = pipe_output.main_stuff_as_str -``` -Structured object (BaseModel): - -```python -result = pipe_output.main_stuff_as(content_type=GanttChart) -``` - -If it's a list, you can get a `ListContent` of the specific type. - -```python -result_list_content = pipe_output.main_stuff_as_list(item_type=GanttChart) -``` - -or if you want, you can get the actual items as a regular python list: - -```python -result_list = pipe_output.main_stuff_as_items(item_type=GanttChart) -``` - ---- - diff --git a/.pipelex/inference/backends.toml b/.pipelex/inference/backends.toml index 46cbb79..8ffc6c6 100644 --- a/.pipelex/inference/backends.toml +++ b/.pipelex/inference/backends.toml @@ -18,7 +18,6 @@ api_key = "${PIPELEX_GATEWAY_API_KEY}" [anthropic] enabled = false api_key = "${ANTHROPIC_API_KEY}" -valued_constraints = { max_output_tokens_limit = 8192 } [azure_openai] display_name = "Azure OpenAI" @@ -87,7 +86,7 @@ api_key = "${SCALEWAY_API_KEY}" [vertexai] display_name = "Google Vertex AI" -enabled = false # This is the only one we disable beacuse setting it up requires internet access just to get credentials so it fails in CI sandboxes +enabled = false # This is the only one we disable because setting it up requires internet access just to get credentials so it fails in CI sandboxes gcp_project_id = "${GCP_PROJECT_ID}" gcp_location = "${GCP_LOCATION}" gcp_credentials_file_path = "${GCP_CREDENTIALS_FILE_PATH}" diff --git a/.pipelex/inference/backends/anthropic.toml b/.pipelex/inference/backends/anthropic.toml index 729e2d1..145ba19 100644 --- a/.pipelex/inference/backends/anthropic.toml +++ b/.pipelex/inference/backends/anthropic.toml @@ -25,6 +25,7 @@ model_type = "llm" sdk = "anthropic" prompting_target = "anthropic" structure_method = "instructor/anthropic_tools" +thinking_mode = "manual" ################################################################################ # LANGUAGE MODELS @@ -38,20 +39,13 @@ inputs = ["text", "images"] outputs = ["text", "structured"] max_prompt_images = 100 costs = { input = 0.25, output = 1.25 } - -[claude-3-opus] -model_id = "claude-3-opus-20240229" -max_tokens = 4096 -inputs = ["text", "images"] -outputs = ["text", "structured"] -max_prompt_images = 100 -costs = { input = 15.0, output = 75.0 } +thinking_mode = "none" # --- Claude 3.7 Series -------------------------------------------------------- ["claude-3.7-sonnet"] model_id = "claude-3-7-sonnet-20250219" max_tokens = 8192 -inputs = ["text", "images"] +inputs = ["text", "images", "pdf"] outputs = ["text", "structured"] max_prompt_images = 100 costs = { input = 3.0, output = 15.0 } @@ -60,7 +54,7 @@ costs = { input = 3.0, output = 15.0 } [claude-4-sonnet] model_id = "claude-sonnet-4-20250514" max_tokens = 64000 -inputs = ["text", "images"] +inputs = ["text", "images", "pdf"] outputs = ["text", "structured"] max_prompt_images = 100 costs = { input = 3.0, output = 15.0 } @@ -68,7 +62,7 @@ costs = { input = 3.0, output = 15.0 } [claude-4-opus] model_id = "claude-opus-4-20250514" max_tokens = 32000 -inputs = ["text", "images"] +inputs = ["text", "images", "pdf"] outputs = ["text", "structured"] max_prompt_images = 100 costs = { input = 3.0, output = 15.0 } @@ -77,7 +71,7 @@ costs = { input = 3.0, output = 15.0 } ["claude-4.1-opus"] model_id = "claude-opus-4-1-20250805" max_tokens = 32000 -inputs = ["text", "images"] +inputs = ["text", "images", "pdf"] outputs = ["text", "structured"] max_prompt_images = 100 costs = { input = 3.0, output = 15.0 } @@ -86,7 +80,7 @@ costs = { input = 3.0, output = 15.0 } ["claude-4.5-sonnet"] model_id = "claude-sonnet-4-5-20250929" max_tokens = 64000 -inputs = ["text", "images"] +inputs = ["text", "images", "pdf"] outputs = ["text", "structured"] max_prompt_images = 100 costs = { input = 3.0, output = 15.0 } @@ -94,7 +88,7 @@ costs = { input = 3.0, output = 15.0 } ["claude-4.5-haiku"] model_id = "claude-haiku-4-5-20251001" max_tokens = 64000 -inputs = ["text", "images"] +inputs = ["text", "images", "pdf"] outputs = ["text", "structured"] max_prompt_images = 100 costs = { input = 1.0, output = 5.0 } @@ -102,7 +96,16 @@ costs = { input = 1.0, output = 5.0 } ["claude-4.5-opus"] model_id = "claude-opus-4-5-20251101" max_tokens = 64000 -inputs = ["text", "images"] +inputs = ["text", "images", "pdf"] +outputs = ["text", "structured"] +max_prompt_images = 100 +costs = { input = 5.0, output = 25.0 } + +["claude-4.6-opus"] +model_id = "claude-opus-4-6" +max_tokens = 64000 +inputs = ["text", "images", "pdf"] outputs = ["text", "structured"] max_prompt_images = 100 costs = { input = 5.0, output = 25.0 } +thinking_mode = "adaptive" diff --git a/.pipelex/inference/backends/azure_openai.toml b/.pipelex/inference/backends/azure_openai.toml index 51464f2..c3020da 100644 --- a/.pipelex/inference/backends/azure_openai.toml +++ b/.pipelex/inference/backends/azure_openai.toml @@ -25,6 +25,7 @@ model_type = "llm" sdk = "azure_openai_responses" prompting_target = "openai" structure_method = "instructor/openai_responses_tools" +thinking_mode = "none" ################################################################################ # LANGUAGE MODELS @@ -69,6 +70,7 @@ inputs = ["text"] outputs = ["text", "structured"] costs = { input = 3.0, output = 12.0 } valued_constraints = { fixed_temperature = 1 } +thinking_mode = "manual" [o1] model_id = "o1-2024-12-17" @@ -76,6 +78,7 @@ inputs = ["text", "images"] outputs = ["text", "structured"] costs = { input = 15.0, output = 60.0 } valued_constraints = { fixed_temperature = 1 } +thinking_mode = "manual" [o3-mini] model_id = "o3-mini-2025-01-31" @@ -83,6 +86,7 @@ inputs = ["text"] outputs = ["text", "structured"] costs = { input = 1.1, output = 4.4 } valued_constraints = { fixed_temperature = 1 } +thinking_mode = "manual" [o3] model_id = "o3-2025-04-16" @@ -90,6 +94,7 @@ inputs = ["text"] outputs = ["text", "structured"] costs = { input = 2, output = 8 } valued_constraints = { fixed_temperature = 1 } +thinking_mode = "manual" # --- GPT-5 Series ------------------------------------------------------------- [gpt-5-mini] @@ -98,6 +103,7 @@ inputs = ["text", "images"] outputs = ["text", "structured"] costs = { input = 0.25, output = 2.0 } valued_constraints = { fixed_temperature = 1 } +thinking_mode = "manual" [gpt-5-nano] model_id = "gpt-5-nano-2025-08-07" @@ -105,6 +111,7 @@ inputs = ["text", "images"] outputs = ["text", "structured"] costs = { input = 0.05, output = 0.4 } valued_constraints = { fixed_temperature = 1 } +thinking_mode = "manual" [gpt-5-chat] model_id = "gpt-5-chat-2025-08-07" @@ -112,6 +119,7 @@ inputs = ["text", "images"] outputs = ["text", "structured"] costs = { input = 1.25, output = 10.0 } valued_constraints = { fixed_temperature = 1 } +thinking_mode = "manual" [gpt-5] model_id = "gpt-5-2025-08-07" @@ -119,6 +127,7 @@ inputs = ["text", "images"] outputs = ["text", "structured"] costs = { input = 1.25, output = 10.0 } valued_constraints = { fixed_temperature = 1 } +thinking_mode = "manual" # --- GPT-5.1 Series ------------------------------------------------------------- ["gpt-5.1"] @@ -127,6 +136,7 @@ inputs = ["text", "images"] outputs = ["text", "structured"] costs = { input = 1.25, output = 10.0 } valued_constraints = { fixed_temperature = 1 } +thinking_mode = "manual" ["gpt-5.1-chat"] model_id = "gpt-5.1-chat-2025-11-13" @@ -134,6 +144,7 @@ inputs = ["text", "images"] outputs = ["text", "structured"] costs = { input = 1.25, output = 10.0 } valued_constraints = { fixed_temperature = 1 } +thinking_mode = "manual" ["gpt-5.1-codex"] model_id = "gpt-5.1-codex-2025-11-13" @@ -141,6 +152,7 @@ inputs = ["text", "images"] outputs = ["text", "structured"] costs = { input = 1.25, output = 10.0 } valued_constraints = { fixed_temperature = 1 } +thinking_mode = "manual" # --- GPT-5.2 Series ------------------------------------------------------------- ["gpt-5.2"] @@ -148,6 +160,7 @@ model_id = "gpt-5.2-2025-12-11" inputs = ["text", "images"] outputs = ["text", "structured"] costs = { input = 1.75, output = 14.0 } +thinking_mode = "manual" ["gpt-5.2-chat"] model_id = "gpt-5.2-chat-2025-12-11" @@ -155,6 +168,7 @@ inputs = ["text", "images"] outputs = ["text", "structured"] costs = { input = 1.25, output = 10.0 } valued_constraints = { fixed_temperature = 1 } +thinking_mode = "manual" ################################################################################ # IMAGE GENERATION MODELS @@ -170,6 +184,7 @@ outputs = ["image"] costs = { input = 10, output = 40 } [gpt-image-1.rules] +prompt = "positive_only" num_images = "gpt" aspect_ratio = "gpt" background = "gpt" @@ -186,6 +201,7 @@ outputs = ["image"] costs = { input = 2.5, output = 8 } [gpt-image-1-mini.rules] +prompt = "positive_only" num_images = "gpt" aspect_ratio = "gpt" background = "gpt" @@ -202,6 +218,7 @@ outputs = ["image"] costs = { input = 8, output = 32 } ["gpt-image-1.5".rules] +prompt = "positive_only" num_images = "gpt" aspect_ratio = "gpt" background = "gpt" diff --git a/.pipelex/inference/backends/bedrock.toml b/.pipelex/inference/backends/bedrock.toml index c4ab176..d2ecd23 100644 --- a/.pipelex/inference/backends/bedrock.toml +++ b/.pipelex/inference/backends/bedrock.toml @@ -24,6 +24,7 @@ model_type = "llm" sdk = "bedrock_aioboto3" prompting_target = "anthropic" +thinking_mode = "none" ################################################################################ # LANGUAGE MODELS @@ -60,61 +61,78 @@ costs = { input = 3.0, output = 15.0 } sdk = "bedrock_anthropic" model_id = "us.anthropic.claude-3-7-sonnet-20250219-v1:0" max_tokens = 8192 -inputs = ["text", "images"] +inputs = ["text", "images", "pdf"] outputs = ["text", "structured"] max_prompt_images = 100 costs = { input = 3.0, output = 15.0 } +thinking_mode = "manual" [claude-4-sonnet] sdk = "bedrock_anthropic" model_id = "us.anthropic.claude-sonnet-4-20250514-v1:0" max_tokens = 64000 -inputs = ["text", "images"] +inputs = ["text", "images", "pdf"] outputs = ["text", "structured"] max_prompt_images = 100 costs = { input = 3.0, output = 15.0 } +thinking_mode = "manual" [claude-4-opus] sdk = "bedrock_anthropic" model_id = "us.anthropic.claude-opus-4-20250514-v1:0" max_tokens = 32000 -inputs = ["text", "images"] +inputs = ["text", "images", "pdf"] outputs = ["text", "structured"] max_prompt_images = 100 costs = { input = 3.0, output = 15.0 } +thinking_mode = "manual" ["claude-4.1-opus"] sdk = "bedrock_anthropic" model_id = "us.anthropic.claude-opus-4-1-20250805-v1:0" max_tokens = 32000 -inputs = ["text", "images"] +inputs = ["text", "images", "pdf"] outputs = ["text", "structured"] max_prompt_images = 100 costs = { input = 3.0, output = 15.0 } +thinking_mode = "manual" ["claude-4.5-sonnet"] sdk = "bedrock_anthropic" model_id = "us.anthropic.claude-sonnet-4-5-20250929-v1:0" -max_tokens = 8192 -inputs = ["text", "images"] +max_tokens = 64000 +inputs = ["text", "images", "pdf"] outputs = ["text", "structured"] max_prompt_images = 100 costs = { input = 3.0, output = 15.0 } +thinking_mode = "manual" ["claude-4.5-haiku"] sdk = "bedrock_anthropic" model_id = "us.anthropic.claude-haiku-4-5-20251001-v1:0" -max_tokens = 8192 -inputs = ["text", "images"] +max_tokens = 64000 +inputs = ["text", "images", "pdf"] outputs = ["text", "structured"] max_prompt_images = 100 costs = { input = 1.0, output = 5.0 } +thinking_mode = "manual" ["claude-4.5-opus"] sdk = "bedrock_anthropic" model_id = "global.anthropic.claude-opus-4-5-20251101-v1:0" -max_tokens = 8192 -inputs = ["text", "images"] +max_tokens = 64000 +inputs = ["text", "images", "pdf"] +outputs = ["text", "structured"] +max_prompt_images = 100 +costs = { input = 5.0, output = 25.0 } +thinking_mode = "manual" + +["claude-4.6-opus"] +sdk = "bedrock_anthropic" +model_id = "global.anthropic.claude-opus-4-6-v1" +max_tokens = 64000 +inputs = ["text", "images", "pdf"] outputs = ["text", "structured"] max_prompt_images = 100 costs = { input = 5.0, output = 25.0 } +thinking_mode = "adaptive" diff --git a/.pipelex/inference/backends/blackboxai.toml b/.pipelex/inference/backends/blackboxai.toml index 9ee0433..dc1642d 100644 --- a/.pipelex/inference/backends/blackboxai.toml +++ b/.pipelex/inference/backends/blackboxai.toml @@ -8,7 +8,7 @@ # # Configuration structure: # - Each model is defined in its own section with the model name as the header -# - Headers with dots must be quoted (e.g., ["gpt-4.5-preview"]) +# - Headers with dots must be quoted (e.g., ["gpt-5.2"]) # - Model costs are in USD per million tokens (input/output) # # Documentation: https://docs.pipelex.com @@ -24,217 +24,142 @@ model_type = "llm" sdk = "openai" structure_method = "instructor/openai_tools" +thinking_mode = "none" ################################################################################ # LANGUAGE MODELS ################################################################################ -# --- OpenAI Models ------------------------------------------------------------ -[gpt-4o-mini] -model_id = "blackboxai/openai/gpt-4o-mini" -inputs = ["text", "images"] -outputs = ["text", "structured"] -costs = { input = 0.15, output = 0.60 } - -[gpt-4o] -model_id = "blackboxai/openai/gpt-4o" +# --- Anthropic Claude Models -------------------------------------------------- +["claude-4.5-sonnet"] +model_id = "blackboxai/anthropic/claude-sonnet-4.5" inputs = ["text", "images"] outputs = ["text", "structured"] -costs = { input = 2.50, output = 10.00 } - -[o1-mini] -model_id = "blackboxai/openai/o1-mini" -inputs = ["text"] -outputs = ["text", "structured"] -costs = { input = 1.10, output = 4.40 } - -[o4-mini] -model_id = "blackboxai/openai/o4-mini" -inputs = ["text"] -outputs = ["text", "structured"] -costs = { input = 1.10, output = 4.40 } +costs = { input = 3.00, output = 15.00 } +thinking_mode = "manual" -# --- Claude LLMs -------------------------------------------------------------- -["claude-3.5-haiku"] -model_id = "blackboxai/anthropic/claude-3.5-haiku" +["claude-4.5-haiku"] +model_id = "blackboxai/anthropic/claude-haiku-4.5" inputs = ["text", "images"] outputs = ["text", "structured"] -costs = { input = 0.80, output = 4.00 } +costs = { input = 1.00, output = 5.00 } +thinking_mode = "manual" -["claude-3.5-sonnet"] -model_id = "blackboxai/anthropic/claude-3.5-sonnet" +[claude-4-sonnet] +model_id = "blackboxai/anthropic/claude-sonnet-4" inputs = ["text", "images"] outputs = ["text", "structured"] costs = { input = 3.00, output = 15.00 } +thinking_mode = "manual" -["claude-3.7-sonnet"] -model_id = "blackboxai/anthropic/claude-3.7-sonnet" +["claude-4.5-opus"] +model_id = "blackboxai/anthropic/claude-opus-4.5" inputs = ["text", "images"] outputs = ["text", "structured"] -costs = { input = 3.00, output = 15.00 } +costs = { input = 5.00, output = 25.00 } +thinking_mode = "manual" -[claude-opus-4] -model_id = "blackboxai/anthropic/claude-opus-4" +["claude-4.6-opus"] +model_id = "blackboxai/anthropic/claude-opus-4.6" inputs = ["text", "images"] outputs = ["text", "structured"] -costs = { input = 15.00, output = 75.00 } +costs = { input = 5.00, output = 25.00 } +thinking_mode = "adaptive" -[claude-4-sonnet] -model_id = "blackboxai/anthropic/claude-sonnet-4" +# --- OpenAI GPT-5 Models ------------------------------------------------------ +[gpt-5-mini] +model_id = "blackboxai/openai/gpt-5-mini" inputs = ["text", "images"] outputs = ["text", "structured"] -costs = { input = 3.00, output = 15.00 } +costs = { input = 0.25, output = 2.00 } -["claude-4.5-sonnet"] -model_id = "blackboxai/anthropic/claude-sonnet-4.5" +[gpt-5] +model_id = "blackboxai/openai/gpt-5" inputs = ["text", "images"] outputs = ["text", "structured"] -costs = { input = 0.28, output = 1.10 } +costs = { input = 1.25, output = 10.00 } -# --- Google Models ------------------------------------------------------------ -["gemini-2.5-flash"] -model_id = "blackboxai/google/gemini-2.5-flash" +[gpt-5-pro] +model_id = "blackboxai/openai/gpt-5-pro" inputs = ["text", "images"] outputs = ["text", "structured"] -costs = { input = 0.30, output = 2.50 } +costs = { input = 15.00, output = 120.00 } -["gemini-2.5-pro"] -model_id = "blackboxai/google/gemini-2.5-pro" -inputs = ["text", "images"] +[gpt-5-codex] +model_id = "blackboxai/openai/gpt-5-codex" +inputs = ["text"] outputs = ["text", "structured"] costs = { input = 1.25, output = 10.00 } -["gemini-flash-1.5-8b"] -model_id = "blackboxai/google/gemini-flash-1.5-8b" +["gpt-5.1"] +model_id = "blackboxai/openai/gpt-5.1" inputs = ["text", "images"] outputs = ["text", "structured"] -costs = { input = 0.04, output = 0.15 } +costs = { input = 1.25, output = 10.00 } -# --- Mistral Models ----------------------------------------------------------- -[mistral-large] -model_id = "blackboxai/mistralai/mistral-large" +["gpt-5.1-codex"] +model_id = "blackboxai/openai/gpt-5.1-codex" inputs = ["text"] outputs = ["text", "structured"] -costs = { input = 2.00, output = 6.00 } +costs = { input = 1.25, output = 10.00 } -[pixtral-large-2411] -model_id = "blackboxai/mistralai/pixtral-large-2411" +["gpt-5.2"] +model_id = "blackboxai/openai/gpt-5.2" inputs = ["text", "images"] outputs = ["text", "structured"] -costs = { input = 2.00, output = 6.00 } +costs = { input = 1.75, output = 14.00 } -# --- Meta Llama Models -------------------------------------------------------- -["llama-3.3-70b-instruct"] -model_id = "blackboxai/meta-llama/llama-3.3-70b-instruct" -inputs = ["text"] -outputs = ["text", "structured"] -costs = { input = 0.04, output = 0.12 } - -["llama-3.2-11b-vision-instruct"] -model_id = "blackboxai/meta-llama/llama-3.2-11b-vision-instruct" +["gpt-5.2-pro"] +model_id = "blackboxai/openai/gpt-5.2-pro" inputs = ["text", "images"] outputs = ["text", "structured"] -costs = { input = 0.05, output = 0.05 } +costs = { input = 2.00, output = 16.00 } -# --- Qwen Models -------------------------------------------------------------- -["qwen-2.5-72b-instruct"] -model_id = "blackboxai/qwen/qwen-2.5-72b-instruct" -inputs = ["text"] +# --- Google Gemini Models ----------------------------------------------------- +["gemini-2.5-pro"] +model_id = "blackboxai/google/gemini-2.5-pro" +inputs = ["text", "images"] outputs = ["text", "structured"] -costs = { input = 0.12, output = 0.39 } +costs = { input = 1.25, output = 10.00 } +thinking_mode = "none" -["qwen2.5-vl-72b-instruct"] -model_id = "blackboxai/qwen/qwen2.5-vl-72b-instruct" +["gemini-2.5-flash"] +model_id = "blackboxai/google/gemini-2.5-flash" inputs = ["text", "images"] -outputs = ["text"] -costs = { input = 0.25, output = 0.75 } - -# --- Amazon Nova Models ------------------------------------------------------- -[nova-micro-v1] -model_id = "blackboxai/amazon/nova-micro-v1" -inputs = ["text"] outputs = ["text", "structured"] -costs = { input = 0.04, output = 0.14 } +costs = { input = 0.30, output = 2.50 } +thinking_mode = "none" -[nova-lite-v1] -model_id = "blackboxai/amazon/nova-lite-v1" +# --- MiniMax Models ----------------------------------------------------------- +[minimax-m2] +model_id = "blackboxai/minimax/minimax-m2" inputs = ["text"] outputs = ["text", "structured"] -costs = { input = 0.06, output = 0.24 } - -################################################################################ -# FREE MODELS -################################################################################ +costs = { input = 0.27, output = 1.15 } -# --- DeepSeek Free Models ----------------------------------------------------- -[deepseek-chat] -model_id = "blackboxai/deepseek/deepseek-chat:free" +# --- Qwen Models -------------------------------------------------------------- +[qwen3-max] +model_id = "blackboxai/qwen/qwen3-max" inputs = ["text"] outputs = ["text", "structured"] -costs = { input = 0.00, output = 0.00 } +costs = { input = 2.00, output = 10.00 } -[deepseek-r1] -model_id = "blackboxai/deepseek/deepseek-r1:free" +[qwen3-coder] +model_id = "blackboxai/qwen/qwen3-coder" inputs = ["text"] outputs = ["text", "structured"] -costs = { input = 0.00, output = 0.00 } +costs = { input = 1.50, output = 8.00 } -# --- Meta Llama Free Models --------------------------------------------------- -["llama-3.3-70b-instruct-free"] -model_id = "blackboxai/meta-llama/llama-3.3-70b-instruct:free" -inputs = ["text"] +# --- Qwen VL Models ------------------------------------------------- +[qwen3-vl-235b-a22b] +model_id = "blackboxai/qwen3-vl-235b-a22b" +inputs = ["text", "images"] outputs = ["text", "structured"] -costs = { input = 0.00, output = 0.00 } - - -################################################################################ -# IMAGE GENERATION MODELS -################################################################################ - -[flux-pro] -model_type = "img_gen" -sdk = "blackboxai_img_gen" -model_id = "blackboxai/black-forest-labs/flux-pro" -inputs = ["text"] -outputs = ["image"] -costs = { input = 0.0, output = 0.04 } - -["flux-pro/v1.1"] -model_type = "img_gen" -sdk = "blackboxai_img_gen" -model_id = "blackboxai/black-forest-labs/flux-1.1-pro" -inputs = ["text"] -outputs = ["image"] -costs = { input = 0.0, output = 0.04 } - -["flux-pro/v1.1-ultra"] -model_type = "img_gen" -sdk = "blackboxai_img_gen" -model_id = "blackboxai/black-forest-labs/flux-1.1-pro-ultra" -inputs = ["text"] -outputs = ["image"] -costs = { input = 0.0, output = 0.06 } - -[fast-lightning-sdxl] -model_type = "img_gen" -sdk = "blackboxai_img_gen" -model_id = "blackboxai/bytedance/sdxl-lightning-4step" -inputs = ["text"] -outputs = ["image"] -costs = { input = 0.0, output = 0.0014 } - -[nano-banana] -model_type = "img_gen" -sdk = "blackboxai_img_gen" -model_id = "blackboxai/google/nano-banana" -inputs = ["text"] -outputs = ["image"] -costs = { input = 0.0, output = 0.039 } +costs = { input = 3.00, output = 15.00 } -[nano-banana-pro] -model_type = "img_gen" -sdk = "blackboxai_img_gen" -model_id = "blackboxai/google/nano-banana-pro" -inputs = ["text"] -outputs = ["image"] -costs = { input = 0.0, output = 0.039 } +[qwen3-vl-32b] +model_id = "blackboxai/qwen3-vl-32b" +inputs = ["text", "images"] +outputs = ["text", "structured"] +costs = { input = 1.00, output = 5.00 } +structure_method = "instructor/openrouter_structured_outputs" diff --git a/.pipelex/inference/backends/fal.toml b/.pipelex/inference/backends/fal.toml index 42e2f77..d79251e 100644 --- a/.pipelex/inference/backends/fal.toml +++ b/.pipelex/inference/backends/fal.toml @@ -24,6 +24,7 @@ model_type = "img_gen" sdk = "fal" prompting_target = "fal" +thinking_mode = "none" ################################################################################ # IMAGE GENERATION MODELS @@ -37,6 +38,7 @@ outputs = ["image"] costs = { input = 0.05, output = 0.0 } [flux-pro.rules] +prompt = "positive_only" num_images = "fal" aspect_ratio = "flux" inference = "flux" @@ -51,6 +53,7 @@ outputs = ["image"] costs = { input = 0.05, output = 0.0 } ["flux-pro/v1.1".rules] +prompt = "positive_only" num_images = "fal" aspect_ratio = "flux" inference = "flux" @@ -65,6 +68,7 @@ outputs = ["image"] costs = { input = 0.06, output = 0.0 } ["flux-pro/v1.1-ultra".rules] +prompt = "positive_only" num_images = "fal" aspect_ratio = "flux_11_ultra" inference = "flux_11_ultra" @@ -79,6 +83,7 @@ outputs = ["image"] costs = { input = 0.05, output = 0.0 } [flux-2.rules] +prompt = "positive_only" num_images = "fal" aspect_ratio = "flux" inference = "flux" @@ -94,6 +99,7 @@ outputs = ["image"] costs = { input = 0.0003, output = 0.0 } [fast-lightning-sdxl.rules] +prompt = "positive_only" num_images = "fal" aspect_ratio = "flux" inference = "sdxl_lightning" diff --git a/.pipelex/inference/backends/google.toml b/.pipelex/inference/backends/google.toml index 36e19c2..fdc63d5 100644 --- a/.pipelex/inference/backends/google.toml +++ b/.pipelex/inference/backends/google.toml @@ -8,7 +8,7 @@ # # Configuration structure: # - Each model is defined in its own section with the model name as the header -# - Headers with dots must be quoted (e.g., ["gemini-2.0-flash"]) +# - Headers with dots must be quoted (e.g., ["gemini-3.0-pro"]) # - Model costs are in USD per million tokens (input/output) # # Documentation: https://docs.pipelex.com @@ -25,37 +25,30 @@ model_type = "llm" sdk = "google" prompting_target = "gemini" structure_method = "instructor/genai_tools" +thinking_mode = "manual" ################################################################################ # LANGUAGE MODELS ################################################################################ -# --- Gemini 2.0 Series ---------------------------------------- -["gemini-2.0-flash"] -model_id = "gemini-2.0-flash" -inputs = ["text", "images"] -outputs = ["text", "structured"] -max_prompt_images = 3000 -costs = { input = 0.10, output = 0.40 } - # --- Gemini 2.5 Series ---------------------------------------- ["gemini-2.5-pro"] model_id = "gemini-2.5-pro" -inputs = ["text", "images"] +inputs = ["text", "images", "pdf"] outputs = ["text", "structured"] max_prompt_images = 3000 costs = { input = 1.25, output = 10.0 } ["gemini-2.5-flash"] model_id = "gemini-2.5-flash" -inputs = ["text", "images"] +inputs = ["text", "images", "pdf"] outputs = ["text", "structured"] max_prompt_images = 3000 costs = { input = 0.30, output = 2.50 } ["gemini-2.5-flash-lite"] model_id = "gemini-2.5-flash-lite" -inputs = ["text", "images"] +inputs = ["text", "images", "pdf"] outputs = ["text", "structured"] max_prompt_images = 3000 costs = { input = 0.10, output = 0.40 } @@ -63,17 +56,19 @@ costs = { input = 0.10, output = 0.40 } # --- Gemini 3.0 Series ---------------------------------------- ["gemini-3.0-pro"] model_id = "gemini-3-pro-preview" -inputs = ["text", "images"] +inputs = ["text", "images", "pdf"] outputs = ["text", "structured"] max_prompt_images = 3000 costs = { input = 2, output = 12.0 } +thinking_mode = "adaptive" ["gemini-3.0-flash-preview"] model_id = "gemini-3-flash-preview" -inputs = ["text", "images"] +inputs = ["text", "images", "pdf"] outputs = ["text", "structured"] max_prompt_images = 3000 costs = { input = 0.5, output = 3.0 } +thinking_mode = "adaptive" ################################################################################ # IMAGE GENERATION MODELS (Nano Banana) @@ -84,6 +79,7 @@ model_type = "img_gen" model_id = "gemini-2.5-flash-image" inputs = ["text"] outputs = ["image"] +thinking_mode = "none" costs = { input = 0.0, output = 0.039 } [nano-banana-pro] @@ -91,4 +87,5 @@ model_type = "img_gen" model_id = "gemini-3-pro-image-preview" inputs = ["text"] outputs = ["image"] +thinking_mode = "none" costs = { input = 0.0, output = 0.039 } diff --git a/.pipelex/inference/backends/groq.toml b/.pipelex/inference/backends/groq.toml index 72bdae3..cb48094 100644 --- a/.pipelex/inference/backends/groq.toml +++ b/.pipelex/inference/backends/groq.toml @@ -25,6 +25,7 @@ model_type = "llm" sdk = "openai" structure_method = "instructor/json" +thinking_mode = "none" ################################################################################ # PRODUCTION TEXT MODELS diff --git a/.pipelex/inference/backends/huggingface.toml b/.pipelex/inference/backends/huggingface.toml index 8916f9b..2fd0faf 100644 --- a/.pipelex/inference/backends/huggingface.toml +++ b/.pipelex/inference/backends/huggingface.toml @@ -23,6 +23,7 @@ [defaults] model_type = "img_gen" sdk = "huggingface_img_gen" +thinking_mode = "none" ################################################################################ # IMAGE GENERATION MODELS @@ -38,5 +39,6 @@ variant = "fal-ai" # variant = "replicate" [qwen-image.rules] +prompt = "with_negative" aspect_ratio = "qwen_image" inference = "qwen_image" diff --git a/.pipelex/inference/backends/internal.toml b/.pipelex/inference/backends/internal.toml index 2d30b7f..8fcc38d 100644 --- a/.pipelex/inference/backends/internal.toml +++ b/.pipelex/inference/backends/internal.toml @@ -14,12 +14,19 @@ # ################################################################################ +################################################################################ +# MODEL DEFAULTS +################################################################################ + +[defaults] +thinking_mode = "none" + ################################################################################ # TEXT EXTRACTION MODELS ################################################################################ # --- PyPDFium2 Text Extractor ------------------------------------------------- -[pypdfium2-extract-text] +[pypdfium2-extract-pdf] model_type = "text_extractor" sdk = "pypdfium2" model_id = "extract-text" diff --git a/.pipelex/inference/backends/mistral.toml b/.pipelex/inference/backends/mistral.toml index c8c4b05..7ca2615 100644 --- a/.pipelex/inference/backends/mistral.toml +++ b/.pipelex/inference/backends/mistral.toml @@ -25,6 +25,7 @@ model_type = "llm" sdk = "mistral" prompting_target = "mistral" structure_method = "instructor/mistral_tools" +thinking_mode = "none" ################################################################################ # LANGUAGE MODELS @@ -69,65 +70,119 @@ inputs = ["text"] outputs = ["text"] costs = { input = 1.0, output = 3.0 } -# --- Mistral Large Series ----------------------------------------------------- -[mistral-large-2402] -model_id = "mistral-large-2402" -max_tokens = 32768 -inputs = ["text"] +# --- Pixtral Series ----------------------------------------------------------- +[pixtral-12b] +model_id = "pixtral-12b-latest" +max_tokens = 131072 +inputs = ["text", "images"] outputs = ["text", "structured"] -costs = { input = 4.0, output = 12.0 } +costs = { input = 0.15, output = 0.15 } -[mistral-large] -model_id = "mistral-large-latest" +[pixtral-large] +model_id = "pixtral-large-latest" max_tokens = 131072 -inputs = ["text"] +inputs = ["text", "images"] outputs = ["text", "structured"] -costs = { input = 4.0, output = 12.0 } +costs = { input = 2.0, output = 6.0 } # --- Mistral Small Series ----------------------------------------------------- -[mistral-small-2402] -model_id = "mistral-small-2402" -max_tokens = 32768 +[mistral-small-2506] +model_id = "mistral-small-2506" +max_tokens = 128000 inputs = ["text"] outputs = ["text", "structured"] -costs = { input = 1.0, output = 3.0 } +costs = { input = 0.1, output = 0.3 } + +["mistral-small-3.2"] +model_id = "mistral-small-2506" +max_tokens = 128000 +inputs = ["text"] +outputs = ["text", "structured"] +costs = { input = 0.1, output = 0.3 } [mistral-small] model_id = "mistral-small-latest" -max_tokens = 32768 +max_tokens = 128000 inputs = ["text"] outputs = ["text", "structured"] -costs = { input = 1.0, output = 3.0 } +costs = { input = 0.1, output = 0.3 } -# --- Pixtral Series ----------------------------------------------------------- -[pixtral-12b] -model_id = "pixtral-12b-latest" -max_tokens = 131072 -inputs = ["text", "images"] +# --- Mistral Medium Series ---------------------------------------------------- +[mistral-medium-2508] +model_id = "mistral-medium-2508" +max_tokens = 128000 +inputs = ["text", "images", "pdf"] outputs = ["text", "structured"] -costs = { input = 0.15, output = 0.15 } +costs = { input = 0.4, output = 2.0 } -[pixtral-large] -model_id = "pixtral-large-latest" -max_tokens = 131072 -inputs = ["text", "images"] +["mistral-medium-3.1"] +model_id = "mistral-medium-2508" +max_tokens = 128000 +inputs = ["text", "images", "pdf"] outputs = ["text", "structured"] -costs = { input = 2.0, output = 6.0 } +costs = { input = 0.4, output = 2.0 } -# --- Mistral Medium Series ---------------------------------------------------- [mistral-medium] model_id = "mistral-medium-latest" max_tokens = 128000 -inputs = ["text", "images"] +inputs = ["text", "images", "pdf"] outputs = ["text", "structured"] costs = { input = 0.4, output = 2.0 } -[mistral-medium-2508] -model_id = "mistral-medium-2508" -max_tokens = 128000 -inputs = ["text", "images"] +# --- Mistral Large Series ----------------------------------------------------- +[mistral-large-2512] +model_id = "mistral-large-2512" +max_tokens = 256000 +inputs = ["text", "images", "pdf"] outputs = ["text", "structured"] -costs = { input = 0.4, output = 2.0 } +costs = { input = 0.5, output = 1.5 } + +[mistral-large-3] +model_id = "mistral-large-2512" +max_tokens = 256000 +inputs = ["text", "images", "pdf"] +outputs = ["text", "structured"] +costs = { input = 0.5, output = 1.5 } + +[mistral-large] +model_id = "mistral-large-latest" +max_tokens = 256000 +inputs = ["text", "images", "pdf"] +outputs = ["text", "structured"] +costs = { input = 0.5, output = 1.5 } + +# --- Magistral Series (reasoning models) ------------------------------------ +[magistral-small-2509] +model_id = "magistral-small-2509" +max_tokens = 128000 +inputs = ["text", "pdf"] +outputs = ["text"] +costs = { input = 0.5, output = 1.5 } +thinking_mode = "manual" + +[magistral-small] +model_id = "magistral-small-latest" +max_tokens = 128000 +inputs = ["text", "pdf"] +outputs = ["text"] +costs = { input = 0.5, output = 1.5 } +thinking_mode = "manual" + +[magistral-medium-2509] +model_id = "magistral-medium-2509" +max_tokens = 128000 +inputs = ["text", "pdf"] +outputs = ["text"] +costs = { input = 2, output = 5 } +thinking_mode = "manual" + +[magistral-medium] +model_id = "magistral-medium-latest" +max_tokens = 128000 +inputs = ["text", "pdf"] +outputs = ["text"] +costs = { input = 2, output = 5 } +thinking_mode = "manual" ################################################################################ # EXTRACTION MODELS @@ -135,7 +190,6 @@ costs = { input = 0.4, output = 2.0 } # TODO: add support to pricing per page -# --- OCR Models --------------------------------------------------------------- [mistral-ocr-2503] model_type = "text_extractor" model_id = "mistral-ocr-2503" diff --git a/.pipelex/inference/backends/ollama.toml b/.pipelex/inference/backends/ollama.toml index 397e9ac..3e020a8 100644 --- a/.pipelex/inference/backends/ollama.toml +++ b/.pipelex/inference/backends/ollama.toml @@ -25,6 +25,7 @@ model_type = "llm" sdk = "openai" prompting_target = "anthropic" structure_method = "instructor/openai_tools" +thinking_mode = "none" ################################################################################ # LANGUAGE MODELS diff --git a/.pipelex/inference/backends/openai.toml b/.pipelex/inference/backends/openai.toml index e61d52e..3c2af8c 100644 --- a/.pipelex/inference/backends/openai.toml +++ b/.pipelex/inference/backends/openai.toml @@ -25,6 +25,7 @@ model_type = "llm" sdk = "openai_responses" prompting_target = "openai" structure_method = "instructor/openai_responses_tools" +thinking_mode = "none" ################################################################################ # LANGUAGE MODELS @@ -50,135 +51,150 @@ costs = { input = 10.0, output = 30.0 } # --- GPT-4o Series ------------------------------------------------------------ [gpt-4o-2024-11-20] -inputs = ["text", "images"] +inputs = ["text", "images", "pdf"] outputs = ["text", "structured"] costs = { input = 2.5, output = 10.0 } [gpt-4o] -inputs = ["text", "images"] +inputs = ["text", "images", "pdf"] outputs = ["text", "structured"] costs = { input = 2.5, output = 10.0 } [gpt-4o-mini-2024-07-18] -inputs = ["text", "images"] +inputs = ["text", "images", "pdf"] outputs = ["text", "structured"] costs = { input = 0.15, output = 0.6 } [gpt-4o-mini] -inputs = ["text", "images"] +inputs = ["text", "images", "pdf"] outputs = ["text", "structured"] costs = { input = 0.15, output = 0.6 } # --- GPT-4.1 Series ----------------------------------------------------------- ["gpt-4.1"] -inputs = ["text", "images"] +inputs = ["text", "images", "pdf"] outputs = ["text", "structured"] costs = { input = 2, output = 8 } ["gpt-4.1-mini"] -inputs = ["text", "images"] +inputs = ["text", "images", "pdf"] outputs = ["text", "structured"] costs = { input = 0.4, output = 1.6 } ["gpt-4.1-nano"] -inputs = ["text", "images"] +inputs = ["text", "images", "pdf"] outputs = ["text", "structured"] costs = { input = 0.1, output = 0.4 } # --- o Series ---------------------------------------------------------------- [o1] -inputs = ["text", "images"] +inputs = ["text", "images", "pdf"] outputs = ["text", "structured"] costs = { input = 15.0, output = 60.0 } valued_constraints = { fixed_temperature = 1 } +thinking_mode = "manual" [o3-mini] inputs = ["text"] outputs = ["text", "structured"] costs = { input = 1.1, output = 4.4 } valued_constraints = { fixed_temperature = 1 } +thinking_mode = "manual" [o3] -inputs = ["text", "images"] +inputs = ["text", "images", "pdf"] outputs = ["text", "structured"] costs = { input = 10.0, output = 40.0 } valued_constraints = { fixed_temperature = 1 } +thinking_mode = "manual" [o4-mini] inputs = ["text"] outputs = ["text", "structured"] costs = { input = 1.1, output = 4.4 } valued_constraints = { fixed_temperature = 1 } +thinking_mode = "manual" # --- GPT-5 Series ------------------------------------------------------------- [gpt-5] -inputs = ["text", "images"] +inputs = ["text", "images", "pdf"] outputs = ["text", "structured"] costs = { input = 1.25, output = 10.0 } valued_constraints = { fixed_temperature = 1 } +thinking_mode = "manual" [gpt-5-mini] -inputs = ["text", "images"] +inputs = ["text", "images", "pdf"] outputs = ["text", "structured"] costs = { input = 0.25, output = 2.0 } valued_constraints = { fixed_temperature = 1 } +thinking_mode = "manual" [gpt-5-nano] -inputs = ["text", "images"] +inputs = ["text", "images", "pdf"] outputs = ["text", "structured"] costs = { input = 0.05, output = 0.4 } valued_constraints = { fixed_temperature = 1 } +thinking_mode = "manual" [gpt-5-chat] model_id = "gpt-5-chat-latest" -inputs = ["text", "images"] +inputs = ["text", "images", "pdf"] outputs = ["text"] costs = { input = 1.25, output = 10.0 } valued_constraints = { fixed_temperature = 1 } +thinking_mode = "manual" [gpt-5-codex] -inputs = ["text", "images"] +inputs = ["text", "images", "pdf"] outputs = ["text", "structured"] costs = { input = 1.25, output = 10.0 } valued_constraints = { fixed_temperature = 1 } +thinking_mode = "manual" # --- GPT-5.1 Series ------------------------------------------------------------- ["gpt-5.1"] -inputs = ["text", "images"] +inputs = ["text", "images", "pdf"] outputs = ["text", "structured"] costs = { input = 1.25, output = 10.0 } +thinking_mode = "manual" ["gpt-5.1-chat"] model_id = "gpt-5.1-chat-latest" -inputs = ["text", "images"] +inputs = ["text", "images", "pdf"] outputs = ["text", "structured"] costs = { input = 1.25, output = 10.0 } valued_constraints = { fixed_temperature = 1 } +thinking_mode = "manual" ["gpt-5.1-codex"] -inputs = ["text", "images"] +inputs = ["text", "images", "pdf"] outputs = ["text", "structured"] costs = { input = 1.25, output = 10.0 } valued_constraints = { fixed_temperature = 1 } +thinking_mode = "manual" ["gpt-5.1-codex-max"] -inputs = ["text", "images"] +inputs = ["text", "images", "pdf"] outputs = ["text", "structured"] costs = { input = 1.25, output = 10.0 } valued_constraints = { fixed_temperature = 1 } +thinking_mode = "manual" # --- GPT-5.2 Series ------------------------------------------------------------- ["gpt-5.2"] -inputs = ["text", "images"] +inputs = ["text", "images", "pdf"] outputs = ["text", "structured"] costs = { input = 1.75, output = 14.0 } +thinking_mode = "manual" ["gpt-5.2-chat"] model_id = "gpt-5.2-chat-latest" -inputs = ["text", "images"] +inputs = ["text", "images", "pdf"] outputs = ["text", "structured"] costs = { input = 1.75, output = 14.0 } valued_constraints = { fixed_temperature = 1 } +thinking_mode = "manual" ################################################################################ # IMAGE GENERATION MODELS diff --git a/.pipelex/inference/backends/pipelex_inference.toml b/.pipelex/inference/backends/pipelex_inference.toml index 751c570..abb5fcf 100644 --- a/.pipelex/inference/backends/pipelex_inference.toml +++ b/.pipelex/inference/backends/pipelex_inference.toml @@ -25,6 +25,7 @@ model_type = "llm" sdk = "openai" prompting_target = "anthropic" structure_method = "instructor/openai_tools" +thinking_mode = "none" ################################################################################ # LANGUAGE MODELS @@ -151,12 +152,6 @@ outputs = ["text", "structured"] costs = { input = 5, output = 25 } # --- Gemini LLMs -------------------------------------------------------------- -["gemini-2.0-flash"] -model_id = "pipelex/gemini-2.0-flash" -inputs = ["text", "images"] -outputs = ["text", "structured"] -costs = { input = 0.10, output = 0.40 } - ["gemini-2.5-pro"] model_id = "pipelex/gemini-2.5-pro" inputs = ["text", "images"] diff --git a/.pipelex/inference/backends/portkey.toml b/.pipelex/inference/backends/portkey.toml index 2080d0e..d91a1da 100644 --- a/.pipelex/inference/backends/portkey.toml +++ b/.pipelex/inference/backends/portkey.toml @@ -24,6 +24,7 @@ model_type = "llm" sdk = "portkey_completions" structure_method = "instructor/openai_tools" prompting_target = "anthropic" +thinking_mode = "none" ################################################################################ # LANGUAGE MODELS @@ -31,52 +32,58 @@ prompting_target = "anthropic" # --- OpenAI LLMs -------------------------------------------------------------- [gpt-4o-mini] -inputs = ["text", "images"] +inputs = ["text", "images", "pdf"] outputs = ["text", "structured"] costs = { input = 0.15, output = 0.6 } sdk = "portkey_responses" structure_method = "instructor/openai_responses_tools" +thinking_mode = "none" x-portkey-provider = "@openai" [gpt-4o] -inputs = ["text", "images"] +inputs = ["text", "images", "pdf"] outputs = ["text", "structured"] costs = { input = 2.5, output = 10.0 } sdk = "portkey_responses" structure_method = "instructor/openai_responses_tools" +thinking_mode = "none" x-portkey-provider = "@openai" ["gpt-4.1-nano"] -inputs = ["text", "images"] +inputs = ["text", "images", "pdf"] outputs = ["text", "structured"] costs = { input = 0.1, output = 0.4 } sdk = "portkey_responses" structure_method = "instructor/openai_responses_tools" +thinking_mode = "none" x-portkey-provider = "@openai" ["gpt-4.1-mini"] -inputs = ["text", "images"] +inputs = ["text", "images", "pdf"] outputs = ["text", "structured"] costs = { input = 0.4, output = 1.6 } sdk = "portkey_responses" structure_method = "instructor/openai_responses_tools" +thinking_mode = "none" x-portkey-provider = "@openai" ["gpt-4.1"] -inputs = ["text", "images"] +inputs = ["text", "images", "pdf"] outputs = ["text", "structured"] costs = { input = 2, output = 8 } sdk = "portkey_responses" structure_method = "instructor/openai_responses_tools" +thinking_mode = "none" x-portkey-provider = "@openai" [o1] -inputs = ["text", "images"] +inputs = ["text", "images", "pdf"] outputs = ["text", "structured"] costs = { input = 15.0, output = 60.0 } valued_constraints = { fixed_temperature = 1 } sdk = "portkey_responses" structure_method = "instructor/openai_responses_tools" +thinking_mode = "manual" x-portkey-provider = "@openai" [o3-mini] @@ -86,69 +93,77 @@ costs = { input = 1.1, output = 4.4 } valued_constraints = { fixed_temperature = 1 } sdk = "portkey_responses" structure_method = "instructor/openai_responses_tools" +thinking_mode = "manual" x-portkey-provider = "@openai" [o3] -inputs = ["text", "images"] +inputs = ["text", "images", "pdf"] outputs = ["text", "structured"] costs = { input = 2, output = 8 } valued_constraints = { fixed_temperature = 1 } sdk = "portkey_responses" structure_method = "instructor/openai_responses_tools" +thinking_mode = "manual" x-portkey-provider = "@openai" [o4-mini] -inputs = ["text", "images"] +inputs = ["text"] outputs = ["text", "structured"] costs = { input = 1.1, output = 4.4 } valued_constraints = { fixed_temperature = 1 } sdk = "portkey_responses" structure_method = "instructor/openai_responses_tools" +thinking_mode = "manual" x-portkey-provider = "@openai" [gpt-5-nano] -inputs = ["text", "images"] +inputs = ["text", "images", "pdf"] outputs = ["text", "structured"] costs = { input = 0.05, output = 0.4 } valued_constraints = { fixed_temperature = 1 } sdk = "portkey_responses" structure_method = "instructor/openai_responses_tools" +thinking_mode = "manual" x-portkey-provider = "@openai" [gpt-5-mini] -inputs = ["text", "images"] +inputs = ["text", "images", "pdf"] outputs = ["text", "structured"] costs = { input = 0.25, output = 2.0 } valued_constraints = { fixed_temperature = 1 } sdk = "portkey_responses" structure_method = "instructor/openai_responses_tools" +thinking_mode = "manual" x-portkey-provider = "@openai" [gpt-5] -inputs = ["text", "images"] +inputs = ["text", "images", "pdf"] outputs = ["text", "structured"] costs = { input = 1.25, output = 10.0 } valued_constraints = { fixed_temperature = 1 } sdk = "portkey_responses" structure_method = "instructor/openai_responses_tools" +thinking_mode = "manual" x-portkey-provider = "@openai" ["gpt-5.1"] -inputs = ["text", "images"] +inputs = ["text", "images", "pdf"] outputs = ["text", "structured"] costs = { input = 1.25, output = 10.0 } valued_constraints = { fixed_temperature = 1 } sdk = "portkey_responses" structure_method = "instructor/openai_responses_tools" +thinking_mode = "manual" x-portkey-provider = "@openai" ["gpt-5.1-codex"] -inputs = ["text", "images"] +inputs = ["text", "images", "pdf"] outputs = ["text", "structured"] costs = { input = 1.25, output = 10.0 } valued_constraints = { fixed_temperature = 1 } sdk = "portkey_responses" structure_method = "instructor/openai_responses_tools" +thinking_mode = "manual" x-portkey-provider = "@openai" # --- Claude LLMs -------------------------------------------------------------- @@ -159,114 +174,134 @@ inputs = ["text", "images"] outputs = ["text", "structured"] max_prompt_images = 100 costs = { input = 0.25, output = 1.25 } -x-portkey-provider = "@anthropic" - -[claude-3-opus] -model_id = "claude-3-opus-20240229" -max_tokens = 4096 -inputs = ["text", "images"] -outputs = ["text", "structured"] -max_prompt_images = 100 -costs = { input = 15.0, output = 75.0 } +thinking_mode = "none" x-portkey-provider = "@anthropic" ["claude-3.7-sonnet"] model_id = "claude-3-7-sonnet-20250219" max_tokens = 8192 -inputs = ["text", "images"] +inputs = ["text", "images", "pdf"] outputs = ["text", "structured"] max_prompt_images = 100 costs = { input = 3.0, output = 15.0 } +thinking_mode = "manual" x-portkey-provider = "@anthropic" [claude-4-sonnet] model_id = "claude-sonnet-4-20250514" max_tokens = 64000 -inputs = ["text", "images"] +inputs = ["text", "images", "pdf"] outputs = ["text", "structured"] max_prompt_images = 100 costs = { input = 3.0, output = 15.0 } +thinking_mode = "manual" x-portkey-provider = "@anthropic" [claude-4-opus] model_id = "claude-opus-4-20250514" max_tokens = 32000 -inputs = ["text", "images"] +inputs = ["text", "images", "pdf"] outputs = ["text", "structured"] max_prompt_images = 100 costs = { input = 3.0, output = 15.0 } +thinking_mode = "manual" x-portkey-provider = "@anthropic" ["claude-4.1-opus"] model_id = "claude-opus-4-1-20250805" max_tokens = 32000 -inputs = ["text", "images"] +inputs = ["text", "images", "pdf"] outputs = ["text", "structured"] max_prompt_images = 100 costs = { input = 3.0, output = 15.0 } +thinking_mode = "manual" x-portkey-provider = "@anthropic" ["claude-4.5-sonnet"] model_id = "claude-sonnet-4-5-20250929" max_tokens = 64000 -inputs = ["text", "images"] +inputs = ["text", "images", "pdf"] outputs = ["text", "structured"] max_prompt_images = 100 costs = { input = 3.0, output = 15.0 } +thinking_mode = "manual" x-portkey-provider = "@anthropic" ["claude-4.5-haiku"] model_id = "claude-haiku-4-5-20251001" max_tokens = 64000 -inputs = ["text", "images"] +inputs = ["text", "images", "pdf"] outputs = ["text", "structured"] max_prompt_images = 100 costs = { input = 1.0, output = 5.0 } +thinking_mode = "manual" x-portkey-provider = "@anthropic" ["claude-4.5-opus"] model_id = "claude-opus-4-5-20251101" max_tokens = 64000 -inputs = ["text", "images"] +inputs = ["text", "images", "pdf"] outputs = ["text", "structured"] max_prompt_images = 100 costs = { input = 5.0, output = 25.0 } +thinking_mode = "manual" x-portkey-provider = "@anthropic" -# --- Gemini LLMs -------------------------------------------------------------- -["gemini-2.0-flash"] -model_id = "gemini-2.0-flash" -inputs = ["text", "images"] +["claude-4.6-opus"] +model_id = "claude-opus-4-6" +max_tokens = 64000 +inputs = ["text", "images", "pdf"] outputs = ["text", "structured"] -costs = { input = 0.10, output = 0.40 } -x-portkey-provider = "@google" +max_prompt_images = 100 +costs = { input = 5.0, output = 25.0 } +thinking_mode = "adaptive" +x-portkey-provider = "@anthropic" +# --- Gemini LLMs -------------------------------------------------------------- ["gemini-2.5-pro"] model_id = "gemini-2.5-pro" -inputs = ["text", "images"] +inputs = ["text", "images", "pdf"] outputs = ["text", "structured"] max_prompt_images = 3000 costs = { input = 1.25, output = 10.0 } +thinking_mode = "manual" +prompting_target = "gemini" x-portkey-provider = "@google" ["gemini-2.5-flash"] model_id = "gemini-2.5-flash" -inputs = ["text", "images"] +inputs = ["text", "images", "pdf"] outputs = ["text", "structured"] costs = { input = 0.30, output = 2.50 } +thinking_mode = "manual" +prompting_target = "gemini" x-portkey-provider = "@google" ["gemini-2.5-flash-lite"] model_id = "gemini-2.5-flash-lite" -inputs = ["text", "images"] +inputs = ["text", "images", "pdf"] outputs = ["text", "structured"] costs = { input = 0.10, output = 0.40 } +thinking_mode = "manual" +prompting_target = "gemini" x-portkey-provider = "@google" ["gemini-3.0-pro"] model_id = "gemini-3-pro-preview" -inputs = ["text", "images"] +inputs = ["text", "images", "pdf"] outputs = ["text", "structured"] max_prompt_images = 3000 costs = { input = 2, output = 12.0 } +thinking_mode = "adaptive" +prompting_target = "gemini" +x-portkey-provider = "@google" + +["gemini-3.0-flash-preview"] +model_id = "gemini-3-flash-preview" +inputs = ["text", "images", "pdf"] +outputs = ["text", "structured"] +max_prompt_images = 3000 +costs = { input = 0.5, output = 3.0 } +thinking_mode = "adaptive" +prompting_target = "gemini" x-portkey-provider = "@google" diff --git a/.pipelex/inference/backends/scaleway.toml b/.pipelex/inference/backends/scaleway.toml index 20fe792..75d6a05 100644 --- a/.pipelex/inference/backends/scaleway.toml +++ b/.pipelex/inference/backends/scaleway.toml @@ -25,6 +25,7 @@ model_type = "llm" sdk = "openai" structure_method = "instructor/json" +thinking_mode = "none" # --- DeepSeek Models ---------------------------------------------------------- [deepseek-r1-distill-llama-70b] diff --git a/.pipelex/inference/backends/vertexai.toml b/.pipelex/inference/backends/vertexai.toml index 1ebab79..fe89dc8 100644 --- a/.pipelex/inference/backends/vertexai.toml +++ b/.pipelex/inference/backends/vertexai.toml @@ -8,7 +8,7 @@ # # Configuration structure: # - Each model is defined in its own section with the model name as the header -# - Headers with dots must be quoted (e.g., ["gemini-2.0-flash"]) +# - Headers with dots must be quoted (e.g., ["gemini-2.5-pro"]) # - Model costs are in USD per million tokens (input/output) # # Documentation: https://docs.pipelex.com @@ -25,19 +25,12 @@ model_type = "llm" sdk = "openai" prompting_target = "gemini" structure_method = "instructor/vertexai_tools" +thinking_mode = "none" ################################################################################ # LANGUAGE MODELS ################################################################################ -# --- Gemini 2.0 Series -------------------------------------------------------- -["gemini-2.0-flash"] -model_id = "google/gemini-2.0-flash" -inputs = ["text", "images"] -outputs = ["text", "structured"] -max_prompt_images = 3000 -costs = { input = 0.1, output = 0.4 } - # --- Gemini 2.5 Series -------------------------------------------------------- ["gemini-2.5-pro"] model_id = "google/gemini-2.5-pro" diff --git a/.pipelex/inference/backends/xai.toml b/.pipelex/inference/backends/xai.toml index 3045344..a6348f2 100644 --- a/.pipelex/inference/backends/xai.toml +++ b/.pipelex/inference/backends/xai.toml @@ -25,6 +25,7 @@ model_type = "llm" sdk = "openai" prompting_target = "anthropic" structure_method = "instructor/openai_tools" +thinking_mode = "none" ################################################################################ # LANGUAGE MODELS diff --git a/.pipelex/inference/deck/1_llm_deck.toml b/.pipelex/inference/deck/1_llm_deck.toml new file mode 100644 index 0000000..649576b --- /dev/null +++ b/.pipelex/inference/deck/1_llm_deck.toml @@ -0,0 +1,87 @@ +#################################################################################################### +# Pipelex Model Deck - LLM Configuration +#################################################################################################### +# +# This file defines model defaults, aliases, and presets for LLMs +# +# Model Reference Syntax: +# - Preset: $preset_name or preset:preset_name +# - Alias: @alias_name or alias:alias_name +# +# Documentation: https://docs.pipelex.com +# Support: https://go.pipelex.com/discord +# +#################################################################################################### + +#################################################################################################### +# LLM Default Choices +#################################################################################################### + +[llm.choice_defaults] +default_temperature = 0.5 +for_text = "@default-general" +for_object = "@default-general" + +#################################################################################################### +# LLM Aliases +#################################################################################################### + +[llm.aliases] +best-gpt = "gpt-5.2" +best-claude = "claude-4.5-opus" +best-gemini = "gemini-3.0-pro" +best-mistral = "mistral-large" + +# Default aliases (first choice from waterfalls) +default-general = "claude-4.5-sonnet" +default-premium = "claude-4.6-opus" +default-premium-vision = "claude-4.6-opus" +default-premium-structured = "claude-4.6-opus" +default-large-context-code = "gemini-3.0-pro" +default-large-context-text = "gemini-2.5-flash" +default-small = "gpt-4o-mini" +default-small-structured = "gpt-4o-mini" +default-small-vision = "gemini-2.5-flash-lite" +default-small-creative = "gemini-2.5-flash-lite" + +#################################################################################################### +# LLM Presets +#################################################################################################### + +[llm.presets] + +# Writing +writing-factual = { model = "@default-premium", temperature = 0.1, description = "Factual writing with high accuracy" } +writing-creative = { model = "@default-premium", temperature = 0.9, description = "Creative writing with high variability" } + +# Retrieval +retrieval = { model = "@default-large-context-text", temperature = 0.1, description = "Data retrieval from large text corpora" } + +# Engineering +engineering-structured = { model = "@default-premium-structured", temperature = 0.2, description = "Structured engineering output (JSON, schemas)" } +engineering-code = { model = "@default-premium", temperature = 0.1, description = "Code generation and analysis" } +engineering-codebase-analysis = { model = "@best-gemini", temperature = 0.1, description = "Large codebase analysis" } + +# Vision +vision = { model = "@default-premium-vision", temperature = 0.5, description = "Vision language model for understanding images" } +vision-cheap = { model = "@default-small-vision", temperature = 0.5, description = "Budget vision model for simple image tasks" } +vision-diagram = { model = "@default-premium-vision", temperature = 0.3, description = "Diagram and chart interpretation" } +vision-table = { model = "@default-premium-vision", temperature = 0.3, description = "Table extraction from images" } + +# Image generation prompting +img-gen-prompting = { model = "@default-premium", temperature = 0.5, description = "Crafting image generation prompts" } +img-gen-prompting-cheap = { model = "@default-small", temperature = 0.5, description = "Budget image prompt generation" } + +# Reasoning +deep-analysis = { model = "@default-premium", temperature = 0.1, reasoning_effort = "high", description = "Deep reasoning and analysis" } +quick-reasoning = { model = "@default-premium", temperature = 0.3, reasoning_effort = "low", description = "Quick reasoning for simple tasks" } + +# Builder (isolated presets for the pipeline builder) +pipe-builder-engineering = { model = "claude-4.6-opus", temperature = 0.2, description = "Builder: structured engineering output" } +pipe-builder-img-gen-prompting = { model = "claude-4.5-sonnet", temperature = 0.7, description = "Builder: crafting image generation prompts" } + +# Testing +testing-text = { model = "@default-small", temperature = 0.5, description = "Testing preset for text generation" } +testing-structured = { model = "@default-small-structured", temperature = 0.1, description = "Testing preset for structured output" } +testing-vision = { model = "@default-small-vision", temperature = 0.5, description = "Testing preset for vision tasks" } +testing-vision-structured = { model = "@default-small-vision", temperature = 0.5, description = "Testing preset for structured vision output" } diff --git a/.pipelex/inference/deck/2_img_gen_deck.toml b/.pipelex/inference/deck/2_img_gen_deck.toml new file mode 100644 index 0000000..400b492 --- /dev/null +++ b/.pipelex/inference/deck/2_img_gen_deck.toml @@ -0,0 +1,53 @@ +#################################################################################################### +# Pipelex Model Deck - Image Generation Configuration +#################################################################################################### +# +# This file defines model aliases and presets for image generation models +# +# Model Reference Syntax: +# - Preset: $preset_name or preset:preset_name +# - Alias: @alias_name or alias:alias_name +# +# Documentation: https://docs.pipelex.com +# Support: https://go.pipelex.com/discord +# +#################################################################################################### + +#################################################################################################### +# Image Generation Default Choices +#################################################################################################### + +[img_gen] +default_quality = "medium" +choice_default = "$gen-image" + +#################################################################################################### +# Image Generation Aliases +#################################################################################################### + +[img_gen.aliases] +best-gpt = "gpt-image-1.5" +best-gemini = "nano-banana-pro" +best-blackforestlabs = "flux-2-pro" + +default-general = "flux-2-pro" +default-premium = "nano-banana-pro" +default-small = "gpt-image-1-mini" + +#################################################################################################### +# Image Generation Presets +#################################################################################################### + +[img_gen.presets] + +# General purpose +gen-image = { model = "@default-general", quality = "medium", description = "Standard image generation" } +gen-image-fast = { model = "@default-small", quality = "low", description = "Fast image generation with lower quality" } +gen-image-high-quality = { model = "@default-premium", quality = "high", description = "High-quality image generation" } + +# Testing +gen-image-testing = { model = "@default-small", quality = "low", description = "Testing preset for image generation" } +gen-image-testing-img2img = { model = "nano-banana-pro", description = "Testing preset for image-to-image" } +synthesize-photo = { model = "@default-small", quality = "low", description = "Synthesize realistic photos for testing" } +synthesize-ui = { model = "nano-banana-pro", description = "Synthesize UI screenshots for testing" } +synthesize-chart = { model = "nano-banana-pro", description = "Synthesize charts and graphs for testing" } diff --git a/.pipelex/inference/deck/3_extract_deck.toml b/.pipelex/inference/deck/3_extract_deck.toml new file mode 100644 index 0000000..2e5e5af --- /dev/null +++ b/.pipelex/inference/deck/3_extract_deck.toml @@ -0,0 +1,42 @@ +#################################################################################################### +# Pipelex Model Deck - Base Configuration +#################################################################################################### +# +# This file defines model aliases and presets for Document extraction models, including +# extraction of text and images from documents and OCR and text extraction from images. +# +# Model Reference Syntax: +# - Preset: $preset_name or preset:preset_name +# - Alias: @alias_name or alias:alias_name +# +# Documentation: https://docs.pipelex.com +# Support: https://go.pipelex.com/discord +# +#################################################################################################### + +#################################################################################################### +# Document Extraction Default Choices +#################################################################################################### + +[extract] +choice_default = "@default-extract-document" + +#################################################################################################### +# Aliases +#################################################################################################### + +[extract.aliases] +default-premium = "azure-document-intelligence" +default-extract-document = "mistral-document-ai-2505" +default-extract-image = "mistral-document-ai-2505" +default-text-from-pdf = "pypdfium2-extract-pdf" +default-no-inference = "pypdfium2-extract-pdf" + +#################################################################################################### +# Extract Presets +#################################################################################################### + +[extract.presets] + +# Testing +extract-testing = { model = "@default-extract-document", max_nb_images = 5, image_min_size = 50, description = "Testing preset for document extraction" } diff --git a/.pipelex/inference/deck/base_deck.toml b/.pipelex/inference/deck/base_deck.toml deleted file mode 100644 index bb28c3a..0000000 --- a/.pipelex/inference/deck/base_deck.toml +++ /dev/null @@ -1,202 +0,0 @@ -#################################################################################################### -# Pipelex Model Deck - Base Configuration -#################################################################################################### -# -# This file defines model aliases and presets for: -# - LLMs (language models for text generation and structured output) -# - Image generation models (for creating images from text prompts) -# - Document extraction models (OCR and text extraction from PDFs/images) -# -# Documentation: https://docs.pipelex.com -# Support: https://go.pipelex.com/discord -# -#################################################################################################### - -#################################################################################################### -# Aliases -#################################################################################################### - -[aliases] -base-claude = "claude-4.5-sonnet" -base-gpt = "gpt-4o" -base-gemini = "gemini-2.5-flash" -base-mistral = "mistral-medium" -base-groq = "llama-3.3-70b-versatile" -base-grok = "grok-4-fast-non-reasoning" - -best-gpt = "gpt-5.1" -best-claude = "claude-4.5-opus" -best-gemini = "gemini-3.0-pro" -best-mistral = "mistral-medium" - -# Groq-specific aliases -fast-groq = "llama-3.1-8b-instant" -vision-groq = "llama-4-scout-17b-16e-instruct" - -# Image generation aliases -base-img-gen = "flux-pro/v1.1" -best-img-gen = "flux-2" -fast-img-gen = "fast-lightning-sdxl" - -#################################################################################################### -# Waterfalls -#################################################################################################### - -[waterfalls] - -# --- Waterfalls for LLMs --------------------------------------------------------------------- -smart_llm = [ - "claude-4.5-opus", - "claude-4.5-sonnet", - "gemini-3.0-pro", - "gpt-5.1", - "claude-4.1-opus", - "gemini-2.5-pro", - "claude-4-sonnet", - "grok-4", -] -smart_llm_with_vision = [ - "claude-4.5-opus", - "claude-4.5-sonnet", - "gemini-3.0-pro", - "gpt-5.1", - "claude-4.1-opus", - "gemini-2.5-pro", - "claude-4-sonnet", - "grok-4", -] -smart_llm_for_structured = [ - "claude-4.5-opus", - "claude-4.5-sonnet", - "gemini-3.0-pro", - "gpt-5.1", - "claude-4.1-opus", - "claude-4-sonnet", -] -llm_for_creativity = [ - "claude-4.5-opus", - "claude-4.1-opus", - "gemini-2.5-pro", - "gpt-5.1", -] -llm_for_large_codebase = [ - "gemini-2.5-pro", - "claude-4.5-sonnet", - "gemini-3.0-pro", - "gpt-5.1", - "gemini-2.5-flash", - "grok-4", -] -cheap_llm = [ - "gpt-4o-mini", - "gemini-2.5-flash-lite", - "mistral-small", - "claude-3-haiku", - "grok-3-mini", -] -cheap_llm_for_vision = [ - "gemini-2.5-flash-lite", - "gpt-4o-mini", - "claude-3-haiku", -] -cheap_llm_for_structured = ["gpt-4o-mini", "mistral-small", "claude-3-haiku"] -cheap_llm_for_creativity = [ - "gemini-2.5-flash", - "grok-3-mini", - "gpt-4o-mini", - "claude-4.5-haiku", -] - -# --- Waterfalls for Extracts --------------------------------------------------------------------- -pdf_text_extractor = [ - "azure-document-intelligence", - "mistral-ocr", - "pypdfium2-extract-text", -] -image_text_extractor = ["mistral-ocr"] - -#################################################################################################### -# LLM Presets -#################################################################################################### - -[llm.presets] - -# LLM Presets — Specific skills ------------------------------------------------------------- - -# Generation skills -llm_for_factual_writing = { model = "base-gpt", temperature = 0.1 } -llm_for_creative_writing = { model = "base-gpt", temperature = 0.9 } -llm_for_writing_cheap = { model = "gpt-4o-mini", temperature = 0.3 } - -# Retrieve and answer questions skills -llm_to_answer_questions_cheap = { model = "gpt-4o-mini", temperature = 0.3 } -llm_to_answer_questions = { model = "base-claude", temperature = 0.3 } -llm_to_retrieve = { model = "base-claude", temperature = 0.1 } - -# Engineering skills -llm_to_engineer = { model = "smart_llm_for_structured", temperature = 0.2 } -llm_to_code = { model = "base-claude", temperature = 0.1 } -llm_to_analyze_large_codebase = { model = "base-claude", temperature = 0.1 } - -# Vision skills -llm_for_img_to_text_cheap = { model = "gpt-4o-mini", temperature = 0.1 } -llm_for_img_to_text = { model = "base-claude", temperature = 0.1 } -llm_for_diagram_to_text = { model = "best-claude", temperature = 0.3 } -llm_for_table_to_text = { model = "base-claude", temperature = 0.3 } - -# Image generation prompting skills -llm_to_prompt_img_gen = { model = "base-claude", temperature = 0.2 } -llm_to_prompt_img_gen_cheap = { model = "gpt-4o-mini", temperature = 0.5 } - -# Groq-specific presets (fast inference, low cost) -llm_groq_fast_text = { model = "fast-groq", temperature = 0.7 } -llm_groq_balanced = { model = "base-groq", temperature = 0.5 } -llm_groq_vision = { model = "vision-groq", temperature = 0.3 } - -# LLM Presets — For Testing --------------------------------------------------------------------- - -llm_for_testing_gen_text = { model = "cheap_llm", temperature = 0.5 } -llm_for_testing_gen_object = { model = "cheap_llm_for_structured", temperature = 0.1 } -llm_for_testing_vision = { model = "cheap_llm_for_vision", temperature = 0.5 } -llm_for_testing_vision_structured = { model = "cheap_llm_for_vision", temperature = 0.5 } - -#################################################################################################### -# LLM Choices -#################################################################################################### - -[llm.choice_defaults] -for_text = "cheap_llm" -for_object = "cheap_llm_for_structured" - -#################################################################################################### -# Extract Presets -#################################################################################################### - -[extract] -choice_default = "extract_text_from_visuals" - -[extract.presets] -extract_text_from_visuals = { model = "azure-document-intelligence", max_nb_images = 100, image_min_size = 50 } -extract_text_from_pdf = { model = "pypdfium2-extract-text", max_nb_images = 100, image_min_size = 50 } - -#################################################################################################### -# Image Generation Presets -#################################################################################################### - -[img_gen] -choice_default = "gen_image_basic" - -[img_gen.presets] - -# General purpose -gen_image_basic = { model = "base-img-gen", quality = "medium", guidance_scale = 7.5, is_moderated = true, safety_tolerance = 3 } -gen_image_fast = { model = "fast-img-gen", nb_steps = 4, guidance_scale = 5.0, is_moderated = true, safety_tolerance = 3 } -gen_image_high_quality = { model = "best-img-gen", quality = "high", guidance_scale = 8.0, is_moderated = true, safety_tolerance = 3 } -gen_image_openai_low_quality = { model = "gpt-image-1", quality = "low" } - -# Specific skills -img_gen_for_art = { model = "best-img-gen", quality = "high", guidance_scale = 9.0, is_moderated = false, safety_tolerance = 5 } -img_gen_for_diagram = { model = "base-img-gen", quality = "medium", guidance_scale = 7.0, is_moderated = true, safety_tolerance = 2 } -img_gen_for_mockup = { model = "base-img-gen", quality = "medium", guidance_scale = 6.5, is_moderated = true, safety_tolerance = 3 } -img_gen_for_product = { model = "best-img-gen", quality = "high", guidance_scale = 8.5, is_moderated = true, safety_tolerance = 2 } -img_gen_for_testing = { model = "fast-img-gen", nb_steps = 4, guidance_scale = 4.0, is_moderated = true, safety_tolerance = 4 } diff --git a/.pipelex/inference/deck/cocode_deck.toml b/.pipelex/inference/deck/cocode_deck.toml deleted file mode 100644 index 19de968..0000000 --- a/.pipelex/inference/deck/cocode_deck.toml +++ /dev/null @@ -1,8 +0,0 @@ -#################################################################################################### -# LLM Presets -#################################################################################################### - -[llm.presets] - -llm_for_swe = { model = "smart_llm", temperature = 0.1 } -llm_for_git_diff = { model = "llm_for_large_codebase", temperature = 0.1 } diff --git a/.pipelex/inference/deck/overrides.toml b/.pipelex/inference/deck/overrides.toml deleted file mode 100644 index 08814db..0000000 --- a/.pipelex/inference/deck/overrides.toml +++ /dev/null @@ -1,19 +0,0 @@ -#################################################################################################### -# Pipelex Model Deck - Overrides -#################################################################################################### -# -# This file allows you to override the default model choices defined in base_deck.toml. -# You can customize presets for LLMs, image generation, and document extraction models. -# -# Documentation: https://docs.pipelex.com -# Support: https://go.pipelex.com/discord -# -#################################################################################################### - -#################################################################################################### -# LLM Deck overrides -#################################################################################################### - -[llm.choice_overrides] -for_text = "disabled" -for_object = "disabled" diff --git a/.pipelex/inference/deck/x_custom_extract_deck.toml b/.pipelex/inference/deck/x_custom_extract_deck.toml new file mode 100644 index 0000000..24fc260 --- /dev/null +++ b/.pipelex/inference/deck/x_custom_extract_deck.toml @@ -0,0 +1,40 @@ +#################################################################################################### +# Pipelex Model Deck - Custom Configurations for Document Extraction Models +#################################################################################################### +# +# This file allows you to override or complete the base model decks. +# +# ADVANCED USERS ONLY: This file is for users who bring their own API keys and connect directly +# to AI providers (Azure, Mistral, etc.) without using the Pipelex Gateway. +# +# If you're using the standard Pipelex Gateway setup, you don't need to modify this file. +# The Gateway handles model routing automatically and supports all available models. +# +# Waterfalls are useful when using multiple backends directly - they define ordered lists +# of models that are resolved at configuration time based on which backends are available. +# This enables defining pipelines that work across different environments with varying +# backend configurations. +# +# Documentation: https://docs.pipelex.com +# Support: https://go.pipelex.com/discord +# +#################################################################################################### + + +#################################################################################################### +# Waterfalls — ordered lists of models resolved at configuration time by backend availability +##################################################################################################### + +# Example (uncomment to use): +# [extract.waterfalls] +# document_extractor = ["azure-document-intelligence", "mistral-document-ai-2505"] +# pdf_text_extractor = [ +# "azure-document-intelligence", +# "mistral-document-ai-2505", +# "pypdfium2-extract-pdf", +# ] +# image_text_extractor = [ +# "azure-document-intelligence", +# "mistral-document-ai-2505", +# ] +#################################################################################################### diff --git a/.pipelex/inference/deck/x_custom_llm_deck.toml b/.pipelex/inference/deck/x_custom_llm_deck.toml new file mode 100644 index 0000000..0b964ef --- /dev/null +++ b/.pipelex/inference/deck/x_custom_llm_deck.toml @@ -0,0 +1,72 @@ +#################################################################################################### +# Pipelex Model Deck - Custom Configurations for LLMs +#################################################################################################### +# +# This file allows you to override or complete the base model decks. +# +# ADVANCED USERS ONLY: This file is for users who bring their own API keys and connect directly +# to AI providers (OpenAI, Anthropic, Google, etc.) without using the Pipelex Gateway. +# +# If you're using the standard Pipelex Gateway setup, you don't need to modify this file. +# The Gateway handles model routing automatically and supports all available models. +# +# Waterfalls are useful when using multiple backends directly - they define ordered lists +# of models that are resolved at configuration time based on which backends are available. +# This enables defining pipelines that work across different environments with varying +# backend configurations. +# +# Documentation: https://docs.pipelex.com +# Support: https://go.pipelex.com/discord +# +#################################################################################################### + +#################################################################################################### +# LLM Deck overrides +#################################################################################################### + +[llm.choice_overrides] +for_text = "disabled" +for_object = "disabled" + + +#################################################################################################### +# Waterfalls — ordered lists of models resolved at configuration time by backend availability +##################################################################################################### + +# Example (uncomment to use): +# [llm.waterfalls] +# premium-llm = ["claude-4.5-opus", "gemini-3.0-pro", "gpt-5.2", "grok-4"] +# premium-llm-vision = [ +# "claude-4.5-opus", +# "gemini-3.0-pro", +# "gpt-5.2", +# "grok-4-fast-reasoning", +# ] +# premium-llm-structured = [ +# "claude-4.5-opus", +# "gemini-3.0-pro", +# "gpt-5.2", +# "grok-4", +# ] +# large-context-llm-code = [ +# "gemini-3.0-pro", +# "claude-4.5-opus", +# "gpt-5.2", +# "grok-4-fast-reasoning", +# ] +# large-context-llm-text = ["gemini-2.5-flash", "claude-4.5-sonnet"] +# small-llm = [ +# "gemini-2.5-flash-lite", +# "gpt-4o-mini", +# "claude-3-haiku", +# "phi-4", +# "grok-3-mini", +# ] +# small-llm-structured = [ +# "gemini-2.5-flash-lite", +# "gpt-4o-mini", +# "claude-3-haiku", +# ] +# small-llm-vision = ["gemini-2.5-flash-lite", "gpt-4o-mini", "claude-3-haiku"] +# small-llm-creative = ["gemini-2.5-flash-lite", "gpt-4o-mini", "claude-3-haiku"] +#################################################################################################### diff --git a/.pipelex/inference/routing_profiles.toml b/.pipelex/inference/routing_profiles.toml index d27e659..eb9aae0 100644 --- a/.pipelex/inference/routing_profiles.toml +++ b/.pipelex/inference/routing_profiles.toml @@ -8,51 +8,22 @@ # Support: https://go.pipelex.com/discord # ========================================================================================= +# Note: The internal backend (software-only models) is always available regardless of +# which routing profile you select. See the documentation for details. + # Which profile to use (change this to switch routing) -# TODO: TBD -active = "pipelex_gateway_first" # Change to "pipelex_gateway_first" after enabling gateway +active = "all_pipelex_gateway" -# We recommend using the "pipelex_gateway_first" profile to get a head start with all models. +# We recommend using the "all_pipelex_gateway" profile to get a head start with all models. # To use the Pipelex Gateway backend: -# 1. Join our Discord community to get your free API key (no credit card required): -# Visit https://go.pipelex.com/discord and request your key in the appropriate channel -# 2. Set the environment variable (or add it to your .env file): -# - Linux/macOS: export PIPELEX_GATEWAY_API_KEY="your-api-key" -# - Windows CMD: set PIPELEX_GATEWAY_API_KEY=your-api-key -# - Windows PowerShell: $env:PIPELEX_GATEWAY_API_KEY="your-api-key" -# 3. The .pipelex/inference/backends.toml is already configured with api_key = "${PIPELEX_GATEWAY_API_KEY}" -# which will get the key from the environment variable. +# 1. Get your API key at https://app.pipelex.com (free credits included) +# 2. Add it to your .env file: PIPELEX_GATEWAY_API_KEY=your-key-here +# 3. Run `pipelex init` and accept the Gateway terms of service # ========================================================================================= # Routing Profiles # ========================================================================================= -[profiles.pipelex_gateway_first] -description = "Use Pipelex Gateway backend for all its supported models" -default = "pipelex_gateway" -fallback_order = [ - "pipelex_gateway", - "azure_openai", - "bedrock", - "google", - "blackboxai", - "mistral", - "fal", -] - -[profiles.pipelex_gateway_first.routes] -# Pattern matching: "model-pattern" = "backend-name" - -[profiles.pipelex_gateway_first.optional_routes] # Each optional route is considered only if its backend is available -"gpt-*" = "pipelex_gateway" -"gpt-image-1" = "openai" -"claude-*" = "pipelex_gateway" -"grok-*" = "pipelex_gateway" -"gemini-*" = "pipelex_gateway" -"*-sdxl" = "fal" -"flux-*" = "fal" -"mistral-ocr" = "mistral" - [profiles.all_pipelex_gateway] description = "Use Pipelex Gateway for all its supported models" default = "pipelex_gateway" @@ -109,6 +80,10 @@ default = "portkey" description = "Use Scaleway backend for all its supported models" default = "scaleway" +[profiles.all_vertexai] +description = "Use Vertex AI backend for all its supported models" +default = "vertexai" + [profiles.all_xai] description = "Use xAI backend for all its supported models" default = "xai" @@ -138,7 +113,7 @@ default = "internal" # ========================================================================================= [profiles.example_routing_using_patterns] description = "Example routing profile using patterns" -default = "pipelex_inference" +default = "pipelex_gateway" [profiles.example_routing_using_patterns.routes] # Pattern matching: "model-pattern" = "backend-name" @@ -158,17 +133,17 @@ default = "pipelex_inference" description = "Example routing profile using specific models" [profiles.example_routing_using_specific_models.routes] -"gpt-5-nano" = "pipelex_inference" +"gpt-5-nano" = "pipelex_gateway" "gpt-4o-mini" = "blackboxai" "gpt-5-mini" = "openai" "gpt-5-chat" = "azure_openai" -"claude-4-sonnet" = "pipelex_inference" +"claude-4-sonnet" = "pipelex_gateway" "claude-3.7-sonnet" = "blackboxai" -"gemini-2.5-flash-lite" = "pipelex_inference" +"gemini-2.5-flash-lite" = "pipelex_gateway" "gemini-2.5-flash" = "blackboxai" "gemini-2.5-pro" = "vertexai" -"grok-3" = "pipelex_inference" +"grok-3" = "pipelex_gateway" "grok-3-mini" = "xai" diff --git a/.pipelex/mthds_schema.json b/.pipelex/mthds_schema.json new file mode 100644 index 0000000..95c11f9 --- /dev/null +++ b/.pipelex/mthds_schema.json @@ -0,0 +1,1712 @@ +{ + "additionalProperties": false, + "properties": { + "domain": { + "title": "Domain", + "type": "string" + }, + "description": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Description" + }, + "system_prompt": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "default": null, + "title": "System Prompt" + }, + "main_pipe": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Main Pipe" + }, + "concept": { + "anyOf": [ + { + "additionalProperties": { + "anyOf": [ + { + "$ref": "#/definitions/ConceptBlueprint" + }, + { + "type": "string" + } + ] + }, + "type": "object" + }, + { + "type": "null" + } + ], + "title": "Concept" + }, + "pipe": { + "anyOf": [ + { + "additionalProperties": { + "oneOf": [ + { + "$ref": "#/definitions/PipeFuncBlueprint" + }, + { + "$ref": "#/definitions/PipeImgGenBlueprint" + }, + { + "$ref": "#/definitions/PipeComposeBlueprint" + }, + { + "$ref": "#/definitions/PipeLLMBlueprint" + }, + { + "$ref": "#/definitions/PipeExtractBlueprint" + }, + { + "$ref": "#/definitions/PipeBatchBlueprint" + }, + { + "$ref": "#/definitions/PipeConditionBlueprint" + }, + { + "$ref": "#/definitions/PipeParallelBlueprint" + }, + { + "$ref": "#/definitions/PipeSequenceBlueprint" + } + ] + }, + "type": "object" + }, + { + "type": "null" + } + ], + "title": "Pipe" + } + }, + "required": [ + "domain" + ], + "title": "MTHDS File Schema", + "type": "object", + "definitions": { + "AspectRatio": { + "enum": [ + "square", + "landscape_4_3", + "landscape_3_2", + "landscape_16_9", + "landscape_21_9", + "portrait_3_4", + "portrait_2_3", + "portrait_9_16", + "portrait_9_21" + ], + "title": "AspectRatio", + "type": "string" + }, + "Background": { + "enum": [ + "transparent", + "opaque", + "auto" + ], + "title": "Background", + "type": "string" + }, + "ConceptBlueprint": { + "additionalProperties": false, + "properties": { + "description": { + "title": "Description", + "type": "string" + }, + "structure": { + "anyOf": [ + { + "type": "string" + }, + { + "additionalProperties": { + "anyOf": [ + { + "type": "string" + }, + { + "$ref": "#/definitions/ConceptStructureBlueprint" + } + ] + }, + "type": "object" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Structure" + }, + "refines": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Refines" + } + }, + "required": [ + "description" + ], + "title": "ConceptBlueprint", + "type": "object" + }, + "ConceptStructureBlueprint": { + "properties": { + "description": { + "title": "Description", + "type": "string" + }, + "type": { + "anyOf": [ + { + "$ref": "#/definitions/ConceptStructureBlueprintFieldType" + }, + { + "type": "null" + } + ], + "default": null + }, + "key_type": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Key Type" + }, + "value_type": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Value Type" + }, + "item_type": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Item Type" + }, + "concept_ref": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Concept Ref" + }, + "item_concept_ref": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Item Concept Ref" + }, + "choices": { + "anyOf": [ + { + "items": { + "type": "string" + }, + "type": "array" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Choices" + }, + "default_value": { + "anyOf": [ + {}, + { + "type": "null" + } + ], + "default": null, + "title": "Default Value" + }, + "required": { + "default": false, + "title": "Required", + "type": "boolean" + } + }, + "required": [ + "description" + ], + "title": "ConceptStructureBlueprint", + "type": "object" + }, + "ConceptStructureBlueprintFieldType": { + "enum": [ + "text", + "list", + "dict", + "integer", + "boolean", + "number", + "date", + "concept" + ], + "title": "ConceptStructureBlueprintFieldType", + "type": "string" + }, + "ConstructBlueprint": { + "title": "ConstructBlueprint", + "description": "Construct section defining how to compose a StructuredContent from working memory fields.", + "type": "object", + "additionalProperties": { + "anyOf": [ + { + "type": "string", + "description": "Fixed string value" + }, + { + "type": "number", + "description": "Fixed numeric value" + }, + { + "type": "boolean", + "description": "Fixed boolean value" + }, + { + "type": "array", + "description": "Fixed array value" + }, + { + "type": "object", + "description": "Variable reference from working memory", + "properties": { + "from": { + "type": "string", + "description": "Path to variable in working memory" + }, + "list_to_dict_keyed_by": { + "type": "string", + "description": "Convert list to dict keyed by this attribute" + } + }, + "required": [ + "from" + ], + "additionalProperties": false + }, + { + "type": "object", + "description": "Jinja2 template string", + "properties": { + "template": { + "type": "string", + "description": "Jinja2 template string (with $ preprocessing)" + } + }, + "required": [ + "template" + ], + "additionalProperties": false + }, + { + "type": "object", + "description": "Nested construct", + "additionalProperties": { + "$ref": "#/definitions/ConstructFieldBlueprint" + }, + "minProperties": 1 + } + ] + }, + "minProperties": 1 + }, + "ConstructFieldBlueprint": { + "title": "ConstructFieldBlueprint", + "anyOf": [ + { + "type": "string", + "description": "Fixed string value" + }, + { + "type": "number", + "description": "Fixed numeric value" + }, + { + "type": "boolean", + "description": "Fixed boolean value" + }, + { + "type": "array", + "description": "Fixed array value" + }, + { + "type": "object", + "description": "Variable reference from working memory", + "properties": { + "from": { + "type": "string", + "description": "Path to variable in working memory" + }, + "list_to_dict_keyed_by": { + "type": "string", + "description": "Convert list to dict keyed by this attribute" + } + }, + "required": [ + "from" + ], + "additionalProperties": false + }, + { + "type": "object", + "description": "Jinja2 template string", + "properties": { + "template": { + "type": "string", + "description": "Jinja2 template string (with $ preprocessing)" + } + }, + "required": [ + "template" + ], + "additionalProperties": false + }, + { + "type": "object", + "description": "Nested construct", + "additionalProperties": { + "$ref": "#/definitions/ConstructFieldBlueprint" + }, + "minProperties": 1 + } + ] + }, + "ConstructFieldMethod": { + "description": "Method used to compose a field value.", + "enum": [ + "fixed", + "from_var", + "template", + "nested" + ], + "title": "ConstructFieldMethod", + "type": "string" + }, + "ExtractSetting": { + "additionalProperties": false, + "properties": { + "model": { + "title": "Model", + "type": "string" + }, + "max_nb_images": { + "anyOf": [ + { + "minimum": 0, + "type": "integer" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Max Nb Images" + }, + "image_min_size": { + "anyOf": [ + { + "minimum": 0, + "type": "integer" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Image Min Size" + }, + "description": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Description" + } + }, + "required": [ + "model" + ], + "title": "ExtractSetting", + "type": "object" + }, + "ImageFormat": { + "enum": [ + "png", + "jpeg", + "webp" + ], + "title": "ImageFormat", + "type": "string" + }, + "ImgGenSetting": { + "additionalProperties": false, + "properties": { + "model": { + "title": "Model", + "type": "string" + }, + "quality": { + "anyOf": [ + { + "$ref": "#/definitions/Quality" + }, + { + "type": "null" + } + ], + "default": null + }, + "nb_steps": { + "anyOf": [ + { + "exclusiveMinimum": true, + "type": "integer", + "minimum": 0 + }, + { + "type": "null" + } + ], + "default": null, + "title": "Nb Steps" + }, + "guidance_scale": { + "anyOf": [ + { + "exclusiveMinimum": true, + "type": "number", + "minimum": 0 + }, + { + "type": "null" + } + ], + "default": null, + "title": "Guidance Scale" + }, + "is_moderated": { + "default": false, + "title": "Is Moderated", + "type": "boolean" + }, + "safety_tolerance": { + "anyOf": [ + { + "maximum": 6, + "minimum": 1, + "type": "integer" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Safety Tolerance" + }, + "description": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Description" + } + }, + "required": [ + "model" + ], + "title": "ImgGenSetting", + "type": "object" + }, + "LLMSetting": { + "additionalProperties": false, + "properties": { + "model": { + "title": "Model", + "type": "string" + }, + "temperature": { + "maximum": 1, + "minimum": 0, + "title": "Temperature", + "type": "number" + }, + "max_tokens": { + "anyOf": [ + { + "type": "integer" + }, + { + "enum": [ + "auto" + ] + }, + { + "type": "null" + } + ], + "default": null, + "title": "Max Tokens" + }, + "image_detail": { + "anyOf": [ + { + "$ref": "#/definitions/PromptImageDetail" + }, + { + "type": "null" + } + ], + "default": null + }, + "prompting_target": { + "anyOf": [ + { + "$ref": "#/definitions/PromptingTarget" + }, + { + "type": "null" + } + ], + "default": null + }, + "reasoning_effort": { + "anyOf": [ + { + "$ref": "#/definitions/ReasoningEffort" + }, + { + "type": "null" + } + ], + "default": null + }, + "reasoning_budget": { + "anyOf": [ + { + "exclusiveMinimum": true, + "type": "integer", + "minimum": 0 + }, + { + "type": "null" + } + ], + "default": null, + "title": "Reasoning Budget" + }, + "description": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Description" + } + }, + "required": [ + "model", + "temperature" + ], + "title": "LLMSetting", + "type": "object" + }, + "ModelReference": { + "description": "A parsed model reference with explicit kind and name.\n\nArgs:\n kind: The type of reference (preset, alias, waterfall, or handle)\n name: The actual name of the model/preset/alias/waterfall (without prefix)\n raw: The original input string (for error messages)", + "properties": { + "kind": { + "$ref": "#/definitions/ModelReferenceKind" + }, + "name": { + "title": "Name", + "type": "string" + }, + "raw": { + "title": "Raw", + "type": "string" + } + }, + "required": [ + "kind", + "name", + "raw" + ], + "title": "ModelReference", + "type": "object" + }, + "ModelReferenceKind": { + "description": "The kind of model reference.", + "enum": [ + "preset", + "alias", + "waterfall", + "handle" + ], + "title": "ModelReferenceKind", + "type": "string" + }, + "PipeBatchBlueprint": { + "additionalProperties": false, + "properties": { + "type": { + "default": "PipeBatch", + "title": "Type", + "type": "string", + "enum": [ + "PipeBatch" + ] + }, + "description": { + "title": "Description", + "type": "string" + }, + "inputs": { + "anyOf": [ + { + "additionalProperties": { + "type": "string" + }, + "type": "object" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Inputs" + }, + "output": { + "title": "Output", + "type": "string" + }, + "branch_pipe_code": { + "title": "Branch Pipe Code", + "type": "string" + }, + "input_list_name": { + "title": "Input List Name", + "type": "string" + }, + "input_item_name": { + "title": "Input Item Name", + "type": "string" + } + }, + "required": [ + "description", + "output", + "branch_pipe_code", + "input_list_name", + "input_item_name" + ], + "title": "PipeBatchBlueprint", + "type": "object" + }, + "PipeComposeBlueprint": { + "additionalProperties": false, + "properties": { + "type": { + "default": "PipeCompose", + "title": "Type", + "type": "string", + "enum": [ + "PipeCompose" + ] + }, + "description": { + "title": "Description", + "type": "string" + }, + "inputs": { + "anyOf": [ + { + "additionalProperties": { + "type": "string" + }, + "type": "object" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Inputs" + }, + "output": { + "title": "Output", + "type": "string" + }, + "template": { + "anyOf": [ + { + "type": "string" + }, + { + "$ref": "#/definitions/TemplateBlueprint" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Template" + }, + "construct": { + "anyOf": [ + { + "$ref": "#/definitions/ConstructBlueprint" + }, + { + "type": "null" + } + ], + "default": null + } + }, + "required": [ + "description", + "output" + ], + "title": "PipeComposeBlueprint", + "type": "object" + }, + "PipeConditionBlueprint": { + "additionalProperties": false, + "properties": { + "type": { + "default": "PipeCondition", + "title": "Type", + "type": "string", + "enum": [ + "PipeCondition" + ] + }, + "description": { + "title": "Description", + "type": "string" + }, + "inputs": { + "anyOf": [ + { + "additionalProperties": { + "type": "string" + }, + "type": "object" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Inputs" + }, + "output": { + "title": "Output", + "type": "string" + }, + "expression_template": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Expression Template" + }, + "expression": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Expression" + }, + "outcomes": { + "additionalProperties": { + "type": "string" + }, + "title": "Outcomes", + "type": "object" + }, + "default_outcome": { + "anyOf": [ + { + "type": "string" + }, + { + "$ref": "#/definitions/SpecialOutcome" + } + ], + "title": "Default Outcome" + }, + "add_alias_from_expression_to": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Add Alias From Expression To" + } + }, + "required": [ + "description", + "output", + "default_outcome" + ], + "title": "PipeConditionBlueprint", + "type": "object" + }, + "PipeExtractBlueprint": { + "additionalProperties": false, + "properties": { + "type": { + "default": "PipeExtract", + "title": "Type", + "type": "string", + "enum": [ + "PipeExtract" + ] + }, + "description": { + "title": "Description", + "type": "string" + }, + "inputs": { + "anyOf": [ + { + "additionalProperties": { + "type": "string" + }, + "type": "object" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Inputs" + }, + "output": { + "title": "Output", + "type": "string" + }, + "model": { + "anyOf": [ + { + "$ref": "#/definitions/ExtractSetting" + }, + { + "type": "string" + }, + { + "$ref": "#/definitions/ModelReference" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Model" + }, + "max_page_images": { + "anyOf": [ + { + "type": "integer" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Max Page Images" + }, + "page_image_captions": { + "anyOf": [ + { + "type": "boolean" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Page Image Captions" + }, + "page_views": { + "anyOf": [ + { + "type": "boolean" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Page Views" + }, + "page_views_dpi": { + "anyOf": [ + { + "type": "integer" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Page Views Dpi" + } + }, + "required": [ + "description", + "output" + ], + "title": "PipeExtractBlueprint", + "type": "object" + }, + "PipeFuncBlueprint": { + "additionalProperties": false, + "properties": { + "type": { + "default": "PipeFunc", + "title": "Type", + "type": "string", + "enum": [ + "PipeFunc" + ] + }, + "description": { + "title": "Description", + "type": "string" + }, + "inputs": { + "anyOf": [ + { + "additionalProperties": { + "type": "string" + }, + "type": "object" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Inputs" + }, + "output": { + "title": "Output", + "type": "string" + }, + "function_name": { + "description": "The name of the function to call.", + "title": "Function Name", + "type": "string" + } + }, + "required": [ + "description", + "output", + "function_name" + ], + "title": "PipeFuncBlueprint", + "type": "object" + }, + "PipeImgGenBlueprint": { + "additionalProperties": false, + "properties": { + "type": { + "default": "PipeImgGen", + "title": "Type", + "type": "string", + "enum": [ + "PipeImgGen" + ] + }, + "description": { + "title": "Description", + "type": "string" + }, + "inputs": { + "anyOf": [ + { + "additionalProperties": { + "type": "string" + }, + "type": "object" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Inputs" + }, + "output": { + "title": "Output", + "type": "string" + }, + "prompt": { + "title": "Prompt", + "type": "string" + }, + "negative_prompt": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Negative Prompt" + }, + "model": { + "anyOf": [ + { + "$ref": "#/definitions/ImgGenSetting" + }, + { + "type": "string" + }, + { + "$ref": "#/definitions/ModelReference" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Model" + }, + "aspect_ratio": { + "anyOf": [ + { + "$ref": "#/definitions/AspectRatio" + }, + { + "type": "null" + } + ], + "default": null + }, + "is_raw": { + "anyOf": [ + { + "type": "boolean" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Is Raw" + }, + "seed": { + "anyOf": [ + { + "type": "integer" + }, + { + "type": "string", + "enum": [ + "auto" + ] + }, + { + "type": "null" + } + ], + "default": null, + "title": "Seed" + }, + "background": { + "anyOf": [ + { + "$ref": "#/definitions/Background" + }, + { + "type": "null" + } + ], + "default": null + }, + "output_format": { + "anyOf": [ + { + "$ref": "#/definitions/ImageFormat" + }, + { + "type": "null" + } + ], + "default": null + } + }, + "required": [ + "description", + "output", + "prompt" + ], + "title": "PipeImgGenBlueprint", + "type": "object" + }, + "PipeLLMBlueprint": { + "additionalProperties": false, + "properties": { + "type": { + "default": "PipeLLM", + "title": "Type", + "type": "string", + "enum": [ + "PipeLLM" + ] + }, + "description": { + "title": "Description", + "type": "string" + }, + "inputs": { + "anyOf": [ + { + "additionalProperties": { + "type": "string" + }, + "type": "object" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Inputs" + }, + "output": { + "title": "Output", + "type": "string" + }, + "model": { + "anyOf": [ + { + "$ref": "#/definitions/LLMSetting" + }, + { + "type": "string" + }, + { + "$ref": "#/definitions/ModelReference" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Model" + }, + "model_to_structure": { + "anyOf": [ + { + "$ref": "#/definitions/LLMSetting" + }, + { + "type": "string" + }, + { + "$ref": "#/definitions/ModelReference" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Model To Structure" + }, + "system_prompt": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "default": null, + "title": "System Prompt" + }, + "prompt": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Prompt" + }, + "structuring_method": { + "anyOf": [ + { + "$ref": "#/definitions/StructuringMethod" + }, + { + "type": "null" + } + ], + "default": null + } + }, + "required": [ + "description", + "output" + ], + "title": "PipeLLMBlueprint", + "type": "object" + }, + "PipeParallelBlueprint": { + "additionalProperties": false, + "properties": { + "type": { + "default": "PipeParallel", + "title": "Type", + "type": "string", + "enum": [ + "PipeParallel" + ] + }, + "description": { + "title": "Description", + "type": "string" + }, + "inputs": { + "anyOf": [ + { + "additionalProperties": { + "type": "string" + }, + "type": "object" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Inputs" + }, + "output": { + "title": "Output", + "type": "string" + }, + "branches": { + "items": { + "$ref": "#/definitions/SubPipeBlueprint" + }, + "title": "Branches", + "type": "array" + }, + "add_each_output": { + "default": false, + "title": "Add Each Output", + "type": "boolean" + }, + "combined_output": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Combined Output" + } + }, + "required": [ + "description", + "output", + "branches" + ], + "title": "PipeParallelBlueprint", + "type": "object" + }, + "PipeSequenceBlueprint": { + "additionalProperties": false, + "properties": { + "type": { + "default": "PipeSequence", + "title": "Type", + "type": "string", + "enum": [ + "PipeSequence" + ] + }, + "description": { + "title": "Description", + "type": "string" + }, + "inputs": { + "anyOf": [ + { + "additionalProperties": { + "type": "string" + }, + "type": "object" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Inputs" + }, + "output": { + "title": "Output", + "type": "string" + }, + "steps": { + "items": { + "$ref": "#/definitions/SubPipeBlueprint" + }, + "title": "Steps", + "type": "array" + } + }, + "required": [ + "description", + "output", + "steps" + ], + "title": "PipeSequenceBlueprint", + "type": "object" + }, + "PromptImageDetail": { + "enum": [ + "high", + "low", + "auto" + ], + "title": "PromptImageDetail", + "type": "string" + }, + "PromptingTarget": { + "enum": [ + "openai", + "anthropic", + "mistral", + "gemini", + "fal" + ], + "title": "PromptingTarget", + "type": "string" + }, + "Quality": { + "enum": [ + "low", + "medium", + "high" + ], + "title": "Quality", + "type": "string" + }, + "ReasoningEffort": { + "enum": [ + "none", + "minimal", + "low", + "medium", + "high", + "max" + ], + "title": "ReasoningEffort", + "type": "string" + }, + "SpecialOutcome": { + "enum": [ + "fail", + "continue" + ], + "title": "SpecialOutcome", + "type": "string" + }, + "StructuringMethod": { + "enum": [ + "direct", + "preliminary_text" + ], + "title": "StructuringMethod", + "type": "string" + }, + "SubPipeBlueprint": { + "additionalProperties": false, + "properties": { + "pipe": { + "title": "Pipe", + "type": "string" + }, + "result": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Result" + }, + "nb_output": { + "anyOf": [ + { + "type": "integer" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Nb Output" + }, + "multiple_output": { + "anyOf": [ + { + "type": "boolean" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Multiple Output" + }, + "batch_over": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Batch Over" + }, + "batch_as": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Batch As" + } + }, + "required": [ + "pipe" + ], + "title": "SubPipeBlueprint", + "type": "object" + }, + "TagStyle": { + "enum": [ + "no_tag", + "ticks", + "xml", + "square_brackets" + ], + "title": "TagStyle", + "type": "string" + }, + "TemplateBlueprint": { + "properties": { + "template": { + "description": "Raw template source", + "title": "Template", + "type": "string" + }, + "templating_style": { + "anyOf": [ + { + "$ref": "#/definitions/TemplatingStyle" + }, + { + "type": "null" + } + ], + "default": null, + "description": "Style of prompting to use (typically for different LLMs)" + }, + "category": { + "$ref": "#/definitions/TemplateCategory", + "description": "Category of the template (could also be HTML, MARKDOWN, MERMAID, etc.), influences template rendering rules" + }, + "extra_context": { + "anyOf": [ + { + "additionalProperties": true, + "type": "object" + }, + { + "type": "null" + } + ], + "default": null, + "description": "Additional context variables for template rendering", + "title": "Extra Context" + } + }, + "required": [ + "template", + "category" + ], + "title": "TemplateBlueprint", + "type": "object" + }, + "TemplateCategory": { + "enum": [ + "basic", + "expression", + "html", + "markdown", + "mermaid", + "llm_prompt", + "img_gen_prompt" + ], + "title": "TemplateCategory", + "type": "string" + }, + "TemplatingStyle": { + "properties": { + "tag_style": { + "$ref": "#/definitions/TagStyle" + }, + "text_format": { + "$ref": "#/definitions/TextFormat", + "default": "plain" + } + }, + "required": [ + "tag_style" + ], + "title": "TemplatingStyle", + "type": "object" + }, + "TextFormat": { + "enum": [ + "plain", + "markdown", + "html", + "json" + ], + "title": "TextFormat", + "type": "string" + } + }, + "$schema": "http://json-schema.org/draft-04/schema#", + "$comment": "Generated from PipelexBundleBlueprint v0.18.0b3. Do not edit manually.", + "x-taplo": { + "initKeys": [ + "domain" + ] + } +} diff --git a/.pipelex/pipelex.toml b/.pipelex/pipelex.toml index 2983ada..12516db 100644 --- a/.pipelex/pipelex.toml +++ b/.pipelex/pipelex.toml @@ -6,19 +6,19 @@ # `pipelex init config` # # Purpose: -# - This file allows to override Pipelex's default settings for specific projects -# - Feel free to modify any settings below to suit your needs -# - You can add any configuration sections that exist in the main pipelex.toml +# - This file allows you to override Pipelex's default settings for specific projects +# - All values below are set to their defaults - modify them as needed +# - The values here will override the defaults from the Pipelex package # # Finding Available Settings: # - See the full default configuration in: pipelex/pipelex.toml (in the Pipelex package) # - See the configuration structure classes in: pipelex/config.py and pipelex/cogt/config_cogt.py # -# Common customizations are proposed below, such as: +# Common customizations include: # - Logging levels and behavior # - Excluded directories for scanning # - LLM prompt dumping for debugging -# - Feature flags for tracking and reporting +# - Feature flags # - Observer and reporting output directories # # Documentation: https://docs.pipelex.com @@ -26,89 +26,166 @@ # #################################################################################################### +#################################################################################################### +# Pipeline Execution Config +#################################################################################################### + +[pipelex.pipeline_execution_config] +# Set to false to disable conversion of incoming data URLs to pipelex-storage:// URIs +is_normalize_data_urls_to_storage = true +# Set to false to disable generation of execution graphs +is_generate_graph = true + +[pipelex.pipeline_execution_config.graph_config.data_inclusion] +# Control what data is included in graph outputs +stuff_json_content = true +stuff_text_content = true +stuff_html_content = true +error_stack_traces = true + +[pipelex.pipeline_execution_config.graph_config.graphs_inclusion] +# Control which graph outputs are generated +graphspec_json = true +mermaidflow_mmd = true +mermaidflow_html = true +reactflow_viewspec = true +reactflow_html = true + +[pipelex.pipeline_execution_config.graph_config.reactflow_config] +# Customize ReactFlow graph rendering +edge_type = "bezier" # Options: "bezier", "smoothstep", "step", "straight" +nodesep = 50 # Horizontal spacing between nodes +ranksep = 30 # Vertical spacing between ranks/levels +initial_zoom = 1.0 # Initial zoom level (1.0 = 100%) +pan_to_top = true # Pan to show top of graph on load + +#################################################################################################### +# Storage Config +#################################################################################################### [pipelex.storage_config] -# Uncomment to change the storage method: "local" (default) or "in_memory" -# is_fetch_remote_content_enabled = true -# uri_format = "{primary_id}/{secondary_id}/{hash}.{extension}" -# method = "local" -# local_storage_path = ".pipelex/storage" +# Storage method: "local", "in_memory" (default), "s3", or "gcp" +method = "in_memory" +# Whether to fetch remote HTTP URLs and store them locally +is_fetch_remote_content_enabled = true +# Whether to upload local file paths to storage and replace with pipelex-storage:// URIs +is_upload_local_content_enabled = true + +[pipelex.storage_config.local] +# Local storage settings +uri_format = "{primary_id}/{secondary_id}/{hash}.{extension}" +local_storage_path = ".pipelex/storage" + +[pipelex.storage_config.in_memory] +# In-memory storage settings +uri_format = "{primary_id}/{secondary_id}/{hash}.{extension}" + +[pipelex.storage_config.s3] +# AWS S3 storage settings (requires boto3: `pip install pipelex[s3]`) +uri_format = "{primary_id}/{secondary_id}/{hash}.{extension}" +bucket_name = "" +region = "" +signed_urls_lifespan_seconds = 3600 # Set to "disabled" for public URLs + +[pipelex.storage_config.gcp] +# Google Cloud Storage settings (requires google-cloud-storage: `pip install pipelex[gcp-storage]`) +uri_format = "{primary_id}/{secondary_id}/{hash}.{extension}" +bucket_name = "" +project_id = "" +signed_urls_lifespan_seconds = 3600 # Set to "disabled" for public URLs + +#################################################################################################### +# Scan Config +#################################################################################################### [pipelex.scan_config] -# Uncomment to customize the excluded directories for scanning -# excluded_dirs = [ -# ".venv", -# "venv", -# "env", -# ".env", -# "virtualenv", -# ".virtualenv", -# ".git", -# "__pycache__", -# ".pytest_cache", -# ".mypy_cache", -# ".ruff_cache", -# "node_modules", -# "results", -# ] +# Directories to exclude when scanning for pipeline files +excluded_dirs = [ + ".venv", + "venv", + "env", + ".env", + "virtualenv", + ".virtualenv", + ".git", + "__pycache__", + ".pytest_cache", + ".mypy_cache", + ".ruff_cache", + "node_modules", + "results", +] + +#################################################################################################### +# Builder Config +#################################################################################################### [pipelex.builder_config] -# Uncomment to change where the generated pipelines are saved: -# default_output_dir = "." -# default_bundle_file_name = "bundle" -# default_directory_base_name = "pipeline" +# Settings for generated pipelines +default_output_dir = "." +default_bundle_file_name = "bundle" +default_directory_base_name = "pipeline" -[pipelex.log_config] -# Uncomment to change the default log level: -# default_log_level = "INFO" +#################################################################################################### +# Log Config +#################################################################################################### -# Uncomment to log to stderr instead of stdout -# console_log_target = "stderr" -# console_print_target = "stderr" +[pipelex.log_config] +# Default logging level: "DEBUG", "INFO", "WARNING", "ERROR" +default_log_level = "INFO" +# Log output target: "stdout" or "stderr" +console_log_target = "stdout" +console_print_target = "stdout" [pipelex.log_config.package_log_levels] -# Uncomment to change the log level for specific packages: -# pipelex = "INFO" +# Log levels for specific packages (use "-" instead of "." in package names) +pipelex = "INFO" + +#################################################################################################### +# Feature Config +#################################################################################################### + +[pipelex.feature_config] +# WIP/Experimental feature flags +is_reporting_enabled = true + +#################################################################################################### +# Reporting Config +#################################################################################################### + +[pipelex.reporting_config] +# Cost reporting settings +is_log_costs_to_console = false +is_generate_cost_report_file_enabled = false +cost_report_dir_path = "reports" +cost_report_base_name = "cost_report" +cost_report_extension = "csv" +cost_report_unit_scale = 1.0 + +#################################################################################################### +# Cogt (Cognitive Tools) Config +#################################################################################################### -[cogt] [cogt.model_deck_config] -# Uncomment to disable model fallback: it will raise errors instead of using secondary model options: -# is_model_fallback_enabled = false -# Uncomment to change the reaction to missing presets: "raise" (default), "log" or "none" -# missing_presets_reaction = "raise" +# Model fallback behavior: if true, uses secondary model options when primary fails +is_model_fallback_enabled = true +# Reaction to missing presets: "raise", "log", or "none" +missing_presets_reaction = "log" [cogt.tenacity_config] -# Uncomment to change those values as needed: -# max_retries = 50 # Maximum number of retry attempts before giving up -# wait_multiplier = 0.2 # Multiplier applied to the wait time between retries (in seconds) -# wait_max = 20 # Maximum wait time between retries (in seconds) -# wait_exp_base = 1.3 # Base for exponential backoff calculation +# Retry behavior for API calls +max_retries = 50 # Maximum number of retry attempts before giving up +wait_multiplier = 0.2 # Multiplier applied to the wait time between retries (in seconds) +wait_max = 20 # Maximum wait time between retries (in seconds) +wait_exp_base = 1.3 # Base for exponential backoff calculation [cogt.llm_config] -# Uncomment any of these to enable dumping the inputs or outputs of text-generation with an LLM: -# is_dump_text_prompts_enabled = true -# is_dump_response_text_enabled = true +# Enable dumping of LLM inputs/outputs for debugging +is_dump_text_prompts_enabled = false +is_dump_response_text_enabled = false [cogt.llm_config.instructor_config] -# Uncomment any of these to enable dumping the kwargs, response or errors when generating structured content: -# is_dump_kwargs_enabled = true -# is_dump_response_enabled = true -# is_dump_error_enabled = true - -[pipelex.observer_config] -# Uncomment to change the directory where the observer will save its results: -# observer_dir = "results/observer" - -[pipelex.feature_config] -# WIP/Experimental feature flags: -# is_pipeline_tracking_enabled = false -# is_reporting_enabled = true - -[pipelex.reporting_config] -# Uncomment to customize the reporting configuration: -# is_log_costs_to_console = false -# is_generate_cost_report_file_enabled = false -# cost_report_dir_path = "reports" -# cost_report_base_name = "cost_report" -# cost_report_extension = "csv" -# cost_report_unit_scale = 1.0 +# Enable dumping of structured content generation details for debugging +is_dump_kwargs_enabled = false +is_dump_response_enabled = false +is_dump_error_enabled = false diff --git a/.pipelex/plxt.toml b/.pipelex/plxt.toml new file mode 100644 index 0000000..f1c52f7 --- /dev/null +++ b/.pipelex/plxt.toml @@ -0,0 +1,127 @@ +# ============================================================================= +# Pipelex TOML Configuration for pipelex-demo +# ============================================================================= +# Configures MTHDS/TOML formatting and linting behaviour for this project. +# Powered by the Pipelex extension (plxt / taplo engine). +# +# Docs: https://taplo.tamasfe.dev/configuration/ +# ============================================================================= + +# --------------------------------------------------------------------------- +# File discovery +# --------------------------------------------------------------------------- + +# Glob patterns for files to process. +include = ["**/*.toml", "**/*.mthds", "**/*.plx"] + +exclude = [ + ".venv/**", + ".mypy_cache/**", + ".ruff_cache/**", + ".pytest_cache/**", + "__pycache__/**", + "target/**", + "node_modules/**", + ".git/**", + "*.lock", +] # Glob patterns for files to ignore. +# These are evaluated relative to the config file location. + +# ============================================================================= +# Global formatting defaults +# ============================================================================= +# These apply to every file matched by `include` unless overridden by a +# [[rule]].formatting section below. Every option is shown at its built-in +# default so you can tune any of them in one place. + +[formatting] +align_entries = false # line up "=" signs across consecutive entries +align_comments = true # align end-of-line comments on consecutive lines +align_single_comments = true # also align lone comments (requires align_comments) +array_trailing_comma = true +array_auto_expand = true # go multiline when array exceeds column_width +array_auto_collapse = false # don't re-collapse multiline arrays that fit +inline_table_expand = true # expand inline tables exceeding column_width +compact_arrays = true # [1, 2] not [ 1, 2 ] +compact_inline_tables = false # keep spaces inside braces: { a = 1 } +compact_entries = false # keep spaces around "=": key = value +column_width = 80 +indent_tables = false +indent_entries = false +indent_string = " " +trailing_newline = true +reorder_keys = false +reorder_arrays = false +reorder_inline_tables = false +allowed_blank_lines = 2 +crlf = false + +# ============================================================================= +# Per-file-type rules +# ============================================================================= +# Each [[rule]] can narrow its scope with `include` / `exclude` globs and +# provide its own [rule.formatting] overrides. Options not listed here fall +# back to the global [formatting] section above. + + +# --------------------------------------------------------------------------- +# Rule: TOML files +# --------------------------------------------------------------------------- +[[rule]] +# Which files this rule applies to (relative globs). +include = ["**/*.toml"] + +# Per-rule formatting overrides — all at defaults so you can tweak them +# independently of .mthds files. +[rule.formatting] +# align_entries = false +# align_comments = true +# align_single_comments = true +# array_trailing_comma = true +# array_auto_expand = true +# array_auto_collapse = true +# inline_table_expand = true +# compact_arrays = true +# compact_inline_tables = false +# compact_entries = false +# column_width = 80 +# indent_tables = false +# indent_entries = false +# indent_string = " " +# trailing_newline = true +# allowed_blank_lines = 2 + + +# --------------------------------------------------------------------------- +# Rule: MTHDS files (Pipelex pipeline definitions) +# --------------------------------------------------------------------------- +[[rule]] +# Which files this rule applies to (relative globs). +include = ["**/*.mthds", "**/*.plx"] + +[rule.schema] +sources = [ + ".pipelex/mthds_schema.json", + "~/.pipelex/mthds_schema.json", + "https://pipelex-config.s3.eu-west-3.amazonaws.com/mthds_schema_1.json", +] + +# Per-rule formatting overrides — all at defaults so you can tweak them +# independently of .toml files. +[rule.formatting] +align_entries = true +# align_comments = true +# align_single_comments = true +# array_trailing_comma = true +# array_auto_expand = true +# array_auto_collapse = true +# inline_table_expand = true +# compact_arrays = true +# compact_inline_tables = false +# compact_entries = false +# column_width = 80 +# indent_tables = false +# indent_entries = false +# indent_string = " " +# trailing_newline = true +# allowed_blank_lines = 2 diff --git a/.vscode/settings.json b/.vscode/settings.json index e908f73..3cc0b99 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -22,6 +22,18 @@ "python.testing.pytestEnabled": true, "djlint.showInstallError": false, "files.associations": { - "*.plx": "plx" + "*.plx": "mthds" + }, + "editor.formatOnSave": true, + "[html]": { + "editor.formatOnSave": false + }, + "[toml]": { + "editor.defaultFormatter": "Pipelex.pipelex", + "editor.formatOnSave": true + }, + "[mthds]": { + "editor.defaultFormatter": "Pipelex.pipelex", + "editor.formatOnSave": true } } \ No newline at end of file diff --git a/.windsurfrules.md b/.windsurfrules.md deleted file mode 100644 index af4572e..0000000 --- a/.windsurfrules.md +++ /dev/null @@ -1,1219 +0,0 @@ - -# Pipelex Coding Rules - -## Guide to write or edit pipelines using the Pipelex language in .plx files - -- Always first write your "plan" in natural language, then transcribe it in pipelex. -- You should ALWAYS RUN validation when you are writing or editing a `.plx` file. It will ensure the pipe is runnable. If not, iterate. - - For a specific file: `pipelex validate path_to_file.plx` - - For all pipelines: `pipelex validate all` - - **IMPORTANT**: Ensure the Python virtual environment is activated before running `pipelex` commands. For standard installations, the venv is named `.venv` - always check that first. The commands will not work without proper venv activation. -- Please use POSIX standard for files. (empty lines, no trailing whitespaces, etc.) - -### Pipeline File Naming -- Files must be `.plx` for pipelines (Always add an empty line at the end of the file, and do not add trailing whitespaces to PLX files at all) -- Files must be `.py` for code defining the data structures -- Use descriptive names in `snake_case` - -### Pipeline File Outline -A pipeline file has three main sections: -1. Domain statement -2. Concept definitions -3. Pipe definitions - -#### Domain Statement -```plx -domain = "domain_code" -description = "Description of the domain" # Optional -``` -Note: The domain code usually matches the plx filename for single-file domains. For multi-file domains, use the subdirectory name. - -#### Concept Definitions - -Concepts represent ideas and semantic entities in your pipeline. They define what something *is*, not how it's structured. - -```plx -[concept] -ConceptName = "Description of the concept" -``` - -**Naming Rules:** -- Use PascalCase for concept names -- Never use plurals (no "Stories", use "Story") - lists are handled implicitly by Pipelex -- Avoid circumstantial adjectives (no "LargeText", use "Text") - focus on the essence of what the concept represents -- Don't redefine native concepts (Text, Image, PDF, TextAndImages, Number, Page, JSON) - -**Native Concepts:** -Pipelex provides built-in native concepts: `Text`, `Image`, `PDF`, `TextAndImages`, `Number`, `Page`, `JSON`. Use these directly or refine them when appropriate. - -**Refining Native Concepts:** -To create a concept that specializes a native concept without adding fields: - -```plx -[concept.Landscape] -description = "A scenic outdoor photograph" -refines = "Image" -``` - -For details on how to structure concepts with fields, see the "Structuring Models" section below. - -#### Pipe Definitions - -### Pipe Base Definition - -```plx -[pipe.your_pipe_code] -type = "PipeLLM" -description = "A description of what your pipe does" -inputs = { input_1 = "ConceptName1", input_2 = "ConceptName2" } -output = "ConceptName" -``` - -The pipes will all have at least this base definition. -- `inputs`: Dictionary of key being the variable used in the prompts, and the value being the ConceptName. It should ALSO LIST THE INPUTS OF THE INTERMEDIATE STEPS (if PipeSequence) or of the conditional pipes (if PipeCondition). -So If you have this error: -`PipeValidationError: missing_input_variable • domain='expense_validator' • pipe='validate_expense' • -variable='['invoice']'`` -That means that the pipe validate_expense is missing the input `invoice` because one of the subpipe is needing it. - -NEVER WRITE THE INPUTS BY BREAKING THE LINE LIKE THIS: - -```plx -inputs = { - input_1 = "ConceptName1", - input_2 = "ConceptName2" -} -``` - - -- `output`: The name of the concept to output. The `ConceptName` should have the same name as the python class if you want structured output: - -#### Input Multiplicity - -By default, inputs expect a single item. Use bracket notation to specify multiple items: - -```plx -## Single item (default) -inputs = { document = "Text" } - -## Variable list - indeterminate number of items -inputs = { documents = "Text[]" } - -## Fixed count - exactly N items -inputs = { comparison_items = "Image[2]" } -``` - -**Key points:** -- No brackets = single item (default behavior) -- Use `[]` for lists of unknown length -- Use `[N]` (where N is an integer) when operation requires exact count (e.g., comparing 2 items) - -### Structuring Models - -Once you've defined your concepts semantically (see "Concept Definitions" above), you need to specify their structure if they have fields. - -#### Three Ways to Structure Concepts - -**1. No Structure Needed** - -If a concept only refines a native concept without adding fields, use the TOML table syntax shown in "Concept Definitions" above. No structure section is needed. - -**2. Inline Structure Definition (RECOMMENDED for most cases)** - -For concepts with structured fields, define them inline using TOML syntax: - -```plx -[concept.Invoice] -description = "A commercial document issued by a seller to a buyer" - -[concept.Invoice.structure] -invoice_number = "The unique invoice identifier" # This will be optional by default -issue_date = { type = "date", description = "The date the invoice was issued", required = true } -total_amount = { type = "number", description = "The total invoice amount", required = true } -vendor_name = "The name of the vendor" # This will be optional by default -line_items = { type = "list", item_type = "text", description = "List of items" } -``` - -**Supported inline field types:** `text`, `integer`, `boolean`, `number`, `date`, `list`, `dict` - -**Field properties:** `type`, `description`, `required` (default: false), `default_value`, `choices`, `item_type` (for lists), `key_type` and `value_type` (for dicts) - -**Simple syntax** (creates required text field): -```plx -field_name = "Field description" -``` - -**Detailed syntax** (with explicit properties): -```plx -field_name = { type = "text", description = "Field description", default_value = "default" } -``` - -**3. Python StructuredContent Class (For Advanced Features)** - -Create a Python class when you need: -- Custom validation logic (@field_validator, @model_validator) -- Computed properties (@property methods) -- Custom methods or class methods -- Complex cross-field validation -- Reusable structures across multiple domains - -```python -from pipelex.core.stuffs.structured_content import StructuredContent -from pydantic import Field, field_validator - -class Invoice(StructuredContent): - """A commercial invoice with validation.""" - - invoice_number: str = Field(description="The unique invoice identifier") - total_amount: float = Field(ge=0, description="The total invoice amount") - tax_amount: float = Field(ge=0, description="Tax amount") - - @field_validator('tax_amount') - @classmethod - def validate_tax(cls, v, info): - """Ensure tax doesn't exceed total.""" - total = info.data.get('total_amount', 0) - if v > total: - raise ValueError('Tax amount cannot exceed total amount') - return v -``` - -**Location:** Create models in `my_project/some_domain/some_domain_struct.py`. Classes inheriting from `StructuredContent` are automatically discovered. - -#### Decision Rules for Agents - -**If concept already exists:** -- If it's already inline → KEEP IT INLINE unless user explicitly asks to convert or features require Python class -- If it's already a Python class → KEEP IT as Python class - -**If creating new concept:** -1. Does it only refine a native concept without adding fields? → Use concept-only declaration -2. Does it need custom validation, computed properties, or methods? → Use Python class -3. Otherwise → Use inline structure (fastest and simplest) - -**When to suggest conversion to Python class:** -- User needs validation logic beyond type checking -- User needs computed properties or custom methods -- Structure needs to be reused across multiple domains -- Complex type relationships or inheritance required - -#### Inline Structure Limitations - -Inline structures: -- ✅ Support all common field types (text, number, date, list, dict, etc.) -- ✅ Support required/optional fields, defaults, choices -- ✅ Generate full Pydantic models with validation -- ❌ Cannot have custom validators or complex validation logic -- ❌ Cannot have computed properties or custom methods -- ❌ Cannot refine custom (non-native) concepts -- ❌ Limited IDE autocomplete compared to explicit Python classes - - -### Pipe Controllers and Pipe Operators - -Look at the Pipes we have in order to adapt it. Pipes are organized in two categories: - -1. **Controllers** - For flow control: - - `PipeSequence` - For creating a sequence of multiple steps - - `PipeCondition` - If the next pipe depends of the expression of a stuff in the working memory - - `PipeParallel` - For parallelizing pipes - -2. **Operators** - For specific tasks: - - `PipeLLM` - Generate Text and Objects (include Vision LLM) - - `PipeExtract` - Extract text and images from an image or a PDF - - `PipeCompose` - For composing text using Jinja2 templates: supports html, markdown, mermaid, etc. - - `PipeImgGen` - Generate Images - - `PipeFunc` - For running classic python scripts - -### PipeSequence controller - -Purpose: PipeSequence executes multiple pipes in a defined order, where each step can use results from original inputs or from previous steps. - -#### Basic Definition -```plx -[pipe.your_sequence_name] -type = "PipeSequence" -description = "Description of what this sequence does" -inputs = { input_name = "InputType" } # All the inputs of the sub pipes, except the ones generated by intermediate steps -output = "OutputType" -steps = [ - { pipe = "first_pipe", result = "first_result" }, - { pipe = "second_pipe", result = "second_result" }, - { pipe = "final_pipe", result = "final_result" } -] -``` - -#### Key Components - -1. **Steps Array**: List of pipes to execute in sequence - - `pipe`: Name of the pipe to execute - - `result`: Name to assign to the pipe's output that will be in the working memory - -#### Using PipeBatch in Steps - -You can use PipeBatch functionality within steps using `batch_over` and `batch_as`: - -```plx -steps = [ - { pipe = "process_items", batch_over = "input_list", batch_as = "current_item", result = "processed_items" - } -] -``` - -1. **batch_over**: Specifies a `ListContent` field to iterate over. Each item in the list will be processed individually and IN PARALLEL by the pipe. - - Must be a `ListContent` type containing the items to process - - Can reference inputs or results from previous steps - -2. **batch_as**: Defines the name that will be used to reference the current item being processed - - This name can be used in the pipe's input mappings - - Makes each item from the batch available as a single element - -The result of a batched step will be a `ListContent` containing the outputs from processing each item. - -### PipeCondition controller - -The PipeCondition controller allows you to implement conditional logic in your pipeline, choosing which pipe to execute based on an evaluated expression. It supports both direct expressions and expression templates. - -#### Basic usage - -```plx -[pipe.conditional_operation] -type = "PipeCondition" -description = "A conditional pipe to decide whether..." -inputs = { input_data = "CategoryInput" } -output = "native.Text" -expression = "input_data.category" -default_outcome = "process_medium" - -[pipe.conditional_operation.outcomes] -small = "process_small" -medium = "process_medium" -large = "process_large" -``` -or -```plx -[pipe.conditional_operation] -type = "PipeCondition" -description = "A conditional pipe to decide whether..." -inputs = { input_data = "CategoryInput" } -output = "native.Text" -expression_template = "{{ input_data.category }}" # Jinja2 code -default_outcome = "process_medium" - -[pipe.conditional_operation.outcomes] -small = "process_small" -medium = "process_medium" -large = "process_large" -``` - -#### Key Parameters - -- `expression`: Direct boolean or string expression (mutually exclusive with expression_template) -- `expression_template`: Jinja2 template for more complex conditional logic (mutually exclusive with expression) -- `outcomes`: Dictionary mapping expression results to pipe codes: - 1. The key on the left (`small`, `medium`) is the result of `expression` or `expression_template` - 2. The value on the right (`process_small`, `process_medium`, etc.) is the name of the pipe to trigger -- `default_outcome`: **Required** - The pipe to execute if the expression doesn't match any key in outcomes. Use `"fail"` if you want the pipeline to fail when no match is found - -Example with fail as default: -```plx -[pipe.strict_validation] -type = "PipeCondition" -description = "Validate with strict matching" -inputs = { status = "Status" } -output = "Text" -expression = "status.value" -default_outcome = "fail" - -[pipe.strict_validation.outcomes] -approved = "process_approved" -rejected = "process_rejected" -``` - -### PipeLLM operator - -PipeLLM is used to: -1. Generate text or objects with LLMs -2. Process images with Vision LLMs - -#### Basic Usage - -Simple Text Generation: -```plx -[pipe.write_story] -type = "PipeLLM" -description = "Write a short story" -output = "Text" -prompt = """ -Write a short story about a programmer. -""" -``` - -Structured Data Extraction: -```plx -[pipe.extract_info] -type = "PipeLLM" -description = "Extract information" -inputs = { text = "Text" } -output = "PersonInfo" -prompt = """ -Extract person information from this text: -@text -""" -``` - -Supports system instructions: -```plx -[pipe.expert_analysis] -type = "PipeLLM" -description = "Expert analysis" -output = "Analysis" -system_prompt = "You are a data analysis expert" -prompt = "Analyze this data" -``` - -#### Multiple Outputs - -Generate multiple outputs (fixed number) - use bracket notation: -```plx -[pipe.generate_ideas] -type = "PipeLLM" -description = "Generate ideas" -output = "Idea[3]" # Generate exactly 3 ideas -prompt = "Generate 3 ideas" -``` - -Generate multiple outputs (variable number) - use bracket notation: -```plx -[pipe.generate_ideas] -type = "PipeLLM" -description = "Generate ideas" -output = "Idea[]" # Let the LLM decide how many to generate -prompt = "Generate ideas" -``` - -#### Vision - -Process images with VLMs (image inputs must be tagged in the prompt): -```plx -[pipe.analyze_image] -type = "PipeLLM" -description = "Analyze image" -inputs = { image = "Image" } -output = "ImageAnalysis" -prompt = """ -Describe what you see in this image: - -$image -""" -``` - -You can also reference images inline in meaningful sentences to guide the Visual LLM: -```plx -[pipe.compare_images] -type = "PipeLLM" -description = "Compare two images" -inputs = { photo = "Image", painting = "Image" } -output = "Analysis" -prompt = "Analyze the colors in $photo and the shapes in $painting." -``` - -#### Writing prompts for PipeLLM - -**Insert stuff inside a tagged block** - -If the inserted text is supposedly a long text, made of several lines or paragraphs, you want it inserted inside a block, possibly a block tagged and delimlited with proper syntax as one would do in a markdown documentation. To include stuff as a block, use the "@" prefix. - -Example template: -```plx -prompt = """ -Match the expense with its corresponding invoice: - -@expense - -@invoices -""" -``` -In the example above, the expense data and the invoices data are obviously made of several lines each, that's why it makes sense to use the "@" prefix in order to have them delimited inside a block. Note that our preprocessor will automatically include the block's title, so it doesn't need to be explicitly written in the prompt. - -DO NOT write things like "Here is the expense: @expense". -DO write simply "@expense" alone in an isolated line. - -**Insert stuff inline** - -If the inserted text is short text and it makes sense to have it inserted directly into a sentence, you want it inserted inline. To insert stuff inline, use the "$" prefix. This will insert the stuff without delimiters and the content will be rendered as plain text. - -Example template: -```plx -prompt = """ -Your goal is to summarize everything related to $topic in the provided text: - -@text - -Please provide only the summary, with no additional text or explanations. -Your summary should not be longer than 2 sentences. -""" -``` - -In the example above, $topic will be inserted inline, whereas @text will be a a delimited block. -Be sure to make the proper choice of prefix for each insertion. - -DO NOT write "$topic" alone in an isolated line. -DO write things like "Write an essay about $topic" to include text into an actual sentence. - - -### PipeExtract operator - -The PipeExtract operator is used to extract text and images from an image or a PDF - -#### Simple Text Extraction -```plx -[pipe.extract_info] -type = "PipeExtract" -description = "extract the information" -inputs = { document = "Document" } # or { image = "Image" } if it's an image. This is the only input. -output = "Page" -``` - -Using Extract Model Settings: -```plx -[pipe.extract_with_model] -type = "PipeExtract" -description = "Extract with specific model" -inputs = { document = "Document" } -output = "Page" -model = "base_extract_mistral" # Use predefined extract preset or model alias -``` - -Only one input is allowed and it must either be an `Image` or a `PDF`. The input can be named anything. - -The output concept `Page` is a native concept, with the structure `PageContent`: -It corresponds to 1 page. Therefore, the PipeExtract is outputing a `ListContent` of `Page` - -```python -class TextAndImagesContent(StuffContent): - text: TextContent | None - images: list[ImageContent] | None - -class PageContent(StructuredContent): # CONCEPT IS "Page" - text_and_images: TextAndImagesContent - page_view: ImageContent | None = None -``` -- `text_and_images` are the text, and the related images found in the input image or PDF. -- `page_view` is the screenshot of the whole pdf page/image. - -### PipeCompose operator - -The PipeCompose operator is used to compose text using Jinja2 templates. It supports various output formats including HTML, Markdown, Mermaid diagrams, and more. - -#### Basic Usage - -Simple Template Composition: -```plx -[pipe.compose_report] -type = "PipeCompose" -description = "Compose a report using template" -inputs = { data = "ReportData" } -output = "Text" -template = """ -## Report Summary - -Based on the analysis: -$data - -Generated on: {{ current_date }} -""" -``` - -Using Named Templates: -```plx -[pipe.use_template] -type = "PipeCompose" -description = "Use a predefined template" -inputs = { content = "Text" } -output = "Text" -template_name = "standard_report_template" -``` - -Using Nested Template Section (for more control): -```plx -[pipe.advanced_template] -type = "PipeCompose" -description = "Use advanced template settings" -inputs = { data = "ReportData" } -output = "Text" - -[pipe.advanced_template.template] -template = "Report: $data" -category = "html" -templating_style = { tag_style = "square_brackets", text_format = "html" } -``` - -CRM Email Template: -```plx -[pipe.compose_follow_up_email] -type = "PipeCompose" -description = "Compose a personalized follow-up email for CRM" -inputs = { customer = "Customer", deal = "Deal", sales_rep = "SalesRep" } -output = "Text" -template_category = "html" -templating_style = { tag_style = "square_brackets", text_format = "html" } -template = """ -Subject: Following up on our $deal.product_name discussion - -Hi $customer.first_name, - -I hope this email finds you well! I wanted to follow up on our conversation about $deal.product_name from $deal.last_contact_date. - -Based on our discussion, I understand that your key requirements are: $deal.customer_requirements - -I'm excited to let you know that we can definitely help you achieve your goals. Here's what I'd like to propose: - -**Next Steps:** -- Schedule a demo tailored to your specific needs -- Provide you with a customized quote based on your requirements -- Connect you with our implementation team - -Would you be available for a 30-minute call this week? I have openings on: -{% for slot in available_slots %} -- {{ slot }} -{% endfor %} - -Looking forward to moving this forward together! - -Best regards, -$sales_rep.name -$sales_rep.title -$sales_rep.phone | $sales_rep.email -""" -``` - -#### Key Parameters (Template Mode) - -- `template`: Inline template string (mutually exclusive with template_name and construct) -- `template_name`: Name of a predefined template (mutually exclusive with template) -- `template_category`: Template type ("llm_prompt", "html", "markdown", "mermaid", etc.) -- `templating_style`: Styling options for template rendering -- `extra_context`: Additional context variables for template - -For more control, you can use a nested `template` section instead of the `template` field: - -- `template.template`: The template string -- `template.category`: Template type -- `template.templating_style`: Styling options - -#### Template Variables - -Use the same variable insertion rules as PipeLLM: - -- `@variable` for block insertion (multi-line content) -- `$variable` for inline insertion (short text) - -#### Construct Mode (for StructuredContent Output) - -PipeCompose can also generate `StructuredContent` objects using the `construct` section. This mode composes field values from fixed values, variable references, templates, or nested structures. - -**When to use construct mode:** - -- You need to output a structured object (not just Text) -- You want to deterministically compose fields from existing data -- No LLM is needed - just data composition and templating - -##### Basic Construct Usage - -```plx -[concept.SalesSummary] -description = "A structured sales summary" - -[concept.SalesSummary.structure] -report_title = { type = "text", description = "Title of the report" } -customer_name = { type = "text", description = "Customer name" } -deal_value = { type = "number", description = "Deal value" } -summary_text = { type = "text", description = "Generated summary text" } - -[pipe.compose_summary] -type = "PipeCompose" -description = "Compose a sales summary from deal data" -inputs = { deal = "Deal" } -output = "SalesSummary" - -[pipe.compose_summary.construct] -report_title = "Monthly Sales Report" -customer_name = { from = "deal.customer_name" } -deal_value = { from = "deal.amount" } -summary_text = { template = "Deal worth $deal.amount with $deal.customer_name" } -``` - -##### Field Composition Methods - -There are four ways to define field values in a construct: - -**1. Fixed Value (literal)** - -Use a literal value directly: - -```plx -[pipe.compose_report.construct] -report_title = "Annual Report" -report_year = 2024 -is_draft = false -``` - -**2. Variable Reference (`from`)** - -Get a value from working memory using a dotted path: - -```plx -[pipe.compose_report.construct] -customer_name = { from = "deal.customer_name" } -total_amount = { from = "order.total" } -street_address = { from = "customer.address.street" } -``` - -**3. Template (`template`)** - -Render a Jinja2 template with variable substitution: - -```plx -[pipe.compose_report.construct] -invoice_number = { template = "INV-$order.id" } -summary = { template = "Deal worth $deal.amount with $deal.customer_name on {{ current_date }}" } -``` - -**4. Nested Construct** - -For nested structures, use a TOML subsection: - -```plx -[pipe.compose_invoice.construct] -invoice_number = { template = "INV-$order.id" } -total = { from = "order.total_amount" } - -[pipe.compose_invoice.construct.billing_address] -street = { from = "customer.address.street" } -city = { from = "customer.address.city" } -country = "France" -``` - -##### Complete Construct Example - -```plx -domain = "invoicing" - -[concept.Address] -description = "A postal address" - -[concept.Address.structure] -street = { type = "text", description = "Street address" } -city = { type = "text", description = "City name" } -country = { type = "text", description = "Country name" } - -[concept.Invoice] -description = "An invoice document" - -[concept.Invoice.structure] -invoice_number = { type = "text", description = "Invoice number" } -total = { type = "number", description = "Total amount" } - -[pipe.compose_invoice] -type = "PipeCompose" -description = "Compose an invoice from order and customer data" -inputs = { order = "Order", customer = "Customer" } -output = "Invoice" - -[pipe.compose_invoice.construct] -invoice_number = { template = "INV-$order.id" } -total = { from = "order.total_amount" } - -[pipe.compose_invoice.construct.billing_address] -street = { from = "customer.address.street" } -city = { from = "customer.address.city" } -country = "France" -``` - -##### Key Parameters (Construct Mode) - -- `construct`: Dictionary mapping field names to their composition rules -- Each field can be: - - A literal value (string, number, boolean) - - A dict with `from` key for variable reference - - A dict with `template` key for template rendering - - A nested dict for nested structures - -**Note:** You must use either `template` or `construct`, not both. They are mutually exclusive. - -### PipeImgGen operator - -The PipeImgGen operator is used to generate images using AI image generation models. - -#### Basic Usage - -Simple Image Generation: -```plx -[pipe.generate_image] -type = "PipeImgGen" -description = "Generate an image from prompt" -inputs = { prompt = "ImgGenPrompt" } -output = "Image" -``` - -Using Image Generation Settings: -```plx -[pipe.generate_photo] -type = "PipeImgGen" -description = "Generate a high-quality photo" -inputs = { prompt = "ImgGenPrompt" } -output = "Photo" -model = { model = "fast-img-gen" } -aspect_ratio = "16:9" -quality = "hd" -``` - -Multiple Image Generation: -```plx -[pipe.generate_variations] -type = "PipeImgGen" -description = "Generate multiple image variations" -inputs = { prompt = "ImgGenPrompt" } -output = "Image[3]" -seed = "auto" -``` - -Advanced Configuration: -```plx -[pipe.generate_custom] -type = "PipeImgGen" -description = "Generate image with custom settings" -inputs = { prompt = "ImgGenPrompt" } -output = "Image" -model = "img_gen_preset_name" # Use predefined preset -aspect_ratio = "1:1" -quality = "hd" -background = "transparent" -output_format = "png" -is_raw = false -safety_tolerance = 3 -``` - -#### Key Parameters - -**Image Generation Settings:** -- `model`: Model choice (preset name or inline settings with model name) -- `quality`: Image quality ("standard", "hd") - -**Output Configuration:** -- `aspect_ratio`: Image dimensions ("1:1", "16:9", "9:16", etc.) -- `output_format`: File format ("png", "jpeg", "webp") -- `background`: Background type ("default", "transparent") - -**Generation Control:** -- `seed`: Random seed (integer or "auto") -- `is_raw`: Whether to apply post-processing -- `is_moderated`: Enable content moderation -- `safety_tolerance`: Content safety level (1-6) - -#### Input Requirements - -PipeImgGen requires exactly one input that must be either: -- An `ImgGenPrompt` concept -- A concept that refines `ImgGenPrompt` - -The input can be named anything but must contain the prompt text for image generation. - -### PipeFunc operator - -The PipeFunc operator is used to run custom Python functions within a pipeline. This allows integration of classic Python scripts and custom logic. - -#### Basic Usage - -Simple Function Call: -```plx -[pipe.process_data] -type = "PipeFunc" -description = "Process data using custom function" -inputs = { input_data = "DataType" } -output = "ProcessedData" -function_name = "process_data_function" -``` - -File Processing Example: -```plx -[pipe.read_file] -type = "PipeFunc" -description = "Read file content" -inputs = { file_path = "FilePath" } -output = "FileContent" -function_name = "read_file_content" -``` - -#### Key Parameters - -- `function_name`: Name of the Python function to call (must be registered in func_registry) - -#### Function Requirements - -The Python function must: - -1. **Be registered** in the `func_registry` -2. **Accept `working_memory`** as a parameter: - ```python - async def my_function(working_memory: WorkingMemory) -> StuffContent | list[StuffContent] | str: - # Function implementation - pass - ``` - -3. **Return appropriate types**: - - `StuffContent`: Single content object - - `list[StuffContent]`: Multiple content objects (becomes ListContent) - - `str`: Simple string (becomes TextContent) - -#### Function Registration - -Functions must be registered in the function registry before use: - -```python -from pipelex.system.registries.func_registry import func_registry - -@func_registry.register("my_function_name") -async def my_custom_function(working_memory: WorkingMemory) -> StuffContent: - # Access inputs from working memory - input_data = working_memory.get_stuff("input_name") - - # Process data - result = process_logic(input_data.content) - - # Return result - return MyResultContent(data=result) -``` - -#### Working Memory Access - -Inside the function, access pipeline inputs through working memory: - -```python -async def process_function(working_memory: WorkingMemory) -> TextContent: - # Get input stuff by name - input_stuff = working_memory.get_stuff("input_name") - - # Access the content - input_content = input_stuff.content - - # Process and return - processed_text = f"Processed: {input_content.text}" - return TextContent(text=processed_text) -``` - ---- - -### Rules to choose LLM models used in PipeLLMs. - -#### LLM Configuration System - -In order to use it in a pipe, an LLM is referenced by its llm_handle (alias) and possibly by an llm_preset. -LLM configurations are managed through the new inference backend system with files located in `.pipelex/inference/`: - -- **Model Deck**: `.pipelex/inference/deck/base_deck.toml` and `.pipelex/inference/deck/overrides.toml` -- **Backends**: `.pipelex/inference/backends.toml` and `.pipelex/inference/backends/*.toml` -- **Routing**: `.pipelex/inference/routing_profiles.toml` - -#### LLM Handles - -An llm_handle can be either: -1. **A direct model name** (like "gpt-4o-mini", "claude-3-sonnet") - automatically available for all models loaded by the inference backend system -2. **An alias** - user-defined shortcuts that map to model names, defined in the `[aliases]` section: - -```toml -[aliases] -base-claude = "claude-4.5-sonnet" -base-gpt = "gpt-5" -base-gemini = "gemini-2.5-flash" -base-mistral = "mistral-medium" -``` - -The system first looks for direct model names, then checks aliases if no direct match is found. The system handles model routing through backends automatically. - -#### Using an LLM Handle in a PipeLLM - -Here is an example of using a model to specify which LLM to use in a PipeLLM: - -```plx -[pipe.hello_world] -type = "PipeLLM" -description = "Write text about Hello World." -output = "Text" -model = { model = "gpt-5", temperature = 0.9 } -prompt = """ -Write a haiku about Hello World. -""" -``` - -As you can see, to use the LLM, you must also indicate the temperature (float between 0 and 1) and max_tokens (either an int or the string "auto"). - -#### LLM Presets - -Presets are meant to record the choice of an llm with its hyper parameters (temperature and max_tokens) if it's good for a particular task. LLM Presets are skill-oriented. - -Examples: -```toml -llm_to_engineer = { model = "base-claude", temperature = 1 } -llm_to_extract_invoice = { model = "claude-3-7-sonnet", temperature = 0.1, max_tokens = "auto" } -``` - -The interest is that these presets can be used to set the LLM choice in a PipeLLM, like this: - -```plx -[pipe.extract_invoice] -type = "PipeLLM" -description = "Extract invoice information from an invoice text transcript" -inputs = { invoice_text = "InvoiceText" } -output = "Invoice" -model = "llm_to_extract_invoice" -prompt = """ -Extract invoice information from this invoice: - -The category of this invoice is: $invoice_details.category. - -@invoice_text -""" -``` - -The setting here `model = "llm_to_extract_invoice"` works because "llm_to_extract_invoice" has been declared as an llm_preset in the deck. -You must not use an LLM preset in a PipeLLM that does not exist in the deck. If needed, you can add llm presets. - -You can override the predefined llm presets by setting them in `.pipelex/inference/deck/overrides.toml`. - ---- - -ALWAYS RUN validation when you are finished writing pipelines: This checks for errors. If there are errors, iterate until it works. -- For a specific bundle/file: `pipelex validate path_to_file.plx` -- For all pipelines: `pipelex validate all` -- Remember: Ensure your Python virtual environment is activated (typically `.venv` for standard installations) before running `pipelex` commands. - -Then, create an example file to run the pipeline in the `examples` folder. -But don't write documentation unless asked explicitly to. - -## Guide to execute a pipeline and write example code - -### Example to execute a pipeline with text output - -```python -import asyncio - -from pipelex import pretty_print -from pipelex.pipelex import Pipelex -from pipelex.pipeline.execute import execute_pipeline - - -async def hello_world() -> str: - """ - This function demonstrates the use of a super simple Pipelex pipeline to generate text. - """ - # Run the pipe - pipe_output = await execute_pipeline( - pipe_code="hello_world", - ) - - return pipe_output.main_stuff_as_str - - -## start Pipelex -Pipelex.make() -## run sample using asyncio -output_text = asyncio.run(hello_world()) -pretty_print(output_text, title="Your first Pipelex output") -``` - -### Example to execute a pipeline with structured output - -```python -import asyncio - -from pipelex import pretty_print -from pipelex.pipelex import Pipelex -from pipelex.pipeline.execute import execute_pipeline -from pipelex.core.stuffs.image_content import ImageContent - -from my_project.gantt.gantt_struct import GanttChart - -SAMPLE_NAME = "extract_gantt" -IMAGE_URL = "assets/gantt/gantt_tree_house.png" - - -async def extract_gantt(image_url: str) -> GanttChart: - # Run the pipe - pipe_output = await execute_pipeline( - pipe_code="extract_gantt_by_steps", - inputs={ - "gantt_chart_image": { - "concept": "gantt.GanttImage", - "content": ImageContent(url=image_url), - } - }, - ) - # Output the result - return pipe_output.main_stuff_as(content_type=GanttChart) - - -## start Pipelex -Pipelex.make() - -## run sample using asyncio -gantt_chart = asyncio.run(extract_gantt(image_url=IMAGE_URL)) -pretty_print(gantt_chart, title="Gantt Chart") -``` - -### Setting up the input memory - -#### Explanation of input memory - -The input memory is a dictionary, where the key is the name of the input variable and the value provides details to make it a stuff object. The relevant definitions are: -```python -StuffContentOrData = dict[str, Any] | StuffContent | list[Any] | str -PipelineInputs = dict[str, StuffContentOrData] -``` -As you can seen, we made it so different ways can be used to define that stuff using structured content or data. - -#### Different ways to set up the input memory - -So here are a few concrete examples of calls to execute_pipeline with various ways to set up the input memory: - -```python -## Here we have a single input and it's a Text. -## If you assign a string, by default it will be considered as a TextContent. - pipe_output = await execute_pipeline( - pipe_code="master_advisory_orchestrator", - inputs={ - "user_input": problem_description, - }, - ) - -## Here we have a single input and it's a document. -## Because DocumentContent is a native concept, we can use it directly as a value, -## the system knows what content it corresponds to: - pipe_output = await execute_pipeline( - pipe_code="power_extractor_dpe", - inputs={ - "document": DocumentContent(url=pdf_url), - }, - ) - -## Here we have a single input and it's an Image. -## Because ImageContent is a native concept, we can use it directly as a value: - pipe_output = await execute_pipeline( - pipe_code="fashion_variation_pipeline", - inputs={ - "fashion_photo": ImageContent(url=image_url), - }, - ) - -## Here we have a single input, it's an image but -## its actually a more specific concept gantt.GanttImage which refines Image, -## so we must provide it using a dict with the concept and the content: - pipe_output = await execute_pipeline( - pipe_code="extract_gantt_by_steps", - inputs={ - "gantt_chart_image": { - "concept": "gantt.GanttImage", - "content": ImageContent(url=image_url), - } - }, - ) - -## Here is a more complex example with multiple inputs assigned using different ways: - pipe_output = await execute_pipeline( - pipe_code="retrieve_then_answer", - dynamic_output_concept_code="contracts.Fees", - inputs={ - "text": load_text_from_path(path=text_path), - "question": { - "concept": "answer.Question", - "content": question, - }, - "client_instructions": client_instructions, - }, - ) -``` - -### Using the outputs of a pipeline - -All pipe executions return a `PipeOutput` object. -It's a BaseModel which contains the resulting working memory at the end of the execution and the pipeline run id. -It also provides a bunch of accessor functions and properties to unwrap the main stuff, which is the last stuff added to the working memory: - -```python - -class PipeOutput(BaseModel): - working_memory: WorkingMemory = Field(default_factory=WorkingMemory) - pipeline_run_id: str = Field(default=SpecialPipelineId.UNTITLED) - - @property - def main_stuff(self) -> Stuff: - ... - - def main_stuff_as_list(self, item_type: type[StuffContentType]) -> ListContent[StuffContentType]: - ... - - def main_stuff_as_items(self, item_type: type[StuffContentType]) -> list[StuffContentType]: - ... - - def main_stuff_as(self, content_type: type[StuffContentType]) -> StuffContentType: - ... - - @property - def main_stuff_as_text(self) -> TextContent: - ... - - @property - def main_stuff_as_str(self) -> str: - ... - - @property - def main_stuff_as_image(self) -> ImageContent: - ... - - @property - def main_stuff_as_text_and_image(self) -> TextAndImagesContent: - ... - - @property - def main_stuff_as_number(self) -> NumberContent: - ... - - @property - def main_stuff_as_html(self) -> HtmlContent: - ... - - @property - def main_stuff_as_mermaid(self) -> MermaidContent: - ... -``` - -As you can see, you can extract any variable from the output working memory. - -#### Getting the main stuff as a specific type - -Simple text as a string: - -```python -result = pipe_output.main_stuff_as_str -``` -Structured object (BaseModel): - -```python -result = pipe_output.main_stuff_as(content_type=GanttChart) -``` - -If it's a list, you can get a `ListContent` of the specific type. - -```python -result_list_content = pipe_output.main_stuff_as_list(item_type=GanttChart) -``` - -or if you want, you can get the actual items as a regular python list: - -```python -result_list = pipe_output.main_stuff_as_items(item_type=GanttChart) -``` - ---- - diff --git a/AGENTS.md b/AGENTS.md deleted file mode 100644 index af4572e..0000000 --- a/AGENTS.md +++ /dev/null @@ -1,1219 +0,0 @@ - -# Pipelex Coding Rules - -## Guide to write or edit pipelines using the Pipelex language in .plx files - -- Always first write your "plan" in natural language, then transcribe it in pipelex. -- You should ALWAYS RUN validation when you are writing or editing a `.plx` file. It will ensure the pipe is runnable. If not, iterate. - - For a specific file: `pipelex validate path_to_file.plx` - - For all pipelines: `pipelex validate all` - - **IMPORTANT**: Ensure the Python virtual environment is activated before running `pipelex` commands. For standard installations, the venv is named `.venv` - always check that first. The commands will not work without proper venv activation. -- Please use POSIX standard for files. (empty lines, no trailing whitespaces, etc.) - -### Pipeline File Naming -- Files must be `.plx` for pipelines (Always add an empty line at the end of the file, and do not add trailing whitespaces to PLX files at all) -- Files must be `.py` for code defining the data structures -- Use descriptive names in `snake_case` - -### Pipeline File Outline -A pipeline file has three main sections: -1. Domain statement -2. Concept definitions -3. Pipe definitions - -#### Domain Statement -```plx -domain = "domain_code" -description = "Description of the domain" # Optional -``` -Note: The domain code usually matches the plx filename for single-file domains. For multi-file domains, use the subdirectory name. - -#### Concept Definitions - -Concepts represent ideas and semantic entities in your pipeline. They define what something *is*, not how it's structured. - -```plx -[concept] -ConceptName = "Description of the concept" -``` - -**Naming Rules:** -- Use PascalCase for concept names -- Never use plurals (no "Stories", use "Story") - lists are handled implicitly by Pipelex -- Avoid circumstantial adjectives (no "LargeText", use "Text") - focus on the essence of what the concept represents -- Don't redefine native concepts (Text, Image, PDF, TextAndImages, Number, Page, JSON) - -**Native Concepts:** -Pipelex provides built-in native concepts: `Text`, `Image`, `PDF`, `TextAndImages`, `Number`, `Page`, `JSON`. Use these directly or refine them when appropriate. - -**Refining Native Concepts:** -To create a concept that specializes a native concept without adding fields: - -```plx -[concept.Landscape] -description = "A scenic outdoor photograph" -refines = "Image" -``` - -For details on how to structure concepts with fields, see the "Structuring Models" section below. - -#### Pipe Definitions - -### Pipe Base Definition - -```plx -[pipe.your_pipe_code] -type = "PipeLLM" -description = "A description of what your pipe does" -inputs = { input_1 = "ConceptName1", input_2 = "ConceptName2" } -output = "ConceptName" -``` - -The pipes will all have at least this base definition. -- `inputs`: Dictionary of key being the variable used in the prompts, and the value being the ConceptName. It should ALSO LIST THE INPUTS OF THE INTERMEDIATE STEPS (if PipeSequence) or of the conditional pipes (if PipeCondition). -So If you have this error: -`PipeValidationError: missing_input_variable • domain='expense_validator' • pipe='validate_expense' • -variable='['invoice']'`` -That means that the pipe validate_expense is missing the input `invoice` because one of the subpipe is needing it. - -NEVER WRITE THE INPUTS BY BREAKING THE LINE LIKE THIS: - -```plx -inputs = { - input_1 = "ConceptName1", - input_2 = "ConceptName2" -} -``` - - -- `output`: The name of the concept to output. The `ConceptName` should have the same name as the python class if you want structured output: - -#### Input Multiplicity - -By default, inputs expect a single item. Use bracket notation to specify multiple items: - -```plx -## Single item (default) -inputs = { document = "Text" } - -## Variable list - indeterminate number of items -inputs = { documents = "Text[]" } - -## Fixed count - exactly N items -inputs = { comparison_items = "Image[2]" } -``` - -**Key points:** -- No brackets = single item (default behavior) -- Use `[]` for lists of unknown length -- Use `[N]` (where N is an integer) when operation requires exact count (e.g., comparing 2 items) - -### Structuring Models - -Once you've defined your concepts semantically (see "Concept Definitions" above), you need to specify their structure if they have fields. - -#### Three Ways to Structure Concepts - -**1. No Structure Needed** - -If a concept only refines a native concept without adding fields, use the TOML table syntax shown in "Concept Definitions" above. No structure section is needed. - -**2. Inline Structure Definition (RECOMMENDED for most cases)** - -For concepts with structured fields, define them inline using TOML syntax: - -```plx -[concept.Invoice] -description = "A commercial document issued by a seller to a buyer" - -[concept.Invoice.structure] -invoice_number = "The unique invoice identifier" # This will be optional by default -issue_date = { type = "date", description = "The date the invoice was issued", required = true } -total_amount = { type = "number", description = "The total invoice amount", required = true } -vendor_name = "The name of the vendor" # This will be optional by default -line_items = { type = "list", item_type = "text", description = "List of items" } -``` - -**Supported inline field types:** `text`, `integer`, `boolean`, `number`, `date`, `list`, `dict` - -**Field properties:** `type`, `description`, `required` (default: false), `default_value`, `choices`, `item_type` (for lists), `key_type` and `value_type` (for dicts) - -**Simple syntax** (creates required text field): -```plx -field_name = "Field description" -``` - -**Detailed syntax** (with explicit properties): -```plx -field_name = { type = "text", description = "Field description", default_value = "default" } -``` - -**3. Python StructuredContent Class (For Advanced Features)** - -Create a Python class when you need: -- Custom validation logic (@field_validator, @model_validator) -- Computed properties (@property methods) -- Custom methods or class methods -- Complex cross-field validation -- Reusable structures across multiple domains - -```python -from pipelex.core.stuffs.structured_content import StructuredContent -from pydantic import Field, field_validator - -class Invoice(StructuredContent): - """A commercial invoice with validation.""" - - invoice_number: str = Field(description="The unique invoice identifier") - total_amount: float = Field(ge=0, description="The total invoice amount") - tax_amount: float = Field(ge=0, description="Tax amount") - - @field_validator('tax_amount') - @classmethod - def validate_tax(cls, v, info): - """Ensure tax doesn't exceed total.""" - total = info.data.get('total_amount', 0) - if v > total: - raise ValueError('Tax amount cannot exceed total amount') - return v -``` - -**Location:** Create models in `my_project/some_domain/some_domain_struct.py`. Classes inheriting from `StructuredContent` are automatically discovered. - -#### Decision Rules for Agents - -**If concept already exists:** -- If it's already inline → KEEP IT INLINE unless user explicitly asks to convert or features require Python class -- If it's already a Python class → KEEP IT as Python class - -**If creating new concept:** -1. Does it only refine a native concept without adding fields? → Use concept-only declaration -2. Does it need custom validation, computed properties, or methods? → Use Python class -3. Otherwise → Use inline structure (fastest and simplest) - -**When to suggest conversion to Python class:** -- User needs validation logic beyond type checking -- User needs computed properties or custom methods -- Structure needs to be reused across multiple domains -- Complex type relationships or inheritance required - -#### Inline Structure Limitations - -Inline structures: -- ✅ Support all common field types (text, number, date, list, dict, etc.) -- ✅ Support required/optional fields, defaults, choices -- ✅ Generate full Pydantic models with validation -- ❌ Cannot have custom validators or complex validation logic -- ❌ Cannot have computed properties or custom methods -- ❌ Cannot refine custom (non-native) concepts -- ❌ Limited IDE autocomplete compared to explicit Python classes - - -### Pipe Controllers and Pipe Operators - -Look at the Pipes we have in order to adapt it. Pipes are organized in two categories: - -1. **Controllers** - For flow control: - - `PipeSequence` - For creating a sequence of multiple steps - - `PipeCondition` - If the next pipe depends of the expression of a stuff in the working memory - - `PipeParallel` - For parallelizing pipes - -2. **Operators** - For specific tasks: - - `PipeLLM` - Generate Text and Objects (include Vision LLM) - - `PipeExtract` - Extract text and images from an image or a PDF - - `PipeCompose` - For composing text using Jinja2 templates: supports html, markdown, mermaid, etc. - - `PipeImgGen` - Generate Images - - `PipeFunc` - For running classic python scripts - -### PipeSequence controller - -Purpose: PipeSequence executes multiple pipes in a defined order, where each step can use results from original inputs or from previous steps. - -#### Basic Definition -```plx -[pipe.your_sequence_name] -type = "PipeSequence" -description = "Description of what this sequence does" -inputs = { input_name = "InputType" } # All the inputs of the sub pipes, except the ones generated by intermediate steps -output = "OutputType" -steps = [ - { pipe = "first_pipe", result = "first_result" }, - { pipe = "second_pipe", result = "second_result" }, - { pipe = "final_pipe", result = "final_result" } -] -``` - -#### Key Components - -1. **Steps Array**: List of pipes to execute in sequence - - `pipe`: Name of the pipe to execute - - `result`: Name to assign to the pipe's output that will be in the working memory - -#### Using PipeBatch in Steps - -You can use PipeBatch functionality within steps using `batch_over` and `batch_as`: - -```plx -steps = [ - { pipe = "process_items", batch_over = "input_list", batch_as = "current_item", result = "processed_items" - } -] -``` - -1. **batch_over**: Specifies a `ListContent` field to iterate over. Each item in the list will be processed individually and IN PARALLEL by the pipe. - - Must be a `ListContent` type containing the items to process - - Can reference inputs or results from previous steps - -2. **batch_as**: Defines the name that will be used to reference the current item being processed - - This name can be used in the pipe's input mappings - - Makes each item from the batch available as a single element - -The result of a batched step will be a `ListContent` containing the outputs from processing each item. - -### PipeCondition controller - -The PipeCondition controller allows you to implement conditional logic in your pipeline, choosing which pipe to execute based on an evaluated expression. It supports both direct expressions and expression templates. - -#### Basic usage - -```plx -[pipe.conditional_operation] -type = "PipeCondition" -description = "A conditional pipe to decide whether..." -inputs = { input_data = "CategoryInput" } -output = "native.Text" -expression = "input_data.category" -default_outcome = "process_medium" - -[pipe.conditional_operation.outcomes] -small = "process_small" -medium = "process_medium" -large = "process_large" -``` -or -```plx -[pipe.conditional_operation] -type = "PipeCondition" -description = "A conditional pipe to decide whether..." -inputs = { input_data = "CategoryInput" } -output = "native.Text" -expression_template = "{{ input_data.category }}" # Jinja2 code -default_outcome = "process_medium" - -[pipe.conditional_operation.outcomes] -small = "process_small" -medium = "process_medium" -large = "process_large" -``` - -#### Key Parameters - -- `expression`: Direct boolean or string expression (mutually exclusive with expression_template) -- `expression_template`: Jinja2 template for more complex conditional logic (mutually exclusive with expression) -- `outcomes`: Dictionary mapping expression results to pipe codes: - 1. The key on the left (`small`, `medium`) is the result of `expression` or `expression_template` - 2. The value on the right (`process_small`, `process_medium`, etc.) is the name of the pipe to trigger -- `default_outcome`: **Required** - The pipe to execute if the expression doesn't match any key in outcomes. Use `"fail"` if you want the pipeline to fail when no match is found - -Example with fail as default: -```plx -[pipe.strict_validation] -type = "PipeCondition" -description = "Validate with strict matching" -inputs = { status = "Status" } -output = "Text" -expression = "status.value" -default_outcome = "fail" - -[pipe.strict_validation.outcomes] -approved = "process_approved" -rejected = "process_rejected" -``` - -### PipeLLM operator - -PipeLLM is used to: -1. Generate text or objects with LLMs -2. Process images with Vision LLMs - -#### Basic Usage - -Simple Text Generation: -```plx -[pipe.write_story] -type = "PipeLLM" -description = "Write a short story" -output = "Text" -prompt = """ -Write a short story about a programmer. -""" -``` - -Structured Data Extraction: -```plx -[pipe.extract_info] -type = "PipeLLM" -description = "Extract information" -inputs = { text = "Text" } -output = "PersonInfo" -prompt = """ -Extract person information from this text: -@text -""" -``` - -Supports system instructions: -```plx -[pipe.expert_analysis] -type = "PipeLLM" -description = "Expert analysis" -output = "Analysis" -system_prompt = "You are a data analysis expert" -prompt = "Analyze this data" -``` - -#### Multiple Outputs - -Generate multiple outputs (fixed number) - use bracket notation: -```plx -[pipe.generate_ideas] -type = "PipeLLM" -description = "Generate ideas" -output = "Idea[3]" # Generate exactly 3 ideas -prompt = "Generate 3 ideas" -``` - -Generate multiple outputs (variable number) - use bracket notation: -```plx -[pipe.generate_ideas] -type = "PipeLLM" -description = "Generate ideas" -output = "Idea[]" # Let the LLM decide how many to generate -prompt = "Generate ideas" -``` - -#### Vision - -Process images with VLMs (image inputs must be tagged in the prompt): -```plx -[pipe.analyze_image] -type = "PipeLLM" -description = "Analyze image" -inputs = { image = "Image" } -output = "ImageAnalysis" -prompt = """ -Describe what you see in this image: - -$image -""" -``` - -You can also reference images inline in meaningful sentences to guide the Visual LLM: -```plx -[pipe.compare_images] -type = "PipeLLM" -description = "Compare two images" -inputs = { photo = "Image", painting = "Image" } -output = "Analysis" -prompt = "Analyze the colors in $photo and the shapes in $painting." -``` - -#### Writing prompts for PipeLLM - -**Insert stuff inside a tagged block** - -If the inserted text is supposedly a long text, made of several lines or paragraphs, you want it inserted inside a block, possibly a block tagged and delimlited with proper syntax as one would do in a markdown documentation. To include stuff as a block, use the "@" prefix. - -Example template: -```plx -prompt = """ -Match the expense with its corresponding invoice: - -@expense - -@invoices -""" -``` -In the example above, the expense data and the invoices data are obviously made of several lines each, that's why it makes sense to use the "@" prefix in order to have them delimited inside a block. Note that our preprocessor will automatically include the block's title, so it doesn't need to be explicitly written in the prompt. - -DO NOT write things like "Here is the expense: @expense". -DO write simply "@expense" alone in an isolated line. - -**Insert stuff inline** - -If the inserted text is short text and it makes sense to have it inserted directly into a sentence, you want it inserted inline. To insert stuff inline, use the "$" prefix. This will insert the stuff without delimiters and the content will be rendered as plain text. - -Example template: -```plx -prompt = """ -Your goal is to summarize everything related to $topic in the provided text: - -@text - -Please provide only the summary, with no additional text or explanations. -Your summary should not be longer than 2 sentences. -""" -``` - -In the example above, $topic will be inserted inline, whereas @text will be a a delimited block. -Be sure to make the proper choice of prefix for each insertion. - -DO NOT write "$topic" alone in an isolated line. -DO write things like "Write an essay about $topic" to include text into an actual sentence. - - -### PipeExtract operator - -The PipeExtract operator is used to extract text and images from an image or a PDF - -#### Simple Text Extraction -```plx -[pipe.extract_info] -type = "PipeExtract" -description = "extract the information" -inputs = { document = "Document" } # or { image = "Image" } if it's an image. This is the only input. -output = "Page" -``` - -Using Extract Model Settings: -```plx -[pipe.extract_with_model] -type = "PipeExtract" -description = "Extract with specific model" -inputs = { document = "Document" } -output = "Page" -model = "base_extract_mistral" # Use predefined extract preset or model alias -``` - -Only one input is allowed and it must either be an `Image` or a `PDF`. The input can be named anything. - -The output concept `Page` is a native concept, with the structure `PageContent`: -It corresponds to 1 page. Therefore, the PipeExtract is outputing a `ListContent` of `Page` - -```python -class TextAndImagesContent(StuffContent): - text: TextContent | None - images: list[ImageContent] | None - -class PageContent(StructuredContent): # CONCEPT IS "Page" - text_and_images: TextAndImagesContent - page_view: ImageContent | None = None -``` -- `text_and_images` are the text, and the related images found in the input image or PDF. -- `page_view` is the screenshot of the whole pdf page/image. - -### PipeCompose operator - -The PipeCompose operator is used to compose text using Jinja2 templates. It supports various output formats including HTML, Markdown, Mermaid diagrams, and more. - -#### Basic Usage - -Simple Template Composition: -```plx -[pipe.compose_report] -type = "PipeCompose" -description = "Compose a report using template" -inputs = { data = "ReportData" } -output = "Text" -template = """ -## Report Summary - -Based on the analysis: -$data - -Generated on: {{ current_date }} -""" -``` - -Using Named Templates: -```plx -[pipe.use_template] -type = "PipeCompose" -description = "Use a predefined template" -inputs = { content = "Text" } -output = "Text" -template_name = "standard_report_template" -``` - -Using Nested Template Section (for more control): -```plx -[pipe.advanced_template] -type = "PipeCompose" -description = "Use advanced template settings" -inputs = { data = "ReportData" } -output = "Text" - -[pipe.advanced_template.template] -template = "Report: $data" -category = "html" -templating_style = { tag_style = "square_brackets", text_format = "html" } -``` - -CRM Email Template: -```plx -[pipe.compose_follow_up_email] -type = "PipeCompose" -description = "Compose a personalized follow-up email for CRM" -inputs = { customer = "Customer", deal = "Deal", sales_rep = "SalesRep" } -output = "Text" -template_category = "html" -templating_style = { tag_style = "square_brackets", text_format = "html" } -template = """ -Subject: Following up on our $deal.product_name discussion - -Hi $customer.first_name, - -I hope this email finds you well! I wanted to follow up on our conversation about $deal.product_name from $deal.last_contact_date. - -Based on our discussion, I understand that your key requirements are: $deal.customer_requirements - -I'm excited to let you know that we can definitely help you achieve your goals. Here's what I'd like to propose: - -**Next Steps:** -- Schedule a demo tailored to your specific needs -- Provide you with a customized quote based on your requirements -- Connect you with our implementation team - -Would you be available for a 30-minute call this week? I have openings on: -{% for slot in available_slots %} -- {{ slot }} -{% endfor %} - -Looking forward to moving this forward together! - -Best regards, -$sales_rep.name -$sales_rep.title -$sales_rep.phone | $sales_rep.email -""" -``` - -#### Key Parameters (Template Mode) - -- `template`: Inline template string (mutually exclusive with template_name and construct) -- `template_name`: Name of a predefined template (mutually exclusive with template) -- `template_category`: Template type ("llm_prompt", "html", "markdown", "mermaid", etc.) -- `templating_style`: Styling options for template rendering -- `extra_context`: Additional context variables for template - -For more control, you can use a nested `template` section instead of the `template` field: - -- `template.template`: The template string -- `template.category`: Template type -- `template.templating_style`: Styling options - -#### Template Variables - -Use the same variable insertion rules as PipeLLM: - -- `@variable` for block insertion (multi-line content) -- `$variable` for inline insertion (short text) - -#### Construct Mode (for StructuredContent Output) - -PipeCompose can also generate `StructuredContent` objects using the `construct` section. This mode composes field values from fixed values, variable references, templates, or nested structures. - -**When to use construct mode:** - -- You need to output a structured object (not just Text) -- You want to deterministically compose fields from existing data -- No LLM is needed - just data composition and templating - -##### Basic Construct Usage - -```plx -[concept.SalesSummary] -description = "A structured sales summary" - -[concept.SalesSummary.structure] -report_title = { type = "text", description = "Title of the report" } -customer_name = { type = "text", description = "Customer name" } -deal_value = { type = "number", description = "Deal value" } -summary_text = { type = "text", description = "Generated summary text" } - -[pipe.compose_summary] -type = "PipeCompose" -description = "Compose a sales summary from deal data" -inputs = { deal = "Deal" } -output = "SalesSummary" - -[pipe.compose_summary.construct] -report_title = "Monthly Sales Report" -customer_name = { from = "deal.customer_name" } -deal_value = { from = "deal.amount" } -summary_text = { template = "Deal worth $deal.amount with $deal.customer_name" } -``` - -##### Field Composition Methods - -There are four ways to define field values in a construct: - -**1. Fixed Value (literal)** - -Use a literal value directly: - -```plx -[pipe.compose_report.construct] -report_title = "Annual Report" -report_year = 2024 -is_draft = false -``` - -**2. Variable Reference (`from`)** - -Get a value from working memory using a dotted path: - -```plx -[pipe.compose_report.construct] -customer_name = { from = "deal.customer_name" } -total_amount = { from = "order.total" } -street_address = { from = "customer.address.street" } -``` - -**3. Template (`template`)** - -Render a Jinja2 template with variable substitution: - -```plx -[pipe.compose_report.construct] -invoice_number = { template = "INV-$order.id" } -summary = { template = "Deal worth $deal.amount with $deal.customer_name on {{ current_date }}" } -``` - -**4. Nested Construct** - -For nested structures, use a TOML subsection: - -```plx -[pipe.compose_invoice.construct] -invoice_number = { template = "INV-$order.id" } -total = { from = "order.total_amount" } - -[pipe.compose_invoice.construct.billing_address] -street = { from = "customer.address.street" } -city = { from = "customer.address.city" } -country = "France" -``` - -##### Complete Construct Example - -```plx -domain = "invoicing" - -[concept.Address] -description = "A postal address" - -[concept.Address.structure] -street = { type = "text", description = "Street address" } -city = { type = "text", description = "City name" } -country = { type = "text", description = "Country name" } - -[concept.Invoice] -description = "An invoice document" - -[concept.Invoice.structure] -invoice_number = { type = "text", description = "Invoice number" } -total = { type = "number", description = "Total amount" } - -[pipe.compose_invoice] -type = "PipeCompose" -description = "Compose an invoice from order and customer data" -inputs = { order = "Order", customer = "Customer" } -output = "Invoice" - -[pipe.compose_invoice.construct] -invoice_number = { template = "INV-$order.id" } -total = { from = "order.total_amount" } - -[pipe.compose_invoice.construct.billing_address] -street = { from = "customer.address.street" } -city = { from = "customer.address.city" } -country = "France" -``` - -##### Key Parameters (Construct Mode) - -- `construct`: Dictionary mapping field names to their composition rules -- Each field can be: - - A literal value (string, number, boolean) - - A dict with `from` key for variable reference - - A dict with `template` key for template rendering - - A nested dict for nested structures - -**Note:** You must use either `template` or `construct`, not both. They are mutually exclusive. - -### PipeImgGen operator - -The PipeImgGen operator is used to generate images using AI image generation models. - -#### Basic Usage - -Simple Image Generation: -```plx -[pipe.generate_image] -type = "PipeImgGen" -description = "Generate an image from prompt" -inputs = { prompt = "ImgGenPrompt" } -output = "Image" -``` - -Using Image Generation Settings: -```plx -[pipe.generate_photo] -type = "PipeImgGen" -description = "Generate a high-quality photo" -inputs = { prompt = "ImgGenPrompt" } -output = "Photo" -model = { model = "fast-img-gen" } -aspect_ratio = "16:9" -quality = "hd" -``` - -Multiple Image Generation: -```plx -[pipe.generate_variations] -type = "PipeImgGen" -description = "Generate multiple image variations" -inputs = { prompt = "ImgGenPrompt" } -output = "Image[3]" -seed = "auto" -``` - -Advanced Configuration: -```plx -[pipe.generate_custom] -type = "PipeImgGen" -description = "Generate image with custom settings" -inputs = { prompt = "ImgGenPrompt" } -output = "Image" -model = "img_gen_preset_name" # Use predefined preset -aspect_ratio = "1:1" -quality = "hd" -background = "transparent" -output_format = "png" -is_raw = false -safety_tolerance = 3 -``` - -#### Key Parameters - -**Image Generation Settings:** -- `model`: Model choice (preset name or inline settings with model name) -- `quality`: Image quality ("standard", "hd") - -**Output Configuration:** -- `aspect_ratio`: Image dimensions ("1:1", "16:9", "9:16", etc.) -- `output_format`: File format ("png", "jpeg", "webp") -- `background`: Background type ("default", "transparent") - -**Generation Control:** -- `seed`: Random seed (integer or "auto") -- `is_raw`: Whether to apply post-processing -- `is_moderated`: Enable content moderation -- `safety_tolerance`: Content safety level (1-6) - -#### Input Requirements - -PipeImgGen requires exactly one input that must be either: -- An `ImgGenPrompt` concept -- A concept that refines `ImgGenPrompt` - -The input can be named anything but must contain the prompt text for image generation. - -### PipeFunc operator - -The PipeFunc operator is used to run custom Python functions within a pipeline. This allows integration of classic Python scripts and custom logic. - -#### Basic Usage - -Simple Function Call: -```plx -[pipe.process_data] -type = "PipeFunc" -description = "Process data using custom function" -inputs = { input_data = "DataType" } -output = "ProcessedData" -function_name = "process_data_function" -``` - -File Processing Example: -```plx -[pipe.read_file] -type = "PipeFunc" -description = "Read file content" -inputs = { file_path = "FilePath" } -output = "FileContent" -function_name = "read_file_content" -``` - -#### Key Parameters - -- `function_name`: Name of the Python function to call (must be registered in func_registry) - -#### Function Requirements - -The Python function must: - -1. **Be registered** in the `func_registry` -2. **Accept `working_memory`** as a parameter: - ```python - async def my_function(working_memory: WorkingMemory) -> StuffContent | list[StuffContent] | str: - # Function implementation - pass - ``` - -3. **Return appropriate types**: - - `StuffContent`: Single content object - - `list[StuffContent]`: Multiple content objects (becomes ListContent) - - `str`: Simple string (becomes TextContent) - -#### Function Registration - -Functions must be registered in the function registry before use: - -```python -from pipelex.system.registries.func_registry import func_registry - -@func_registry.register("my_function_name") -async def my_custom_function(working_memory: WorkingMemory) -> StuffContent: - # Access inputs from working memory - input_data = working_memory.get_stuff("input_name") - - # Process data - result = process_logic(input_data.content) - - # Return result - return MyResultContent(data=result) -``` - -#### Working Memory Access - -Inside the function, access pipeline inputs through working memory: - -```python -async def process_function(working_memory: WorkingMemory) -> TextContent: - # Get input stuff by name - input_stuff = working_memory.get_stuff("input_name") - - # Access the content - input_content = input_stuff.content - - # Process and return - processed_text = f"Processed: {input_content.text}" - return TextContent(text=processed_text) -``` - ---- - -### Rules to choose LLM models used in PipeLLMs. - -#### LLM Configuration System - -In order to use it in a pipe, an LLM is referenced by its llm_handle (alias) and possibly by an llm_preset. -LLM configurations are managed through the new inference backend system with files located in `.pipelex/inference/`: - -- **Model Deck**: `.pipelex/inference/deck/base_deck.toml` and `.pipelex/inference/deck/overrides.toml` -- **Backends**: `.pipelex/inference/backends.toml` and `.pipelex/inference/backends/*.toml` -- **Routing**: `.pipelex/inference/routing_profiles.toml` - -#### LLM Handles - -An llm_handle can be either: -1. **A direct model name** (like "gpt-4o-mini", "claude-3-sonnet") - automatically available for all models loaded by the inference backend system -2. **An alias** - user-defined shortcuts that map to model names, defined in the `[aliases]` section: - -```toml -[aliases] -base-claude = "claude-4.5-sonnet" -base-gpt = "gpt-5" -base-gemini = "gemini-2.5-flash" -base-mistral = "mistral-medium" -``` - -The system first looks for direct model names, then checks aliases if no direct match is found. The system handles model routing through backends automatically. - -#### Using an LLM Handle in a PipeLLM - -Here is an example of using a model to specify which LLM to use in a PipeLLM: - -```plx -[pipe.hello_world] -type = "PipeLLM" -description = "Write text about Hello World." -output = "Text" -model = { model = "gpt-5", temperature = 0.9 } -prompt = """ -Write a haiku about Hello World. -""" -``` - -As you can see, to use the LLM, you must also indicate the temperature (float between 0 and 1) and max_tokens (either an int or the string "auto"). - -#### LLM Presets - -Presets are meant to record the choice of an llm with its hyper parameters (temperature and max_tokens) if it's good for a particular task. LLM Presets are skill-oriented. - -Examples: -```toml -llm_to_engineer = { model = "base-claude", temperature = 1 } -llm_to_extract_invoice = { model = "claude-3-7-sonnet", temperature = 0.1, max_tokens = "auto" } -``` - -The interest is that these presets can be used to set the LLM choice in a PipeLLM, like this: - -```plx -[pipe.extract_invoice] -type = "PipeLLM" -description = "Extract invoice information from an invoice text transcript" -inputs = { invoice_text = "InvoiceText" } -output = "Invoice" -model = "llm_to_extract_invoice" -prompt = """ -Extract invoice information from this invoice: - -The category of this invoice is: $invoice_details.category. - -@invoice_text -""" -``` - -The setting here `model = "llm_to_extract_invoice"` works because "llm_to_extract_invoice" has been declared as an llm_preset in the deck. -You must not use an LLM preset in a PipeLLM that does not exist in the deck. If needed, you can add llm presets. - -You can override the predefined llm presets by setting them in `.pipelex/inference/deck/overrides.toml`. - ---- - -ALWAYS RUN validation when you are finished writing pipelines: This checks for errors. If there are errors, iterate until it works. -- For a specific bundle/file: `pipelex validate path_to_file.plx` -- For all pipelines: `pipelex validate all` -- Remember: Ensure your Python virtual environment is activated (typically `.venv` for standard installations) before running `pipelex` commands. - -Then, create an example file to run the pipeline in the `examples` folder. -But don't write documentation unless asked explicitly to. - -## Guide to execute a pipeline and write example code - -### Example to execute a pipeline with text output - -```python -import asyncio - -from pipelex import pretty_print -from pipelex.pipelex import Pipelex -from pipelex.pipeline.execute import execute_pipeline - - -async def hello_world() -> str: - """ - This function demonstrates the use of a super simple Pipelex pipeline to generate text. - """ - # Run the pipe - pipe_output = await execute_pipeline( - pipe_code="hello_world", - ) - - return pipe_output.main_stuff_as_str - - -## start Pipelex -Pipelex.make() -## run sample using asyncio -output_text = asyncio.run(hello_world()) -pretty_print(output_text, title="Your first Pipelex output") -``` - -### Example to execute a pipeline with structured output - -```python -import asyncio - -from pipelex import pretty_print -from pipelex.pipelex import Pipelex -from pipelex.pipeline.execute import execute_pipeline -from pipelex.core.stuffs.image_content import ImageContent - -from my_project.gantt.gantt_struct import GanttChart - -SAMPLE_NAME = "extract_gantt" -IMAGE_URL = "assets/gantt/gantt_tree_house.png" - - -async def extract_gantt(image_url: str) -> GanttChart: - # Run the pipe - pipe_output = await execute_pipeline( - pipe_code="extract_gantt_by_steps", - inputs={ - "gantt_chart_image": { - "concept": "gantt.GanttImage", - "content": ImageContent(url=image_url), - } - }, - ) - # Output the result - return pipe_output.main_stuff_as(content_type=GanttChart) - - -## start Pipelex -Pipelex.make() - -## run sample using asyncio -gantt_chart = asyncio.run(extract_gantt(image_url=IMAGE_URL)) -pretty_print(gantt_chart, title="Gantt Chart") -``` - -### Setting up the input memory - -#### Explanation of input memory - -The input memory is a dictionary, where the key is the name of the input variable and the value provides details to make it a stuff object. The relevant definitions are: -```python -StuffContentOrData = dict[str, Any] | StuffContent | list[Any] | str -PipelineInputs = dict[str, StuffContentOrData] -``` -As you can seen, we made it so different ways can be used to define that stuff using structured content or data. - -#### Different ways to set up the input memory - -So here are a few concrete examples of calls to execute_pipeline with various ways to set up the input memory: - -```python -## Here we have a single input and it's a Text. -## If you assign a string, by default it will be considered as a TextContent. - pipe_output = await execute_pipeline( - pipe_code="master_advisory_orchestrator", - inputs={ - "user_input": problem_description, - }, - ) - -## Here we have a single input and it's a document. -## Because DocumentContent is a native concept, we can use it directly as a value, -## the system knows what content it corresponds to: - pipe_output = await execute_pipeline( - pipe_code="power_extractor_dpe", - inputs={ - "document": DocumentContent(url=pdf_url), - }, - ) - -## Here we have a single input and it's an Image. -## Because ImageContent is a native concept, we can use it directly as a value: - pipe_output = await execute_pipeline( - pipe_code="fashion_variation_pipeline", - inputs={ - "fashion_photo": ImageContent(url=image_url), - }, - ) - -## Here we have a single input, it's an image but -## its actually a more specific concept gantt.GanttImage which refines Image, -## so we must provide it using a dict with the concept and the content: - pipe_output = await execute_pipeline( - pipe_code="extract_gantt_by_steps", - inputs={ - "gantt_chart_image": { - "concept": "gantt.GanttImage", - "content": ImageContent(url=image_url), - } - }, - ) - -## Here is a more complex example with multiple inputs assigned using different ways: - pipe_output = await execute_pipeline( - pipe_code="retrieve_then_answer", - dynamic_output_concept_code="contracts.Fees", - inputs={ - "text": load_text_from_path(path=text_path), - "question": { - "concept": "answer.Question", - "content": question, - }, - "client_instructions": client_instructions, - }, - ) -``` - -### Using the outputs of a pipeline - -All pipe executions return a `PipeOutput` object. -It's a BaseModel which contains the resulting working memory at the end of the execution and the pipeline run id. -It also provides a bunch of accessor functions and properties to unwrap the main stuff, which is the last stuff added to the working memory: - -```python - -class PipeOutput(BaseModel): - working_memory: WorkingMemory = Field(default_factory=WorkingMemory) - pipeline_run_id: str = Field(default=SpecialPipelineId.UNTITLED) - - @property - def main_stuff(self) -> Stuff: - ... - - def main_stuff_as_list(self, item_type: type[StuffContentType]) -> ListContent[StuffContentType]: - ... - - def main_stuff_as_items(self, item_type: type[StuffContentType]) -> list[StuffContentType]: - ... - - def main_stuff_as(self, content_type: type[StuffContentType]) -> StuffContentType: - ... - - @property - def main_stuff_as_text(self) -> TextContent: - ... - - @property - def main_stuff_as_str(self) -> str: - ... - - @property - def main_stuff_as_image(self) -> ImageContent: - ... - - @property - def main_stuff_as_text_and_image(self) -> TextAndImagesContent: - ... - - @property - def main_stuff_as_number(self) -> NumberContent: - ... - - @property - def main_stuff_as_html(self) -> HtmlContent: - ... - - @property - def main_stuff_as_mermaid(self) -> MermaidContent: - ... -``` - -As you can see, you can extract any variable from the output working memory. - -#### Getting the main stuff as a specific type - -Simple text as a string: - -```python -result = pipe_output.main_stuff_as_str -``` -Structured object (BaseModel): - -```python -result = pipe_output.main_stuff_as(content_type=GanttChart) -``` - -If it's a list, you can get a `ListContent` of the specific type. - -```python -result_list_content = pipe_output.main_stuff_as_list(item_type=GanttChart) -``` - -or if you want, you can get the actual items as a regular python list: - -```python -result_list = pipe_output.main_stuff_as_items(item_type=GanttChart) -``` - ---- - diff --git a/CLAUDE.md b/CLAUDE.md index af4572e..ed51c46 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -1,17 +1,17 @@ # Pipelex Coding Rules -## Guide to write or edit pipelines using the Pipelex language in .plx files +## Guide to write or edit pipelines using the Pipelex language in .mthds files - Always first write your "plan" in natural language, then transcribe it in pipelex. -- You should ALWAYS RUN validation when you are writing or editing a `.plx` file. It will ensure the pipe is runnable. If not, iterate. - - For a specific file: `pipelex validate path_to_file.plx` +- You should ALWAYS RUN validation when you are writing or editing a `.mthds` file. It will ensure the pipe is runnable. If not, iterate. + - For a specific file: `pipelex validate path_to_file.mthds` - For all pipelines: `pipelex validate all` - **IMPORTANT**: Ensure the Python virtual environment is activated before running `pipelex` commands. For standard installations, the venv is named `.venv` - always check that first. The commands will not work without proper venv activation. - Please use POSIX standard for files. (empty lines, no trailing whitespaces, etc.) ### Pipeline File Naming -- Files must be `.plx` for pipelines (Always add an empty line at the end of the file, and do not add trailing whitespaces to PLX files at all) +- Files must be `.mthds` for pipelines (Always add an empty line at the end of the file, and do not add trailing whitespaces to MTHDS files at all) - Files must be `.py` for code defining the data structures - Use descriptive names in `snake_case` @@ -26,7 +26,7 @@ A pipeline file has three main sections: domain = "domain_code" description = "Description of the domain" # Optional ``` -Note: The domain code usually matches the plx filename for single-file domains. For multi-file domains, use the subdirectory name. +Note: The domain code usually matches the mthds filename for single-file domains. For multi-file domains, use the subdirectory name. #### Concept Definitions @@ -985,7 +985,7 @@ You can override the predefined llm presets by setting them in `.pipelex/inferen --- ALWAYS RUN validation when you are finished writing pipelines: This checks for errors. If there are errors, iterate until it works. -- For a specific bundle/file: `pipelex validate path_to_file.plx` +- For a specific bundle/file: `pipelex validate path_to_file.mthds` - For all pipelines: `pipelex validate all` - Remember: Ensure your Python virtual environment is activated (typically `.venv` for standard installations) before running `pipelex` commands. @@ -1001,7 +1001,7 @@ import asyncio from pipelex import pretty_print from pipelex.pipelex import Pipelex -from pipelex.pipeline.execute import execute_pipeline +from pipelex.pipeline.runner import PipelexRunner async def hello_world() -> str: @@ -1009,9 +1009,11 @@ async def hello_world() -> str: This function demonstrates the use of a super simple Pipelex pipeline to generate text. """ # Run the pipe - pipe_output = await execute_pipeline( + runner = PipelexRunner() + response = await runner.execute_pipeline( pipe_code="hello_world", ) + pipe_output = response.pipe_output return pipe_output.main_stuff_as_str @@ -1030,7 +1032,7 @@ import asyncio from pipelex import pretty_print from pipelex.pipelex import Pipelex -from pipelex.pipeline.execute import execute_pipeline +from pipelex.pipeline.runner import PipelexRunner from pipelex.core.stuffs.image_content import ImageContent from my_project.gantt.gantt_struct import GanttChart @@ -1041,7 +1043,8 @@ IMAGE_URL = "assets/gantt/gantt_tree_house.png" async def extract_gantt(image_url: str) -> GanttChart: # Run the pipe - pipe_output = await execute_pipeline( + runner = PipelexRunner() + response = await runner.execute_pipeline( pipe_code="extract_gantt_by_steps", inputs={ "gantt_chart_image": { @@ -1050,6 +1053,7 @@ async def extract_gantt(image_url: str) -> GanttChart: } }, ) + pipe_output = response.pipe_output # Output the result return pipe_output.main_stuff_as(content_type=GanttChart) @@ -1075,41 +1079,46 @@ As you can seen, we made it so different ways can be used to define that stuff u #### Different ways to set up the input memory -So here are a few concrete examples of calls to execute_pipeline with various ways to set up the input memory: +So here are a few concrete examples of calls to `PipelexRunner.execute_pipeline` with various ways to set up the input memory: ```python + runner = PipelexRunner() + ## Here we have a single input and it's a Text. ## If you assign a string, by default it will be considered as a TextContent. - pipe_output = await execute_pipeline( + response = await runner.execute_pipeline( pipe_code="master_advisory_orchestrator", inputs={ "user_input": problem_description, }, ) + pipe_output = response.pipe_output ## Here we have a single input and it's a document. ## Because DocumentContent is a native concept, we can use it directly as a value, ## the system knows what content it corresponds to: - pipe_output = await execute_pipeline( + response = await runner.execute_pipeline( pipe_code="power_extractor_dpe", inputs={ "document": DocumentContent(url=pdf_url), }, ) + pipe_output = response.pipe_output ## Here we have a single input and it's an Image. ## Because ImageContent is a native concept, we can use it directly as a value: - pipe_output = await execute_pipeline( + response = await runner.execute_pipeline( pipe_code="fashion_variation_pipeline", inputs={ "fashion_photo": ImageContent(url=image_url), }, ) + pipe_output = response.pipe_output ## Here we have a single input, it's an image but ## its actually a more specific concept gantt.GanttImage which refines Image, ## so we must provide it using a dict with the concept and the content: - pipe_output = await execute_pipeline( + response = await runner.execute_pipeline( pipe_code="extract_gantt_by_steps", inputs={ "gantt_chart_image": { @@ -1118,9 +1127,10 @@ So here are a few concrete examples of calls to execute_pipeline with various wa } }, ) + pipe_output = response.pipe_output ## Here is a more complex example with multiple inputs assigned using different ways: - pipe_output = await execute_pipeline( + response = await runner.execute_pipeline( pipe_code="retrieve_then_answer", dynamic_output_concept_code="contracts.Fees", inputs={ @@ -1132,11 +1142,12 @@ So here are a few concrete examples of calls to execute_pipeline with various wa "client_instructions": client_instructions, }, ) + pipe_output = response.pipe_output ``` ### Using the outputs of a pipeline -All pipe executions return a `PipeOutput` object. +All pipe executions return a `PipelexPipelineExecuteResponse` object, from which you access the `PipeOutput` via `response.pipe_output`. It's a BaseModel which contains the resulting working memory at the end of the execution and the pipeline run id. It also provides a bunch of accessor functions and properties to unwrap the main stuff, which is the last stuff added to the working memory: diff --git a/cocode/pipelines/doc_proofread/doc_proofread.plx b/cocode/pipelines/doc_proofread/doc_proofread.mthds similarity index 75% rename from cocode/pipelines/doc_proofread/doc_proofread.plx rename to cocode/pipelines/doc_proofread/doc_proofread.mthds index 2ec9504..ff32deb 100644 --- a/cocode/pipelines/doc_proofread/doc_proofread.plx +++ b/cocode/pipelines/doc_proofread/doc_proofread.mthds @@ -1,13 +1,13 @@ -domain = "doc_proofread" +domain = "doc_proofread" description = "Systematically proofread documentation against actual codebase to find inconsistencies using chunking" [concept] -DocumentationFile = "A documentation file that needs to be proofread against the codebase" -FilePath = "A path to a file in the codebase" -RepositoryMap = "A repository map containing the codebase structure and file contents" -CodebaseFileContent = "Content of a codebase file" +DocumentationFile = "A documentation file that needs to be proofread against the codebase" +FilePath = "A path to a file in the codebase" +RepositoryMap = "A repository map containing the codebase structure and file contents" +CodebaseFileContent = "Content of a codebase file" DocumentationInconsistency = "An inconsistency found between documentation and actual code" -MarkdownReport = "A markdown report containing documentation inconsistencies formatted as a Cursor prompt" +MarkdownReport = "A markdown report containing documentation inconsistencies formatted as a Cursor prompt" [pipe] @@ -17,9 +17,9 @@ description = "Process a single documentation file to find inconsistencies" inputs = { doc_file = "DocumentationFile", repo_map = "RepositoryMap" } output = "DocumentationInconsistency[]" steps = [ - { pipe = "find_related_code_files", result = "related_file_paths" }, - { pipe = "read_doc_file", result = "related_files" }, - { pipe = "proofread_single_doc", result = "inconsistencies" } + { pipe = "find_related_code_files", result = "related_file_paths" }, + { pipe = "read_doc_file", result = "related_files" }, + { pipe = "proofread_single_doc", result = "inconsistencies" }, ] [pipe.find_related_code_files] @@ -28,7 +28,7 @@ description = "Find code files that implement or use elements mentioned in docs" inputs = { doc_file = "DocumentationFile", repo_map = "RepositoryMap" } output = "FilePath[]" model = { model = "llm_for_large_codebase", temperature = 0.1 } -model_to_structure = "cheap_llm_for_structured" +model_to_structure = "@default-small-structured" structuring_method = "preliminary_text" system_prompt = """ Extract code elements mentioned in docs (classes, functions, commands) and find their actual implementations or usages in the codebase. @@ -49,7 +49,7 @@ type = "PipeLLM" description = "Find major inconsistencies between docs and code" inputs = { doc_file = "DocumentationFile", related_files = "CodebaseFileContent" } output = "DocumentationInconsistency[]" -model = "llm_for_swe" +model = "$engineering-structured" system_prompt = """ Find MAJOR inconsistencies between documentation and code that would cause user code to fail. Only report issues that would completely break functionality or lead users down the wrong path. @@ -77,7 +77,7 @@ type = "PipeLLM" description = "Create a markdown report with inconsistencies formatted as a Cursor prompt" inputs = { all_inconsistencies = "DocumentationInconsistency" } output = "MarkdownReport" -model = "llm_for_swe" +model = "$engineering-structured" system_prompt = """ Create a concise markdown report for Cursor AI with specific, actionable fixes for documentation inconsistencies. Focus only on critical issues that would break user code or cause major confusion. @@ -110,14 +110,13 @@ description = "Complete documentation proofreading pipeline for CLI usage" inputs = { repo_map = "RepositoryMap", doc_files = "DocumentationFile[]" } output = "MarkdownReport" steps = [ - { pipe = "proofread_doc_sequence", batch_over = "doc_files", batch_as = "doc_file", result = "all_inconsistencies" }, - { pipe = "create_cursor_report", result = "cursor_report" } + { pipe = "proofread_doc_sequence", batch_over = "doc_files", batch_as = "doc_file", result = "all_inconsistencies" }, + { pipe = "create_cursor_report", result = "cursor_report" }, ] [pipe.read_doc_file] -type = "PipeFunc" -description = "Read the content of related codebase files" -inputs = { related_file_paths = "FilePath" } -output = "CodebaseFileContent[]" +type = "PipeFunc" +description = "Read the content of related codebase files" +inputs = { related_file_paths = "FilePath" } +output = "CodebaseFileContent[]" function_name = "read_file_content" - diff --git a/cocode/pipelines/doc_update/ai_instruction_update.plx b/cocode/pipelines/doc_update/ai_instruction_update.mthds similarity index 92% rename from cocode/pipelines/doc_update/ai_instruction_update.plx rename to cocode/pipelines/doc_update/ai_instruction_update.mthds index 4d7b72a..bfdf221 100644 --- a/cocode/pipelines/doc_update/ai_instruction_update.plx +++ b/cocode/pipelines/doc_update/ai_instruction_update.mthds @@ -1,12 +1,12 @@ -domain = "ai_instruction_update" +domain = "ai_instruction_update" description = "Pipeline for updating AI instruction files (AGENTS.md, CLAUDE.md, cursor rules) based on git diff" [concept] -AgentsContent = "Content of the AGENTS.md file" -ClaudeContent = "Content of the CLAUDE.md file" -CursorRulesContent = "Content of cursor rules files" -AIInstructionFileAnalysis = "Analysis of changes needed for a specific AI instruction file" -AIInstructionParallelResults = "Results from parallel analysis of AI instruction files" +AgentsContent = "Content of the AGENTS.md file" +ClaudeContent = "Content of the CLAUDE.md file" +CursorRulesContent = "Content of cursor rules files" +AIInstructionFileAnalysis = "Analysis of changes needed for a specific AI instruction file" +AIInstructionParallelResults = "Results from parallel analysis of AI instruction files" AIInstructionUpdateSuggestions = "Comprehensive suggestions for updating all AI instruction files" [pipe] @@ -277,10 +277,10 @@ type = "PipeParallel" description = "Analyze changes for all AI instruction files in parallel" inputs = { git_diff = "git.GitDiff", agents_content = "AgentsContent", claude_content = "ClaudeContent", cursor_rules_content = "CursorRulesContent" } output = "AIInstructionParallelResults" -parallels = [ - { pipe = "analyze_agents_md_changes", result = "agents_analysis" }, - { pipe = "analyze_claude_md_changes", result = "claude_analysis" }, - { pipe = "analyze_cursor_rules_changes", result = "cursor_analysis" }, +branches = [ + { pipe = "analyze_agents_md_changes", result = "agents_analysis" }, + { pipe = "analyze_claude_md_changes", result = "claude_analysis" }, + { pipe = "analyze_cursor_rules_changes", result = "cursor_analysis" }, ] combined_output = "ai_instruction_update.AIInstructionParallelResults" @@ -290,8 +290,7 @@ description = "AI instruction update analysis with parallel file processing and inputs = { git_diff = "git.GitDiff", agents_content = "AgentsContent", claude_content = "ClaudeContent", cursor_rules_content = "CursorRulesContent" } output = "Text" steps = [ - { pipe = "ai_instruction_update_parallel", result = "parallel_analyses" }, - { pipe = "combine_ai_instruction_analyses", result = "combined_suggestions" }, - { pipe = "format_ai_instruction_output", result = "ai_instruction_output" }, + { pipe = "ai_instruction_update_parallel", result = "parallel_analyses" }, + { pipe = "combine_ai_instruction_analyses", result = "combined_suggestions" }, + { pipe = "format_ai_instruction_output", result = "ai_instruction_output" }, ] - diff --git a/cocode/pipelines/doc_update/doc_update.plx b/cocode/pipelines/doc_update/doc_update.mthds similarity index 100% rename from cocode/pipelines/doc_update/doc_update.plx rename to cocode/pipelines/doc_update/doc_update.mthds diff --git a/cocode/pipelines/swe_diff/changelog.plx b/cocode/pipelines/swe_diff/changelog.mthds similarity index 81% rename from cocode/pipelines/swe_diff/changelog.plx rename to cocode/pipelines/swe_diff/changelog.mthds index 559a24b..c226873 100644 --- a/cocode/pipelines/swe_diff/changelog.plx +++ b/cocode/pipelines/swe_diff/changelog.mthds @@ -1,9 +1,9 @@ -domain = "changelog" +domain = "changelog" description = "Pipelines for analyzing differences between two versions of a codebase." [concept] StructuredChangelog = "A structured changelog with sections for each type of change." -MarkdownChangelog = "A text report in markdown format that summarizes the changes made to the codebase between two versions." +MarkdownChangelog = "A text report in markdown format that summarizes the changes made to the codebase between two versions." [pipe] [pipe.write_changelog] @@ -12,8 +12,8 @@ description = "Write a comprehensive changelog for a software project" inputs = { git_diff = "git.GitDiff" } output = "MarkdownChangelog" steps = [ - { pipe = "structure_changelog_from_git_diff", result = "structured_changelog" }, - { pipe = "format_changelog_as_markdown", result = "markdown_changelog" }, + { pipe = "structure_changelog_from_git_diff", result = "structured_changelog" }, + { pipe = "format_changelog_as_markdown", result = "markdown_changelog" }, ] [pipe.structure_changelog_from_git_diff] @@ -21,7 +21,7 @@ type = "PipeLLM" description = "Write a changelog for a software project." inputs = { git_diff = "git.GitDiff" } output = "StructuredChangelog" -model = "llm_for_git_diff" +model = "$engineering-codebase-analysis" system_prompt = """ You are an expert technical writer and software architect. Your task is to carefully review the code diff and write a structured changelog. """ @@ -33,10 +33,10 @@ Be sure to include changes to code but also complementary pipelines, scripts, do """ [pipe.format_changelog_as_markdown] -type = "PipeCompose" +type = "PipeCompose" description = "Format the final changelog in markdown with proper structure" -inputs = { structured_changelog = "StructuredChangelog" } -output = "MarkdownChangelog" +inputs = { structured_changelog = "StructuredChangelog" } +output = "MarkdownChangelog" [pipe.format_changelog_as_markdown.template] category = "markdown" diff --git a/cocode/pipelines/swe_diff/changelog_enhanced.plx b/cocode/pipelines/swe_diff/changelog_enhanced.mthds similarity index 87% rename from cocode/pipelines/swe_diff/changelog_enhanced.plx rename to cocode/pipelines/swe_diff/changelog_enhanced.mthds index c6f46e9..75fa7c8 100644 --- a/cocode/pipelines/swe_diff/changelog_enhanced.plx +++ b/cocode/pipelines/swe_diff/changelog_enhanced.mthds @@ -1,4 +1,4 @@ -domain = "changelog_enhanced" +domain = "changelog_enhanced" description = "Pipelines for analyzing differences between two versions of a codebase — enhanced version." [concept] @@ -12,9 +12,9 @@ description = "Write a comprehensive changelog for a software project" inputs = { git_diff = "git.GitDiff" } output = "changelog.MarkdownChangelog" steps = [ - { pipe = "draft_changelog_from_git_diff", result = "draft_changelog" }, - { pipe = "polish_changelog", result = "structured_changelog" }, - { pipe = "format_changelog_as_markdown", result = "markdown_changelog" }, + { pipe = "draft_changelog_from_git_diff", result = "draft_changelog" }, + { pipe = "polish_changelog", result = "structured_changelog" }, + { pipe = "format_changelog_as_markdown", result = "markdown_changelog" }, ] [pipe.draft_changelog_from_git_diff] @@ -22,7 +22,7 @@ type = "PipeLLM" description = "Write a changelog for a software project." inputs = { git_diff = "git.GitDiff" } output = "DraftChangelog" -model = "llm_for_git_diff" +model = "$engineering-codebase-analysis" system_prompt = """ You are an expert technical writer and software architect. """ @@ -50,7 +50,7 @@ type = "PipeLLM" description = "Polish and improve the draft changelog" inputs = { draft_changelog = "DraftChangelog" } output = "changelog.StructuredChangelog" -model = "llm_for_swe" +model = "$engineering-structured" structuring_method = "preliminary_text" system_prompt = """ You are an expert technical writer. Your task is to polish and improve a draft changelog to make it more clear, concise, and well-structured. @@ -65,4 +65,3 @@ And when you see several changes that were made for the same purpose, groupd the Don't add fluff, stay sharp and to the point. Use nice readable markdown formatting. """ - diff --git a/cocode/pipelines/swe_diff/changelog_struct.py b/cocode/pipelines/swe_diff/changelog_struct.py index 4d99c90..62611d0 100644 --- a/cocode/pipelines/swe_diff/changelog_struct.py +++ b/cocode/pipelines/swe_diff/changelog_struct.py @@ -1,7 +1,3 @@ -from __future__ import annotations - -from typing import List - from pipelex import log from pipelex.core.stuffs.structured_content import StructuredContent from pydantic import Field, model_validator @@ -9,12 +5,12 @@ class StructuredChangelog(StructuredContent): - added: List[str] = Field(default_factory=list, description="New features.") - changed: List[str] = Field(default_factory=list, description="Updates to existing behavior.") - fixed: List[str] = Field(default_factory=list, description="Bug fixes.") - removed: List[str] = Field(default_factory=list, description="Features removed.") - deprecated: List[str] = Field(default_factory=list, description="Soon-to-be removed features.") - security: List[str] = Field(default_factory=list, description="Security-related changes.") + added: list[str] = Field(default_factory=list, description="New features.") + changed: list[str] = Field(default_factory=list, description="Updates to existing behavior.") + fixed: list[str] = Field(default_factory=list, description="Bug fixes.") + removed: list[str] = Field(default_factory=list, description="Features removed.") + deprecated: list[str] = Field(default_factory=list, description="Soon-to-be removed features.") + security: list[str] = Field(default_factory=list, description="Security-related changes.") # --- validation --------------------------------------------------------- @model_validator(mode="after") diff --git a/cocode/pipelines/swe_diff/git.plx b/cocode/pipelines/swe_diff/git.mthds similarity index 90% rename from cocode/pipelines/swe_diff/git.plx rename to cocode/pipelines/swe_diff/git.mthds index 9e2b10d..370d480 100644 --- a/cocode/pipelines/swe_diff/git.plx +++ b/cocode/pipelines/swe_diff/git.mthds @@ -1,4 +1,4 @@ -domain = "git" +domain = "git" description = "Pipelines for analyzing git diffs." [concept] @@ -9,7 +9,7 @@ type = "PipeLLM" description = "Analyze the git diff based on a prompt." inputs = { git_diff = "GitDiff", prompt = "Text" } output = "Text" -model = "llm_for_git_diff" +model = "$engineering-codebase-analysis" system_prompt = """ You are an expert technical writer and software architect. Your task is to carefully review and analyze the code diff. """ diff --git a/cocode/pipelines/swe_docs/swe_docs.plx b/cocode/pipelines/swe_docs/swe_docs.mthds similarity index 84% rename from cocode/pipelines/swe_docs/swe_docs.plx rename to cocode/pipelines/swe_docs/swe_docs.mthds index 41cf437..b8f2e11 100644 --- a/cocode/pipelines/swe_docs/swe_docs.plx +++ b/cocode/pipelines/swe_docs/swe_docs.mthds @@ -1,16 +1,16 @@ -domain = "swe" +domain = "swe" description = "Pipelines for software engineering tasks." [concept] -SoftwareDoc = "Documentation related to software engineering projects or codebases." -InconsistencyReport = "A text report enumerating any inconsistencies detected within the provided documentation." -SoftwareFeaturesRecap = "A comprehensive overview of software features highlighting key capabilities, strengths, and limitations without technical implementation details." -FundamentalsDoc = "A comprehensive overview of the fundamental concepts and principles of software engineering." -EnvironmentBuildDoc = "A comprehensive overview of the environment and build setup for a software project." -CodingStandardsDoc = "A comprehensive overview of the coding standards and best practices for a software project." -TestStrategyDoc = "A comprehensive overview of the testing strategy and procedures for a software project." +SoftwareDoc = "Documentation related to software engineering projects or codebases." +InconsistencyReport = "A text report enumerating any inconsistencies detected within the provided documentation." +SoftwareFeaturesRecap = "A comprehensive overview of software features highlighting key capabilities, strengths, and limitations without technical implementation details." +FundamentalsDoc = "A comprehensive overview of the fundamental concepts and principles of software engineering." +EnvironmentBuildDoc = "A comprehensive overview of the environment and build setup for a software project." +CodingStandardsDoc = "A comprehensive overview of the coding standards and best practices for a software project." +TestStrategyDoc = "A comprehensive overview of the testing strategy and procedures for a software project." ContextualGuidelinesDoc = "A comprehensive overview of the contextual development guidelines and conventions for a software project." -CollaborationDoc = "A comprehensive overview of the collaboration and workflow information for a software project." +CollaborationDoc = "A comprehensive overview of the collaboration and workflow information for a software project." OnboardingDocumentation = "Complete set of documentation needed for onboarding new developers to a project." [pipe] @@ -19,7 +19,7 @@ type = "PipeLLM" description = "Identify inconsistencies in a set of software engineering documents." inputs = { repo_text = "SoftwareDoc" } output = "InconsistencyReport" -model = "llm_for_swe" +model = "$engineering-structured" system_prompt = """ You are an expert technical writer and software architect. Your task is to carefully review software documentation and point out any inconsistencies or contradictions. """ @@ -40,12 +40,12 @@ type = "PipeParallel" description = "Extract comprehensive onboarding documentation from software project docs" inputs = { repo_text = "SoftwareDoc" } output = "OnboardingDocumentation" -parallels = [ - { pipe = "extract_fundamentals", result = "fundamentals" }, - { pipe = "extract_environment_build", result = "environment_build" }, - { pipe = "extract_coding_standards", result = "coding_standards" }, - { pipe = "extract_test_strategy", result = "test_strategy" }, - { pipe = "extract_collaboration", result = "collaboration" }, +branches = [ + { pipe = "extract_fundamentals", result = "fundamentals" }, + { pipe = "extract_environment_build", result = "environment_build" }, + { pipe = "extract_coding_standards", result = "coding_standards" }, + { pipe = "extract_test_strategy", result = "test_strategy" }, + { pipe = "extract_collaboration", result = "collaboration" }, ] combined_output = "swe.OnboardingDocumentation" @@ -54,7 +54,7 @@ type = "PipeLLM" description = "Extract fundamental project information from documentation" inputs = { repo_text = "SoftwareDoc" } output = "FundamentalsDoc" -model = "llm_for_swe" +model = "$engineering-structured" system_prompt = """ You are an expert at extracting structured project information from software documentation. Focus on identifying core project context and foundational information. """ @@ -77,7 +77,7 @@ type = "PipeLLM" description = "Extract environment setup and build information from documentation" inputs = { repo_text = "SoftwareDoc" } output = "EnvironmentBuildDoc" -model = "llm_for_swe" +model = "$engineering-structured" system_prompt = """ You are an expert at extracting development environment setup information from software documentation. """ @@ -102,7 +102,7 @@ type = "PipeLLM" description = "Extract code quality and style information from documentation" inputs = { repo_text = "SoftwareDoc" } output = "CodingStandardsDoc" -model = "llm_for_swe" +model = "$engineering-structured" system_prompt = """ You are an expert at extracting code quality standards and tooling information from software documentation. """ @@ -128,7 +128,7 @@ type = "PipeLLM" description = "Extract testing strategy and procedures from documentation" inputs = { repo_text = "SoftwareDoc" } output = "TestStrategyDoc" -model = "llm_for_swe" +model = "$engineering-structured" system_prompt = """ You are an expert at extracting testing strategies and procedures from software documentation. """ @@ -153,7 +153,7 @@ type = "PipeLLM" description = "Extract contextual development guidelines from documentation" inputs = { repo_text = "SoftwareDoc" } output = "ContextualGuidelinesDoc" -model = "llm_for_swe" +model = "$engineering-structured" system_prompt = """ You are an expert at extracting contextual development guidelines and conventions from software documentation. """ @@ -176,7 +176,7 @@ type = "PipeLLM" description = "Extract collaboration and workflow information from documentation" inputs = { repo_text = "SoftwareDoc" } output = "CollaborationDoc" -model = "llm_for_swe" +model = "$engineering-structured" system_prompt = """ You are an expert at extracting collaboration processes and workflow information from software documentation. """ @@ -201,7 +201,7 @@ type = "PipeLLM" description = "Extract and analyze software features from documentation to create a comprehensive feature overview" inputs = { repo_text = "SoftwareDoc" } output = "SoftwareFeaturesRecap" -model = "llm_for_swe" +model = "$engineering-structured" system_prompt = """ You are a product analyst and technical writer specializing in software feature analysis. Your task is to analyze software documentation and create compelling feature presentations that highlight capabilities, strengths, and potential limitations. """ @@ -228,4 +228,3 @@ Guidelines: If insufficient information is available in the documentation to assess features comprehensively, indicate which aspects need additional information. """ - diff --git a/cocode/pipelines/text_utils.plx b/cocode/pipelines/text_utils.mthds similarity index 100% rename from cocode/pipelines/text_utils.plx rename to cocode/pipelines/text_utils.mthds diff --git a/cocode/swe/swe_cmd.py b/cocode/swe/swe_cmd.py index 71d60c0..e929d20 100644 --- a/cocode/swe/swe_cmd.py +++ b/cocode/swe/swe_cmd.py @@ -9,7 +9,7 @@ from pipelex.core.stuffs.stuff_factory import StuffFactory from pipelex.hub import get_report_delegate, get_required_concept from pipelex.pipe_run.pipe_run_mode import PipeRunMode -from pipelex.pipeline.execute import execute_pipeline +from pipelex.pipeline.runner import PipelexRunner from pipelex.tools.misc.file_utils import ensure_path, failable_load_text_from_path, load_text_from_path, save_text_to_path from cocode.pipelines.doc_proofread.doc_proofread_models import DocumentationFile, DocumentationInconsistency, RepositoryMap @@ -25,6 +25,16 @@ class SweFromRepoDiffWithPromptError(Exception): pass +async def _execute_pipeline( + pipe_code: str, + inputs: Any = None, + pipe_run_mode: PipeRunMode | None = None, +) -> PipeOutput: + runner = PipelexRunner(pipe_run_mode=pipe_run_mode) + response = await runner.execute_pipeline(pipe_code=pipe_code, inputs=inputs) + return response.pipe_output + + async def swe_from_repo( pipe_code: str, repo_path: str, @@ -59,7 +69,7 @@ async def swe_from_repo( repo_text = get_repo_text_for_swe(repox_processor=processor) # Run the pipe - pipe_output = await execute_pipeline( + pipe_output = await _execute_pipeline( pipe_code=pipe_code, pipe_run_mode=pipe_run_mode, inputs={"repo_text": repo_text}, @@ -90,7 +100,7 @@ async def swe_from_file( text = load_text_from_path(input_file_path) # Run the pipe - pipe_output = await execute_pipeline( + pipe_output = await _execute_pipeline( pipe_code=pipe_code, pipe_run_mode=pipe_run_mode, inputs={"text": text}, @@ -129,7 +139,7 @@ async def swe_from_repo_diff( return # Run the pipe - pipe_output = await execute_pipeline( + pipe_output = await _execute_pipeline( pipe_code=pipe_code, pipe_run_mode=pipe_run_mode, inputs={ @@ -177,7 +187,7 @@ async def swe_from_repo_diff_with_prompt( return # Run the pipe - pipe_output = await execute_pipeline( + pipe_output = await _execute_pipeline( pipe_code=pipe_code, pipe_run_mode=pipe_run_mode, inputs={ @@ -214,7 +224,7 @@ async def swe_doc_update_from_diff( # Generate git diff git_diff = run_git_diff_command(repo_path=repo_path, version=version, include_patterns=include_patterns, exclude_patterns=exclude_patterns) - pipe_output = await execute_pipeline( + pipe_output = await _execute_pipeline( pipe_code="doc_update", inputs={ "git_diff": { @@ -293,7 +303,7 @@ async def swe_ai_instruction_update_from_diff( stuff_list=[git_diff_stuff, agents_content_stuff, claude_content_stuff, cursor_rules_content_stuff] ) - pipe_output = await execute_pipeline( + pipe_output = await _execute_pipeline( pipe_code="ai_instruction_update", inputs=working_memory, ) @@ -367,7 +377,7 @@ async def swe_doc_proofread( working_memory = WorkingMemoryFactory.make_from_multiple_stuffs(stuff_list=[repo_map_stuff, doc_files_stuff]) - pipe_output = await execute_pipeline( + pipe_output = await _execute_pipeline( pipe_code="doc_proofread", inputs=working_memory, ) diff --git a/pyproject.toml b/pyproject.toml index c5040bd..4310ada 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -8,46 +8,47 @@ license = "MIT" readme = "README.md" requires-python = ">=3.10" classifiers = [ - "Programming Language :: Python :: 3", - "Programming Language :: Python :: 3.10", - "Programming Language :: Python :: 3.11", - "Programming Language :: Python :: 3.12", - "Programming Language :: Python :: 3.13", - "Programming Language :: Python :: 3.14", - "Operating System :: OS Independent", + "Programming Language :: Python :: 3", + "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: 3.11", + "Programming Language :: Python :: 3.12", + "Programming Language :: Python :: 3.13", + "Programming Language :: Python :: 3.14", + "Operating System :: OS Independent", ] dependencies = [ - "pipelex[anthropic,google,google-genai,bedrock]", - "PyGithub==2.4.0", + "pipelex[anthropic,google,google-genai,bedrock]", + "PyGithub==2.4.0", ] [tool.uv.sources] -pipelex = { git = "https://github.com/Pipelex/pipelex.git", branch = "pre-release/v0.18.0b2" } +# pipelex = { git = "https://github.com/Pipelex/pipelex.git", branch = "pre-release/v0.18.0b2" } +pipelex = { git = "https://github.com/Pipelex/pipelex.git", branch = "feature/Chicago" } [project.optional-dependencies] docs = [ - "mkdocs==1.6.1", - "mkdocs-glightbox==0.4.0", - "mkdocs-material==9.6.14", - "mkdocs-meta-manager==1.1.0", + "mkdocs==1.6.1", + "mkdocs-glightbox==0.4.0", + "mkdocs-material==9.6.14", + "mkdocs-meta-manager==1.1.0", ] dev = [ - "boto3-stubs>=1.35.24", - "mypy>=1.11.2", - "pyright>=1.1.405", - "pytest>=9.0.1", - "pytest_asyncio>=0.24.0", - "pytest-cov>=6.1.1", - "pytest-mock>=3.14.0", - "pytest-sugar>=1.0.0", - "ruff>=0.6.8", - "types-aioboto3[bedrock,bedrock-runtime]>=13.4.0", - "types-aiofiles>=24.1.0.20240626", - "types-markdown>=3.6.0.20240316", - "types-networkx>=3.3.0.20241020", - "types-openpyxl>=3.1.5.20250306", - "types-PyYAML>=6.0.12.20250326", + "boto3-stubs>=1.35.24", + "mypy>=1.11.2", + "pyright>=1.1.405", + "pytest>=9.0.1", + "pytest_asyncio>=0.24.0", + "pytest-cov>=6.1.1", + "pytest-mock>=3.14.0", + "pytest-sugar>=1.0.0", + "ruff>=0.6.8", + "types-aioboto3[bedrock,bedrock-runtime]>=13.4.0", + "types-aiofiles>=24.1.0.20240626", + "types-markdown>=3.6.0.20240316", + "types-networkx>=3.3.0.20241020", + "types-openpyxl>=3.1.5.20250306", + "types-PyYAML>=6.0.12.20250326", ] [project.scripts] @@ -192,41 +193,41 @@ typeCheckingMode = "strict" [tool.pytest] minversion = "9.0" addopts = [ - "--import-mode=importlib", - "-ra", - "-m", - "not (inference or llm or img_gen or extract or needs_output or pipelex_api)", + "--import-mode=importlib", + "-ra", + "-m", + "not (inference or llm or img_gen or extract or needs_output or pipelex_api)", ] asyncio_default_fixture_loop_scope = "session" xfail_strict = true filterwarnings = [ - "ignore:Support for class-based `config` is deprecated:DeprecationWarning", - "ignore:websockets.*is deprecated:DeprecationWarning", - "ignore:typing\\.io is deprecated:DeprecationWarning", - "ignore:typing\\.re is deprecated:DeprecationWarning", - "ignore:.*has been moved to cryptography.*", - "ignore:Use.*Types instead", + "ignore:Support for class-based `config` is deprecated:DeprecationWarning", + "ignore:websockets.*is deprecated:DeprecationWarning", + "ignore:typing\\.io is deprecated:DeprecationWarning", + "ignore:typing\\.re is deprecated:DeprecationWarning", + "ignore:.*has been moved to cryptography.*", + "ignore:Use.*Types instead", ] markers = [ - "unit: fast isolated tests with no external dependencies", - "integration: tests that require external services or dependencies", - "end2end: full workflow tests that test complete user scenarios", - "inference: slow and costly due to inference calls", - "gha_disabled: tests that should not run in GitHub Actions", - "codex_disabled: tests that should not run in Codex", - "dry_runnable: tests that can be run in dry-run mode", + "unit: fast isolated tests with no external dependencies", + "integration: tests that require external services or dependencies", + "end2end: full workflow tests that test complete user scenarios", + "inference: slow and costly due to inference calls", + "gha_disabled: tests that should not run in GitHub Actions", + "codex_disabled: tests that should not run in Codex", + "dry_runnable: tests that can be run in dry-run mode", ] [tool.ruff] exclude = [ - ".cursor", - ".git", - ".github", - ".mypy_cache", - ".ruff_cache", - ".venv", - ".vscode", - "trigger_pipeline", + ".cursor", + ".git", + ".github", + ".mypy_cache", + ".ruff_cache", + ".venv", + ".vscode", + "trigger_pipeline", ] line-length = 150 target-version = "py311" @@ -237,20 +238,20 @@ target-version = "py311" ignore = ["F401"] external = ["F401"] select = [ - "E4", - "E7", - "E9", - "F", - "A001", - "A002", - "A003", - "RUF008", - "RUF009", - "RUF012", - "RUF013", - "RUF100", - "E501", - "I", + "E4", + "E7", + "E9", + "F", + "A001", + "A002", + "A003", + "RUF008", + "RUF009", + "RUF012", + "RUF013", + "RUF100", + "E501", + "I", ] [tool.pipelex] diff --git a/tests/integration/test_hello_world.py b/tests/integration/test_hello_world.py index f96165a..0f74f10 100644 --- a/tests/integration/test_hello_world.py +++ b/tests/integration/test_hello_world.py @@ -1,6 +1,6 @@ import pytest from pipelex.pipe_run.pipe_run_mode import PipeRunMode -from pipelex.pipeline.execute import execute_pipeline +from pipelex.pipeline.runner import PipelexRunner @pytest.mark.xfail(reason="This test is failing because the hello_world pipeline is not found in the pipeline library.") @@ -10,9 +10,8 @@ async def test_hello_world(pipe_run_mode: PipeRunMode): """Test that the hello_world function runs successfully.""" # Run the pipe - pipe_output = await execute_pipeline( - pipe_code="hello_world", - pipe_run_mode=pipe_run_mode, - ) + runner = PipelexRunner(pipe_run_mode=pipe_run_mode) + response = await runner.execute_pipeline(pipe_code="hello_world") + pipe_output = response.pipe_output assert pipe_output is not None diff --git a/tests/pipelines/hello_world.plx b/tests/pipelines/hello_world.mthds similarity index 100% rename from tests/pipelines/hello_world.plx rename to tests/pipelines/hello_world.mthds diff --git a/uv.lock b/uv.lock index ea129f6..8b90f25 100644 --- a/uv.lock +++ b/uv.lock @@ -182,7 +182,7 @@ wheels = [ [[package]] name = "anthropic" -version = "0.69.0" +version = "0.83.0" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "anyio" }, @@ -194,9 +194,9 @@ dependencies = [ { name = "sniffio" }, { name = "typing-extensions" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/c8/9d/9ad1778b95f15c5b04e7d328c1b5f558f1e893857b7c33cd288c19c0057a/anthropic-0.69.0.tar.gz", hash = "sha256:c604d287f4d73640f40bd2c0f3265a2eb6ce034217ead0608f6b07a8bc5ae5f2", size = 480622, upload-time = "2025-09-29T16:53:45.282Z" } +sdist = { url = "https://files.pythonhosted.org/packages/db/e5/02cd2919ec327b24234abb73082e6ab84c451182cc3cc60681af700f4c63/anthropic-0.83.0.tar.gz", hash = "sha256:a8732c68b41869266c3034541a31a29d8be0f8cd0a714f9edce3128b351eceb4", size = 534058, upload-time = "2026-02-19T19:26:38.904Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/9b/38/75129688de5637eb5b383e5f2b1570a5cc3aecafa4de422da8eea4b90a6c/anthropic-0.69.0-py3-none-any.whl", hash = "sha256:1f73193040f33f11e27c2cd6ec25f24fe7c3f193dc1c5cde6b7a08b18a16bcc5", size = 337265, upload-time = "2025-09-29T16:53:43.686Z" }, + { url = "https://files.pythonhosted.org/packages/5f/75/b9d58e4e2a4b1fc3e75ffbab978f999baf8b7c4ba9f96e60edb918ba386b/anthropic-0.83.0-py3-none-any.whl", hash = "sha256:f069ef508c73b8f9152e8850830d92bd5ef185645dbacf234bb213344a274810", size = 456991, upload-time = "2026-02-19T19:26:40.114Z" }, ] [[package]] @@ -560,7 +560,7 @@ requires-dist = [ { name = "mkdocs-material", marker = "extra == 'docs'", specifier = "==9.6.14" }, { name = "mkdocs-meta-manager", marker = "extra == 'docs'", specifier = "==1.1.0" }, { name = "mypy", marker = "extra == 'dev'", specifier = ">=1.11.2" }, - { name = "pipelex", extras = ["anthropic", "google", "google-genai", "bedrock"], git = "https://github.com/Pipelex/pipelex.git?branch=pre-release%2Fv0.18.0b2" }, + { name = "pipelex", extras = ["anthropic", "google", "google-genai", "bedrock"], git = "https://github.com/Pipelex/pipelex.git?branch=feature%2FChicago" }, { name = "pygithub", specifier = "==2.4.0" }, { name = "pyright", marker = "extra == 'dev'", specifier = ">=1.1.405" }, { name = "pytest", marker = "extra == 'dev'", specifier = ">=9.0.1" }, @@ -1338,6 +1338,20 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/fc/5b/0a90ae6576cd7fa2593e7bdde4fa52fe4e79f5812b8f1ada001414aaaa54/mkdocs_meta_manager-1.1.0-py3-none-any.whl", hash = "sha256:bdd1b5e9403bcfe222720dcd93102e0f1df5befef9b40a6ff91ad67926528658", size = 3186, upload-time = "2025-01-20T21:58:02.314Z" }, ] +[[package]] +name = "mthds" +version = "0.0.2" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "backports-strenum", marker = "python_full_version < '3.11'" }, + { name = "httpx" }, + { name = "pydantic" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/b4/1c/207fed4b05c8dbcfe6f6c0802b0c59cdc683466f02f95625475248e1ff20/mthds-0.0.2.tar.gz", hash = "sha256:273aaec6e8332f462f772c1bd58cf5baaea4a69224de2fa024b08ebe7ed005f1", size = 60229, upload-time = "2026-02-19T13:38:28.094Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/6c/52/c831b1c6bb0a2b7ecaf21de099132eb4c1a1277569ad7ae214da86dd5583/mthds-0.0.2-py3-none-any.whl", hash = "sha256:168210f289f67cc1fd831ddc4bb46f2d65de6c48b5932406bc31459be94f312e", size = 11818, upload-time = "2026-02-19T13:38:26.447Z" }, +] + [[package]] name = "multidict" version = "6.6.3" @@ -1894,8 +1908,8 @@ wheels = [ [[package]] name = "pipelex" -version = "0.18.0b2" -source = { git = "https://github.com/Pipelex/pipelex.git?branch=pre-release%2Fv0.18.0b2#06b33904bd697791e058c7bc5b43454d96839f46" } +version = "0.18.0b3" +source = { git = "https://github.com/Pipelex/pipelex.git?branch=feature%2FChicago#258890c9cfc6e86fd71280450fa48718abfee04c" } dependencies = [ { name = "aiofiles" }, { name = "backports-strenum", marker = "python_full_version < '3.11'" }, @@ -1906,6 +1920,7 @@ dependencies = [ { name = "json2html" }, { name = "kajson" }, { name = "markdown" }, + { name = "mthds" }, { name = "networkx", version = "3.4.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11'" }, { name = "networkx", version = "3.5", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" }, { name = "openai" }, @@ -1914,6 +1929,7 @@ dependencies = [ { name = "opentelemetry-sdk" }, { name = "opentelemetry-semantic-conventions" }, { name = "pillow" }, + { name = "pipelex-tools" }, { name = "polyfactory" }, { name = "portkey-ai" }, { name = "posthog" }, @@ -1922,6 +1938,7 @@ dependencies = [ { name = "python-dotenv" }, { name = "pyyaml" }, { name = "rich" }, + { name = "semantic-version" }, { name = "shortuuid" }, { name = "tomli" }, { name = "tomlkit" }, @@ -1945,6 +1962,21 @@ google-genai = [ { name = "instructor", extra = ["google-genai"] }, ] +[[package]] +name = "pipelex-tools" +version = "0.2.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/5a/06/b0b78881565093fd5c662dbb128b9ed8e090403c1d6110357e1ce8ed9ea2/pipelex_tools-0.2.0.tar.gz", hash = "sha256:8dc6b866aae05dd2d59ac80f5f21ab31c5477b9e09400d86fc2a1b2143868266", size = 143877, upload-time = "2026-02-21T21:04:34.277Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/56/54/865a714ff7880f148acc762686a8281844e5620af38a307fdf535dc4fb94/pipelex_tools-0.2.0-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:0d3cac6f46be310a66d242b9ca42a237042661c77ed8de1c4116c2b066f9e977", size = 5077220, upload-time = "2026-02-21T21:04:20.644Z" }, + { url = "https://files.pythonhosted.org/packages/d3/47/09abdef9d35cadbda5d3909fe45546282f87d48ad0c876f757f4dc5a847a/pipelex_tools-0.2.0-py3-none-macosx_11_0_arm64.whl", hash = "sha256:7c908648a72b96bef1a68c2ff2ba407e39757e8843f45e1e799d11fab78c405b", size = 4826425, upload-time = "2026-02-21T21:04:22.403Z" }, + { url = "https://files.pythonhosted.org/packages/cf/35/2dea7a7c615e349807b5aa56604b83e5ed4aa2f7ba6b3249fab7d1d23582/pipelex_tools-0.2.0-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:45f88bffd036d5813cd2684a6cc6fba46852e25614a3723faa8e5ef16fb0dd28", size = 4965360, upload-time = "2026-02-21T21:04:24.959Z" }, + { url = "https://files.pythonhosted.org/packages/4c/59/2dacf08122e0302b7bf07ea4617dcbf82946445b8b5fcf137443fde3bd20/pipelex_tools-0.2.0-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:b1cc46c25659c6f42ec088a13fa685f1b49f836b22a7300dc807573f74b3af7f", size = 5216110, upload-time = "2026-02-21T21:04:26.904Z" }, + { url = "https://files.pythonhosted.org/packages/2b/09/89f87484e575205632d3f97c7f22dd3080473ab34c50801d6f12a031028d/pipelex_tools-0.2.0-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:283210d5f24cbc945a80b3127fc42abc7cf3dbaa18b269d68561bc6a528e1b14", size = 5222398, upload-time = "2026-02-21T21:04:29.134Z" }, + { url = "https://files.pythonhosted.org/packages/9e/31/6777aa89f9ef096e963b890b748150d4085351edb013edc97b57256488e6/pipelex_tools-0.2.0-py3-none-win32.whl", hash = "sha256:f39eb059f27f2426c3c3dbc990e9d1c13ab66cee3974a1e9d513b4cff24fd7b2", size = 4598419, upload-time = "2026-02-21T21:04:30.819Z" }, + { url = "https://files.pythonhosted.org/packages/5b/c9/4a3c74f29a5a73f4ca3ea79943212472db5803f13e1de324da5d706f0e01/pipelex_tools-0.2.0-py3-none-win_amd64.whl", hash = "sha256:94488f11c1700bd682cb9da981ee7e7569bc1fa601544ae658254592fc493f56", size = 5393983, upload-time = "2026-02-21T21:04:32.558Z" }, +] + [[package]] name = "platformdirs" version = "4.3.8" @@ -2600,6 +2632,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/18/17/22bf8155aa0ea2305eefa3a6402e040df7ebe512d1310165eda1e233c3f8/s3transfer-0.13.0-py3-none-any.whl", hash = "sha256:0148ef34d6dd964d0d8cf4311b2b21c474693e57c2e069ec708ce043d2b527be", size = 85152, upload-time = "2025-05-22T19:24:48.703Z" }, ] +[[package]] +name = "semantic-version" +version = "2.10.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/7d/31/f2289ce78b9b473d582568c234e104d2a342fd658cc288a7553d83bb8595/semantic_version-2.10.0.tar.gz", hash = "sha256:bdabb6d336998cbb378d4b9db3a4b56a1e3235701dc05ea2690d9a997ed5041c", size = 52289, upload-time = "2022-05-26T13:35:23.454Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/6a/23/8146aad7d88f4fcb3a6218f41a60f6c2d4e3a72de72da1825dc7c8f7877c/semantic_version-2.10.0-py2.py3-none-any.whl", hash = "sha256:de78a3b8e0feda74cabc54aab2da702113e33ac9d9eb9d2389bcf1f58b7d9177", size = 15552, upload-time = "2022-05-26T13:35:21.206Z" }, +] + [[package]] name = "shellingham" version = "1.5.4"