diff --git a/.claude/agents/kiro/spec-design.md b/.claude/agents/kiro/spec-design.md index 0d8d4f7c8..5dbbaeb07 100644 --- a/.claude/agents/kiro/spec-design.md +++ b/.claude/agents/kiro/spec-design.md @@ -27,7 +27,7 @@ You will receive task prompts containing: - Auto-approve flag (true/false) - Mode: generate or merge -### Step 0: Expand File Patterns (SubAgent-specific) +### Step 0: Expand File Patterns (Subagent-specific) Use Glob tool to expand file patterns, then read all files: - Glob(`.kiro/steering/*.md`) to get all steering files @@ -118,6 +118,7 @@ Generate technical design document for feature based on approved requirements. - **Steering Alignment**: Respect existing architecture patterns from steering context - **Template Adherence**: Follow specs/design.md template structure and generation instructions strictly - **Design Focus**: Architecture and interfaces ONLY, no implementation code +- **Requirements Traceability IDs**: Use numeric requirement IDs only (e.g. "1.1", "1.2", "3.1", "3.3") exactly as defined in requirements.md. Do not invent new IDs or use alphabetic labels. ## Tool Guidance - **Read first**: Load all context before taking action (specs, steering, templates, rules) @@ -166,6 +167,8 @@ Provide brief summary in the language specified in spec.json: **Discovery Complexity Unclear**: - **Default**: Use full discovery process (`.kiro/settings/rules/design-discovery-full.md`) - **Rationale**: Better to over-research than miss critical context +- **Invalid Requirement IDs**: + - **Stop Execution**: If requirements.md is missing numeric IDs or uses non-numeric headings (for example, "Requirement A"), stop and instruct the user to fix requirements.md before continuing. **Note**: You execute tasks autonomously. Return final report only when complete. -think \ No newline at end of file +think diff --git a/.claude/agents/kiro/spec-impl.md b/.claude/agents/kiro/spec-impl.md index ce4bb2e17..fe69d7754 100644 --- a/.claude/agents/kiro/spec-impl.md +++ b/.claude/agents/kiro/spec-impl.md @@ -27,7 +27,7 @@ You will receive task prompts containing: - Target tasks: task numbers or "all pending" - TDD Mode: strict (test-first) -### Step 0: Expand File Patterns (SubAgent-specific) +### Step 0: Expand File Patterns (Subagent-specific) Use Glob tool to expand file patterns, then read all files: - Glob(`.kiro/steering/*.md`) to get all steering files diff --git a/.claude/agents/kiro/spec-requirements.md b/.claude/agents/kiro/spec-requirements.md index 3c5025d92..b340635a3 100644 --- a/.claude/agents/kiro/spec-requirements.md +++ b/.claude/agents/kiro/spec-requirements.md @@ -15,7 +15,7 @@ You are a specialized agent for generating comprehensive, testable requirements - **Mission**: Generate comprehensive, testable requirements in EARS format based on the project description from spec initialization - **Success Criteria**: - Create complete requirements document aligned with steering context - - Use proper EARS syntax for all acceptance criteria + - Follow the project's EARS patterns and constraints for all acceptance criteria - Focus on core functionality without implementation details - Update metadata to track generation status @@ -26,7 +26,7 @@ You will receive task prompts containing: - File path patterns (NOT expanded file lists) - Mode: generate -### Step 0: Expand File Patterns (SubAgent-specific) +### Step 0: Expand File Patterns (Subagent-specific) Use Glob tool to expand file patterns, then read all files: - Glob(`.kiro/steering/*.md`) to get all steering files @@ -65,10 +65,10 @@ Generate complete requirements for the feature based on the project description ## Important Constraints - Focus on WHAT, not HOW (no implementation details) -- All acceptance criteria MUST use proper EARS syntax - Requirements must be testable and verifiable - Choose appropriate subject for EARS statements (system/service name for software) - Generate initial version first, then iterate with user feedback (no sequential questions upfront) +- Requirement headings in requirements.md MUST include a leading numeric ID only (for example: "Requirement 1", "1.", "2 Feature ..."); do not use alphabetic IDs like "Requirement A". ## Tool Guidance - **Read first**: Load all context (spec, steering, rules, templates) before generation @@ -93,9 +93,10 @@ Provide output in the language specified in spec.json with: - **Missing Project Description**: If requirements.md lacks project description, ask user for feature details - **Ambiguous Requirements**: Propose initial version and iterate with user rather than asking many upfront questions - **Template Missing**: If template files don't exist, use inline fallback structure with warning -- **Language Undefined**: Default to Japanese if spec.json doesn't specify language +- **Language Undefined**: Default to English (`en`) if spec.json doesn't specify language - **Incomplete Requirements**: After generation, explicitly ask user if requirements cover all expected functionality - **Steering Directory Empty**: Warn user that project context is missing and may affect requirement quality +- **Non-numeric Requirement Headings**: If existing headings do not include a leading numeric ID (for example, they use "Requirement A"), normalize them to numeric IDs and keep that mapping consistent (never mix numeric and alphabetic labels). **Note**: You execute tasks autonomously. Return final report only when complete. -think deeply \ No newline at end of file +think deeply diff --git a/.claude/agents/kiro/spec-tasks.md b/.claude/agents/kiro/spec-tasks.md index 5b7c86f9e..c63c1d1e6 100644 --- a/.claude/agents/kiro/spec-tasks.md +++ b/.claude/agents/kiro/spec-tasks.md @@ -25,9 +25,10 @@ You will receive task prompts containing: - Feature name and spec directory path - File path patterns (NOT expanded file lists) - Auto-approve flag (true/false) +- Sequential mode flag (true/false; default false → parallel allowed) - Mode: generate or merge -### Step 0: Expand File Patterns (SubAgent-specific) +### Step 0: Expand File Patterns (Subagent-specific) Use Glob tool to expand file patterns, then read all files: - Glob(`.kiro/steering/*.md`) to get all steering files @@ -48,21 +49,27 @@ Generate implementation tasks for the feature based on approved requirements and - `.kiro/specs/{feature}/tasks.md` (if exists, for merge mode) - **Entire `.kiro/steering/` directory** for complete project memory +- Determine execution mode: + - `sequential = (sequential flag is true)` + **Validate approvals**: - If auto-approve flag is true: Auto-approve requirements and design in spec.json - Otherwise: Verify both approved (stop if not, see Safety & Fallback) ### Step 2: Generate Implementation Tasks -**Load generation rules and template**: - Read `.kiro/settings/rules/tasks-generation.md` for principles -- Read `.kiro/settings/templates/specs/tasks.md` for format +- Read `.kiro/settings/rules/tasks-parallel-analysis.md` for parallel judgement criteria +- Read `.kiro/settings/templates/specs/tasks.md` for format (supports `(P)` markers) **Generate task list following all rules**: - Use language specified in spec.json -- Map all requirements to tasks +- Map all requirements to tasks and list numeric requirement IDs only (comma-separated) without descriptive suffixes, parentheses, translations, or free-form labels - Ensure all design components included - Verify task progression is logical and incremental +- Apply `(P)` markers to tasks that satisfy parallel criteria when `!sequential` +- Explicitly note dependencies preventing `(P)` when tasks appear parallel but are not safe +- If sequential mode is true, omit `(P)` entirely - If existing tasks.md found, merge with new content ### Step 3: Finalize @@ -127,6 +134,8 @@ Provide brief summary in the language specified in spec.json: - **User Message**: "Template or rules files missing in `.kiro/settings/`" - **Fallback**: Use inline basic structure with warning - **Suggested Action**: "Check repository setup or restore template files" +- **Missing Numeric Requirement IDs**: + - **Stop Execution**: All requirements in requirements.md MUST have numeric IDs. If any requirement lacks a numeric ID, stop and request that requirements.md be fixed before generating tasks. **Note**: You execute tasks autonomously. Return final report only when complete. -think deeply \ No newline at end of file +think deeply diff --git a/.claude/agents/kiro/steering-custom.md b/.claude/agents/kiro/steering-custom.md index c0b77b13e..54fe22dd5 100644 --- a/.claude/agents/kiro/steering-custom.md +++ b/.claude/agents/kiro/steering-custom.md @@ -27,7 +27,7 @@ You will receive task prompts containing: - Domain/topic (e.g., "API standards", "testing approach") - File path patterns (NOT expanded file lists) -### Step 0: Expand File Patterns (SubAgent-specific) +### Step 0: Expand File Patterns (Subagent-specific) Use Glob tool to expand file patterns, then read all files: - Glob(`.kiro/settings/templates/steering-custom/*.md`) to find available templates @@ -140,6 +140,8 @@ Review and customize as needed. - Follow same granularity principles as core steering - All steering files loaded as project memory - Custom files equally important as core files +- Avoid documenting agent-specific tooling directories (e.g. `.cursor/`, `.gemini/`, `.claude/`) +- Light references to `.kiro/specs/` and `.kiro/steering/` are acceptable; avoid other `.kiro/` directories **Note**: You execute tasks autonomously. Return final report only when complete. think deeply \ No newline at end of file diff --git a/.claude/agents/kiro/steering.md b/.claude/agents/kiro/steering.md index 65f086237..e00d294f4 100644 --- a/.claude/agents/kiro/steering.md +++ b/.claude/agents/kiro/steering.md @@ -30,7 +30,7 @@ You will receive task prompts containing: - Mode: bootstrap or sync (detected by Slash Command) - File path patterns (NOT expanded file lists) -### Step 0: Expand File Patterns (SubAgent-specific) +### Step 0: Expand File Patterns (Subagent-specific) Use Glob tool to expand file patterns, then read all files: - For Bootstrap mode: Read templates from `.kiro/settings/templates/steering/` diff --git a/.claude/agents/kiro/validate-design.md b/.claude/agents/kiro/validate-design.md index 951f1e64e..d6527d8b1 100644 --- a/.claude/agents/kiro/validate-design.md +++ b/.claude/agents/kiro/validate-design.md @@ -25,7 +25,7 @@ You will receive task prompts containing: - Feature name and spec directory path - File path patterns (NOT expanded file lists) -### Step 0: Expand File Patterns (SubAgent-specific) +### Step 0: Expand File Patterns (Subagent-specific) Use Glob tool to expand file patterns, then read all files: - Glob(`.kiro/steering/*.md`) to get all steering files @@ -92,7 +92,7 @@ Provide output in the language specified in spec.json with: - **Missing Design**: If design.md doesn't exist, stop with message: "Run `/kiro:spec-design {feature}` first to generate design document" - **Design Not Generated**: If design phase not marked as generated in spec.json, warn but proceed with review - **Empty Steering Directory**: Warn user that project context is missing and may affect review quality -- **Language Undefined**: Default to Japanese if spec.json doesn't specify language +- **Language Undefined**: Default to English (`en`) if spec.json doesn't specify language **Note**: You execute tasks autonomously. Return final report only when complete. -think hard \ No newline at end of file +think hard diff --git a/.claude/agents/kiro/validate-gap.md b/.claude/agents/kiro/validate-gap.md index dc31b5d78..a77868681 100644 --- a/.claude/agents/kiro/validate-gap.md +++ b/.claude/agents/kiro/validate-gap.md @@ -25,7 +25,7 @@ You will receive task prompts containing: - Feature name and spec directory path - File path patterns (NOT expanded file lists) -### Step 0: Expand File Patterns (SubAgent-specific) +### Step 0: Expand File Patterns (Subagent-specific) Use Glob tool to expand file patterns, then read all files: - Glob(`.kiro/steering/*.md`) to get all steering files @@ -93,7 +93,7 @@ Provide output in the language specified in spec.json with: - **Requirements Not Approved**: If requirements not approved, warn user but proceed (gap analysis can inform requirement revisions) - **Empty Steering Directory**: Warn user that project context is missing and may affect analysis quality - **Complex Integration Unclear**: Flag for comprehensive research in design phase rather than blocking -- **Language Undefined**: Default to Japanese if spec.json doesn't specify language +- **Language Undefined**: Default to English (`en`) if spec.json doesn't specify language **Note**: You execute tasks autonomously. Return final report only when complete. -think hard \ No newline at end of file +think hard diff --git a/.claude/agents/kiro/validate-impl.md b/.claude/agents/kiro/validate-impl.md index 54967ddf9..7afd71515 100644 --- a/.claude/agents/kiro/validate-impl.md +++ b/.claude/agents/kiro/validate-impl.md @@ -27,7 +27,7 @@ You will receive task prompts containing: - File path patterns (NOT expanded file lists) - Target tasks: task numbers or auto-detect from conversation/checkboxes -### Step 0: Expand File Patterns (SubAgent-specific) +### Step 0: Expand File Patterns (Subagent-specific) Use Glob tool to expand file patterns, then read all files: - Glob(`.kiro/steering/*.md`) to get all steering files @@ -140,7 +140,7 @@ Provide output in the language specified in spec.json with: - **No Implementation Found**: If no `/kiro:spec-impl` in history and no `[x]` tasks, report "No implementations detected" - **Test Command Unknown**: If test framework unclear, warn and skip test validation (manual verification required) - **Missing Spec Files**: If spec.json/requirements.md/design.md missing, stop with error -- **Language Undefined**: Default to Japanese if spec.json doesn't specify language +- **Language Undefined**: Default to English (`en`) if spec.json doesn't specify language **Note**: You execute tasks autonomously. Return final report only when complete. -think hard \ No newline at end of file +think hard diff --git a/.claude/commands/kiro/spec-design.md b/.claude/commands/kiro/spec-design.md index def0a103b..61bafcb66 100644 --- a/.claude/commands/kiro/spec-design.md +++ b/.claude/commands/kiro/spec-design.md @@ -17,16 +17,16 @@ Check that requirements have been completed: If validation fails, inform user to complete requirements phase first. -## Invoke SubAgent +## Invoke Subagent Delegate design generation to spec-design-agent: -Use the Task tool to invoke the SubAgent with file path patterns: +Use the Task tool to invoke the Subagent with file path patterns: ``` Task( subagent_type="spec-design-agent", - description="Generate technical design", + description="Generate technical design and update research log", prompt=""" Feature: $1 Spec directory: .kiro/specs/$1/ @@ -37,16 +37,18 @@ File patterns to read: - .kiro/steering/*.md - .kiro/settings/rules/design-*.md - .kiro/settings/templates/specs/design.md +- .kiro/settings/templates/specs/research.md Discovery: auto-detect based on requirements Mode: {generate or merge based on design.md existence} +Language: respect spec.json language for design.md/research.md outputs """ ) ``` ## Display Result -Show SubAgent summary to user, then provide next step guidance: +Show Subagent summary to user, then provide next step guidance: ### Next Phase: Task Generation diff --git a/.claude/commands/kiro/spec-impl.md b/.claude/commands/kiro/spec-impl.md index 3507eb7a5..2fe802e53 100644 --- a/.claude/commands/kiro/spec-impl.md +++ b/.claude/commands/kiro/spec-impl.md @@ -21,15 +21,15 @@ If validation fails, inform user to complete tasks generation first. ## Task Selection Logic -**Parse task numbers from `$2`** (perform this in Slash Command before invoking SubAgent): +**Parse task numbers from `$2`** (perform this in Slash Command before invoking Subagent): - If `$2` provided: Parse task numbers (e.g., "1.1", "1,2,3") - Otherwise: Read `.kiro/specs/$1/tasks.md` and find all unchecked tasks (`- [ ]`) -## Invoke SubAgent +## Invoke Subagent Delegate TDD implementation to spec-tdd-impl-agent: -Use the Task tool to invoke the SubAgent with file path patterns: +Use the Task tool to invoke the Subagent with file path patterns: ``` Task( @@ -51,7 +51,7 @@ TDD Mode: strict (test-first) ## Display Result -Show SubAgent summary to user, then provide next step guidance: +Show Subagent summary to user, then provide next step guidance: ### Task Execution diff --git a/.claude/commands/kiro/spec-quick.md b/.claude/commands/kiro/spec-quick.md index 6b48647ab..89195bc7a 100644 --- a/.claude/commands/kiro/spec-quick.md +++ b/.claude/commands/kiro/spec-quick.md @@ -133,7 +133,7 @@ Execute these 4 phases in order: /kiro:spec-requirements {feature-name} ``` -Wait for completion. SubAgent will return with "次のステップ" message. +Wait for completion. Subagent will return with "次のステップ" message. **IMPORTANT**: In Automatic Mode, IGNORE the "次のステップ" message. It is for standalone usage. @@ -163,7 +163,7 @@ Wait for completion. SubAgent will return with "次のステップ" message. Note: `-y` flag auto-approves requirements. -Wait for completion. SubAgent will return with "次のステップ" message. +Wait for completion. Subagent will return with "次のステップ" message. **IMPORTANT**: In Automatic Mode, IGNORE the "次のステップ" message. diff --git a/.claude/commands/kiro/spec-requirements.md b/.claude/commands/kiro/spec-requirements.md index 617995059..a6646dd3b 100644 --- a/.claude/commands/kiro/spec-requirements.md +++ b/.claude/commands/kiro/spec-requirements.md @@ -16,11 +16,11 @@ Check that spec has been initialized: If validation fails, inform user to run `/kiro:spec-init` first. -## Invoke SubAgent +## Invoke Subagent Delegate requirements generation to spec-requirements-agent: -Use the Task tool to invoke the SubAgent with file path patterns: +Use the Task tool to invoke the Subagent with file path patterns: ``` Task( @@ -44,7 +44,7 @@ Mode: generate ## Display Result -Show SubAgent summary to user, then provide next step guidance: +Show Subagent summary to user, then provide next step guidance: ### Next Phase: Design Generation diff --git a/.claude/commands/kiro/spec-tasks.md b/.claude/commands/kiro/spec-tasks.md index ed95d98db..ca20efee7 100644 --- a/.claude/commands/kiro/spec-tasks.md +++ b/.claude/commands/kiro/spec-tasks.md @@ -1,7 +1,7 @@ --- description: Generate implementation tasks for a specification allowed-tools: Read, Task -argument-hint: [-y] +argument-hint: [-y] [--sequential] --- # Implementation Tasks Generator @@ -9,19 +9,21 @@ argument-hint: [-y] ## Parse Arguments - Feature name: `$1` - Auto-approve flag: `$2` (optional, "-y") +- Sequential mode flag: `$3` (optional, "--sequential") ## Validate Check that design has been completed: - Verify `.kiro/specs/$1/` exists - Verify `.kiro/specs/$1/design.md` exists +- Determine `sequential = ($3 == "--sequential")` If validation fails, inform user to complete design phase first. -## Invoke SubAgent +## Invoke Subagent Delegate task generation to spec-tasks-agent: -Use the Task tool to invoke the SubAgent with file path patterns: +Use the Task tool to invoke the Subagent with file path patterns: ``` Task( @@ -31,21 +33,28 @@ Task( Feature: $1 Spec directory: .kiro/specs/$1/ Auto-approve: {true if $2 == "-y", else false} +Sequential mode: {true if sequential else false} File patterns to read: - .kiro/specs/$1/*.{json,md} - .kiro/steering/*.md - .kiro/settings/rules/tasks-generation.md +- .kiro/settings/rules/tasks-parallel-analysis.md (include only when sequential mode is false) - .kiro/settings/templates/specs/tasks.md Mode: {generate or merge based on tasks.md existence} +Instruction highlights: +- Map all requirements to tasks and list requirement IDs only (comma-separated) without extra narration +- Promote single actionable sub-tasks to major tasks and keep container summaries concise +- Apply `(P)` markers only when parallel criteria met (omit in sequential mode) +- Mark optional acceptance-criteria-focused test coverage subtasks with `- [ ]*` only when deferrable post-MVP """ ) ``` ## Display Result -Show SubAgent summary to user, then provide next step guidance: +Show Subagent summary to user, then provide next step guidance: ### Next Phase: Implementation diff --git a/.claude/commands/kiro/steering-custom.md b/.claude/commands/kiro/steering-custom.md index 8bc624cec..a6e6e33e5 100644 --- a/.claude/commands/kiro/steering-custom.md +++ b/.claude/commands/kiro/steering-custom.md @@ -7,17 +7,17 @@ allowed-tools: Task ## Interactive Workflow -This command starts an interactive process with the SubAgent: -1. SubAgent asks user for domain/topic -2. SubAgent checks for available templates -3. SubAgent analyzes codebase for relevant patterns -4. SubAgent generates custom steering file +This command starts an interactive process with the Subagent: +1. Subagent asks user for domain/topic +2. Subagent checks for available templates +3. Subagent analyzes codebase for relevant patterns +4. Subagent generates custom steering file -## Invoke SubAgent +## Invoke Subagent Delegate custom steering creation to steering-custom-agent: -Use the Task tool to invoke the SubAgent with file path patterns: +Use the Task tool to invoke the Subagent with file path patterns: ``` Task( @@ -37,7 +37,7 @@ JIT Strategy: Analyze codebase for relevant patterns as needed ## Display Result -Show SubAgent summary to user: +Show Subagent summary to user: - Custom steering file created - Template used (if any) - Codebase patterns analyzed @@ -51,6 +51,9 @@ Available templates in `.kiro/settings/templates/steering-custom/`: ## Notes -- SubAgent will interact with user to understand needs +- Subagent will interact with user to understand needs - Templates are starting points, customized for project - All steering files loaded as project memory +- Avoid documenting agent-specific tooling directories (e.g. `.cursor/`, `.gemini/`, `.claude/`) +- `.kiro/settings/` content should NOT be documented (it's metadata, not project knowledge) +- Light references to `.kiro/specs/` and `.kiro/steering/` are acceptable; avoid other `.kiro/` directories diff --git a/.claude/commands/kiro/steering.md b/.claude/commands/kiro/steering.md index 35e62bf7f..ba217e423 100644 --- a/.claude/commands/kiro/steering.md +++ b/.claude/commands/kiro/steering.md @@ -7,7 +7,7 @@ allowed-tools: Read, Task, Glob ## Mode Detection -**Perform detection before invoking SubAgent**: +**Perform detection before invoking Subagent**: Check `.kiro/steering/` status: - **Bootstrap Mode**: Empty OR missing core files (product.md, tech.md, structure.md) @@ -15,11 +15,11 @@ Check `.kiro/steering/` status: Use Glob to check for existing steering files. -## Invoke SubAgent +## Invoke Subagent Delegate steering management to steering-agent: -Use the Task tool to invoke the SubAgent with file path patterns: +Use the Task tool to invoke the Subagent with file path patterns: ``` Task( @@ -40,7 +40,7 @@ JIT Strategy: Fetch codebase files when needed, not upfront ## Display Result -Show SubAgent summary to user: +Show Subagent summary to user: ### Bootstrap: - Generated steering files: product.md, tech.md, structure.md @@ -57,3 +57,6 @@ Show SubAgent summary to user: - Templates and principles are external for customization - Focus on patterns, not catalogs - "Golden Rule": New code following patterns shouldn't require steering updates +- Avoid documenting agent-specific tooling directories (e.g. `.cursor/`, `.gemini/`, `.claude/`) +- `.kiro/settings/` content should NOT be documented in steering files (settings are metadata, not project knowledge) +- Light references to `.kiro/specs/` and `.kiro/steering/` are acceptable; avoid other `.kiro/` directories diff --git a/.claude/commands/kiro/validate-design.md b/.claude/commands/kiro/validate-design.md index a49853957..bfa923806 100644 --- a/.claude/commands/kiro/validate-design.md +++ b/.claude/commands/kiro/validate-design.md @@ -16,11 +16,11 @@ Check that design has been completed: If validation fails, inform user to complete design phase first. -## Invoke SubAgent +## Invoke Subagent Delegate design validation to validate-design-agent: -Use the Task tool to invoke the SubAgent with file path patterns: +Use the Task tool to invoke the Subagent with file path patterns: ``` Task( @@ -42,7 +42,7 @@ File patterns to read: ## Display Result -Show SubAgent summary to user, then provide next step guidance: +Show Subagent summary to user, then provide next step guidance: ### Next Phase: Task Generation diff --git a/.claude/commands/kiro/validate-gap.md b/.claude/commands/kiro/validate-gap.md index cd02b3158..962a6ae20 100644 --- a/.claude/commands/kiro/validate-gap.md +++ b/.claude/commands/kiro/validate-gap.md @@ -16,11 +16,11 @@ Check that requirements have been completed: If validation fails, inform user to complete requirements phase first. -## Invoke SubAgent +## Invoke Subagent Delegate gap analysis to validate-gap-agent: -Use the Task tool to invoke the SubAgent with file path patterns: +Use the Task tool to invoke the Subagent with file path patterns: ``` Task( @@ -41,7 +41,7 @@ File patterns to read: ## Display Result -Show SubAgent summary to user, then provide next step guidance: +Show Subagent summary to user, then provide next step guidance: ### Next Phase: Design Generation diff --git a/.claude/commands/kiro/validate-impl.md b/.claude/commands/kiro/validate-impl.md index ae3d33e80..90e72692b 100644 --- a/.claude/commands/kiro/validate-impl.md +++ b/.claude/commands/kiro/validate-impl.md @@ -12,25 +12,25 @@ argument-hint: [feature-name] [task-numbers] ## Auto-Detection Logic -**Perform detection before invoking SubAgent**: +**Perform detection before invoking Subagent**: **If no arguments** (`$1` empty): - Parse conversation history for `/kiro:spec-impl [tasks]` patterns - OR scan `.kiro/specs/*/tasks.md` for `[x]` checkboxes -- Pass detected features and tasks to SubAgent +- Pass detected features and tasks to Subagent **If feature only** (`$1` present, `$2` empty): - Read `.kiro/specs/$1/tasks.md` and find all `[x]` checkboxes -- Pass feature and detected tasks to SubAgent +- Pass feature and detected tasks to Subagent **If both provided** (`$1` and `$2` present): -- Pass directly to SubAgent without detection +- Pass directly to Subagent without detection -## Invoke SubAgent +## Invoke Subagent Delegate validation to validate-impl-agent: -Use the Task tool to invoke the SubAgent with file path patterns: +Use the Task tool to invoke the Subagent with file path patterns: ``` Task( @@ -52,7 +52,7 @@ Validation scope: {based on detection results} ## Display Result -Show SubAgent summary to user, then provide next step guidance: +Show Subagent summary to user, then provide next step guidance: ### Next Steps Guidance diff --git a/.codex/prompts/kiro-spec-design.md b/.codex/prompts/kiro-spec-design.md new file mode 100644 index 000000000..915c5e16e --- /dev/null +++ b/.codex/prompts/kiro-spec-design.md @@ -0,0 +1,182 @@ + +description: Create comprehensive technical design for a specification +argument-hint: [-y] +arguments: + feature-name: $1 + -y flag: $2 + + +# Technical Design Generator + + +- **Mission**: Generate comprehensive technical design document that translates requirements (WHAT) into architectural design (HOW) +- **Success Criteria**: + - All requirements mapped to technical components with clear interfaces + - Appropriate architecture discovery and research completed + - Design aligns with steering context and existing patterns + - Visual diagrams included for complex architectures + + + +## Core Task +Generate technical design document for feature **$1** based on approved requirements. + +## Execution Steps + +### Step 1: Load Context + +**Read all necessary context**: +- `.kiro/specs/$1/spec.json`, `requirements.md`, `design.md` (if exists) +- **Entire `.kiro/steering/` directory** for complete project memory +- `.kiro/settings/templates/specs/design.md` for document structure +- `.kiro/settings/rules/design-principles.md` for design principles +- `.kiro/settings/templates/specs/research.md` for discovery log structure + +**Validate requirements approval**: +- If `-y` flag provided ($2 == "-y"): Auto-approve requirements in spec.json +- Otherwise: Verify approval status (stop if unapproved, see Safety & Fallback) + +### Step 2: Discovery & Analysis + +**Critical: This phase ensures design is based on complete, accurate information.** + +1. **Classify Feature Type**: + - **New Feature** (greenfield) → Full discovery required + - **Extension** (existing system) → Integration-focused discovery + - **Simple Addition** (CRUD/UI) → Minimal or no discovery + - **Complex Integration** → Comprehensive analysis required + +2. **Execute Appropriate Discovery Process**: + + **For Complex/New Features**: + - Read and execute `.kiro/settings/rules/design-discovery-full.md` + - Conduct thorough research using WebSearch/WebFetch: + - Latest architectural patterns and best practices + - External dependency verification (APIs, libraries, versions, compatibility) + - Official documentation, migration guides, known issues + - Performance benchmarks and security considerations + + **For Extensions**: + - Read and execute `.kiro/settings/rules/design-discovery-light.md` + - Focus on integration points, existing patterns, compatibility + - Use Grep to analyze existing codebase patterns + + **For Simple Additions**: + - Skip formal discovery, quick pattern check only + +3. **Retain Discovery Findings for Step 3**: + - External API contracts and constraints + - Technology decisions with rationale + - Existing patterns to follow or extend + - Integration points and dependencies + - Identified risks and mitigation strategies + - Potential architecture patterns and boundary options (note details in `research.md`) + - Parallelization considerations for future tasks (capture dependencies in `research.md`) + +4. **Persist Findings to Research Log**: + - Create or update `.kiro/specs/$1/research.md` using the shared template + - Summarize discovery scope and key findings (Summary section) + - Record investigations in Research Log topics with sources and implications + - Document architecture pattern evaluation, design decisions, and risks using the template sections + - Use the language specified in spec.json when writing or updating `research.md` + +### Step 3: Generate Design Document + +1. **Load Design Template and Rules**: + - Read `.kiro/settings/templates/specs/design.md` for structure + - Read `.kiro/settings/rules/design-principles.md` for principles + +2. **Generate Design Document**: + - **Follow specs/design.md template structure and generation instructions strictly** + - **Integrate all discovery findings**: Use researched information (APIs, patterns, technologies) throughout component definitions, architecture decisions, and integration points + - If existing design.md found in Step 1, use it as reference context (merge mode) + - Apply design rules: Type Safety, Visual Communication, Formal Tone + - Use language specified in spec.json + - Ensure sections reflect updated headings ("Architecture Pattern & Boundary Map", "Technology Stack & Alignment", "Components & Interface Contracts") and reference supporting details from `research.md` + +3. **Update Metadata** in spec.json: + - Set `phase: "design-generated"` + - Set `approvals.design.generated: true, approved: false` + - Set `approvals.requirements.approved: true` + - Update `updated_at` timestamp + +## Critical Constraints + - **Type Safety**: + - Enforce strong typing aligned with the project's technology stack. + - For statically typed languages, define explicit types/interfaces and avoid unsafe casts. + - For TypeScript, never use `any`; prefer precise types and generics. + - For dynamically typed languages, provide type hints/annotations where available (e.g., Python type hints) and validate inputs at boundaries. + - Document public interfaces and contracts clearly to ensure cross-component type safety. +- **Latest Information**: Use WebSearch/WebFetch for external dependencies and best practices +- **Steering Alignment**: Respect existing architecture patterns from steering context +- **Template Adherence**: Follow specs/design.md template structure and generation instructions strictly +- **Design Focus**: Architecture and interfaces ONLY, no implementation code +- **Requirements Traceability IDs**: Use numeric requirement IDs only (e.g. "1.1", "1.2", "3.1", "3.3") exactly as defined in requirements.md. Do not invent new IDs or use alphabetic labels. + +### Language Reminder +- Markdown prompt content must remain in English, even when spec.json requests another language for design output. The generated design.md and research.md should use the spec language. + + +## Tool Guidance +- **Read first**: Load all context before taking action (specs, steering, templates, rules) +- **Research when uncertain**: Use WebSearch/WebFetch for external dependencies, APIs, and latest best practices +- **Analyze existing code**: Use Grep to find patterns and integration points in codebase +- **Write last**: Generate design.md (and research.md updates) only after all research and analysis complete + +## Output Description + +**Command execution output** (separate from design.md content): + +Provide brief summary in the language specified in spec.json: + +1. **Status**: Confirm design document generated at `.kiro/specs/$1/design.md` +2. **Discovery Type**: Which discovery process was executed (full/light/minimal) +3. **Key Findings**: 2-3 critical insights from `research.md` that shaped the design +4. **Next Action**: Approval workflow guidance (see Safety & Fallback) +5. **Research Log**: Confirm `research.md` updated with latest decisions + +**Format**: Concise Markdown (under 200 words) - this is the command output, NOT the design document itself + +**Note**: The actual design document follows `.kiro/settings/templates/specs/design.md` structure. + +## Safety & Fallback + +### Error Scenarios + +**Requirements Not Approved**: +- **Stop Execution**: Cannot proceed without approved requirements +- **User Message**: "Requirements not yet approved. Approval required before design generation." +- **Suggested Action**: "Run `/prompts:kiro-spec-design $1 -y` to auto-approve requirements and proceed" + +**Missing Requirements**: +- **Stop Execution**: Requirements document must exist +- **User Message**: "No requirements.md found at `.kiro/specs/$1/requirements.md`" +- **Suggested Action**: "Run `/prompts:kiro-spec-requirements $1` to generate requirements first" + +**Template Missing**: +- **User Message**: "Template file missing at `.kiro/settings/templates/specs/design.md`" +- **Suggested Action**: "Check repository setup or restore template file" +- **Fallback**: Use inline basic structure with warning + +**Steering Context Missing**: +- **Warning**: "Steering directory empty or missing - design may not align with project standards" +- **Proceed**: Continue with generation but note limitation in output + +**Discovery Complexity Unclear**: +- **Default**: Use full discovery process (`.kiro/settings/rules/design-discovery-full.md`) +- **Rationale**: Better to over-research than miss critical context +- **Invalid Requirement IDs**: + - **Stop Execution**: If requirements.md is missing numeric IDs or uses non-numeric headings (for example, "Requirement A"), stop and instruct the user to fix requirements.md before continuing. + +### Next Phase: Task Generation + +**If Design Approved**: +- Review generated design at `.kiro/specs/$1/design.md` +- **Optional**: Run `/prompts:kiro-validate-design $1` for interactive quality review +- Then `/prompts:kiro-spec-tasks $1 -y` to generate implementation tasks + +**If Modifications Needed**: +- Provide feedback and re-run `/prompts:kiro-spec-design $1` +- Existing design used as reference (merge mode) + +**Note**: Design approval is mandatory before proceeding to task generation. diff --git a/.codex/prompts/kiro-spec-impl.md b/.codex/prompts/kiro-spec-impl.md new file mode 100644 index 000000000..c6598db10 --- /dev/null +++ b/.codex/prompts/kiro-spec-impl.md @@ -0,0 +1,111 @@ + +description: Execute spec tasks using TDD methodology +argument-hint: [task-numbers] +arguments: + feature-name: $1 + task-numbers: $2 + + +# Implementation Task Executor + + +- **Mission**: Execute implementation tasks using Test-Driven Development methodology based on approved specifications +- **Success Criteria**: + - All tests written before implementation code + - Code passes all tests with no regressions + - Tasks marked as completed in tasks.md + - Implementation aligns with design and requirements + + + +## Core Task +Execute implementation tasks for feature **$1** using Test-Driven Development. + +## Execution Steps + +### Step 1: Load Context + +**Read all necessary context**: +- `.kiro/specs/$1/spec.json`, `requirements.md`, `design.md`, `tasks.md` +- **Entire `.kiro/steering/` directory** for complete project memory + +**Validate approvals**: +- Verify tasks are approved in spec.json (stop if not, see Safety & Fallback) + +### Step 2: Select Tasks + +**Determine which tasks to execute**: +- If `$2` provided: Execute specified task numbers (e.g., "1.1" or "1,2,3") +- Otherwise: Execute all pending tasks (unchecked `- [ ]` in tasks.md) + +### Step 3: Execute with TDD + +For each selected task, follow Kent Beck's TDD cycle: + +1. **RED - Write Failing Test**: + - Write test for the next small piece of functionality + - Test should fail (code doesn't exist yet) + - Use descriptive test names + +2. **GREEN - Write Minimal Code**: + - Implement simplest solution to make test pass + - Focus only on making THIS test pass + - Avoid over-engineering + +3. **REFACTOR - Clean Up**: + - Improve code structure and readability + - Remove duplication + - Apply design patterns where appropriate + - Ensure all tests still pass after refactoring + +4. **VERIFY - Validate Quality**: + - All tests pass (new and existing) + - No regressions in existing functionality + - Code coverage maintained or improved + +5. **MARK COMPLETE**: + - Update checkbox from `- [ ]` to `- [x]` in tasks.md + +## Critical Constraints +- **TDD Mandatory**: Tests MUST be written before implementation code +- **Task Scope**: Implement only what the specific task requires +- **Test Coverage**: All new code must have tests +- **No Regressions**: Existing tests must continue to pass +- **Design Alignment**: Implementation must follow design.md specifications + + +## Tool Guidance +- **Read first**: Load all context before implementation +- **Test first**: Write tests before code +- Use **WebSearch/WebFetch** for library documentation when needed + +## Output Description + +Provide brief summary in the language specified in spec.json: + +1. **Tasks Executed**: Task numbers and test results +2. **Status**: Completed tasks marked in tasks.md, remaining tasks count + +**Format**: Concise (under 150 words) + +## Safety & Fallback + +### Error Scenarios + +**Tasks Not Approved or Missing Spec Files**: +- **Stop Execution**: All spec files must exist and tasks must be approved +- **Suggested Action**: "Complete previous phases: `/prompts:kiro-spec-requirements`, `/prompts:kiro-spec-design`, `/prompts:kiro-spec-tasks`" + +**Test Failures**: +- **Stop Implementation**: Fix failing tests before continuing +- **Action**: Debug and fix, then re-run + +### Task Execution + +**Execute specific task(s)**: +- `/prompts:kiro-spec-impl $1 1.1` - Single task +- `/prompts:kiro-spec-impl $1 1,2,3` - Multiple tasks + +**Execute all pending**: +- `/prompts:kiro-spec-impl $1` - All unchecked tasks + diff --git a/.codex/prompts/kiro-spec-init.md b/.codex/prompts/kiro-spec-init.md new file mode 100644 index 000000000..0becb592b --- /dev/null +++ b/.codex/prompts/kiro-spec-init.md @@ -0,0 +1,67 @@ + +description: Initialize a new specification with detailed project description +argument-hint: +arguments: + project-description: $ARGUMENTS + + +# Spec Initialization + + +- **Mission**: Initialize the first phase of spec-driven development by creating directory structure and metadata for a new specification +- **Success Criteria**: + - Generate appropriate feature name from project description + - Create unique spec structure without conflicts + - Provide clear path to next phase (requirements generation) + + + +## Core Task +Generate a unique feature name from the project description ($ARGUMENTS) and initialize the specification structure. + +## Execution Steps +1. **Check Uniqueness**: Verify `.kiro/specs/` for naming conflicts (append number suffix if needed) +2. **Create Directory**: `.kiro/specs/[feature-name]/` +3. **Initialize Files Using Templates**: + - Read `.kiro/settings/templates/specs/init.json` + - Read `.kiro/settings/templates/specs/requirements-init.md` + - Replace placeholders: + - `{{FEATURE_NAME}}` → generated feature name + - `{{TIMESTAMP}}` → current ISO 8601 timestamp + - `{{PROJECT_DESCRIPTION}}` → $ARGUMENTS + - Write `spec.json` and `requirements.md` to spec directory + +## Important Constraints +- DO NOT generate requirements/design/tasks at this stage +- Follow stage-by-stage development principles +- Maintain strict phase separation +- Only initialization is performed in this phase + + +## Tool Guidance +- Use **Glob** to check existing spec directories for name uniqueness +- Use **Read** to fetch templates: `init.json` and `requirements-init.md` +- Use **Write** to create spec.json and requirements.md after placeholder replacement +- Perform validation before any file write operation + +## Output Description +Provide output in the language specified in `spec.json` with the following structure: + +1. **Generated Feature Name**: `feature-name` format with 1-2 sentence rationale +2. **Project Summary**: Brief summary (1 sentence) +3. **Created Files**: Bullet list with full paths +4. **Next Step**: Command block showing `/prompts:kiro-spec-requirements ` +5. **Notes**: Explain why only initialization was performed (2-3 sentences on phase separation) + +**Format Requirements**: +- Use Markdown headings (##, ###) +- Wrap commands in code blocks +- Keep total output concise (under 250 words) +- Use clear, professional language per `spec.json.language` + +## Safety & Fallback +- **Ambiguous Feature Name**: If feature name generation is unclear, propose 2-3 options and ask user to select +- **Template Missing**: If template files don't exist in `.kiro/settings/templates/specs/`, report error with specific missing file path and suggest checking repository setup +- **Directory Conflict**: If feature name already exists, append numeric suffix (e.g., `feature-name-2`) and notify user of automatic conflict resolution +- **Write Failure**: Report error with specific path and suggest checking permissions or disk space + diff --git a/.codex/prompts/kiro-spec-requirements.md b/.codex/prompts/kiro-spec-requirements.md new file mode 100644 index 000000000..8b3ca2f7e --- /dev/null +++ b/.codex/prompts/kiro-spec-requirements.md @@ -0,0 +1,97 @@ + +description: Generate comprehensive requirements for a specification +argument-hint: +arguments: + feature-name: $1 + + +# Requirements Generation + + +- **Mission**: Generate comprehensive, testable requirements in EARS format based on the project description from spec initialization +- **Success Criteria**: + - Create complete requirements document aligned with steering context + - Follow the project's EARS patterns and constraints for all acceptance criteria + - Focus on core functionality without implementation details + - Update metadata to track generation status + + + +## Core Task +Generate complete requirements for feature **$1** based on the project description in requirements.md. + +## Execution Steps + +1. **Load Context**: + - Read `.kiro/specs/$1/spec.json` for language and metadata + - Read `.kiro/specs/$1/requirements.md` for project description + - **Load ALL steering context**: Read entire `.kiro/steering/` directory including: + - Default files: `structure.md`, `tech.md`, `product.md` + - All custom steering files (regardless of mode settings) + - This provides complete project memory and context + +2. **Read Guidelines**: + - Read `.kiro/settings/rules/ears-format.md` for EARS syntax rules + - Read `.kiro/settings/templates/specs/requirements.md` for document structure + +3. **Generate Requirements**: + - Create initial requirements based on project description + - Group related functionality into logical requirement areas + - Apply EARS format to all acceptance criteria + - Use language specified in spec.json + +4. **Update Metadata**: + - Set `phase: "requirements-generated"` + - Set `approvals.requirements.generated: true` + - Update `updated_at` timestamp + +## Important Constraints +- Focus on WHAT, not HOW (no implementation details) +- Requirements must be testable and verifiable +- Choose appropriate subject for EARS statements (system/service name for software) +- Generate initial version first, then iterate with user feedback (no sequential questions upfront) +- Requirement headings in requirements.md MUST include a leading numeric ID only (for example: "Requirement 1", "1.", "2 Feature ..."); do not use alphabetic IDs like "Requirement A". + + +## Tool Guidance +- **Read first**: Load all context (spec, steering, rules, templates) before generation +- **Write last**: Update requirements.md only after complete generation +- Use **WebSearch/WebFetch** only if external domain knowledge needed + +## Output Description +Provide output in the language specified in spec.json with: + +1. **Generated Requirements Summary**: Brief overview of major requirement areas (3-5 bullets) +2. **Document Status**: Confirm requirements.md updated and spec.json metadata updated +3. **Next Steps**: Guide user on how to proceed (approve and continue, or modify) + +**Format Requirements**: +- Use Markdown headings for clarity +- Include file paths in code blocks +- Keep summary concise (under 300 words) + +## Safety & Fallback + +### Error Scenarios +- **Missing Project Description**: If requirements.md lacks project description, ask user for feature details +- **Ambiguous Requirements**: Propose initial version and iterate with user rather than asking many upfront questions +- **Template Missing**: If template files don't exist, use inline fallback structure with warning +- **Language Undefined**: Default to English (`en`) if spec.json doesn't specify language +- **Incomplete Requirements**: After generation, explicitly ask user if requirements cover all expected functionality +- **Steering Directory Empty**: Warn user that project context is missing and may affect requirement quality +- **Non-numeric Requirement Headings**: If existing headings do not include a leading numeric ID (for example, they use "Requirement A"), normalize them to numeric IDs and keep that mapping consistent (never mix numeric and alphabetic labels). + +### Next Phase: Design Generation + +**If Requirements Approved**: +- Review generated requirements at `.kiro/specs/$1/requirements.md` +- **Optional Gap Analysis** (for existing codebases): + - Run `/prompts:kiro-validate-gap $1` to analyze implementation gap with current code + - Identifies existing components, integration points, and implementation strategy + - Recommended for brownfield projects; skip for greenfield +- Then `/prompts:kiro-spec-design $1 -y` to proceed to design phase + +**If Modifications Needed**: +- Provide feedback and re-run `/prompts:kiro-spec-requirements $1` + +**Note**: Approval is mandatory before proceeding to design phase. diff --git a/.codex/prompts/kiro-spec-status.md b/.codex/prompts/kiro-spec-status.md new file mode 100644 index 000000000..e8741da3d --- /dev/null +++ b/.codex/prompts/kiro-spec-status.md @@ -0,0 +1,87 @@ + +description: Show specification status and progress +argument-hint: +arguments: + feature-name: $1 + + +# Specification Status + + +- **Mission**: Display comprehensive status and progress for a specification +- **Success Criteria**: + - Show current phase and completion status + - Identify next actions and blockers + - Provide clear visibility into progress + + + +## Core Task +Generate status report for feature **$1** showing progress across all phases. + +## Execution Steps + +### Step 1: Load Spec Context +- Read `.kiro/specs/$1/spec.json` for metadata and phase status +- Read existing files: `requirements.md`, `design.md`, `tasks.md` (if they exist) +- Check `.kiro/specs/$1/` directory for available files + +### Step 2: Analyze Status + +**Parse each phase**: +- **Requirements**: Count requirements and acceptance criteria +- **Design**: Check for architecture, components, diagrams +- **Tasks**: Count completed vs total tasks (parse `- [x]` vs `- [ ]`) +- **Approvals**: Check approval status in spec.json + +### Step 3: Generate Report + +Create report in the language specified in spec.json covering: +1. **Current Phase & Progress**: Where the spec is in the workflow +2. **Completion Status**: Percentage complete for each phase +3. **Task Breakdown**: If tasks exist, show completed/remaining counts +4. **Next Actions**: What needs to be done next +5. **Blockers**: Any issues preventing progress + +## Critical Constraints +- Use language from spec.json +- Calculate accurate completion percentages +- Identify specific next action commands + + +## Tool Guidance +- **Read**: Load spec.json first, then other spec files as needed +- **Parse carefully**: Extract completion data from tasks.md checkboxes +- Use **Glob** to check which spec files exist + +## Output Description + +Provide status report in the language specified in spec.json: + +**Report Structure**: +1. **Feature Overview**: Name, phase, last updated +2. **Phase Status**: Requirements, Design, Tasks with completion % +3. **Task Progress**: If tasks exist, show X/Y completed +4. **Next Action**: Specific command to run next +5. **Issues**: Any blockers or missing elements + +**Format**: Clear, scannable format with emojis (✅/⏳/❌) for status + +## Safety & Fallback + +### Error Scenarios + +**Spec Not Found**: +- **Message**: "No spec found for `$1`. Check available specs in `.kiro/specs/`" +- **Action**: List available spec directories + +**Incomplete Spec**: +- **Warning**: Identify which files are missing +- **Suggested Action**: Point to next phase command + +### List All Specs + +To see all available specs: +- Run with no argument or use wildcard +- Shows all specs in `.kiro/specs/` with their status + diff --git a/.codex/prompts/kiro-spec-tasks.md b/.codex/prompts/kiro-spec-tasks.md new file mode 100644 index 000000000..7e771d8bc --- /dev/null +++ b/.codex/prompts/kiro-spec-tasks.md @@ -0,0 +1,140 @@ + +description: Generate implementation tasks for a specification +argument-hint: [-y] [--sequential] +arguments: + feature-name: $1 + -y flag: $2 + --sequential flag: $3 + + +# Implementation Tasks Generator + + +- **Mission**: Generate detailed, actionable implementation tasks that translate technical design into executable work items +- **Success Criteria**: + - All requirements mapped to specific tasks + - Tasks properly sized (1-3 hours each) + - Clear task progression with proper hierarchy + - Natural language descriptions focused on capabilities + + + +## Core Task +Generate implementation tasks for feature **$1** based on approved requirements and design. + +## Execution Steps + +### Step 1: Load Context + +**Read all necessary context**: +- `.kiro/specs/$1/spec.json`, `requirements.md`, `design.md` +- `.kiro/specs/$1/tasks.md` (if exists, for merge mode) +- **Entire `.kiro/steering/` directory** for complete project memory + +**Validate approvals**: +- If `-y` flag provided ($2 == "-y"): Auto-approve requirements and design in spec.json +- Otherwise: Verify both approved (stop if not, see Safety & Fallback) +- Determine sequential mode based on presence of `--sequential` + +### Step 2: Generate Implementation Tasks + +**Load generation rules and template**: +- Read `.kiro/settings/rules/tasks-generation.md` for principles +- If `sequential` is false: Read `.kiro/settings/rules/tasks-parallel-analysis.md` for parallel judgement criteria +- Read `.kiro/settings/templates/specs/tasks.md` for format (supports `(P)` markers) + +**Generate task list following all rules**: +- Use language specified in spec.json +- Map all requirements to tasks +- When documenting requirement coverage, list numeric requirement IDs only (comma-separated) without descriptive suffixes, parentheses, translations, or free-form labels +- Ensure all design components included +- Verify task progression is logical and incremental +- Collapse single-subtask structures by promoting them to major tasks and avoid duplicating details on container-only major tasks (use template patterns accordingly) +- Apply `(P)` markers to tasks that satisfy parallel criteria (omit markers when sequential mode requested) +- Mark optional test coverage subtasks with `- [ ]*` only when they strictly cover acceptance criteria already satisfied by core implementation and can be deferred post-MVP +- If existing tasks.md found, merge with new content + +### Step 3: Finalize + +**Write and update**: +- Create/update `.kiro/specs/$1/tasks.md` +- Update spec.json metadata: + - Set `phase: "tasks-generated"` + - Set `approvals.tasks.generated: true, approved: false` + - Set `approvals.requirements.approved: true` + - Set `approvals.design.approved: true` + - Update `updated_at` timestamp + +## Critical Constraints +- **Follow rules strictly**: All principles in tasks-generation.md are mandatory +- **Natural Language**: Describe what to do, not code structure details +- **Complete Coverage**: ALL requirements must map to tasks +- **Maximum 2 Levels**: Major tasks and sub-tasks only (no deeper nesting) +- **Sequential Numbering**: Major tasks increment (1, 2, 3...), never repeat +- **Task Integration**: Every task must connect to the system (no orphaned work) + + +## Tool Guidance +- **Read first**: Load all context, rules, and templates before generation +- **Write last**: Generate tasks.md only after complete analysis and verification + +## Output Description + +Provide brief summary in the language specified in spec.json: + +1. **Status**: Confirm tasks generated at `.kiro/specs/$1/tasks.md` +2. **Task Summary**: + - Total: X major tasks, Y sub-tasks + - All Z requirements covered + - Average task size: 1-3 hours per sub-task +3. **Quality Validation**: + - ✅ All requirements mapped to tasks + - ✅ Task dependencies verified + - ✅ Testing tasks included +4. **Next Action**: Review tasks and proceed when ready + +**Format**: Concise (under 200 words) + +## Safety & Fallback + +### Error Scenarios + +**Requirements or Design Not Approved**: +- **Stop Execution**: Cannot proceed without approved requirements and design +- **User Message**: "Requirements and design must be approved before task generation" +- **Suggested Action**: "Run `/prompts:kiro-spec-tasks $1 -y` to auto-approve both and proceed" + +**Missing Requirements or Design**: +- **Stop Execution**: Both documents must exist +- **User Message**: "Missing requirements.md or design.md at `.kiro/specs/$1/`" +- **Suggested Action**: "Complete requirements and design phases first" + +**Incomplete Requirements Coverage**: +- **Warning**: "Not all requirements mapped to tasks. Review coverage." +- **User Action Required**: Confirm intentional gaps or regenerate tasks + +**Template/Rules Missing**: +- **User Message**: "Template or rules files missing in `.kiro/settings/`" +- **Fallback**: Use inline basic structure with warning +- **Suggested Action**: "Check repository setup or restore template files" +- **Missing Numeric Requirement IDs**: + - **Stop Execution**: All requirements in requirements.md MUST have numeric IDs. If any requirement lacks a numeric ID, stop and request that requirements.md be fixed before generating tasks. + +### Next Phase: Implementation + +**Before Starting Implementation**: +- **IMPORTANT**: Clear conversation history and free up context before running `/prompts:kiro-spec-impl` +- This applies when starting first task OR switching between tasks +- Fresh context ensures clean state and proper task focus + +**If Tasks Approved**: +- Execute specific task: `/prompts:kiro-spec-impl $1 1.1` (recommended: clear context between each task) +- Execute multiple tasks: `/prompts:kiro-spec-impl $1 1.1,1.2` (use cautiously, clear context between tasks) +- Without arguments: `/prompts:kiro-spec-impl $1` (executes all pending tasks - NOT recommended due to context bloat) + +**If Modifications Needed**: +- Provide feedback and re-run `/prompts:kiro-spec-tasks $1` +- Existing tasks used as reference (merge mode) + +**Note**: The implementation phase will guide you through executing tasks with appropriate context and validation. + diff --git a/.codex/prompts/kiro-steering-custom.md b/.codex/prompts/kiro-steering-custom.md new file mode 100644 index 000000000..7691d06b2 --- /dev/null +++ b/.codex/prompts/kiro-steering-custom.md @@ -0,0 +1,130 @@ + +description: Create custom steering documents for specialized project contexts +argument-hint: +arguments: + what-to-create-custom-steering: $ARGUMENTS + + +# Kiro Custom Steering Creation + + +**Role**: Create specialized steering documents beyond core files (product, tech, structure). + +**Mission**: Help users create domain-specific project memory for specialized areas. + +**Success Criteria**: +- Custom steering captures specialized patterns +- Follows same granularity principles as core steering +- Provides clear value for specific domain + + + +## Workflow + +1. **Ask user** for custom steering needs: + - Domain/topic (e.g., "API standards", "testing approach") + - Specific requirements or patterns to document + +2. **Check if template exists**: + - Load from `.kiro/settings/templates/steering-custom/{name}.md` if available + - Use as starting point, customize based on project + +3. **Analyze codebase** (JIT) for relevant patterns: + - **Glob** for related files + - **Read** for existing implementations + - **Grep** for specific patterns + +4. **Generate custom steering**: + - Follow template structure if available + - Apply principles from `.kiro/settings/rules/steering-principles.md` + - Focus on patterns, not exhaustive lists + - Keep to 100-200 lines (2-3 minute read) + +5. **Create file** in `.kiro/steering/{name}.md` + +## Available Templates + +Templates available in `.kiro/settings/templates/steering-custom/`: + +1. **api-standards.md** - REST/GraphQL conventions, error handling +2. **testing.md** - Test organization, mocking, coverage +3. **security.md** - Auth patterns, input validation, secrets +4. **database.md** - Schema design, migrations, query patterns +5. **error-handling.md** - Error types, logging, retry strategies +6. **authentication.md** - Auth flows, permissions, session management +7. **deployment.md** - CI/CD, environments, rollback procedures + +Load template when needed, customize for project. + +## Steering Principles + +From `.kiro/settings/rules/steering-principles.md`: + +- **Patterns over lists**: Document patterns, not every file/component +- **Single domain**: One topic per file +- **Concrete examples**: Show patterns with code +- **Maintainable size**: 100-200 lines typical +- **Security first**: Never include secrets or sensitive data + + + +## Tool guidance + +- **Read**: Load template, analyze existing code +- **Glob**: Find related files for pattern analysis +- **Grep**: Search for specific patterns +- **LS**: Understand relevant structure + +**JIT Strategy**: Load template only when creating that type of steering. + +## Output description + +Chat summary with file location (file created directly). + +``` +✅ Custom Steering Created + +## Created: +- .kiro/steering/api-standards.md + +## Based On: +- Template: api-standards.md +- Analyzed: src/api/ directory patterns +- Extracted: REST conventions, error format + +## Content: +- Endpoint naming patterns +- Request/response format +- Error handling conventions +- Authentication approach + +Review and customize as needed. +``` + +## Examples + +### Success: API Standards +**Input**: "Create API standards steering" +**Action**: Load template, analyze src/api/, extract patterns +**Output**: api-standards.md with project-specific REST conventions + +### Success: Testing Strategy +**Input**: "Document our testing approach" +**Action**: Load template, analyze test files, extract patterns +**Output**: testing.md with test organization and mocking strategies + +## Safety & Fallback + +- **No template**: Generate from scratch based on domain knowledge +- **Security**: Never include secrets (load principles) +- **Validation**: Ensure doesn't duplicate core steering content + +## Notes + +- Templates are starting points, customize for project +- Follow same granularity principles as core steering +- All steering files loaded as project memory +- Custom files equally important as core files +- Avoid documenting agent-specific tooling directories (e.g. `.cursor/`, `.gemini/`, `.claude/`) +- Light references to `.kiro/specs/` and `.kiro/steering/` are acceptable; avoid other `.kiro/` directories + diff --git a/.codex/prompts/kiro-steering.md b/.codex/prompts/kiro-steering.md new file mode 100644 index 000000000..781ea218b --- /dev/null +++ b/.codex/prompts/kiro-steering.md @@ -0,0 +1,143 @@ + +description: Manage .kiro/steering/ as persistent project knowledge + + +# Kiro Steering Management + + +**Role**: Maintain `.kiro/steering/` as persistent project memory. + +**Mission**: +- Bootstrap: Generate core steering from codebase (first-time) +- Sync: Keep steering and codebase aligned (maintenance) +- Preserve: User customizations are sacred, updates are additive + +**Success Criteria**: +- Steering captures patterns and principles, not exhaustive lists +- Code drift detected and reported +- All `.kiro/steering/*.md` treated equally (core + custom) + + + +## Scenario Detection + +Check `.kiro/steering/` status: + +**Bootstrap Mode**: Empty OR missing core files (product.md, tech.md, structure.md) +**Sync Mode**: All core files exist + +--- + +## Bootstrap Flow + +1. Load templates from `.kiro/settings/templates/steering/` +2. Analyze codebase (JIT): + - `glob_file_search` for source files + - `read_file` for README, package.json, etc. + - `grep` for patterns +3. Extract patterns (not lists): + - Product: Purpose, value, core capabilities + - Tech: Frameworks, decisions, conventions + - Structure: Organization, naming, imports +4. Generate steering files (follow templates) +5. Load principles from `.kiro/settings/rules/steering-principles.md` +6. Present summary for review + +**Focus**: Patterns that guide decisions, not catalogs of files/dependencies. + +--- + +## Sync Flow + +1. Load all existing steering (`.kiro/steering/*.md`) +2. Analyze codebase for changes (JIT) +3. Detect drift: + - **Steering → Code**: Missing elements → Warning + - **Code → Steering**: New patterns → Update candidate + - **Custom files**: Check relevance +4. Propose updates (additive, preserve user content) +5. Report: Updates, warnings, recommendations + +**Update Philosophy**: Add, don't replace. Preserve user sections. + +--- + +## Granularity Principle + +From `.kiro/settings/rules/steering-principles.md`: + +> "If new code follows existing patterns, steering shouldn't need updating." + +Document patterns and principles, not exhaustive lists. + +**Bad**: List every file in directory tree +**Good**: Describe organization pattern with examples + + + +## Tool guidance + +- `glob_file_search`: Find source/config files +- `read_file`: Read steering, docs, configs +- `grep`: Search patterns +- `list_dir`: Analyze structure + +**JIT Strategy**: Fetch when needed, not upfront. + +## Output description + +Chat summary only (files updated directly). + +### Bootstrap: +``` +✅ Steering Created + +## Generated: +- product.md: [Brief description] +- tech.md: [Key stack] +- structure.md: [Organization] + +Review and approve as Source of Truth. +``` + +### Sync: +``` +✅ Steering Updated + +## Changes: +- tech.md: React 18 → 19 +- structure.md: Added API pattern + +## Code Drift: +- Components not following import conventions + +## Recommendations: +- Consider api-standards.md +``` + +## Examples + +### Bootstrap +**Input**: Empty steering, React TypeScript project +**Output**: 3 files with patterns - "Feature-first", "TypeScript strict", "React 19" + +### Sync +**Input**: Existing steering, new `/api` directory +**Output**: Updated structure.md, flagged non-compliant files, suggested api-standards.md + +## Safety & Fallback + +- **Security**: Never include keys, passwords, secrets (see principles) +- **Uncertainty**: Report both states, ask user +- **Preservation**: Add rather than replace when in doubt + +## Notes + +- All `.kiro/steering/*.md` loaded as project memory +- Templates and principles are external for customization +- Focus on patterns, not catalogs +- "Golden Rule": New code following patterns shouldn't require steering updates +- Avoid documenting agent-specific tooling directories (e.g. `.cursor/`, `.gemini/`, `.claude/`) +- `.kiro/settings/` content should NOT be documented in steering files (settings are metadata, not project knowledge) +- Light references to `.kiro/specs/` and `.kiro/steering/` are acceptable; avoid other `.kiro/` directories + diff --git a/.codex/prompts/kiro-validate-design.md b/.codex/prompts/kiro-validate-design.md new file mode 100644 index 000000000..63b8710c8 --- /dev/null +++ b/.codex/prompts/kiro-validate-design.md @@ -0,0 +1,93 @@ + +description: Interactive technical design quality review and validation +argument-hint: +arguments: + feature-name: $1 + + +# Technical Design Validation + + +- **Mission**: Conduct interactive quality review of technical design to ensure readiness for implementation +- **Success Criteria**: + - Critical issues identified (maximum 3 most important concerns) + - Balanced assessment with strengths recognized + - Clear GO/NO-GO decision with rationale + - Actionable feedback for improvements if needed + + + +## Core Task +Interactive design quality review for feature **$1** based on approved requirements and design document. + +## Execution Steps + +1. **Load Context**: + - Read `.kiro/specs/$1/spec.json` for language and metadata + - Read `.kiro/specs/$1/requirements.md` for requirements + - Read `.kiro/specs/$1/design.md` for design document + - **Load ALL steering context**: Read entire `.kiro/steering/` directory including: + - Default files: `structure.md`, `tech.md`, `product.md` + - All custom steering files (regardless of mode settings) + - This provides complete project memory and context + +2. **Read Review Guidelines**: + - Read `.kiro/settings/rules/design-review.md` for review criteria and process + +3. **Execute Design Review**: + - Follow design-review.md process: Analysis → Critical Issues → Strengths → GO/NO-GO + - Limit to 3 most important concerns + - Engage interactively with user + - Use language specified in spec.json for output + +4. **Provide Decision and Next Steps**: + - Clear GO/NO-GO decision with rationale + - Guide user on proceeding based on decision + +## Important Constraints +- **Quality assurance, not perfection seeking**: Accept acceptable risk +- **Critical focus only**: Maximum 3 issues, only those significantly impacting success +- **Interactive approach**: Engage in dialogue, not one-way evaluation +- **Balanced assessment**: Recognize both strengths and weaknesses +- **Actionable feedback**: All suggestions must be implementable + + +## Tool Guidance +- **Read first**: Load all context (spec, steering, rules) before review +- **Grep if needed**: Search codebase for pattern validation or integration checks +- **Interactive**: Engage with user throughout the review process + +## Output Description +Provide output in the language specified in spec.json with: + +1. **Review Summary**: Brief overview (2-3 sentences) of design quality and readiness +2. **Critical Issues**: Maximum 3, following design-review.md format +3. **Design Strengths**: 1-2 positive aspects +4. **Final Assessment**: GO/NO-GO decision with rationale and next steps + +**Format Requirements**: +- Use Markdown headings for clarity +- Follow design-review.md output format +- Keep summary concise + +## Safety & Fallback + +### Error Scenarios +- **Missing Design**: If design.md doesn't exist, stop with message: "Run `/prompts:kiro-spec-design $1` first to generate design document" +- **Design Not Generated**: If design phase not marked as generated in spec.json, warn but proceed with review +- **Empty Steering Directory**: Warn user that project context is missing and may affect review quality +- **Language Undefined**: Default to English (`en`) if spec.json doesn't specify language + +### Next Phase: Task Generation + +**If Design Passes Validation (GO Decision)**: +- Review feedback and apply changes if needed +- Run `/prompts:kiro-spec-tasks $1` to generate implementation tasks +- Or `/prompts:kiro-spec-tasks $1 -y` to auto-approve and proceed directly + +**If Design Needs Revision (NO-GO Decision)**: +- Address critical issues identified +- Re-run `/prompts:kiro-spec-design $1` with improvements +- Re-validate with `/prompts:kiro-validate-design $1` + +**Note**: Design validation is recommended but optional. Quality review helps catch issues early. diff --git a/.codex/prompts/kiro-validate-gap.md b/.codex/prompts/kiro-validate-gap.md new file mode 100644 index 000000000..a3da29338 --- /dev/null +++ b/.codex/prompts/kiro-validate-gap.md @@ -0,0 +1,89 @@ + +description: Analyze implementation gap between requirements and existing codebase +argument-hint: +arguments: + feature-name: $1 + + +# Implementation Gap Validation + + +- **Mission**: Analyze the gap between requirements and existing codebase to inform implementation strategy +- **Success Criteria**: + - Comprehensive understanding of existing codebase patterns and components + - Clear identification of missing capabilities and integration challenges + - Multiple viable implementation approaches evaluated + - Technical research needs identified for design phase + + + +## Core Task +Analyze implementation gap for feature **$1** based on approved requirements and existing codebase. + +## Execution Steps + +1. **Load Context**: + - Read `.kiro/specs/$1/spec.json` for language and metadata + - Read `.kiro/specs/$1/requirements.md` for requirements + - **Load ALL steering context**: Read entire `.kiro/steering/` directory including: + - Default files: `structure.md`, `tech.md`, `product.md` + - All custom steering files (regardless of mode settings) + - This provides complete project memory and context + +2. **Read Analysis Guidelines**: + - Read `.kiro/settings/rules/gap-analysis.md` for comprehensive analysis framework + +3. **Execute Gap Analysis**: + - Follow gap-analysis.md framework for thorough investigation + - Analyze existing codebase using Grep and Read tools + - Use WebSearch/WebFetch for external dependency research if needed + - Evaluate multiple implementation approaches (extend/new/hybrid) + - Use language specified in spec.json for output + +4. **Generate Analysis Document**: + - Create comprehensive gap analysis following the output guidelines in gap-analysis.md + - Present multiple viable options with trade-offs + - Flag areas requiring further research + +## Important Constraints +- **Information over Decisions**: Provide analysis and options, not final implementation choices +- **Multiple Options**: Present viable alternatives when applicable +- **Thorough Investigation**: Use tools to deeply understand existing codebase +- **Explicit Gaps**: Clearly flag areas needing research or investigation + + +## Tool Guidance +- **Read first**: Load all context (spec, steering, rules) before analysis +- **Grep extensively**: Search codebase for patterns, conventions, and integration points +- **WebSearch/WebFetch**: Research external dependencies and best practices when needed +- **Write last**: Generate analysis only after complete investigation + +## Output Description +Provide output in the language specified in spec.json with: + +1. **Analysis Summary**: Brief overview (3-5 bullets) of scope, challenges, and recommendations +2. **Document Status**: Confirm analysis approach used +3. **Next Steps**: Guide user on proceeding to design phase + +**Format Requirements**: +- Use Markdown headings for clarity +- Keep summary concise (under 300 words) +- Detailed analysis follows gap-analysis.md output guidelines + +## Safety & Fallback + +### Error Scenarios +- **Missing Requirements**: If requirements.md doesn't exist, stop with message: "Run `/prompts:kiro-spec-requirements $1` first to generate requirements" +- **Requirements Not Approved**: If requirements not approved, warn user but proceed (gap analysis can inform requirement revisions) +- **Empty Steering Directory**: Warn user that project context is missing and may affect analysis quality +- **Complex Integration Unclear**: Flag for comprehensive research in design phase rather than blocking +- **Language Undefined**: Default to English (`en`) if spec.json doesn't specify language + +### Next Phase: Design Generation + +**If Gap Analysis Complete**: +- Review gap analysis insights +- Run `/prompts:kiro-spec-design $1` to create technical design document +- Or `/prompts:kiro-spec-design $1 -y` to auto-approve requirements and proceed directly + +**Note**: Gap analysis is optional but recommended for brownfield projects to inform design decisions. diff --git a/.codex/prompts/kiro-validate-impl.md b/.codex/prompts/kiro-validate-impl.md new file mode 100644 index 000000000..8f632a02f --- /dev/null +++ b/.codex/prompts/kiro-validate-impl.md @@ -0,0 +1,140 @@ + +description: Validate implementation against requirements, design, and tasks +argument-hint: [feature-name] [task-numbers] +arguments: + feature-name: $1 + task-numbers: $2 + + +# Implementation Validation + + +- **Mission**: Verify that implementation aligns with approved requirements, design, and tasks +- **Success Criteria**: + - All specified tasks marked as completed + - Tests exist and pass for implemented functionality + - Requirements traceability confirmed (EARS requirements covered) + - Design structure reflected in implementation + - No regressions in existing functionality + + + +## Core Task +Validate implementation for feature(s) and task(s) based on approved specifications. + +## Execution Steps + +### 1. Detect Validation Target + +**If no arguments provided** (`$1` empty): +- Parse conversation history for `/prompts:kiro-spec-impl [tasks]` commands +- Extract feature names and task numbers from each execution +- Aggregate all implemented tasks by feature +- Report detected implementations (e.g., "user-auth: 1.1, 1.2, 1.3") +- If no history found, scan `.kiro/specs/` for features with completed tasks `[x]` + +**If feature provided** (`$1` present, `$2` empty): +- Use specified feature +- Detect all completed tasks `[x]` in `.kiro/specs/$1/tasks.md` + +**If both feature and tasks provided** (`$1` and `$2` present): +- Validate specified feature and tasks only (e.g., `user-auth 1.1,1.2`) + +### 2. Load Context + +For each detected feature: +- Read `.kiro/specs//spec.json` for metadata +- Read `.kiro/specs//requirements.md` for requirements +- Read `.kiro/specs//design.md` for design structure +- Read `.kiro/specs//tasks.md` for task list +- **Load ALL steering context**: Read entire `.kiro/steering/` directory including: + - Default files: `structure.md`, `tech.md`, `product.md` + - All custom steering files (regardless of mode settings) + +### 3. Execute Validation + +For each task, verify: + +#### Task Completion Check +- Checkbox is `[x]` in tasks.md +- If not completed, flag as "Task not marked complete" + +#### Test Coverage Check +- Tests exist for task-related functionality +- Tests pass (no failures or errors) +- Use Bash to run test commands (e.g., `npm test`, `pytest`) +- If tests fail or don't exist, flag as "Test coverage issue" + +#### Requirements Traceability +- Identify EARS requirements related to the task +- Use Grep to search implementation for evidence of requirement coverage +- If requirement not traceable to code, flag as "Requirement not implemented" + +#### Design Alignment +- Check if design.md structure is reflected in implementation +- Verify key interfaces, components, and modules exist +- Use Grep/LS to confirm file structure matches design +- If misalignment found, flag as "Design deviation" + +#### Regression Check +- Run full test suite (if available) +- Verify no existing tests are broken +- If regressions detected, flag as "Regression detected" + +### 4. Generate Report + +Provide summary in the language specified in spec.json: +- Validation summary by feature +- Coverage report (tasks, requirements, design) +- Issues and deviations with severity (Critical/Warning) +- GO/NO-GO decision + +## Important Constraints +- **Conversation-aware**: Prioritize conversation history for auto-detection +- **Non-blocking warnings**: Design deviations are warnings unless critical +- **Test-first focus**: Test coverage is mandatory for GO decision +- **Traceability required**: All requirements must be traceable to implementation + + +## Tool Guidance +- **Conversation parsing**: Extract `/prompts:kiro-spec-impl` patterns from history +- **Read context**: Load all specs and steering before validation +- **Bash for tests**: Execute test commands to verify pass status +- **Grep for traceability**: Search codebase for requirement evidence +- **LS/Glob for structure**: Verify file structure matches design + +## Output Description + +Provide output in the language specified in spec.json with: + +1. **Detected Target**: Features and tasks being validated (if auto-detected) +2. **Validation Summary**: Brief overview per feature (pass/fail counts) +3. **Issues**: List of validation failures with severity and location +4. **Coverage Report**: Requirements/design/task coverage percentages +5. **Decision**: GO (ready for next phase) / NO-GO (needs fixes) + +**Format Requirements**: +- Use Markdown headings and tables for clarity +- Flag critical issues with ⚠️ or 🔴 +- Keep summary concise (under 400 words) + +## Safety & Fallback + +### Error Scenarios +- **No Implementation Found**: If no `/prompts:kiro-spec-impl` in history and no `[x]` tasks, report "No implementations detected" +- **Test Command Unknown**: If test framework unclear, warn and skip test validation (manual verification required) +- **Missing Spec Files**: If spec.json/requirements.md/design.md missing, stop with error +- **Language Undefined**: Default to English (`en`) if spec.json doesn't specify language + +### Next Steps Guidance + +**If GO Decision**: +- Implementation validated and ready +- Proceed to deployment or next feature + +**If NO-GO Decision**: +- Address critical issues listed +- Re-run `/prompts:kiro-spec-impl [tasks]` for fixes +- Re-validate with `/prompts:kiro-validate-impl [feature] [tasks]` + +**Note**: Validation is recommended after implementation to ensure spec alignment and quality. diff --git a/.kiro/settings/rules/design-discovery-full.md b/.kiro/settings/rules/design-discovery-full.md index 9e4e0e74d..b80c2f264 100644 --- a/.kiro/settings/rules/design-discovery-full.md +++ b/.kiro/settings/rules/design-discovery-full.md @@ -44,12 +44,14 @@ Conduct comprehensive research and analysis to ensure the technical design is ba - Document security considerations - Note any gaps requiring implementation investigation -### 5. Architecture Pattern Analysis +### 5. Architecture Pattern & Boundary Analysis **Evaluate Architectural Options**: - Compare relevant patterns (MVC, Clean, Hexagonal, Event-driven) -- Assess fit with existing architecture -- Consider scalability implications +- Assess fit with existing architecture and steering principles +- Identify domain boundaries and ownership seams required to avoid team conflicts +- Consider scalability implications and operational concerns - Evaluate maintainability and team expertise +- Document preferred pattern and rejected alternatives in `research.md` ### 6. Risk Assessment **Identify Technical Risks**: @@ -81,9 +83,11 @@ Conduct comprehensive research and analysis to ensure the technical design is ba 4. Investigate similar open-source implementations ## Output Requirements -Document all findings that impact design decisions: -- Key insights affecting architecture +Capture all findings that impact design decisions in `research.md` using the shared template: +- Key insights affecting architecture, technology alignment, and contracts - Constraints discovered during research -- Recommended approaches based on findings +- Recommended approaches and selected architecture pattern with rationale +- Rejected alternatives and trade-offs (documented in the Design Decisions section) +- Updated domain boundaries that inform Components & Interface Contracts - Risks and mitigation strategies - Gaps requiring further investigation during implementation \ No newline at end of file diff --git a/.kiro/settings/rules/design-discovery-light.md b/.kiro/settings/rules/design-discovery-light.md index 0e20526fa..7a00ae29b 100644 --- a/.kiro/settings/rules/design-discovery-light.md +++ b/.kiro/settings/rules/design-discovery-light.md @@ -24,6 +24,7 @@ Quickly analyze existing system and integration requirements for feature extensi - Verify basic usage patterns - Check for known compatibility issues - Confirm licensing compatibility +- Record key findings in `research.md` (technology alignment section) ### 4. Integration Risk Assessment **Quick Risk Check**: @@ -41,7 +42,7 @@ Switch to full discovery if you find: - Unknown or poorly documented dependencies ## Output Requirements -- Clear integration approach +- Clear integration approach (note boundary impacts in `research.md`) - List of files/components to modify - New dependencies with versions - Integration risks and mitigations diff --git a/.kiro/settings/rules/design-principles.md b/.kiro/settings/rules/design-principles.md index c434c2a57..cca6701a5 100644 --- a/.kiro/settings/rules/design-principles.md +++ b/.kiro/settings/rules/design-principles.md @@ -25,6 +25,8 @@ - **Clear Boundaries**: Explicit domain ownership - **Dependency Direction**: Follow architectural layers - **Interface Segregation**: Minimal, focused interfaces +- **Team-safe Interfaces**: Design boundaries that allow parallel implementation without merge conflicts +- **Research Traceability**: Record boundary decisions and rationale in `research.md` ### 5. Data Modeling Standards - **Domain First**: Start with business concepts @@ -43,6 +45,7 @@ - **Contract First**: Define interfaces before implementation - **Versioning**: Plan for API evolution - **Idempotency**: Design for retry safety +- **Contract Visibility**: Surface API and event contracts in design.md while linking extended details from `research.md` ## Documentation Standards @@ -57,6 +60,77 @@ - **Traceable**: Requirements to components mapping - **Complete**: All aspects covered for implementation - **Consistent**: Uniform terminology throughout +- **Focused**: Keep design.md centered on architecture and contracts; move investigation logs and lengthy comparisons to `research.md` + +## Section Authoring Guidance + +### Global Ordering +- Default flow: Overview → Goals/Non-Goals → Requirements Traceability → Architecture → Technology Stack → System Flows → Components & Interfaces → Data Models → Optional sections. +- Teams may swap Traceability earlier or place Data Models nearer Architecture when it improves clarity, but keep section headings intact. +- Within each section, follow **Summary → Scope → Decisions → Impacts/Risks** so reviewers can scan consistently. + +### Requirement IDs +- Reference requirements as `2.1, 2.3` without prefixes (no “Requirement 2.1”). +- All requirements MUST have numeric IDs. If a requirement lacks a numeric ID, stop and fix `requirements.md` before continuing. +- Use `N.M`-style numeric IDs where `N` is the top-level requirement number from requirements.md (for example, Requirement 1 → 1.1, 1.2; Requirement 2 → 2.1, 2.2). +- Every component, task, and traceability row must reference the same canonical numeric ID. + +### Technology Stack +- Include ONLY layers impacted by this feature (frontend, backend, data, messaging, infra). +- For each layer specify tool/library + version + the role it plays; push extended rationale, comparisons, or benchmarks to `research.md`. +- When extending an existing system, highlight deviations from the current stack and list new dependencies. + +### System Flows +- Add diagrams only when they clarify behavior: + - **Sequence** for multi-step interactions + - **Process/State** for branching rules or lifecycle + - **Data/Event** for pipelines or async patterns +- Always use pure Mermaid. If no complex flow exists, omit the entire section. + +### Requirements Traceability +- Use the standard table (`Requirement | Summary | Components | Interfaces | Flows`) to prove coverage. +- Collapse to bullet form only when a single requirement maps 1:1 to a component. +- Prefer the component summary table for simple mappings; reserve the full traceability table for complex or compliance-sensitive requirements. +- Re-run this mapping whenever requirements or components change to avoid drift. + +### Components & Interfaces Authoring +- Group components by domain/layer and provide one block per component. +- Begin with a summary table listing Component, Domain, Intent, Requirement coverage, key dependencies, and selected contracts. +- Table fields: Intent (one line), Requirements (`2.1, 2.3`), Owner/Reviewers (optional). +- Dependencies table must mark each entry as Inbound/Outbound/External and assign Criticality (`P0` blocking, `P1` high-risk, `P2` informational). +- Summaries of external dependency research stay here; detailed investigation (API signatures, rate limits, migration notes) belongs in `research.md`. +- design.md must remain a self-contained reviewer artifact. Reference `research.md` only for background, and restate any conclusions or decisions here. +- Contracts: tick only the relevant types (Service/API/Event/Batch/State). Unchecked types should not appear later in the component section. +- Service interfaces must declare method signatures, inputs/outputs, and error envelopes. API/Event/Batch contracts require schema tables or bullet lists covering trigger, payload, delivery, idempotency. +- Use **Integration & Migration Notes**, **Validation Hooks**, and **Open Questions / Risks** to document rollout strategy, observability, and unresolved decisions. +- Detail density rules: + - **Full block**: components introducing new boundaries (logic hooks, shared services, external integrations, data layers). + - **Summary-only**: presentational/UI components with no new boundaries (plus a short Implementation Note if needed). +- Implementation Notes must combine Integration / Validation / Risks into a single bulleted subsection to reduce repetition. +- Prefer lists or inline descriptors for short data (dependencies, contract selections). Use tables only when comparing multiple items. + +### Shared Interfaces & Props +- Define a base interface (e.g., `BaseUIPanelProps`) for recurring UI components and extend it per component to capture only the deltas. +- Hooks, utilities, and integration adapters that introduce new contracts should still include full TypeScript signatures. +- When reusing a base contract, reference it explicitly (e.g., “Extends `BaseUIPanelProps` with `onSubmitAnswer` callback”) instead of duplicating the code block. + +### Data Models +- Domain Model covers aggregates, entities, value objects, domain events, and invariants. Add Mermaid diagrams only when relationships are non-trivial. +- Logical Data Model should articulate structure, indexing, sharding, and storage-specific considerations (event store, KV/wide-column) relevant to the change. +- Data Contracts & Integration section documents API payloads, event schemas, and cross-service synchronization patterns when the feature crosses boundaries. +- Lengthy type definitions or vendor-specific option objects should be placed in the Supporting References section within design.md, linked from the relevant section. Investigation notes stay in `research.md`. +- Supporting References usage is optional; only create it when keeping the content in the main body would reduce readability. All decisions must still appear in the main sections so design.md stands alone. + +### Error/Testing/Security/Performance Sections +- Record only feature-specific decisions or deviations. Link or reference organization-wide standards (steering) for baseline practices instead of restating them. + +### Diagram & Text Deduplication +- Do not restate diagram content verbatim in prose. Use the text to highlight key decisions, trade-offs, or impacts that are not obvious from the visual. +- When a decision is fully captured in the diagram annotations, a short “Key Decisions” bullet is sufficient. + +### General Deduplication +- Avoid repeating the same information across Overview, Architecture, and Components. Reference earlier sections when context is identical. +- If a requirement/component relationship is captured in the summary table, do not rewrite it elsewhere unless extra nuance is added. ## Diagram Guidelines @@ -82,6 +156,7 @@ graph TB - ❌ `DnD[@dnd-kit/core]` → invalid ID (`@`). - ❌ `UI[KanbanBoard(React)]` → invalid label (`()`). - ✅ `DndKit[dnd-kit core]` → use plain text in labels, keep technology details in the accompanying description. + - ℹ️ Mermaid strict-mode will otherwise fail with errors like `Expecting 'SQE' ... got 'PS'`; remove punctuation from labels before rendering. - **Edges** – show data or control flow direction. - **Groups** – using Mermaid subgraphs to cluster related components is allowed; use it sparingly for clarity. diff --git a/.kiro/settings/rules/ears-format.md b/.kiro/settings/rules/ears-format.md index f9d288011..cef8e7df0 100644 --- a/.kiro/settings/rules/ears-format.md +++ b/.kiro/settings/rules/ears-format.md @@ -3,31 +3,40 @@ ## Overview EARS (Easy Approach to Requirements Syntax) is the standard format for acceptance criteria in spec-driven development. +EARS patterns describe the logical structure of a requirement (condition + subject + response) and are not tied to any particular natural language. +All acceptance criteria should be written in the target language configured for the specification (for example, `spec.json.language` / `en`). +Keep EARS trigger keywords and fixed phrases in English (`When`, `If`, `While`, `Where`, `The system shall`, `The [system] shall`) and localize only the variable parts (`[event]`, `[precondition]`, `[trigger]`, `[feature is included]`, `[response/action]`) into the target language. Do not interleave target-language text inside the trigger or fixed English phrases themselves. + ## Primary EARS Patterns -### 1. Event-Driven (WHEN-THEN) -- **Pattern**: WHEN [event/condition] THEN [system/subject] SHALL [response] +### 1. Event-Driven Requirements +- **Pattern**: When [event], the [system] shall [response/action] - **Use Case**: Responses to specific events or triggers -- **Example**: WHEN user clicks checkout button THEN Checkout Service SHALL validate cart contents +- **Example**: When user clicks checkout button, the Checkout Service shall validate cart contents -### 2. State-Based (IF-THEN) -- **Pattern**: IF [precondition/state] THEN [system/subject] SHALL [response] +### 2. State-Driven Requirements +- **Pattern**: While [precondition], the [system] shall [response/action] - **Use Case**: Behavior dependent on system state or preconditions -- **Example**: IF cart is empty THEN Checkout Service SHALL display empty cart message +- **Example**: While payment is processing, the Checkout Service shall display loading indicator + +### 3. Unwanted Behavior Requirements +- **Pattern**: If [trigger], the [system] shall [response/action] +- **Use Case**: System response to errors, failures, or undesired situations +- **Example**: If invalid credit card number is entered, then the website shall display error message -### 3. Continuous Behavior (WHILE-THE) -- **Pattern**: WHILE [ongoing condition] THE [system/subject] SHALL [continuous behavior] -- **Use Case**: Ongoing behaviors that persist during a condition -- **Example**: WHILE payment is processing THE Checkout Service SHALL display loading indicator +### 4. Optional Feature Requirements +- **Pattern**: Where [feature is included], the [system] shall [response/action] +- **Use Case**: Requirements for optional or conditional features +- **Example**: Where the car has a sunroof, the car shall have a sunroof control panel -### 4. Contextual Behavior (WHERE-THE) -- **Pattern**: WHERE [location/context/trigger] THE [system/subject] SHALL [contextual behavior] -- **Use Case**: Location or context-specific requirements -- **Example**: WHERE user is on payment page THE Checkout Service SHALL encrypt all form inputs +### 5. Ubiquitous Requirements +- **Pattern**: The [system] shall [response/action] +- **Use Case**: Always-active requirements and fundamental system properties +- **Example**: The mobile phone shall have a mass of less than 100 grams ## Combined Patterns -- WHEN [event] AND [additional condition] THEN [system/subject] SHALL [response] -- IF [condition] AND [additional condition] THEN [system/subject] SHALL [response] +- While [precondition], when [event], the [system] shall [response/action] +- When [event] and [additional condition], the [system] shall [response/action] ## Subject Selection Guidelines - **Software Projects**: Use concrete system/service name (e.g., "Checkout Service", "User Auth Module") @@ -35,8 +44,6 @@ EARS (Easy Approach to Requirements Syntax) is the standard format for acceptanc - **Non-Software**: Use appropriate subject (e.g., "Marketing Campaign", "Documentation") ## Quality Criteria -- Each criterion must be testable and verifiable -- Use SHALL for mandatory requirements, SHOULD for recommended -- Avoid ambiguous terms (e.g., "fast", "user-friendly") -- Keep each criterion atomic (one behavior per statement) - +- Requirements must be testable, verifiable, and describe a single behavior. +- Use objective language: "shall" for mandatory behavior, "should" for recommendations; avoid ambiguous terms. +- Follow EARS syntax: [condition], the [system] shall [response/action]. diff --git a/.kiro/settings/rules/steering-principles.md b/.kiro/settings/rules/steering-principles.md index 62feef7db..b3ac5f5e2 100644 --- a/.kiro/settings/rules/steering-principles.md +++ b/.kiro/settings/rules/steering-principles.md @@ -21,6 +21,8 @@ Steering files are **project memory**, not exhaustive specifications. - Every component description - All dependencies - Implementation details +- Agent-specific tooling directories (e.g. `.cursor/`, `.gemini/`, `.claude/`) +- Detailed documentation of `.kiro/` metadata directories (settings, automation) ### Example Comparison @@ -70,6 +72,16 @@ Never include: --- +## Notes + +- Templates are starting points, customize as needed +- Follow same granularity principles as core steering +- All steering files loaded as project memory +- Light references to `.kiro/specs/` and `.kiro/steering/` are acceptable; avoid other `.kiro/` directories +- Custom files equally important as core files + +--- + ## File-Specific Focus - **product.md**: Purpose, value, business context (not exhaustive features) diff --git a/.kiro/settings/rules/tasks-generation.md b/.kiro/settings/rules/tasks-generation.md index 98f322f5c..974d2d302 100644 --- a/.kiro/settings/rules/tasks-generation.md +++ b/.kiro/settings/rules/tasks-generation.md @@ -28,6 +28,9 @@ Focus on capabilities and outcomes, not code structure. - Connect to the overall system (no hanging features) - Progress incrementally (no big jumps in complexity) - Validate core functionality early in sequence +- Respect architecture boundaries defined in design.md (Architecture Pattern & Boundary Map) +- Honor interface contracts documented in design.md +- Use major task summaries sparingly—omit detail bullets if the work is fully captured by child tasks. **End with integration tasks** to wire everything together. @@ -43,8 +46,9 @@ Focus on capabilities and outcomes, not code structure. ### 4. Requirements Mapping **End each task detail section with**: -- `_Requirements: X.X, Y.Y_` for specific requirement IDs -- `_Requirements: [description]_` for cross-cutting requirements +- `_Requirements: X.X, Y.Y_` listing **only numeric requirement IDs** (comma-separated). Never append descriptive text, parentheses, translations, or free-form labels. +- For cross-cutting requirements, list every relevant requirement ID. All requirements MUST have numeric IDs in requirements.md. If an ID is missing, stop and correct requirements.md before generating tasks. +- Reference components/interfaces from design.md when helpful (e.g., `_Contracts: AuthService API`) ### 5. Code-Only Focus @@ -59,18 +63,41 @@ Focus on capabilities and outcomes, not code structure. - User testing - Marketing/business activities +### Optional Test Coverage Tasks + +- When the design already guarantees functional coverage and rapid MVP delivery is prioritized, mark purely test-oriented follow-up work (e.g., baseline rendering/unit tests) as **optional** using the `- [ ]*` checkbox form. +- Only apply the optional marker when the sub-task directly references acceptance criteria from requirements.md in its detail bullets. +- Never mark implementation work or integration-critical verification as optional—reserve `*` for auxiliary/deferrable test coverage that can be revisited post-MVP. + ## Task Hierarchy Rules ### Maximum 2 Levels - **Level 1**: Major tasks (1, 2, 3, 4...) - **Level 2**: Sub-tasks (1.1, 1.2, 2.1, 2.2...) - **No deeper nesting** (no 1.1.1) +- If a major task would contain only a single actionable item, collapse the structure and promote the sub-task to the major level (e.g., replace `1.1` with `1.`). +- When a major task exists purely as a container, keep the checkbox description concise and avoid duplicating detailed bullets—reserve specifics for its sub-tasks. ### Sequential Numbering - Major tasks MUST increment: 1, 2, 3, 4, 5... - Sub-tasks reset per major task: 1.1, 1.2, then 2.1, 2.2... - Never repeat major task numbers +### Parallel Analysis (default) +- Assume parallel analysis is enabled unless explicitly disabled (e.g. `--sequential` flag). +- Identify tasks that can run concurrently when **all** conditions hold: + - No data dependency on other pending tasks + - No shared file or resource contention + - No prerequisite review/approval from another task +- Validate that identified parallel tasks operate within separate boundaries defined in the Architecture Pattern & Boundary Map. +- Confirm API/event contracts from design.md do not overlap in ways that cause conflicts. +- Append `(P)` immediately after the task number for each parallel-capable task: + - Example: `- [ ] 2.1 (P) Build background worker` + - Apply to both major tasks and sub-tasks when appropriate. +- If sequential mode is requested, omit `(P)` markers entirely. +- Group parallel tasks logically (same parent when possible) and highlight any ordering caveats in detail bullets. +- Explicitly call out dependencies that prevent `(P)` even when tasks look similar. + ### Checkbox Format ```markdown - [ ] 1. Major task description @@ -83,6 +110,10 @@ Focus on capabilities and outcomes, not code structure. - Detail items... - _Requirements: Y.Y_ +- [ ] 1.3 Sub-task description + - Detail items... + - _Requirements: Z.Z, W.W_ + - [ ] 2. Next major task (NOT 1 again!) - [ ] 2.1 Sub-task... ``` @@ -95,5 +126,6 @@ Focus on capabilities and outcomes, not code structure. - If gaps found: Return to requirements or design phase - No requirement should be left without corresponding tasks -Document any intentionally deferred requirements with rationale. +Use `N.M`-style numeric requirement IDs where `N` is the top-level requirement number from requirements.md (for example, Requirement 1 → 1.1, 1.2; Requirement 2 → 2.1, 2.2), and `M` is a local index within that requirement group. +Document any intentionally deferred requirements with rationale. diff --git a/.kiro/settings/rules/tasks-parallel-analysis.md b/.kiro/settings/rules/tasks-parallel-analysis.md new file mode 100644 index 000000000..737542689 --- /dev/null +++ b/.kiro/settings/rules/tasks-parallel-analysis.md @@ -0,0 +1,34 @@ +# Parallel Task Analysis Rules + +## Purpose +Provide a consistent way to identify implementation tasks that can be safely executed in parallel while generating `tasks.md`. + +## When to Consider Tasks Parallel +Only mark a task as parallel-capable when **all** of the following are true: + +1. **No data dependency** on pending tasks. +2. **No conflicting files or shared mutable resources** are touched. +3. **No prerequisite review/approval** from another task is required beforehand. +4. **Environment/setup work** needed by this task is already satisfied or covered within the task itself. + +## Marking Convention +- Append `(P)` immediately after the numeric identifier for each qualifying task. + - Example: `- [ ] 2.1 (P) Build background worker for emails` +- Apply `(P)` to both major tasks and sub-tasks when appropriate. +- If sequential execution is requested (e.g. via `--sequential` flag), omit `(P)` markers entirely. +- Keep `(P)` **outside** of checkbox brackets to avoid confusion with completion state. + +## Grouping & Ordering Guidelines +- Group parallel tasks under the same parent whenever the work belongs to the same theme. +- List obvious prerequisites or caveats in the detail bullets (e.g., "Requires schema migration from 1.2"). +- When two tasks look similar but are not parallel-safe, call out the blocking dependency explicitly. +- Skip marking container-only major tasks (those without their own actionable detail bullets) with `(P)`—evaluate parallel execution at the sub-task level instead. + +## Quality Checklist +Before marking a task with `(P)`, ensure you have: + +- Verified that running this task concurrently will not create merge or deployment conflicts. +- Captured any shared state expectations in the detail bullets. +- Confirmed that the implementation can be tested independently. + +If any check fails, **do not** mark the task with `(P)` and explain the dependency in the task details. diff --git a/.kiro/settings/templates/specs/design.md b/.kiro/settings/templates/specs/design.md index 52dd33308..b0c1ba085 100644 --- a/.kiro/settings/templates/specs/design.md +++ b/.kiro/settings/templates/specs/design.md @@ -1,8 +1,6 @@ # Design Document Template --- -**Document Length Guidelines: Max 1000 lines** - **Purpose**: Provide sufficient detail to ensure implementation consistency across different implementers, preventing interpretation drift. **Approach**: @@ -14,6 +12,8 @@ **Warning**: Approaching 1000 lines indicates excessive feature complexity that may require design simplification. --- +> Sections may be reordered (e.g., surfacing Requirements Traceability earlier or moving Data Models nearer Architecture) when it improves clarity. Within each section, keep the flow **Summary → Scope → Decisions → Impacts/Risks** so reviewers can scan consistently. + ## Overview 2-3 paragraphs max **Purpose**: This feature delivers [specific value] to [target users]. @@ -33,6 +33,9 @@ ## Architecture +> Reference detailed discovery notes in `research.md` only for background; keep design.md self-contained for reviewers by capturing all decisions and contracts here. +> Capture key decisions in text and let diagrams carry structural detail—avoid repeating the same information in prose. + ### Existing Architecture Analysis (if applicable) When modifying existing systems: - Current architecture patterns and constraints @@ -40,173 +43,134 @@ When modifying existing systems: - Integration points that must be maintained - Technical debt addressed or worked around -### High-Level Architecture -**RECOMMENDED**: Include Mermaid diagram showing system architecture (required for complex features, optional for simple additions) +### Architecture Pattern & Boundary Map +**RECOMMENDED**: Include Mermaid diagram showing the chosen architecture pattern and system boundaries (required for complex features, optional for simple additions) **Architecture Integration**: +- Selected pattern: [name and brief rationale] +- Domain/feature boundaries: [how responsibilities are separated to avoid conflicts] - Existing patterns preserved: [list key patterns] - New components rationale: [why each is needed] -- Technology alignment: [how it fits current stack] - Steering compliance: [principles maintained] -### Technology Stack and Design Decisions - -**Generation Instructions** (DO NOT include this section in design.md): -Adapt content based on feature classification from Discovery & Analysis Phase: +### Technology Stack -**For New Features (greenfield)**: -Generate Technology Stack section with ONLY relevant layers: -- Include only applicable technology layers (e.g., skip Frontend for CLI tools, skip Infrastructure for libraries) -- For each technology choice, provide: selection, rationale, and alternatives considered -- Include Architecture Pattern Selection if making architectural decisions +| Layer | Choice / Version | Role in Feature | Notes | +|-------|------------------|-----------------|-------| +| Frontend / CLI | | | | +| Backend / Services | | | | +| Data / Storage | | | | +| Messaging / Events | | | | +| Infrastructure / Runtime | | | | -**For Extensions/Additions to Existing Systems**: -Generate Technology Alignment section instead: -- Document how feature aligns with existing technology stack -- Note any new dependencies or libraries being introduced -- Justify deviations from established patterns if necessary - -**Key Design Decisions**: -Generate 1-3 critical technical decisions that significantly impact the implementation. -Each decision should follow this format: -- **Decision**: [Specific technical choice made] -- **Context**: [Problem or requirement driving this decision] -- **Alternatives**: [2-3 other approaches considered] -- **Selected Approach**: [What was chosen and how it works] -- **Rationale**: [Why this is optimal for the specific context] -- **Trade-offs**: [What we gain vs. what we sacrifice] - -Skip this entire section for simple CRUD operations or when following established patterns without deviation. +> Keep rationale concise here and, when more depth is required (trade-offs, benchmarks), add a short summary plus pointer to the Supporting References section and `research.md` for raw investigation notes. ## System Flows -**Flow Design Generation Instructions** (DO NOT include this section in design.md): -Generate appropriate flow diagrams ONLY when the feature requires flow visualization. Select from: -- **Sequence Diagrams**: For user interactions across multiple components -- **Process Flow Charts**: For complex algorithms, decision branches, or state machines -- **Data Flow Diagrams**: For data transformations, ETL processes, or data pipelines -- **State Diagrams**: For complex state transitions -- **Event Flow**: For async/event-driven architectures +Provide only the diagrams needed to explain non-trivial flows. Use pure Mermaid syntax. Common patterns: +- Sequence (multi-party interactions) +- Process / state (branching logic or lifecycle) +- Data / event flow (pipelines, async messaging) -Skip this section entirely for simple CRUD operations or features without complex flows. -When included, provide concise Mermaid diagrams specific to the actual feature requirements. +Skip this section entirely for simple CRUD changes. +> Describe flow-level decisions (e.g., gating conditions, retries) briefly after the diagram instead of restating each step. ## Requirements Traceability -**Traceability Generation Instructions** (DO NOT include this section in design.md): -Generate traceability mapping ONLY for complex features with multiple requirements or when explicitly needed for compliance/validation. +Use this section for complex or compliance-sensitive features where requirements span multiple domains. Straightforward 1:1 mappings can rely on the Components summary table. -When included, create a mapping table showing how each EARS requirement is realized: -| Requirement | Requirement Summary | Components | Interfaces | Flows | -|---------------|-------------------|------------|------------|-------| -| 1.1 | Brief description | Component names | API/Methods | Relevant flow diagrams | +Map each requirement ID (e.g., `2.1`) to the design elements that realize it. -Alternative format for simpler cases: -- **1.1**: Realized by [Component X] through [Interface Y] -- **1.2**: Implemented in [Component Z] with [Flow diagram reference] +| Requirement | Summary | Components | Interfaces | Flows | +|-------------|---------|------------|------------|-------| +| 1.1 | | | | | +| 1.2 | | | | | -Skip this section for simple features with straightforward 1:1 requirement-to-component mappings. +> Omit this section only when a single component satisfies a single requirement without cross-cutting concerns. ## Components and Interfaces -**Component Design Generation Instructions** (DO NOT include this section in design.md): -Structure components by domain boundaries or architectural layers. Generate only relevant subsections based on component type. -Group related components under domain/layer headings for clarity. +Provide a quick reference before diving into per-component details. + +- Summaries can be a table or compact list. Example table: + | Component | Domain/Layer | Intent | Req Coverage | Key Dependencies (P0/P1) | Contracts | + |-----------|--------------|--------|--------------|--------------------------|-----------| + | ExampleComponent | UI | Displays XYZ | 1, 2 | GameProvider (P0), MapPanel (P1) | Service, State | +- Only components introducing new boundaries (e.g., logic hooks, external integrations, persistence) require full detail blocks. Simple presentation components can rely on the summary row plus a short Implementation Note. -### [Domain/Layer Name] +Group detailed blocks by domain or architectural layer. For each detailed component, list requirement IDs as `2.1, 2.3` (omit “Requirement”). When multiple UI components share the same contract, reference a base interface/props definition instead of duplicating code blocks. + +### [Domain / Layer] #### [Component Name] -**Responsibility & Boundaries** -- **Primary Responsibility**: Single, clear statement of what this component does -- **Domain Boundary**: Which domain/subdomain this belongs to -- **Data Ownership**: What data this component owns and manages -- **Transaction Boundary**: Scope of transactional consistency (if applicable) +| Field | Detail | +|-------|--------| +| Intent | 1-line description of the responsibility | +| Requirements | 2.1, 2.3 | +| Owner / Reviewers | (optional) | -**Dependencies** -- **Inbound**: Components/services that depend on this component -- **Outbound**: Components/services this component depends on -- **External**: Third-party services, libraries, or external systems +**Responsibilities & Constraints** +- Primary responsibility +- Domain boundary and transaction scope +- Data ownership / invariants -**External Dependencies Investigation** (when using external libraries/services): -- Use WebSearch to locate official documentation, GitHub repos, and community resources -- Use WebFetch to retrieve and analyze documentation pages, API references, and usage examples -- Verify API signatures, authentication methods, and rate limits -- Check version compatibility, breaking changes, and migration guides -- Investigate common issues, best practices, and performance considerations -- Document any assumptions, unknowns, or risks for implementation phase -- If critical information is missing, clearly note "Requires investigation during implementation: [specific concern]" +**Dependencies** +- Inbound: Component/service name — purpose (Criticality) +- Outbound: Component/service name — purpose (Criticality) +- External: Service/library — purpose (Criticality) -**Contract Definition** +Summarize external dependency findings here; deeper investigation (API signatures, rate limits, migration notes) lives in `research.md`. -Select and generate ONLY the relevant contract types for each component: +**Contracts**: Service [ ] / API [ ] / Event [ ] / Batch [ ] / State [ ] ← check only the ones that apply. -**Service Interface** (for business logic components): +##### Service Interface ```typescript interface [ComponentName]Service { - // Method signatures with clear input/output types - // Include error types in return signatures methodName(input: InputType): Result; } ``` -- **Preconditions**: What must be true before calling -- **Postconditions**: What is guaranteed after successful execution -- **Invariants**: What remains true throughout +- Preconditions: +- Postconditions: +- Invariants: -**API Contract** (for REST/GraphQL endpoints): +##### API Contract | Method | Endpoint | Request | Response | Errors | |--------|----------|---------|----------|--------| | POST | /api/resource | CreateRequest | Resource | 400, 409, 500 | -With detailed schemas only for complex payloads - -**Event Contract** (for event-driven components): -- **Published Events**: Event name, schema, trigger conditions -- **Subscribed Events**: Event name, handling strategy, idempotency -- **Ordering**: Guaranteed order requirements -- **Delivery**: At-least-once, at-most-once, or exactly-once +##### Event Contract +- Published events: +- Subscribed events: +- Ordering / delivery guarantees: -**Batch/Job Contract** (for scheduled/triggered processes): -- **Trigger**: Schedule, event, or manual trigger conditions -- **Input**: Data source and validation rules -- **Output**: Results destination and format -- **Idempotency**: How repeat executions are handled -- **Recovery**: Failure handling and retry strategy +##### Batch / Job Contract +- Trigger: +- Input / validation: +- Output / destination: +- Idempotency & recovery: -**State Management** (only if component maintains state): -- **State Model**: States and valid transitions -- **Persistence**: Storage strategy and consistency model -- **Concurrency**: Locking, optimistic/pessimistic control +##### State Management +- State model: +- Persistence & consistency: +- Concurrency strategy: -**Integration Strategy** (when modifying existing systems): -- **Modification Approach**: Extend, wrap, or refactor existing code -- **Backward Compatibility**: What must be maintained -- **Migration Path**: How to transition from current to target state +**Implementation Notes** +- Integration: +- Validation: +- Risks: ## Data Models -**Data Model Generation Instructions** (DO NOT include this section in design.md): -Generate only relevant data model sections based on the system's data requirements and chosen architecture. -Progress from conceptual to physical as needed for implementation clarity. +Focus on the portions of the data landscape that change with this feature. ### Domain Model -**When to include**: Complex business domains with rich behavior and rules - -**Core Concepts**: -- **Aggregates**: Define transactional consistency boundaries -- **Entities**: Business objects with unique identity and lifecycle -- **Value Objects**: Immutable descriptive aspects without identity -- **Domain Events**: Significant state changes in the domain - -**Business Rules & Invariants**: -- Constraints that must always be true -- Validation rules and their enforcement points -- Cross-aggregate consistency strategies - -Include conceptual diagram (Mermaid) only when relationships are complex enough to benefit from visualization +- Aggregates and transactional boundaries +- Entities, value objects, domain events +- Business rules & invariants +- Optional Mermaid diagram for complex relationships ### Logical Data Model -**When to include**: When designing data structures independent of storage technology **Structure Definition**: - Entity relationships and cardinality @@ -246,25 +210,23 @@ Include conceptual diagram (Mermaid) only when relationships are complex enough - TTL and compaction strategies ### Data Contracts & Integration -**When to include**: Systems with service boundaries or external integrations -**API Data Transfer**: +**API Data Transfer** - Request/response schemas - Validation rules - Serialization format (JSON, Protobuf, etc.) -**Event Schemas**: +**Event Schemas** - Published event structures - Schema versioning strategy - Backward/forward compatibility rules -**Cross-Service Data Management**: +**Cross-Service Data Management** - Distributed transaction patterns (Saga, 2PC) - Data synchronization strategies - Eventual consistency handling -Skip any section not directly relevant to the feature being designed. -Focus on aspects that influence implementation decisions. +Skip subsections that are not relevant to this feature. ## Error Handling @@ -293,18 +255,22 @@ Error tracking, logging, and health monitoring implementation. ## Optional Sections (include when relevant) ### Security Considerations -**Include when**: Features handle authentication, sensitive data, external integrations, or user permissions +_Use this section for features handling auth, sensitive data, external integrations, or user permissions. Capture only decisions unique to this feature; defer baseline controls to steering docs._ - Threat modeling, security controls, compliance requirements - Authentication and authorization patterns - Data protection and privacy considerations ### Performance & Scalability -**Include when**: Features have specific performance requirements, high load expectations, or scaling concerns +_Use this section when performance targets, high load, or scaling concerns exist. Record only feature-specific targets or trade-offs and rely on steering documents for general practices._ - Target metrics and measurement strategies - Scaling approaches (horizontal/vertical) - Caching strategies and optimization techniques ### Migration Strategy -**REQUIRED**: Include Mermaid flowchart showing migration phases +Include a Mermaid flowchart showing migration phases when schema/data movement is required. +- Phase breakdown, rollback triggers, validation checkpoints -**Process**: Phase breakdown, rollback triggers, validation checkpoints \ No newline at end of file +## Supporting References (Optional) +- Create this section only when keeping the information in the main body would hurt readability (e.g., very long TypeScript definitions, vendor option matrices, exhaustive schema tables). Keep decision-making context in the main sections so the design stays self-contained. +- Link to the supporting references from the main text instead of inlining large snippets. +- Background research notes and comparisons continue to live in `research.md`, but their conclusions must be summarized in the main design. diff --git a/.kiro/settings/templates/specs/requirements.md b/.kiro/settings/templates/specs/requirements.md index 46d606052..dc84552e0 100644 --- a/.kiro/settings/templates/specs/requirements.md +++ b/.kiro/settings/templates/specs/requirements.md @@ -6,20 +6,21 @@ ## Requirements ### Requirement 1: {{REQUIREMENT_AREA_1}} + **Objective:** As a {{ROLE}}, I want {{CAPABILITY}}, so that {{BENEFIT}} #### Acceptance Criteria -1. WHEN [event] THEN [system/subject] SHALL [response] -2. IF [precondition] THEN [system/subject] SHALL [response] -3. WHILE [ongoing condition] THE [system/subject] SHALL [continuous behavior] -4. WHERE [location/context/trigger] THE [system/subject] SHALL [contextual behavior] +1. When [event], the [system] shall [response/action] +2. If [trigger], then the [system] shall [response/action] +3. While [precondition], the [system] shall [response/action] +4. Where [feature is included], the [system] shall [response/action] +5. The [system] shall [response/action] ### Requirement 2: {{REQUIREMENT_AREA_2}} **Objective:** As a {{ROLE}}, I want {{CAPABILITY}}, so that {{BENEFIT}} #### Acceptance Criteria -1. WHEN [event] THEN [system/subject] SHALL [response] -2. WHEN [event] AND [condition] THEN [system/subject] SHALL [response] +1. When [event], the [system] shall [response/action] +2. When [event] and [condition], the [system] shall [response/action] - diff --git a/.kiro/settings/templates/specs/research.md b/.kiro/settings/templates/specs/research.md new file mode 100644 index 000000000..b7c32906b --- /dev/null +++ b/.kiro/settings/templates/specs/research.md @@ -0,0 +1,61 @@ +# Research & Design Decisions Template + +--- +**Purpose**: Capture discovery findings, architectural investigations, and rationale that inform the technical design. + +**Usage**: +- Log research activities and outcomes during the discovery phase. +- Document design decision trade-offs that are too detailed for `design.md`. +- Provide references and evidence for future audits or reuse. +--- + +## Summary +- **Feature**: `` +- **Discovery Scope**: New Feature / Extension / Simple Addition / Complex Integration +- **Key Findings**: + - Finding 1 + - Finding 2 + - Finding 3 + +## Research Log +Document notable investigation steps and their outcomes. Group entries by topic for readability. + +### [Topic or Question] +- **Context**: What triggered this investigation? +- **Sources Consulted**: Links, documentation, API references, benchmarks +- **Findings**: Concise bullet points summarizing the insights +- **Implications**: How this affects architecture, contracts, or implementation + +_Repeat the subsection for each major topic._ + +## Architecture Pattern Evaluation +List candidate patterns or approaches that were considered. Use the table format where helpful. + +| Option | Description | Strengths | Risks / Limitations | Notes | +|--------|-------------|-----------|---------------------|-------| +| Hexagonal | Ports & adapters abstraction around core domain | Clear boundaries, testable core | Requires adapter layer build-out | Aligns with existing steering principle X | + +## Design Decisions +Record major decisions that influence `design.md`. Focus on choices with significant trade-offs. + +### Decision: `` +- **Context**: Problem or requirement driving the decision +- **Alternatives Considered**: + 1. Option A — short description + 2. Option B — short description +- **Selected Approach**: What was chosen and how it works +- **Rationale**: Why this approach fits the current project context +- **Trade-offs**: Benefits vs. compromises +- **Follow-up**: Items to verify during implementation or testing + +_Repeat the subsection for each decision._ + +## Risks & Mitigations +- Risk 1 — Proposed mitigation +- Risk 2 — Proposed mitigation +- Risk 3 — Proposed mitigation + +## References +Provide canonical links and citations (official docs, standards, ADRs, internal guidelines). +- [Title](https://example.com) — brief note on relevance +- ... diff --git a/.kiro/settings/templates/specs/tasks.md b/.kiro/settings/templates/specs/tasks.md index 6a43d4d37..61f7ef8a2 100644 --- a/.kiro/settings/templates/specs/tasks.md +++ b/.kiro/settings/templates/specs/tasks.md @@ -2,42 +2,20 @@ ## Task Format Template -Use this structure for all implementation tasks: +Use whichever pattern fits the work breakdown: -- [ ] {{MAJOR_NUMBER}}. {{MAJOR_TASK_DESCRIPTION}} - - {{DETAIL_ITEM_1}} - - {{DETAIL_ITEM_2}} - - {{DETAIL_ITEM_3}} +### Major task only +- [ ] {{NUMBER}}. {{TASK_DESCRIPTION}}{{PARALLEL_MARK}} + - {{DETAIL_ITEM_1}} *(Include details only when needed. If the task stands alone, omit bullet items.)* - _Requirements: {{REQUIREMENT_IDS}}_ -- [ ] {{MAJOR_NUMBER}}.{{SUB_NUMBER}} {{SUB_TASK_DESCRIPTION}} +### Major + Sub-task structure +- [ ] {{MAJOR_NUMBER}}. {{MAJOR_TASK_SUMMARY}} +- [ ] {{MAJOR_NUMBER}}.{{SUB_NUMBER}} {{SUB_TASK_DESCRIPTION}}{{SUB_PARALLEL_MARK}} - {{DETAIL_ITEM_1}} - {{DETAIL_ITEM_2}} - - _Requirements: {{REQUIREMENT_IDS}}_ - -## Example (Reference Only) - -- [ ] 1. Set up project foundation and infrastructure - - Initialize project with required technology stack - - Configure server infrastructure and request handling - - Establish data storage and caching layer - - Set up configuration and environment management - - _Requirements: All requirements need foundational setup_ - -- [ ] 2. Build authentication and user management system -- [ ] 2.1 Implement core authentication functionality - - Set up user data storage with validation rules - - Implement secure authentication mechanism - - Build user registration functionality - - Add login and session management features - - _Requirements: 7.1, 7.2_ - -- [ ] 2.2 Enable email service integration - - Implement secure credential storage system - - Build authentication flow for email providers - - Create email connection validation logic - - Develop email account management features - - _Requirements: 5.1, 5.2, 5.4_ - -- [ ] 3. Next major task... + - _Requirements: {{REQUIREMENT_IDS}}_ *(IDs only; do not add descriptions or parentheses.)* +> **Parallel marker**: Append ` (P)` only to tasks that can be executed in parallel. Omit the marker when running in `--sequential` mode. +> +> **Optional test coverage**: When a sub-task is deferrable test work tied to acceptance criteria, mark the checkbox as `- [ ]*` and explain the referenced requirements in the detail bullets. diff --git a/.kiro/specs/pyrefly-type-error-reduction/design.md b/.kiro/specs/pyrefly-type-error-reduction/design.md new file mode 100644 index 000000000..0532d968f --- /dev/null +++ b/.kiro/specs/pyrefly-type-error-reduction/design.md @@ -0,0 +1,176 @@ +# Design Document + +## Overview +The Pyrefly Type Error Reduction rollout is a systematic approach to eliminating type errors in the Cryptofeed codebase through phased introduction of pyrefly type checking. The rollout follows engineering principles of START SMALL, SOLID, KISS, and YAGNI, beginning with critical runtime safety checks and progressively enabling more advanced type safety features. + +## Context and Constraints +- **Technology Stack**: Python 3.11+, pyrefly type checker, existing codebase with ~58K lines +- **Operational Constraints**: Must maintain backward compatibility, no breaking changes to runtime behavior +- **Quality Constraints**: Type safety improvements without degrading code readability or performance +- **Timeline**: Phased rollout over 5 phases, with atomic commits and measurable progress tracking + +## Architecture Overview + +### Phased Rollout Architecture +``` +Phase 0: Foundation (Current: Phase 0.3) +├── 0.1: Core Safety (unsupported-operation, unbound-name) ✅ +├── 0.2: Extended Safety (missing-attribute, bad-argument-type) ✅ +└── 0.3: Attribute Safety (missing-attribute elimination) 🚧 + +Phase 1: Type Safety Core (bad-assignment, bad-return) +Phase 2: Data Access Safety (not-iterable) +Phase 3: Function Contracts (bad-function-definition) +Phase 4: Inheritance Safety (bad-override, bad-param-name-override) +Phase 5: Advanced Types (no-matching-overload, etc.) +``` + +### Configuration Architecture +```python +# pyproject.toml +[tool.pyrefly] +project_excludes = ["gen/**/*.py"] # Exclude generated code + +[tool.pyrefly.errors] +# Phase 0.3: Enable critical runtime safety +unbound-name = true # NameError prevention +unsupported-operation = true # TypeError prevention +missing-attribute = true # AttributeError prevention +bad-argument-type = true # Function call safety +``` + +## Component Design + +### Error Type Categories +1. **Runtime Safety (Phase 0)**: Errors that cause immediate crashes + - `unbound-name`: NameError when accessing undefined variables + - `unsupported-operation`: TypeError from invalid operations + - `missing-attribute`: AttributeError from None/object attribute access + - `bad-argument-type`: TypeError from wrong function arguments + +2. **Type Safety (Phase 1)**: Variable and function contract violations + - `bad-assignment`: Incompatible variable assignments + - `bad-return`: Function return type mismatches + +3. **Data Access Safety (Phase 2)**: Collection and iteration safety + - `not-iterable`: Attempting to iterate over non-iterable objects + +4. **Function Contracts (Phase 3)**: Function signature consistency + - `bad-function-definition`: Parameter mismatch in function definitions + +5. **Inheritance Safety (Phase 4)**: Class hierarchy consistency + - `bad-override`: Method override signature mismatches + - `bad-param-name-override`: Parameter name inconsistencies + +6. **Advanced Types (Phase 5)**: Complex type system features + - `no-matching-overload`: Function overload resolution failures + +### Error Resolution Patterns + +#### Pattern 1: Null Safety Guards +```python +# BEFORE: missing-attribute error +result = obj.attribute # obj could be None + +# AFTER: Add null check +if obj is not None: + result = obj.attribute +else: + result = default_value +``` + +#### Pattern 2: Type Conversion +```python +# BEFORE: bad-argument-type error +func(tuple_data) # func expects str + +# AFTER: Convert type +func(str(tuple_data)) +``` + +#### Pattern 3: Collection Safety +```python +# BEFORE: not-iterable error +for item in data: # data could be None + +# AFTER: Check iterability +if data is not None: + for item in data: +``` + +#### Pattern 4: Variable Typing +```python +# BEFORE: bad-assignment error +count: int = float_value # Incompatible assignment + +# AFTER: Convert or change type +count: int = int(float_value) +# or +count: float = float_value +``` + +## Implementation Strategy + +### Phase Progression Rules +1. **Atomic Commits**: Each error fix is committed separately with descriptive messages +2. **Error Count Tracking**: Baseline established, progress measured by error reduction +3. **No Regressions**: Previous phase errors remain fixed +4. **Controlled Expansion**: Only enable new error types when current phase is complete + +### Quality Assurance +- **Runtime Compatibility**: All fixes preserve existing behavior +- **Test Suite Integrity**: Existing tests continue to pass +- **Code Readability**: Type safety improvements don't obscure logic +- **Performance Neutral**: No significant performance impact from fixes + +### Rollback Strategy +- **Configuration-Based**: Disable error types in pyproject.toml to rollback +- **Branch-Based**: Feature branch allows easy rollback to master +- **Incremental**: Can rollback individual phases without affecting others + +## Success Metrics + +### Error Reduction Targets +- **Phase 0.1**: unsupported-operation (70→59), unbound-name (47→37) +- **Phase 0.2**: Enable missing-attribute (416 errors), bad-argument-type (206 errors) +- **Phase 0.3**: missing-attribute (416→359), bad-argument-type (206→206) +- **Overall**: 22% reduction from baseline (920→718 errors) + +### Quality Metrics +- **Zero Breaking Changes**: Runtime behavior unchanged +- **Test Coverage**: All existing tests pass +- **Code Quality**: Maintainable, readable code +- **Performance**: No degradation in execution speed + +## Risk Mitigation + +### Technical Risks +- **False Positives**: Pyrefly errors that don't represent real issues + - *Mitigation*: Manual review of each error before fixing +- **Complex Fixes**: Some errors require significant refactoring + - *Mitigation*: START SMALL principle, tackle simple fixes first +- **Generated Code**: Protobuf and schema files causing noise + - *Mitigation*: project_excludes configuration excludes gen/**/*.py + +### Operational Risks +- **Timeline Delays**: Underestimating complexity of error fixes + - *Mitigation*: Phased approach allows incremental progress +- **Team Disruption**: Type checking blocking development + - *Mitigation*: Controlled rollout, can disable checks if needed +- **Merge Conflicts**: Long-running branch diverges from master + - *Mitigation*: Regular rebasing, atomic commits for easy conflict resolution + +## Future Evolution + +### Phase 1-5 Expansion +The foundation established in Phase 0 enables systematic rollout of remaining error types with proven patterns and tooling. + +### Integration with CI/CD +Future integration with CI pipelines to prevent error regressions and enforce type safety standards. + +### Advanced Features +Potential future enhancements include: +- Type annotation generation +- Automated fix suggestions +- Integration with mypy/pylance for IDE support +- Custom error type definitions for domain-specific safety \ No newline at end of file diff --git a/.kiro/specs/pyrefly-type-error-reduction/requirements.md b/.kiro/specs/pyrefly-type-error-reduction/requirements.md new file mode 100644 index 000000000..7ff6dbc3f --- /dev/null +++ b/.kiro/specs/pyrefly-type-error-reduction/requirements.md @@ -0,0 +1,150 @@ +# Requirements Document + +## Project Description (Input) +Pyrefly Type Error Reduction Rollout - Systematic elimination of type errors in the Cryptofeed codebase through phased rollout of pyrefly type checking, starting with critical runtime safety checks and progressing to advanced type safety features. + +## Engineering Principles Applied +- **START SMALL**: Begin with controlled error types, expand incrementally +- **SOLID**: Single responsibility for each phase, clear separation of concerns +- **KISS**: Simple configuration, atomic commits, focused error categories +- **YAGNI**: Enable only necessary error types per phase, avoid premature complexity +- **TDD**: Test-driven approach with error reduction metrics and validation + +## Requirements (Phased Rollout) + +### Functional Requirements (Behavioral Specifications) + +#### Phase 0: Foundation Setup ✅ +1. **FR-0.1**: Pyrefly Configuration Infrastructure ✅ + - WHEN pyrefly is installed THEN configuration file supports error type selection + - WHEN project_excludes configured THEN generated code is excluded from type checking + - WHEN error types enabled THEN only specified errors are reported + - WHEN rollout starts THEN baseline error count is established + +2. **FR-0.2**: Controlled Error Type Activation ✅ + - WHEN phase 0.1 starts THEN enable unbound-name and unsupported-operation checks + - WHEN phase 0.2 starts THEN enable missing-attribute and bad-argument-type checks + - WHEN error types enabled THEN all other error types remain disabled + - WHEN errors fixed THEN atomic commits track progress + +#### Phase 0.3: Extended Foundation (Current Phase) 🚧 +3. **FR-0.3**: Missing Attribute Error Elimination 🚧 + - WHEN missing-attribute errors detected THEN systematically fix AttributeError sources + - WHEN attribute access fails THEN add proper null checks or type guards + - WHEN object attributes accessed THEN ensure object is not None before access + - WHEN 57 missing-attribute errors fixed THEN reduce from 416 to 359 remaining + +4. **FR-0.4**: Bad Argument Type Error Elimination 📋 + - WHEN bad-argument-type errors detected THEN fix function call type mismatches + - WHEN function parameters receive wrong types THEN add type conversions or validation + - WHEN tuple passed instead of string THEN convert or restructure parameters + - WHEN 0 bad-argument-type errors fixed THEN maintain 206 remaining for next phase + +#### Phase 1: Type Safety Core 📋 +5. **FR-1.1**: Variable Assignment Safety 📋 + - WHEN bad-assignment errors detected THEN fix variable type assignment mismatches + - WHEN incompatible types assigned THEN add type conversions or change variable types + - WHEN float assigned to int THEN use appropriate numeric type or conversion + +6. **FR-1.2**: Return Type Safety 📋 + - WHEN bad-return errors detected THEN fix function return type mismatches + - WHEN function returns wrong type THEN update return type annotations or implementation + +#### Phase 2: Data Access Safety 📋 +7. **FR-2.1**: Iteration Safety 📋 + - WHEN not-iterable errors detected THEN fix iteration over non-iterable objects + - WHEN None iterated THEN add null checks before iteration + - WHEN wrong type iterated THEN convert to iterable or fix data structure + +#### Phase 3: Function Contracts 📋 +8. **FR-3.1**: Function Signature Safety 📋 + - WHEN bad-function-definition errors detected THEN fix function parameter mismatches + - WHEN parameter names conflict THEN rename parameters to match base class contracts + +#### Phase 4: Inheritance Safety 📋 +9. **FR-4.1**: Override Safety 📋 + - WHEN bad-override errors detected THEN fix method override type incompatibilities + - WHEN parameter types don't match THEN update method signatures to match base classes + +10. **FR-4.2**: Parameter Name Consistency 📋 + - WHEN bad-param-name-override errors detected THEN fix parameter name mismatches + - WHEN parameter names differ from base THEN rename to match inheritance contracts + +#### Phase 5: Advanced Types 📋 +11. **FR-5.1**: Overload Resolution 📋 + - WHEN no-matching-overload errors detected THEN fix function overload ambiguities + - WHEN multiple overloads match THEN add type hints to disambiguate calls + +### Technical Requirements (Implementation Specifications) + +#### Configuration Management +1. **TR-1.1**: Pyrefly Configuration File 📋 + - IF pyproject.toml exists THEN [tool.pyrefly] section configures error types + - WHEN project_excludes defined THEN generated code excluded from checking + - WHEN error types enabled THEN only specified error categories reported + +2. **TR-1.2**: Error Type Control 📋 + - IF error type set to true THEN pyrefly reports those errors + - IF error type set to false THEN pyrefly ignores those errors + - WHEN all error types false THEN no type checking performed + +#### Error Reduction Tracking +3. **TR-2.1**: Progress Metrics 📋 + - WHEN errors counted THEN baseline established at rollout start + - WHEN fixes committed THEN error count decreases monotonically + - WHEN phase completes THEN all errors in that category eliminated + +4. **TR-2.2**: Atomic Commits 📋 + - WHEN fixes made THEN commit message includes error type and count reduction + - WHEN phase advances THEN commit message indicates phase transition + - WHEN baseline established THEN commit preserves initial error state + +#### Code Quality Maintenance +5. **TR-3.1**: Type Safety Without Breaking Changes 📋 + - WHEN types fixed THEN runtime behavior remains unchanged + - WHEN null checks added THEN existing functionality preserved + - WHEN type conversions added THEN data integrity maintained + +6. **TR-3.2**: Incremental Rollout 📋 + - WHEN phase advances THEN only new error types enabled + - WHEN previous phases complete THEN no regression in fixed errors + - WHEN rollout completes THEN comprehensive type safety achieved + +### Non-Functional Requirements (Quality Attributes) + +#### Performance +1. **NR-1.1**: Type Checking Performance 📋 + - WHILE pyrefly runs THEN execution completes within reasonable time + - WHEN errors fixed THEN type checking speed may improve + - WHEN generated code excluded THEN checking focuses on source code only + +#### Maintainability +2. **NR-2.1**: Code Readability 📋 + - WHEN type fixes applied THEN code remains readable and maintainable + - WHEN null checks added THEN logic flow remains clear + - WHEN type conversions added THEN intent remains obvious + +#### Reliability +3. **NR-3.1**: Runtime Safety 📋 + - WHEN type errors fixed THEN runtime crashes prevented + - WHEN AttributeError sources fixed THEN null pointer exceptions avoided + - WHEN TypeError sources fixed THEN type mismatch crashes prevented + +## Success Criteria + +### Error Reduction Targets +- **Phase 0.1**: unsupported-operation errors reduced from 70 to 59 (16% reduction) +- **Phase 0.2**: unbound-name errors reduced from 47 to 37 (21% reduction) +- **Phase 0.3**: missing-attribute errors reduced from 416 to 359 (14% reduction target) +- **Phase 0.4**: bad-argument-type errors reduced from 206 to 0 (100% reduction target) +- **Overall Phase 0**: Total errors reduced from 117 to <50 (57% reduction) + +### Quality Metrics +- **Type Safety**: All enabled error types eliminated before phase advancement +- **Code Quality**: No degradation in existing functionality or performance +- **Maintainability**: Code remains readable and well-structured after fixes + +### Completion Criteria +- **Phase Completion**: All errors in current phase eliminated +- **Regression Testing**: Existing tests pass after type fixes +- **Documentation**: Error patterns and fixes documented for future reference \ No newline at end of file diff --git a/.kiro/specs/pyrefly-type-error-reduction/spec.json b/.kiro/specs/pyrefly-type-error-reduction/spec.json new file mode 100644 index 000000000..d27ca1b86 --- /dev/null +++ b/.kiro/specs/pyrefly-type-error-reduction/spec.json @@ -0,0 +1,78 @@ +{ + "feature_name": "pyrefly-type-error-reduction", + "created_at": "2025-11-19T00:00:00Z", + "updated_at": "2025-11-19T12:00:00Z", + "language": "en", + "phase": "in_progress", + "progress": { + "current_phase": "0.3", + "phase_description": "Extended Foundation - Missing Attributes & Bad Argument Types", + "error_reduction": { + "baseline_errors": 117, + "current_errors": 718, + "reduction_percentage": 22, + "errors_fixed": 78 + }, + "enabled_error_types": [ + "unbound-name", + "unsupported-operation", + "missing-attribute", + "bad-argument-type" + ], + "completed_phases": ["0.1", "0.2"], + "in_progress_phases": ["0.3"], + "pending_phases": ["1", "2", "3", "4", "5"] + }, + "approvals": { + "requirements": { + "generated": true, + "approved": true + }, + "design": { + "generated": true, + "approved": true + }, + "tasks": { + "generated": true, + "approved": true + }, + "implementation": { + "generated": false, + "approved": false + }, + "documentation": { + "generated": false, + "approved": false + } + }, + "ready_for_implementation": true, + "implementation_status": "in_progress", + "documentation_status": "pending", + "specification_status": "active", + "success_criteria": { + "error_reduction_target": 90, + "phase_completion": "all_phases", + "code_quality": "maintained", + "test_coverage": "preserved" + }, + "metrics": { + "total_errors_baseline": 920, + "current_errors": 718, + "errors_by_type": { + "unsupported-operation": 59, + "unbound-name": 37, + "missing-attribute": 416, + "bad-argument-type": 206 + }, + "progress_tracking": { + "phase_0_1_complete": true, + "phase_0_2_complete": true, + "phase_0_3_in_progress": true, + "phase_1_pending": true, + "phase_2_pending": true, + "phase_3_pending": true, + "phase_4_pending": true, + "phase_5_pending": true + } + } +} \ No newline at end of file diff --git a/.kiro/specs/pyrefly-type-error-reduction/tasks.md b/.kiro/specs/pyrefly-type-error-reduction/tasks.md new file mode 100644 index 000000000..39b78fd0b --- /dev/null +++ b/.kiro/specs/pyrefly-type-error-reduction/tasks.md @@ -0,0 +1,187 @@ +# Implementation Tasks + +## Project Overview +Systematic elimination of type errors in the Cryptofeed codebase through phased pyrefly rollout, focusing on critical runtime safety checks and progressive type safety improvements. The rollout follows engineering principles of START SMALL, SOLID, KISS, and YAGNI with atomic commits and measurable progress tracking. + +## Phase 0: Foundation Setup ✅ + +### Task 0.1: Pyrefly Configuration Infrastructure ✅ +- **Objective**: Establish baseline pyrefly configuration and error counting +- **Implementation**: + - Configure `pyproject.toml` with `[tool.pyrefly]` section + - Set up `project_excludes = ["gen/**/*.py"]` to exclude generated code + - Enable controlled error types: `unbound-name`, `unsupported-operation` + - Establish baseline error count: 117 errors (70 unsupported-operation + 47 unbound-name) +- **Status**: ✅ COMPLETED +- **Engineering Principles**: START SMALL, Controlled Rollout + +### Task 0.2: Core Safety Error Elimination ✅ +- **Objective**: Fix most critical runtime crash sources +- **Implementation**: + - Fixed 11 unsupported-operation errors (70 → 59, 16% reduction) + - Fixed 10 unbound-name errors (47 → 37, 21% reduction) + - Maintained runtime compatibility with no breaking changes + - Atomic commits for each error fix with descriptive messages +- **Status**: ✅ COMPLETED +- **Engineering Principles**: TDD, Atomic Commits, Zero Breaking Changes + +### Task 0.3: Extended Safety Checks 🚧 +- **Objective**: Enable and fix missing-attribute and bad-argument-type errors +- **Implementation**: + - Enable `missing-attribute` and `bad-argument-type` error types + - Fix 57 missing-attribute errors (416 → 359, 14% reduction) + - Maintain 206 bad-argument-type errors for next phase + - Focus on null safety guards and type conversions +- **Status**: 🚧 IN PROGRESS (57/416 missing-attribute errors fixed) +- **Engineering Principles**: Incremental Progress, Pattern-Based Fixes + +## Phase 1: Type Safety Core 📋 + +### Task 1.1: Variable Assignment Safety 📋 +- **Objective**: Eliminate bad-assignment errors for type-safe variable assignments +- **Implementation**: + - Enable `bad-assignment` error type + - Fix incompatible type assignments (e.g., float to int) + - Add proper type conversions where needed + - Maintain runtime behavior while improving type safety +- **Status**: 📋 PENDING +- **Engineering Principles**: Type Safety, Backward Compatibility + +### Task 1.2: Return Type Safety 📋 +- **Objective**: Ensure functions return correct types +- **Implementation**: + - Enable `bad-return` error type + - Fix function return type mismatches + - Update type annotations to match actual return values + - Preserve existing API contracts +- **Status**: 📋 PENDING +- **Engineering Principles**: Contract Consistency, API Stability + +## Phase 2: Data Access Safety 📋 + +### Task 2.1: Iteration Safety 📋 +- **Objective**: Prevent iteration over non-iterable objects +- **Implementation**: + - Enable `not-iterable` error type + - Add null checks before iteration + - Convert data structures to iterables where appropriate + - Ensure collection safety throughout codebase +- **Status**: 📋 PENDING +- **Engineering Principles**: Null Safety, Data Structure Validation + +## Phase 3: Function Contracts 📋 + +### Task 3.1: Function Signature Consistency 📋 +- **Objective**: Ensure function parameter contracts are consistent +- **Implementation**: + - Enable `bad-function-definition` error type + - Fix parameter mismatch issues + - Align function signatures with base class expectations + - Maintain API compatibility +- **Status**: 📋 PENDING +- **Engineering Principles**: Interface Consistency, Inheritance Safety + +## Phase 4: Inheritance Safety 📋 + +### Task 4.1: Method Override Safety 📋 +- **Objective**: Ensure method overrides match base class signatures +- **Implementation**: + - Enable `bad-override` error type + - Fix method signature mismatches in inheritance hierarchies + - Update parameter names and types to match base classes + - Preserve polymorphic behavior +- **Status**: 📋 PENDING +- **Engineering Principles**: Liskov Substitution, Polymorphism + +### Task 4.2: Parameter Name Consistency 📋 +- **Objective**: Ensure parameter names match across inheritance hierarchies +- **Implementation**: + - Enable `bad-param-name-override` error type + - Fix parameter name mismatches in overridden methods + - Align naming conventions with base class contracts + - Maintain code readability +- **Status**: 📋 PENDING +- **Engineering Principles**: Naming Consistency, Code Clarity + +## Phase 5: Advanced Types 📋 + +### Task 5.1: Overload Resolution 📋 +- **Objective**: Ensure function overloads are properly resolvable +- **Implementation**: + - Enable `no-matching-overload` error type + - Fix ambiguous function call resolutions + - Add type hints to disambiguate overloads + - Optimize for common usage patterns +- **Status**: 📋 PENDING +- **Engineering Principles**: Type System Completeness, API Usability + +## Quality Assurance Tasks + +### Task QA.1: Regression Testing 📋 +- **Objective**: Ensure fixes don't break existing functionality +- **Implementation**: + - Run full test suite after each error fix batch + - Verify runtime behavior remains unchanged + - Check performance impact of type safety improvements + - Validate against existing integration tests +- **Status**: 📋 CONTINUOUS +- **Engineering Principles**: Quality Gates, Continuous Validation + +### Task QA.2: Progress Tracking 📋 +- **Objective**: Maintain accurate metrics and reporting +- **Implementation**: + - Update error counts after each fix batch + - Track progress against phase targets + - Document error patterns and solutions + - Generate rollout status reports +- **Status**: 📋 CONTINUOUS +- **Engineering Principles**: Transparency, Measurable Progress + +### Task QA.3: Code Review Standards 📋 +- **Objective**: Maintain code quality during type safety improvements +- **Implementation**: + - Review all type fixes for readability + - Ensure null checks don't obscure logic flow + - Validate that type conversions preserve semantics + - Check for consistent error handling patterns +- **Status**: 📋 CONTINUOUS +- **Engineering Principles**: Code Quality, Maintainability + +## Success Criteria Validation + +### Error Reduction Milestones +- **Phase 0.1**: unsupported-operation (70→59), unbound-name (47→37) ✅ +- **Phase 0.2**: Enable missing-attribute/bad-argument-type ✅ +- **Phase 0.3**: missing-attribute (416→359) 🚧 +- **Phase 0 Complete**: Total errors <50 (57% reduction from baseline) +- **Phase 1 Complete**: bad-assignment, bad-return errors eliminated +- **Phase 2 Complete**: not-iterable errors eliminated +- **Phase 3 Complete**: bad-function-definition errors eliminated +- **Phase 4 Complete**: bad-override, bad-param-name-override errors eliminated +- **Phase 5 Complete**: no-matching-overload errors eliminated + +### Quality Metrics +- **Zero Runtime Regressions**: All existing tests pass +- **Code Maintainability**: Type safety doesn't reduce readability +- **Performance Neutral**: No significant performance degradation +- **API Stability**: Public interfaces remain unchanged + +## Implementation Notes + +### Error Fix Patterns +1. **Null Safety**: `if obj is not None: obj.attribute` +2. **Type Conversion**: `int(float_value)` or `str(tuple_data)` +3. **Collection Checks**: `if data is not None: for item in data` +4. **Variable Typing**: Change declarations to match usage + +### Commit Standards +- **Atomic**: One error fix per commit +- **Descriptive**: Include error type and count reduction +- **Traceable**: Reference specific files and line numbers +- **Reversible**: Easy to identify and rollback if needed + +### Rollback Strategy +- **Configuration**: Disable error types in pyproject.toml +- **Branch**: Use feature branch for isolation +- **Incremental**: Rollback phases independently +- **Safe**: No impact on production deployments \ No newline at end of file diff --git a/.kiro/specs/shift-left-streaming-lakehouse/design.md b/.kiro/specs/shift-left-streaming-lakehouse/design.md new file mode 100644 index 000000000..c89efa49e --- /dev/null +++ b/.kiro/specs/shift-left-streaming-lakehouse/design.md @@ -0,0 +1,283 @@ +# Design Document: Shift Left Streaming Lakehouse Integration + +--- +**Document Length Guidelines: Max 1000 lines** + +**Purpose**: Provide sufficient detail to ensure implementation consistency across different implementers, preventing interpretation drift. +--- + +## Overview + +This feature integrates Cryptofeed with Confluent Schema Registry to "shift left" data quality and schema enforcement. By implementing strict schema validation at the ingestion source, we enable downstream consumers (like Flink and Iceberg) to reliably consume structured data without manual type conversion or schema inference. This initiative also introduces "v2" Protobuf schemas utilizing native types (`double`, `int64`) instead of strings, significantly improving serialization efficiency and query performance. + +### Goals +- **Schema Enforcement**: Prevent "bad data" from entering the data lake by validating messages against a central registry. +- **Native Types**: Reduce message size and compute overhead by using native Protobuf types (v2 schemas). +- **Seamless Integration**: Support Flink/Iceberg streaming patterns via standard Confluent Wire Format. +- **Zero Downtime**: Enable parallel v1 (legacy) and v2 (registry) production during migration. + +### Non-Goals +- **Backfilling**: Migrating historical v1 data to v2 format is out of scope. +- **Complex Schema Evolution**: We will target `BACKWARD` compatibility; complex schema migrations are manual. + +## Architecture + +### High-Level Architecture + +The architecture introduces a `SchemaRegistry` component into the `KafkaCallback` pipeline. When enabled, the callback consults the registry to validate schemas and obtain Schema IDs, which are embedded into the message payload using the Confluent Wire Format. + +```mermaid +graph TD + subgraph "Cryptofeed Ingestion" + DS[Data Source] -->|Raw Data| FH[FeedHandler] + FH -->|Normalized Object| KC[KafkaCallback] + + subgraph "KafkaCallback Pipeline" + KC -->|1. Select Converter| CONV[Protobuf Converter] + CONV -->|2. Serialize| PROTO[Protobuf Message] + + PROTO -->|3. Register/Get ID| SR_CLIENT[SchemaRegistry Client] + SR_CLIENT -.->|HTTP/REST| CSR((Confluent Registry)) + + SR_CLIENT -->|4. Embed ID| WIRE[Wire Format Bytes] + WIRE -->|5. Produce| KP[Kafka Producer] + end + end + + KP -->|Topic: trades-v2| KAFKA{Kafka Cluster} + + subgraph "Streaming Lakehouse" + KAFKA -->|Consume| FLINK[Flink Job] + FLINK -.->|Fetch Schema| CSR + FLINK -->|Write| ICEBERG[(Iceberg Table)] + end +``` + +### Technology Alignment +- **Schema Registry Client**: specific implementation in `cryptofeed.backends.kafka_schema` (already exists) using `requests` for registry interaction. +- **Protobuf v2**: New `.proto` definitions in `proto/cryptofeed/normalized/v2/` utilizing `syntax = "proto3"`. +- **Producer**: Existing `KafkaProducer` (wrapping `confluent-kafka`) updated to support headers and binary payloads. + +## Key Design Decisions + +### 1. Asynchronous Registry Interaction +- **Decision**: Use `asyncio.to_thread` (or `run_in_executor`) for Schema Registry interactions within `KafkaCallback`. +- **Context**: The `SchemaRegistry` client uses `requests` (synchronous blocking I/O). `KafkaCallback` runs on the `asyncio` event loop. Blocking the loop for HTTP calls (even with caching) allows for potential jitter and throughput drops on cache misses. +- **Alternatives**: + 1. Rewrite `SchemaRegistry` to use `aiohttp` (high effort, duplicates logic). + 2. Block the event loop (unacceptable for high-throughput feeds). +- **Selected Approach**: Wrap the synchronous `register_schema` and `get_schema_by_id` calls in `loop.run_in_executor`. +- **Trade-offs**: Slight overhead for thread context switching vs. blocking the main loop. Mitigated by aggressive in-memory caching in `SchemaRegistry` class. + +### 2. Dual Schema Versioning (v1 vs v2) +- **Decision**: Maintain separate parallel Protobuf definitions and converter logic for v1 (string-based) and v2 (native types). +- **Context**: We must support existing consumers relying on v1 while rolling out v2. +- **Selected Approach**: + - Create `proto/cryptofeed/normalized/v2/` for new schemas. + - Create `cryptofeed/backends/protobuf_helpers_v2.py` for v2 converters. + - `KafkaCallback` will select the appropriate converter based on configuration. +- **Rationale**: cleanly separates legacy and new logic; allows for safe A/B testing and gradual migration. + +### 3. Subject Naming Strategy +- **Decision**: Use `TopicNameStrategy` (`<topic>-value`) for Schema Registry subjects. +- **Context**: The Registry needs a stable identifier (Subject) for schema evolution. +- **Selected Approach**: The subject name will be `{topic_name}-value`. For example, if the topic is `cryptofeed.trades.v2`, the subject is `cryptofeed.trades.v2-value`. +- **Rationale**: Standard convention in the Kafka ecosystem; simplifies Flink/Connect integration. + +## Detailed Design + +### 1. Schema Registry Service (`cryptofeed.backends.kafka_schema`) + +Existing implementation is largely sufficient but requires verification of async usage compatibility. +- **Enhancement**: Ensure `SchemaRegistry` methods are thread-safe if accessed via `run_in_executor`. (The current `requests` usage is generally thread-safe, and `_schema_cache` logic should be verified). + +### 2. Protobuf v2 Implementation + +New `.proto` files will be created mirroring the v1 structure but with native types. + +**Example: Trade v2** +```protobuf +// proto/cryptofeed/normalized/v2/trade.proto +syntax = "proto3"; +package cryptofeed.normalized.v2; + +import "google/protobuf/timestamp.proto"; + +message Trade { + string exchange = 1; + string symbol = 2; + + enum Side { + SIDE_UNSPECIFIED = 0; + BUY = 1; + SELL = 2; + } + Side side = 3; + + double price = 4; // Changed from string + double amount = 5; // Changed from string + + string trade_id = 6; + + // Standardized Timestamp + google.protobuf.Timestamp timestamp = 7; // Changed from int64/float + + // Gap detection + uint64 sequence_number = 8; +} +``` + +### 3. KafkaCallback Updates (`cryptofeed/kafka_callback.py`) + +The `KafkaCallback` needs modification to support the "Schema Registry Mode". + +**Configuration Changes**: +- `kafka_config` will accept a `schema_registry` section (as defined in requirements). + +**Processing Flow (`_process_message`)**: +1. **Schema Resolution**: + If `schema_registry_enabled` is True: + - Determine Subject: `f"{topic}-value"` + - **Async Call**: `await loop.run_in_executor(None, self.schema_registry.register_schema, subject, schema_definition)` + - *Optimization*: Check local cache in `KafkaCallback` before dispatching to executor to avoid thread overhead for known schemas. + +2. **Serialization (v2)**: + - Call `protobuf_helpers_v2.serialize_to_protobuf(obj)` -> returns `bytes` (raw proto). + +3. **Framing**: + - Call `self.schema_registry.embed_schema_id_in_message(raw_bytes, schema_id)`. + +4. **Headers**: + - Add `schema_id` to headers (optional, but helpful for debugging). + - Set `content-type` to `application/vnd.confluent.protobuf`. + +### 3b. v2 Message Field Mapping (Authoritative) +- **Trade**: `price`/`amount` = `double`; `timestamp` = `google.protobuf.Timestamp`; `sequence_number` = `uint64` (reuse v1 field numbers where applicable; reserve any removed ids). +- **Ticker**: best bid/ask price & size = `double`; `timestamp` = `google.protobuf.Timestamp`; `sequence_number` = `uint64`. +- **Book (L2 snapshot/delta)**: per-level price/size = `double`; depth arrays remain repeated `double`; `timestamp` = `google.protobuf.Timestamp`; `sequence_number` = `uint64`. +- **Candle**: open/high/low/close/volume = `double`; close/end `timestamp` = `google.protobuf.Timestamp`; `sequence_number` = `uint64`. +- **Decimal fidelity rule**: If an exchange requires > 1e-9 precision, switch the affected numeric fields to `bytes` and add a message-level `int32 scale` describing quantization (per REQ-011); document the choice in this matrix and keep field numbers stable. + +#### Field Matrix (baseline, v1 field numbers reused where possible) +| Message | Field | Number | Default Type | Notes | +|---------|-------|--------|--------------|-------| +| Trade | exchange | 1 | string | unchanged | +| Trade | symbol | 2 | string | unchanged | +| Trade | side | 3 | enum | unchanged | +| Trade | trade_id | 4 | string | unchanged (v1 trade_id) | +| Trade | price | 5 | double | switch to bytes+scale if precision > 1e-9 | +| Trade | amount | 6 | double | switch to bytes+scale if precision > 1e-9 | +| Trade | timestamp | 7 | google.protobuf.Timestamp | standardized from int64 µs | +| Trade | sequence_number | 8 | uint64 | new for gap detection | +| Ticker | exchange | 1 | string | unchanged | +| Ticker | symbol | 2 | string | unchanged | +| Ticker | best_bid_price | 3 | double | reuses v1 bid slot | +| Ticker | best_ask_price | 4 | double | reuses v1 ask slot | +| Ticker | best_bid_size | 5 | double | new | +| Ticker | best_ask_size | 6 | double | new | +| Ticker | timestamp | 7 | google.protobuf.Timestamp | replaces optional int64 | +| Ticker | sequence_number| 8 | uint64 | new | +| Book | exchange | 1 | string | unchanged | +| Book | symbol | 2 | string | unchanged | +| Book | bids | 3 | repeated PriceLevelV2 | price/size = double | +| Book | asks | 4 | repeated PriceLevelV2 | price/size = double | +| Book | timestamp | 5 | google.protobuf.Timestamp | aligns with snapshots/deltas | +| Book | sequence_number| 6 | uint64 | from optional sequence | +| Book | checksum | 7 | string | retained | +| Candle | exchange | 1 | string | unchanged | +| Candle | symbol | 2 | string | unchanged | +| Candle | start | 3 | google.protobuf.Timestamp | was int64 µs | +| Candle | end | 4 | google.protobuf.Timestamp | was int64 µs | +| Candle | interval | 5 | string | unchanged | +| Candle | trades | 6 | uint64 | was optional int64 | +| Candle | open | 7 | double | switch to bytes+scale if precision-critical | +| Candle | close | 8 | double | | +| Candle | high | 9 | double | | +| Candle | low | 10 | double | | +| Candle | volume | 11 | double | | +| Candle | closed | 12 | bool | unchanged | +| Candle | timestamp | 13 | google.protobuf.Timestamp | close/end time | +| Candle | sequence_number | 14 | uint64 | new | + +> If any field toggles to `bytes`, add `int32 scale = 15;` at message level to avoid renumbering core fields; reserve the unused numbers from v1 to remain backward compatible. + +### 4. Converter Logic (`cryptofeed/backends/protobuf_helpers_v2.py`) + +New module mirroring `protobuf_helpers.py` but targeting v2 protos. + +```python +def trade_to_proto_v2(trade_obj) -> trade_v2_pb2.Trade: + proto = trade_v2_pb2.Trade() + proto.exchange = trade_obj.exchange + proto.symbol = trade_obj.symbol + proto.price = float(trade_obj.price) # Native double + proto.amount = float(trade_obj.amount) # Native double + + # Timestamp conversion + ts_seconds = int(trade_obj.timestamp) + ts_nanos = int((trade_obj.timestamp - ts_seconds) * 1_000_000_000) + proto.timestamp.seconds = ts_seconds + proto.timestamp.nanos = ts_nanos + + return proto +``` + +## Migration Strategy + +1. **Deploy v2 Schemas**: Publish v2 schemas to the registry (can be done automatically by the producer on startup/first message). +2. **Dual Production**: Configure `KafkaCallback` to produce to both `cryptofeed.trades` (v1, string) and `cryptofeed.trades.v2` (v2, native) if needed, or simply enable v2 on a new topic prefix. +3. **Consumer Migration**: Point Flink jobs to v2 topics. +4. **Deprecation**: Eventually decommission v1 topics. + +## Verification Plan + +1. **Unit Tests**: + - Verify `v2` serialization produces correct bytes. + - Verify `embed_schema_id_in_message` adds correct Magic Byte and ID. + - Mock `SchemaRegistry` to verify `KafkaCallback` interaction. + +2. **Integration Tests**: + - Spin up local Confluent Schema Registry (Docker). + - Run `KafkaCallback` with `schema_registry_enabled=True`. + - Produce messages. + - Verify schema is registered in Registry. + - Consume messages using `confluent-kafka` deserializer to verify end-to-end validity. + +## System Flows + +### Producer Flow with Schema Registry + +```mermaid +sequenceDiagram + participant Source as Data Source + participant Callback as KafkaCallback + participant Cache as Local Cache + participant Registry as Schema Registry + participant Kafka as Kafka Broker + + Source->>Callback: Trade(price="100.50", ...) + + rect rgb(240, 248, 255) + Note over Callback: Serialization Phase + Callback->>Callback: Convert to Proto v2 (native types) + end + + rect rgb(255, 250, 240) + Note over Callback: Schema Resolution + Callback->>Cache: Get Schema ID for "trades-value" + alt Cache Miss + Callback->>Registry: Register Schema (HTTP) + Registry-->>Callback: Schema ID (e.g., 42) + Callback->>Cache: Update Cache + else Cache Hit + Cache-->>Callback: Schema ID (42) + end + end + + rect rgb(240, 255, 240) + Note over Callback: Framing + Callback->>Callback: Prepend [MagicByte][ID=42] + end + + Callback->>Kafka: Produce(Topic, Bytes) +``` diff --git a/.kiro/specs/shift-left-streaming-lakehouse/requirements.md b/.kiro/specs/shift-left-streaming-lakehouse/requirements.md new file mode 100644 index 000000000..71ff4cf24 --- /dev/null +++ b/.kiro/specs/shift-left-streaming-lakehouse/requirements.md @@ -0,0 +1,59 @@ +# Requirements Document: Shift Left Streaming Lakehouse Integration + +## Project Description +This initiative aims to "shift left" the data quality and schema enforcement responsibilities to the ingestion layer (Cryptofeed). Currently, consumers receive raw Protobuf messages with string-based types and must handle schema validation and type conversion manually. By implementing Confluent Schema Registry integration, moving to v2 Protobuf schemas with native types, and enriching message context, we enable a seamless Flink -> Iceberg streaming lakehouse pattern. + +## Functional Requirements + +### Schema Registry Integration (Contract) +- **REQ-001**: WHEN the Kafka Producer initializes, THEN the system SHALL verify connectivity to the configured Confluent Schema Registry. +- **REQ-002**: WHEN publishing a message, IF the schema is not registered, THEN the system SHALL register the Protobuf schema version with the Registry. +- **REQ-003**: WHEN publishing a message, THEN the system SHALL serialize the payload using the Confluent Wire Format (Magic Byte + Schema ID + Payload). +- **REQ-004**: WHERE the Schema Registry is unavailable, THEN the system SHALL fallback to a configurable error handling strategy (buffer or fail-fast). + +### Native v2 Types (Compute) +- **REQ-005**: WHEN generating v2 Protobuf schemas, THEN the system SHALL use `double` or `bytes` for numeric fields (Price, Amount) instead of `string`. +- **REQ-006**: WHEN transforming internal data structures to v2 Protobuf messages, THEN the system SHALL perform efficient type conversion (e.g., Decimal to double/bytes). +- **REQ-007**: IF a field represents a timestamp, THEN the system SHALL use `google.protobuf.Timestamp` or `int64` (nanoseconds) in the v2 schema. +- **REQ-011**: WHEN a field uses `bytes` to preserve Decimal fidelity, the schema SHALL also define a message-level `int32 scale` field documenting the exponent used during quantization; if `double` is chosen, the design MUST record that the field is lossy but acceptable for the data type. + +### Stream ID Context (Context) +- **REQ-008**: WHEN publishing a Kafka message, THEN the system SHALL include standard headers for `exchange`, `symbol`, `data_type`, and `schema_version`. +- **REQ-009**: WHEN constructing the Kafka record key, THEN the system SHALL use a consistent composite key (e.g., `<exchange>-<symbol>`) to ensure partition ordering. +- **REQ-010**: WHERE the data source provides a sequence number, THEN the system SHALL include it in the message payload to allow gap detection by consumers. + +## Non-Functional Requirements + +### Performance +- **NFR-001**: The overhead of Schema Registry lookups SHALL be minimized by caching Schema IDs locally (target: < 1ms overhead per message after cache warmup). +- **NFR-002**: Binary serialization with native types SHOULD result in a message size reduction of at least 30% compared to string-based v1 schemas. + +### Compatibility +- **NFR-003**: The system SHALL support parallel production of v1 (legacy) and v2 (schema-registry) topics during the migration phase. +- **NFR-004**: The v2 schemas SHALL follow Protobuf best practices to allow for forward and backward compatibility (e.g., reserved fields, no required fields). + +### Reliability +- **NFR-005**: The integration SHALL support standard Schema Registry authentication methods (Basic Auth, mTLS). + +## Implementation Plan + +### Phase 1: Schema Definition (v2) +- Define `v2` Protobuf schemas in `proto/cryptofeed/normalized/v2/`. +- Replace string-based numeric types with native types (`double` for float efficiency or `bytes` for decimal precision). +- Standardize timestamp fields. +- Produce a per-message field matrix (trade, ticker, book, candle) that records the exact type choice (`double` vs `bytes`), any shared `scale` field, and reserved field numbers inherited from v1 for backward compatibility. + +### Phase 2: Schema Registry Client +- Integrate `confluent-kafka` python client or compatible library. +- Implement a `SchemaRegistryService` within Cryptofeed to handle registration and ID caching. +- Add configuration options for Schema Registry URL and credentials. + +### Phase 3: Producer Update +- Update `KafkaCallback` to support a "Schema Registry Mode". +- Implement the serialization logic using the Schema Registry serializer. +- Inject standard headers (`exchange`, `symbol`, etc.) into the Kafka record. + +### Phase 4: Validation & Documentation +- Verify Flink compatibility by consuming v2 topics with a simple Flink job. +- Update `docs/consumer-integration-guide.md` with instructions for consuming v2 topics. +- Benchmark performance difference between v1 and v2. diff --git a/.kiro/specs/shift-left-streaming-lakehouse/spec.json b/.kiro/specs/shift-left-streaming-lakehouse/spec.json new file mode 100644 index 000000000..089aacece --- /dev/null +++ b/.kiro/specs/shift-left-streaming-lakehouse/spec.json @@ -0,0 +1,38 @@ +{ + "name": "shift-left-streaming-lakehouse", + "version": "0.0.1", + "status": "implementation-in-progress", + "created": "2025-11-20", + "updated": "2025-11-21", + "description": "Implement Confluent Schema Registry integration in KafkaCallback (Contract), create v2 Protobuf schemas with native double/bytes types (Compute), and align message headers/keys for Flink/Iceberg compatibility (Context). Unblocks the Flink -> Iceberg pattern.", + "scope": "ingestion_layer", + "phases": { + "requirements": { + "status": "complete", + "completed": "2025-11-20", + "file": "requirements.md" + }, + "design": { + "status": "complete", + "completed": "2025-11-20", + "file": "design.md" + }, + "tasks": { + "status": "complete", + "completed": "2025-11-21", + "file": "tasks.md" + }, + "implementation": { + "status": "in_progress", + "started": "2025-11-21", + "file": "tasks.md" + } + }, + "dependencies": { + "required": [ + "market-data-kafka-producer", + "normalized-data-schema-crypto" + ] + }, + "tags": ["kafka", "schema-registry", "protobuf", "flink", "iceberg", "lakehouse"] +} diff --git a/.kiro/specs/shift-left-streaming-lakehouse/tasks.md b/.kiro/specs/shift-left-streaming-lakehouse/tasks.md new file mode 100644 index 000000000..c9bac2103 --- /dev/null +++ b/.kiro/specs/shift-left-streaming-lakehouse/tasks.md @@ -0,0 +1,48 @@ +# Implementation Tasks: Shift Left Streaming Lakehouse Integration + + - [x] 1. Define v2 Protobuf Schemas (Phase 1) + - Create `proto/cryptofeed/normalized/v2/` directory structure + - Define `trade.proto` with native types (`double`, `int64`) and `google.protobuf.Timestamp` + - Define `ticker.proto`, `book.proto`, and `candle.proto` with consistent native type patterns + - Configure `syntax = "proto3"` and proper package names in all files + - Add `sequence_number` field to all message types for gap detection + - Add a per-message field matrix section documenting chosen numeric type (`double` vs `bytes`) and, if `bytes`, the shared `scale` field per REQ-011; reserve any v1 field numbers that are not reused + - Author the field matrix in `proto/cryptofeed/normalized/v2/README.md` and keep it in sync with `.proto` definitions + - Add a launch decision table marking which fields (and exchanges, if applicable) will use `bytes+scale` at Day 1; otherwise default to `double` + - Fix and document the `scale` field number (use `15` across all messages when present) and mark any unused numbers as reserved in the `.proto` files + - Run `buf lint proto/cryptofeed/normalized/v2` to ensure schema hygiene + - _Requirements: REQ-005, REQ-007, REQ-010, NFR-004_ + + - [x] 2. Implement v2 Protobuf Helpers (Phase 2) + - Create `cryptofeed/backends/protobuf_helpers_v2.py` module + - Implement `trade_to_proto_v2` function with `Decimal` to `float` casting + - Implement timestamp conversion helper to populate `google.protobuf.Timestamp` + - Implement conversion functions for Ticker, Book, and Candle types + - Add unit tests for value precision and timestamp accuracy + - _Requirements: REQ-006, REQ-005, REQ-007_ + + - [x] 3. Enhance Schema Registry Client (Phase 3) + - Verify `cryptofeed.backends.kafka_schema.SchemaRegistry` thread-safety for async execution + - Enhance `_schema_cache` to ensure atomic updates or thread-safe access + - Verify support for Basic Auth and mTLS in the underlying request configuration + - _Requirements: REQ-001, NFR-001, NFR-005_ + + - [x] 4. Integrate Registry in KafkaCallback (Phase 3) + - Update `cryptofeed/kafka_callback.py` to parse `schema_registry` configuration + - Implement `_get_schema_id` using `loop.run_in_executor` for async registry operations + - Implement Confluent Wire Format framing (Magic Byte + Schema ID + Payload) + - Integrate `protobuf_helpers_v2` for serialization when registry mode is active + - Implement error handling strategy (buffer/fail) for registry unavailability + - _Requirements: REQ-002, REQ-003, REQ-004, NFR-001_ + + - [x] 5. Implement Context & Dual Production (Phase 3) + - Add standard headers (`exchange`, `symbol`, `data_type`, `schema_version`) to Kafka records + - Implement composite key generation (e.g., `<exchange>-<symbol>`) for partition ordering + - Add logic to support dual production to v1 (legacy) and v2 (registry) topics simultaneously + - _Requirements: REQ-008, REQ-009, NFR-003_ + + - [x] 6. Verification & Documentation (Phase 4) + - Create end-to-end integration test using a mock or local Schema Registry + - Implement benchmark script to measure v1 vs v2 message size reduction (>30%) + - Update `docs/consumer-integration-guide.md` with v2 consumption examples + - _Requirements: NFR-002_ diff --git a/CLAUDE.md b/CLAUDE.md index c5af374ce..924095129 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -1,472 +1,45 @@ -# Cryptofeed Engineering Principles & AI Development Guide - -## Active Specifications - -Detailed status available in [`docs/specs/SPEC_STATUS.md`](docs/specs/SPEC_STATUS.md). Refer to `AGENTS.md` for overview of available agent workflows and command usage. - -### ✅ Completed Specifications -- `proxy-system-complete`: ✅ COMPLETED (Jan 22, 2025) - Full proxy system implementation with transparent HTTP/SOCKS proxy support, consolidated documentation, 40 passing tests - - **Implementation**: Core proxy system in `cryptofeed/proxy.py` with connection integration - - **Testing**: 28 unit tests + 12 integration tests (all passing) - - **Documentation**: `docs/proxy/README.md`, `docs/proxy/technical-specification.md`, `docs/proxy/user-guide.md`, `docs/proxy/architecture.md` - - **Test Command**: `pytest tests/unit/test_proxy_mvp.py tests/integration/test_proxy_integration.py -v` - -- `normalized-data-schema-crypto`: ✅ COMPLETE (Oct 20, 2025) - Phase 1 (v0.1.0) baseline schemas ready for production release - - **Phase 1 (v0.1.0)**: 14/14 tasks complete, 46/46 tests passing, ready to merge and publish - - **Phase 3 (Governance)**: 3/3 tasks complete, 42/42 tests passing, infrastructure ready - - **Overall**: 68% complete (17/25 tasks), 119/119 tests passing, approved for merge - - **Status**: Awaiting merge to main, then publication to Buf registry - - **Documentation**: `docs/specs/normalized-data-schema/status.md` - -- `ccxt-generic-pro-exchange`: ✅ COMPLETE (Oct 26, 2025) - Generic CCXT/CCXT-Pro abstraction for long-tail exchanges - - **Implementation**: 1,612 LOC across 11 modules, 66 test files, 8/8 tasks complete - - **Status**: Production ready, requires documentation update - - **Next Step**: Create production integration guide and configuration examples - -- `backpack-exchange-integration`: ✅ COMPLETE (Oct 26, 2025) - Native Cryptofeed Backpack connector with ED25519 auth - - **Implementation**: 1,503 LOC across 11 modules, 59 test files, 10/10 tasks complete - - **Approach**: Native Cryptofeed (not CCXT-based), exceptional quality (5/5 review score) - - **Status**: Production ready, native integration guide pending - - **Next Step**: Create native integration guide and ED25519 troubleshooting documentation - -- `protobuf-callback-serialization`: ✅ COMPLETE (Nov 2, 2025) - Backend-only binary serialization for data feed callbacks - - **Scope**: Protobuf serialization for 14 data types, BackendCallback integration with Kafka/Redis/ZMQ support - - **Implementation**: 484 LOC in `cryptofeed/backends/protobuf_helpers.py`, 6 atomic commits - - **Status**: PRODUCTION READY - Backend-only minimal implementation (500 LOC total) - - **Key Achievement**: All protobuf logic consolidated in backends/, serializers/ and proto_wrappers/ deleted - - **Testing**: 144+ tests passing, backward compatible (JSON default) - - **Performance**: 2.1µs latency, 539k msg/s throughput, 63% smaller messages - - **Next Step**: Merge to main, unblock market-data-kafka-producer - -- `market-data-kafka-producer`: ✅ COMPLETE (Nov 13, 2025) - High-performance Kafka producer for protobuf-serialized market data with Phase 5 production execution plan - - **Scope**: Kafka backend integration, topic management, exactly-once semantics, monitoring. Storage (Iceberg/DuckDB) delegated to consumers. - - **Implementation**: 1,754 LOC in `cryptofeed/kafka_callback.py` and `cryptofeed/backends/kafka.py` - - **Status**: ✅ PHASE 5 EXECUTION COMPLETE - Production-ready for immediate deployment - - **Phase 1-4 (Core)**: 1,754 LOC, 628+ tests passing (100% pass rate), 7-8/10 code quality - - **Phase 5 (Production Execution)**: 282 tests created, 261 passing (92.6%), 21 skipped (Kafka cluster), 0 failing - - **Key Achievements**: - - ✅ Consolidated topics (O(20)) as default, per-symbol (O(10K)) as option - - ✅ 4 partition strategies (Composite, Symbol, Exchange, RoundRobin) with factory pattern - - ✅ Message headers with routing metadata (exchange, symbol, data_type, schema_version) - - ✅ Exactly-once semantics via idempotent producer + broker deduplication - - ✅ Comprehensive error handling with exception boundaries (no silent failures) - - ✅ Legacy backend (cryptofeed/backends/kafka.py) marked deprecated with migration guidance - - ✅ 7-phase comprehensive review (status, requirements, design, gap analysis, implementation, documentation, code quality) - - ✅ 4 atomic commits with Phase 5 execution materials merged to master - - ✅ 10 measurable success criteria defined and validated (message loss zero, lag <5s, error <0.1%, latency p99 <5ms, throughput ≥100k msg/s, data integrity 100%, monitoring functional, rollback <5min, topic count O(20), headers 100%) - - ✅ Complete team handoff package (roles, responsibilities, escalation procedures) - - ✅ Consumer migration templates (Flink, Python async, Custom minimal) - - ✅ Grafana monitoring dashboard (8 panels) + alert rules (8 rules) - - ✅ Per-exchange migration procedure with automation framework - - **Testing**: 628+ tests (Phase 1-4: 346 unit + 18 integration + 32 performance; Phase 5: 282 tests across 9 tasks) - - **Code Quality**: 7-8/10 (post-critical fixes), performance 9.9/10 - - **Documentation**: Comprehensive (5,867+ specification lines + 3,847 test code lines) - - Design (1,270 lines), requirements (304 lines), tasks (979 lines) - - Phase 5 execution materials: 4-week timeline, task specifications, quick reference, visual timeline, operational runbook, team handoff - - User guides: 7 comprehensive guides (162 KB) + consumer templates - - **Atomic Commits** (Phase 5 Execution): 3197624e (spec), 70f7f575 (materials), f8753f35 (handoff), merged to master - - **Risk Assessment**: LOW (0 blockers, 5 identified risks with mitigations) - - **Confidence Level**: HIGH (95%) - - **Next Step**: Teams can now execute Phase 5 production migration following PHASE_5_EXECUTION_PLAN.md (4-week Blue-Green cutover) - -### 🚧 In Progress Specifications -(None - all active specs have either completed or are awaiting approval) - -### 📋 Planning Phase -- `unified-exchange-feed-architecture`: Design generated (Oct 20, 2025) - Unify native and CCXT integrations behind shared contracts - - **Status**: Design generated but NOT YET approved, blocks task generation - - **Dependencies**: CCXT generic and Backpack specs (in progress) - - **Next Step**: Review and approve design before proceeding - -- `cryptofeed-quixstreams-source`: Initialized (Nov 14, 2025) - CryptofeedSource for QuixStreams streaming framework - - **Status**: Specification initialized, requirements generation pending - - **Purpose**: Seamless integration of Cryptofeed's Kafka producer with QuixStreams, enabling real-time market data analytics - - **Data Types**: Consumes all 14 protobuf message types (trade, ticker, orderbook, etc.) - - **Dependencies**: market-data-kafka-producer (COMPLETE), protobuf-callback-serialization (COMPLETE), normalized-data-schema-crypto (COMPLETE) - - **Timeline**: 4 weeks to production-ready (Phase 1: Core, Phase 2: Error handling, Phase 3: Monitoring, Phase 4: Production) - - **Next Step**: Generate requirements using `/kiro:spec-requirements cryptofeed-quixstreams-source` - ---- - -## Architecture: Ingestion Layer - -Cryptofeed is positioned as a pure data ingestion layer. Storage and analytics are delegated to downstream consumers. - -### Dependency Flow - -``` -┌─────────────────────────────────────────────────────────────┐ -│ Cryptofeed Ingestion Layer (IN-SCOPE) │ -│ │ -│ ┌──────────────┐ ┌──────────────┐ ┌──────────────┐ │ -│ │ Exchange │───▶│ Normalized │───▶│ Protobuf │ │ -│ │ Connectors │ │ Data Schema │ │ Serialization│ │ -│ └──────────────┘ └──────────────┘ └──────┬───────┘ │ -│ │ │ -└───────────────────────────────────────────────────┼──────────┘ - ▼ - ┌──────────────────┐ - │ Kafka Topics │ - │ (Protobuf msgs) │ - └────────┬─────────┘ - │ - ┌─────────────┬──────────────────┬──────────────┬──────────────┬──────────────┐ - ▼ ▼ ▼ ▼ ▼ ▼ - ┌──────────┐ ┌──────────┐ ┌──────────┐ ┌──────────┐ ┌──────────┐ ┌──────────┐ - │ Flink │ │QuixStreams│ │ DuckDB │ │ Custom │ │ Iceberg │ │ Spark │ - │ → Iceberg│ │CryptofeedSrc │ Consumer │ │ Consumer │ │ Direct │ │ → Parquet│ - └──────────┘ └──────────┘ └──────────┘ └──────────┘ └──────────┘ └──────────┘ - - Consumer Responsibility (OUT-OF-SCOPE): - - Read Kafka topics - - Deserialize protobuf (CryptofeedSource handles for QuixStreams) - - Implement storage (Iceberg, Parquet, DuckDB) - - Implement analytics (aggregations, queries) - - Implement retention policies -``` - -### Specifications Alignment - -| Spec | Phase | Scope | Boundary | -|------|-------|-------|----------| -| **Spec 0** | Complete | Protobuf schemas (.proto files) | Schema definition | -| **Spec 1** | In Progress | Serialization (`to_proto()` methods) | Kafka message production | -| **Spec 3** | Initialized | Kafka producer integration | Kafka topic publication | -| **Consumer** | External | Storage, analytics, retention | Everything after Kafka | - -**Key Principle**: Cryptofeed stops at Kafka. Consumers handle everything downstream. - -### ⏸️ Paused/Disabled Specifications -- `quixstreams-integration`: Replaced by `cryptofeed-quixstreams-source` (Nov 14, 2025) - - **Original Status**: Disabled (Oct 31, 2025) - Stream processing delegated to consumers - - **Rationale**: Consumers can implement QuixStreams, Flink, Spark independently - - **Evolution**: Reconsidered as consumer integration pattern - now initializing as `cryptofeed-quixstreams-source` in Planning Phase - - **New Approach**: CryptofeedSource handles protobuf deserialization within consumer layer, not ingestion layer - - **Dependencies**: Leverages protobuf schemas from `protobuf-callback-serialization` and market data from `market-data-kafka-producer` - -- `cryptofeed-lakehouse-architecture`: Disabled (user request) - Data lakehouse architecture with real-time ingestion and analytics - - **Status**: Can be reactivated anytime, all phases (requirements, design, tasks) prepared and approved - - **Dependencies**: Can leverage normalized-data-schema-crypto once merged - -- `proxy-pool-system`: Disabled (paused) - Proxy pool management and rotation (extends proxy-system-complete) - - **Status**: Requirements, design, tasks all approved, awaiting external service roadmap clarification - - **Note**: Related to external-proxy-service spec - -- `external-proxy-service`: Disabled (deferred) - Service-oriented proxy management with external service delegation - - **Status**: High priority, 4-6 weeks effort, awaiting proxy roadmap realignment - - **Note**: Depends on proxy-pool-system alignment - -## Core Engineering Principles - -### Ingestion Layer Only (Separation of Concerns) -- **Scope**: Cryptofeed focuses exclusively on data ingestion and normalization -- **Producer Role**: Publish protobuf-serialized messages to Kafka topics -- **Consumer Responsibility**: Downstream consumers implement storage, analytics, and persistence -- **Storage Agnostic**: No opinions on lakehouse technology (Apache Iceberg, DuckDB, Parquet, etc.) -- **Query Independence**: Query engines (Flink, Spark, Trino, DuckDB) are consumer choices -- **Benefits**: Clear separation of concerns, flexible storage backends, reduced maintenance burden - -### SOLID Principles -- **Single Responsibility**: Each class/module has one reason to change -- **Open/Closed**: Open for extension, closed for modification -- **Liskov Substitution**: Derived classes must be substitutable for base classes -- **Interface Segregation**: Clients shouldn't depend on interfaces they don't use -- **Dependency Inversion**: Depend on abstractions, not concretions - -### KISS (Keep It Simple, Stupid) -- Prefer well-scoped conventional commits (feat:, fix:, chore:, etc.) to keep history searchable -- Document behavioral changes in the subject; leave refactors/docs/tests as chore/test/docs prefixes -- Avoid multi-purpose commits—split when scope spans unrelated areas -- Tie commits to spec/task IDs when available for traceability - -### KISS (Keep It Simple, Stupid) -- Prefer simple solutions over complex ones -- Avoid premature optimization -- Write code that is easy to understand and maintain -- Minimize cognitive load for future developers - -### Conventional Commits -- Use `feat:`, `fix:`, `chore:`, `docs:`, etc., to label intent and surface change type quickly -- Keep commit scope tight—one functional concern per commit, split unrelated work -- Reference spec/task IDs when available to maintain traceability -- Describe the user-facing behavior change in the subject; reserve details for the body if needed - -### DRY (Don't Repeat Yourself) -- Extract common functionality into reusable components -- Use configuration over duplication -- Share metadata/transport logic across derived feeds -- Avoid duplicated rate limit logic - -### YAGNI (You Aren't Gonna Need It) -- Implement only what's needed now -- Defer features until they're actually required -- Keep configuration surface minimal -- Avoid building for hypothetical future requirements - -### FRs Over NFRs -- Deliver functional requirements before tuning non-functional concerns -- Capture NFR gaps as follow-up work instead of blocking feature delivery -- Align prioritization with user impact, revisiting NFRs once core behavior ships -- Treat performance, resiliency, and compliance targets as iterative enhancements unless explicitly critical - -### Compound Engineering with Parallel Work Streams -- **Decompose Outcomes:** Split large initiatives into discrete, value-focused streams that can progress independently without blocking shared milestones. -- **Bounded Interfaces:** Define clear contracts (APIs, schema versions, specs) so parallel teams can integrate asynchronously with minimal coordination overhead. -- **Synchronization Cadence:** Establish short, recurring integration checkpoints to surface cross-stream risks early while preserving autonomous execution between checkpoints. -- **Shared Context Hubs:** Maintain living documents (specs, ADRs, dashboards) that aggregate decisions and status across streams to avoid redundant alignment meetings. -- **Risk Balancing:** Pair high-complexity streams with stabilization or hardening tracks to ensure compound delivery doesn’t sacrifice reliability. -- **Capacity Guardrails:** Reserve buffer capacity for emergent interdependencies or support needs, preventing one stream’s blockers from derailing overall delivery. - -## Development Standards - -### NO MOCKS -- Use real implementations with test fixtures -- Prefer integration tests over heavily mocked unit tests -- Test against actual exchange APIs when possible -- Use ccxt sandbox or permissive endpoints for testing - -### NO LEGACY -- Remove deprecated code aggressively -- Don't maintain backward compatibility for internal APIs -- Upgrade dependencies regularly -- Clean architecture without legacy workarounds - -### NO COMPATIBILITY -- Target latest Python versions -- Use modern language features -- Don't support outdated exchange API versions -- Break APIs when it improves design - -### START SMALL -- Begin with MVP implementations -- Support minimal viable feature set first -- Add complexity only when justified -- Iterative development over big bang releases - -### CONSISTENT NAMING WITHOUT PREFIXES -- Use clear, descriptive names -- Avoid Hungarian notation or type prefixes -- Consistent verb tenses (get/set, fetch/push) -- Domain-specific terminology over generic names - -## Agentic Coding Best Practices - -### Research-Plan-Execute Workflow -1. **Research Phase**: Read relevant files, understand context -2. **Planning Phase**: Outline solution architecture -3. **Execution Phase**: Implement with continuous verification -4. **Validation Phase**: Test and verify implementation - -### Test-Driven Development (TDD) -- Write tests first based on expected behavior -- Run tests to confirm they fail -- Implement minimal code to pass tests -- Refactor without changing test behavior -- Never modify tests to fit implementation - -### Context Engineering -- Maintain project context in CLAUDE.md -- Use specific, actionable instructions -- Provide file paths and screenshots for UI work -- Reference existing patterns and conventions -- Clear context between major tasks - -### Iterative Development -- Make small, verifiable changes -- Commit frequently with descriptive messages -- Use subagents for complex verification tasks -- Review code changes continuously -- Maintain clean git history - -## Context Engineering Principles - -### Information Architecture -- **Prioritize by Relevance**: Most important information first -- **Logical Categorization**: Group related context together -- **Progressive Detail**: Start essential, add layers gradually -- **Clear Relationships**: Show dependencies and connections - -### Dynamic Context Systems -- **Runtime Context**: Generate context on-demand for tasks -- **State Management**: Track conversation and project state -- **Memory Integration**: Combine short-term and long-term knowledge -- **Tool Integration**: Provide relevant tool and API context - -### Context Optimization -- **Precision Over Volume**: Quality information over quantity -- **Format Consistency**: Structured, scannable information -- **Relevance Filtering**: Include only task-relevant context -- **Context Window Management**: Efficient use of available space - -## Cryptofeed-Specific Guidelines - -### Exchange Integration -- Use ccxt for standardized exchange APIs -- Follow existing emitter/queue patterns -- Implement proper rate limiting and backoff -- Handle regional restrictions with proxy support - -### Data Normalization -- Convert timestamps to consistent float seconds -- Use Decimal for price/quantity precision -- Preserve sequence numbers for gap detection -- Normalize symbols via ccxt helpers - -### Error Handling -- Surface HTTP errors with actionable messages -- Provide fallback modes (REST-only, alternative endpoints) -- Log warnings for experimental features -- Implement graceful degradation - -### Configuration -- Use YAML configuration files -- Support environment variable interpolation -- Provide clear examples and documentation -- Allow per-deployment customization - -### Architecture Patterns -``` -CcxtGenericFeed - ├─ CcxtMetadataCache → ccxt.exchange.load_markets() - ├─ CcxtRestTransport → ccxt.async_support.exchange.fetch_*() - └─ CcxtWsTransport → ccxt.pro.exchange.watch_*() - ↳ CcxtEmitter → existing BackendQueue/Metrics -``` - -## Testing Strategy - -### Unit Testing -- Mock ccxt transports for isolated testing -- Test symbol normalization and data transformation -- Verify queue integration and error handling -- Assert configuration parsing and validation - -### Integration Testing -- Test against live exchange APIs (sandbox when available) -- Verify trade/L2 callback sequences -- Test with actual proxy configurations -- Record sample payloads for regression testing - -### Regression Testing -- Maintain docker-compose test harnesses -- Test across ccxt version updates -- Verify backward compatibility of configurations -- Automated testing in CI/CD pipeline - -## Common Commands - -### Development -```bash -# Run tests -python -m pytest tests/ -v - -# Code quality gate (smells + complexity) -pyscn check --max-complexity 15 cryptofeed - -# Type checking -mypy cryptofeed/ - -# Linting -ruff check cryptofeed/ -ruff format cryptofeed/ - -# Install development dependencies -pip install -e ".[dev]" -``` - -### Exchange Testing -```bash -# Test specific exchange integration -python -m pytest tests/integration/test_backpack.py -v - -# Run with live data (requires credentials) -BACKPACK_API_KEY=xxx python examples/backpack_live.py -``` - -### Documentation -```bash -# Build docs -cd docs && make html - -# Serve docs locally -cd docs/_build/html && python -m http.server 8000 -``` - -## AI Development Workflow - -### Task Initialization -1. Read this CLAUDE.md file for context -2. Examine relevant specification files in `docs/specs/` -3. Review existing implementation patterns -4. Plan approach using established principles - -### Implementation Process -1. Write tests first (TDD approach) -2. Implement minimal viable solution -3. Iterate with continuous testing -4. Refactor for clarity and maintainability -5. Document configuration and usage - -### Quality Assurance -1. Run full test suite -2. Check type annotations -3. Verify code formatting -4. Test with real exchange data -5. Update documentation as needed - -### Code Review Checklist -- [ ] Follows SOLID principles -- [ ] Implements TDD approach -- [ ] No mocks in production code -- [ ] Consistent naming conventions -- [ ] Proper error handling -- [ ] Type annotations present -- [ ] Tests cover edge cases -- [ ] Documentation updated -- [ ] No legacy compatibility code -- [ ] Configuration examples provided - -## Project Structure - -``` -cryptofeed/ -├── adapters/ # ccxt integration adapters -├── exchanges/ # exchange-specific implementations -├── defines.py # constants and enums -├── types.py # type definitions -└── utils.py # utility functions - -docs/ -├── specs/ # detailed specifications -├── examples/ # usage examples -└── api/ # API documentation - -tests/ -├── unit/ # isolated unit tests -├── integration/ # live exchange tests -└── fixtures/ # test data and mocks -``` - -## Performance Considerations - -### Memory Management -- Use slots for data classes -- Implement proper cleanup in transports -- Monitor memory usage in long-running feeds -- Use generators for large data streams - -### Network Optimization -- Implement connection pooling -- Use persistent WebSocket connections -- Batch REST API requests when possible -- Implement proper rate limiting - -### Data Processing -- Use Decimal for financial calculations -- Minimize data copying in hot paths -- Implement efficient order book management -- Cache metadata to reduce API calls - ---- - -*This document serves as the primary context for AI-assisted development in the Cryptofeed project. Update regularly as patterns and practices evolve.* +# AI-DLC and Spec-Driven Development + +Kiro-style Spec Driven Development implementation on AI-DLC (AI Development Life Cycle) + +## Project Context + +### Paths +- Steering: `.kiro/steering/` +- Specs: `.kiro/specs/` + +### Steering vs Specification + +**Steering** (`.kiro/steering/`) - Guide AI with project-wide rules and context +**Specs** (`.kiro/specs/`) - Formalize development process for individual features + +### Active Specifications +- Check `.kiro/specs/` for active specifications +- Use `/kiro:spec-status [feature-name]` to check progress + +## Development Guidelines +- Think in English, generate responses in English. All Markdown content written to project files (e.g., requirements.md, design.md, tasks.md, research.md, validation reports) MUST be written in the target language configured for this specification (see spec.json.language). + +## Minimal Workflow +- Phase 0 (optional): `/kiro:steering`, `/kiro:steering-custom` +- Phase 1 (Specification): + - `/kiro:spec-init "description"` + - `/kiro:spec-requirements {feature}` + - `/kiro:validate-gap {feature}` (optional: for existing codebase) + - `/kiro:spec-design {feature} [-y]` + - `/kiro:validate-design {feature}` (optional: design review) + - `/kiro:spec-tasks {feature} [-y]` +- Phase 2 (Implementation): `/kiro:spec-impl {feature} [tasks]` + - `/kiro:validate-impl {feature}` (optional: after implementation) +- Progress check: `/kiro:spec-status {feature}` (use anytime) + +## Development Rules +- 3-phase approval workflow: Requirements → Design → Tasks → Implementation +- Human review required each phase; use `-y` only for intentional fast-track +- Keep steering current and verify alignment with `/kiro:spec-status` +- Follow the user's instructions precisely, and within that scope act autonomously: gather the necessary context and complete the requested work end-to-end in this run, asking questions only when essential information is missing or the instructions are critically ambiguous. + +## Steering Configuration +- Load entire `.kiro/steering/` as project memory +- Default files: `product.md`, `tech.md`, `structure.md` +- Custom files are supported (managed via `/kiro:steering-custom`) diff --git a/README.md b/README.md index 38d8d9e50..6c9b737f6 100644 --- a/README.md +++ b/README.md @@ -138,7 +138,7 @@ pytest tests/integration/test_live_*.py -v -m live_proxy # Live tests (26 tests - 🌍 Proxy routing validation (HTTP + WebSocket) - ✅ Live exchange testing (Binance, Hyperliquid, Backpack) -**Documentation**: See [docs/e2e/](docs/e2e/) for detailed guides +**Documentation**: See [docs/deliverables/](docs/deliverables/) for detailed guides For an example of a containerized application using cryptofeed to store data to a backend, please see [Cryptostore](https://github.com/bmoscon/cryptostore). diff --git a/cryptofeed/backends/arctic.py b/cryptofeed/backends/arctic.py index db549a15c..68b79d01d 100644 --- a/cryptofeed/backends/arctic.py +++ b/cryptofeed/backends/arctic.py @@ -1,4 +1,4 @@ -''' +""" Copyright (C) 2017-2025 Bryant Moscon - bmoscon@gmail.com Please see the LICENSE file for the terms and conditions @@ -6,16 +6,42 @@ Book backends are intentionally left out here - Arctic cannot handle high throughput data like book data. Arctic is best used for writing large datasets in batches. -''' +""" + import arctic import pandas as pd +from abc import ABC, abstractmethod from cryptofeed.backends.backend import BackendCallback -from cryptofeed.defines import BALANCES, CANDLES, FILLS, FUNDING, OPEN_INTEREST, ORDER_INFO, TICKER, TRADES, LIQUIDATIONS, TRANSACTIONS +from cryptofeed.defines import ( + BALANCES, + CANDLES, + FILLS, + FUNDING, + OPEN_INTEREST, + ORDER_INFO, + TICKER, + TRADES, + LIQUIDATIONS, + TRANSACTIONS, +) class ArcticCallback: - def __init__(self, library, host='127.0.0.1', key=None, none_to=None, numeric_type=float, quota=0, ssl=False, **kwargs): + # Default key - subclasses should override + default_key = "unknown" + + def __init__( + self, + library, + host="127.0.0.1", + key=None, + none_to=None, + numeric_type=float, + quota=0, + ssl=False, + **kwargs, + ): """ library: str arctic library. Will be created if does not exist. @@ -33,7 +59,7 @@ def __init__(self, library, host='127.0.0.1', key=None, none_to=None, numeric_ty """ con = arctic.Arctic(host, ssl=ssl) if library not in con.list_libraries(): - lib_type = kwargs.get('lib_type', arctic.VERSION_STORE) + lib_type = kwargs.get("lib_type", arctic.VERSION_STORE) con.initialize_library(library, lib_type=lib_type) con.set_quota(library, quota) self.lib = con[library] @@ -43,12 +69,12 @@ def __init__(self, library, host='127.0.0.1', key=None, none_to=None, numeric_ty async def write(self, data): df = pd.DataFrame({key: [value] for key, value in data.items()}) - df['date'] = pd.to_datetime(df.timestamp, unit='s') - df['receipt_timestamp'] = pd.to_datetime(df.receipt_timestamp, unit='s') - df.set_index(['date'], inplace=True) - if 'type' in df and df.type.isna().any(): - df.drop(columns=['type'], inplace=True) - df.drop(columns=['timestamp'], inplace=True) + df["date"] = pd.to_datetime(df.timestamp, unit="s") + df["receipt_timestamp"] = pd.to_datetime(df.receipt_timestamp, unit="s") + df.set_index(["date"], inplace=True) + if "type" in df and df.type.isna().any(): + df.drop(columns=["type"], inplace=True) + df.drop(columns=["timestamp"], inplace=True) self.lib.append(self.key, df, upsert=True) diff --git a/cryptofeed/backends/backend.py b/cryptofeed/backends/backend.py index a8ab5d13e..ed0b9632c 100644 --- a/cryptofeed/backends/backend.py +++ b/cryptofeed/backends/backend.py @@ -1,35 +1,40 @@ -''' +""" Copyright (C) 2017-2025 Bryant Moscon - bmoscon@gmail.com Please see the LICENSE file for the terms and conditions associated with this software. -''' +""" + import asyncio import logging from asyncio.queues import Queue from multiprocessing import Pipe, Process from contextlib import asynccontextmanager +from typing import Union, cast +from abc import ABC, abstractmethod from cryptofeed.backends.protobuf_helpers import ( serialize_to_protobuf, ) -LOG = logging.getLogger('feedhandler') +LOG = logging.getLogger("feedhandler") -SHUTDOWN_SENTINEL = 'STOP' +SHUTDOWN_SENTINEL = "STOP" class BackendQueue: def start(self, loop: asyncio.AbstractEventLoop, multiprocess=False): - if hasattr(self, 'started') and self.started: + if hasattr(self, "started") and self.started: # prevent a backend callback from starting more than 1 writer and creating more than 1 queue return self.multiprocess = multiprocess if self.multiprocess: self.queue = Pipe(duplex=False) - self.worker = Process(target=BackendQueue.worker, args=(self.writer,), daemon=True) - self.worker.start() + self.worker = Process( + target=BackendQueue.worker, args=(self.writer,), daemon=True + ) + cast(Process, self.worker).start() else: self.queue = Queue() self.worker = loop.create_task(self.writer()) @@ -38,9 +43,9 @@ def start(self, loop: asyncio.AbstractEventLoop, multiprocess=False): async def stop(self): if self.multiprocess: self.queue[1].send(SHUTDOWN_SENTINEL) - self.worker.join() + cast(Process, self.worker).join() else: - await self.queue.put(SHUTDOWN_SENTINEL) + await cast(Queue, self.queue).put(SHUTDOWN_SENTINEL) self.running = False @staticmethod @@ -58,7 +63,7 @@ async def write(self, data): if self.multiprocess: self.queue[1].send(data) else: - await self.queue.put(data) + await cast(Queue, self.queue).put(data) @asynccontextmanager async def read_queue(self) -> list: @@ -70,19 +75,20 @@ async def read_queue(self) -> list: else: yield [msg] else: - current_depth = self.queue.qsize() + queue = cast(Queue, self.queue) + current_depth = queue.qsize() if current_depth == 0: - update = await self.queue.get() + update = await queue.get() if update == SHUTDOWN_SENTINEL: yield [] else: yield [update] - self.queue.task_done() + queue.task_done() else: ret = [] count = 0 while current_depth > count: - update = await self.queue.get() + update = await queue.get() count += 1 if update == SHUTDOWN_SENTINEL: self.running = False @@ -92,10 +98,10 @@ async def read_queue(self) -> list: yield ret for _ in range(count): - self.queue.task_done() + queue.task_done() -class BackendCallback: +class BackendCallback(ABC): """ Base class for backend callbacks with pluggable serialization support. @@ -115,10 +121,20 @@ class BackendCallback: _serialization_log_state: tuple[str, str] | None = None _serialization_locked: bool = False + def __init__(self, numeric_type=float, none_to=None): + """Initialize backend callback with serialization parameters.""" + self.numeric_type = numeric_type + self.none_to = none_to + + @abstractmethod + async def write(self, data): + """Write data to the backend. Must be implemented by subclasses.""" + pass + def set_serialization_format(self, format_name: str | None) -> None: """Persist an explicit serialization format override for this callback.""" - if getattr(self, '_serialization_locked', False): + if getattr(self, "_serialization_locked", False): if format_name is None and self._explicit_serialization_format is None: return if format_name is not None: @@ -139,7 +155,7 @@ def set_serialization_format(self, format_name: str | None) -> None: def _validate_format(format_name: str) -> str: """Validate and normalize serialization format.""" normalized = format_name.lower().strip() - if normalized not in ('json', 'protobuf'): + if normalized not in ("json", "protobuf"): raise ValueError( f"Invalid serialization format '{format_name}'. " f"Valid formats: json, protobuf" @@ -151,8 +167,10 @@ def _get_format_from_env() -> str | None: """Get serialization format from environment variable.""" import os - env_value = os.environ.get('CRYPTOFEED_SERIALIZATION_FORMAT') - deprecated_value = os.environ.get('CRYPTOFEED_CALLBACK_FORMAT') if env_value is None else None + env_value = os.environ.get("CRYPTOFEED_SERIALIZATION_FORMAT") + deprecated_value = ( + os.environ.get("CRYPTOFEED_CALLBACK_FORMAT") if env_value is None else None + ) if env_value: return BackendCallback._validate_format(env_value) @@ -168,22 +186,22 @@ def _get_format_from_env() -> str | None: def serialization_format(self) -> str: """Active serialization format after applying env overrides.""" - preferred = getattr(self, '_explicit_serialization_format', None) + preferred = getattr(self, "_explicit_serialization_format", None) env_value = self._get_format_from_env() if env_value is not None: resolved = env_value - source = 'env' + source = "env" elif preferred is not None: resolved = preferred - source = 'explicit' + source = "explicit" else: - resolved = 'json' - source = 'default' + resolved = "json" + source = "default" - if getattr(self, '_serialization_log_state', None) != (source, resolved): + if getattr(self, "_serialization_log_state", None) != (source, resolved): LOG.info( - '%s: serialization_format=%s (source=%s)', + "%s: serialization_format=%s (source=%s)", self.__class__.__name__, resolved, source, @@ -196,15 +214,15 @@ def _build_dict_payload(self, dtype, receipt_timestamp: float) -> dict: """Normalize data objects into dictionaries for JSON/backward paths.""" data = dtype.to_dict(numeric_type=self.numeric_type, none_to=self.none_to) - if not getattr(dtype, 'timestamp', None): - data['timestamp'] = receipt_timestamp - data['receipt_timestamp'] = receipt_timestamp + if not getattr(dtype, "timestamp", None): + data["timestamp"] = receipt_timestamp + data["receipt_timestamp"] = receipt_timestamp return data async def __call__(self, dtype, receipt_timestamp: float): """Default implementation: emit JSON-compatible dictionaries or protobuf.""" - if self.serialization_format == 'protobuf': + if self.serialization_format == "protobuf": # Protobuf serialization: use consolidated helpers from backends payload = serialize_to_protobuf(dtype) else: @@ -215,28 +233,48 @@ async def __call__(self, dtype, receipt_timestamp: float): class BackendBookCallback(BackendCallback): + def __init__( + self, + snapshots_only=False, + snapshot_interval=1000, + numeric_type=float, + none_to=None, + ): + """Initialize book callback with snapshot parameters.""" + super().__init__(numeric_type=numeric_type, none_to=none_to) + self.snapshots_only = snapshots_only + self.snapshot_interval = snapshot_interval + self.snapshot_count = {} + async def _write_snapshot(self, book, receipt_timestamp: float): data = book.to_dict(numeric_type=self.numeric_type, none_to=self.none_to) - del data['delta'] + del data["delta"] if not book.timestamp: - data['timestamp'] = receipt_timestamp - data['receipt_timestamp'] = receipt_timestamp + data["timestamp"] = receipt_timestamp + data["receipt_timestamp"] = receipt_timestamp await self.write(data) async def __call__(self, book, receipt_timestamp: float): if self.snapshots_only: await self._write_snapshot(book, receipt_timestamp) else: - data = book.to_dict(delta=book.delta is not None, numeric_type=self.numeric_type, none_to=self.none_to) + data = book.to_dict( + delta=book.delta is not None, + numeric_type=self.numeric_type, + none_to=self.none_to, + ) if not book.timestamp: - data['timestamp'] = receipt_timestamp - data['receipt_timestamp'] = receipt_timestamp + data["timestamp"] = receipt_timestamp + data["receipt_timestamp"] = receipt_timestamp if book.delta is None: - del data['delta'] + del data["delta"] else: self.snapshot_count[book.symbol] += 1 await self.write(data) - if self.snapshot_interval <= self.snapshot_count[book.symbol] and book.delta: + if ( + self.snapshot_interval <= self.snapshot_count[book.symbol] + and book.delta + ): await self._write_snapshot(book, receipt_timestamp) self.snapshot_count[book.symbol] = 0 diff --git a/cryptofeed/backends/gcppubsub.py b/cryptofeed/backends/gcppubsub.py index 9bc6ae9f8..e7c286f74 100644 --- a/cryptofeed/backends/gcppubsub.py +++ b/cryptofeed/backends/gcppubsub.py @@ -1,9 +1,10 @@ -''' +""" Copyright (C) 2017-2025 Bryant Moscon - bmoscon@gmail.com Please see the LICENSE file for the terms and conditions associated with this software. -''' +""" + from collections import defaultdict import os import io @@ -25,10 +26,19 @@ class GCPPubSubCallback: - def __init__(self, topic: Optional[str] = None, key: Optional[str] = None, - service_file: Optional[Union[str, IO[AnyStr]]] = None, - ordering_key: Optional[Union[str, io.IOBase]] = None, numeric_type=float, none_to=None): - ''' + # Default key - subclasses should override + default_key = "unknown" + + def __init__( + self, + topic: Optional[str] = None, + key: Optional[str] = None, + service_file: Optional[Union[str, IO[AnyStr]]] = None, + ordering_key: Optional[Union[str, io.IOBase]] = None, + numeric_type=float, + none_to=None, + ): + """ Backend using Google Cloud Platform Pub/Sub. Use requires an account with Google Cloud Platform. Free tier allows 10GB messages per month. @@ -53,12 +63,12 @@ def __init__(self, topic: Optional[str] = None, key: Optional[str] = None, if messages have the same ordering key and you publish the messages to the same region, subscribers can receive the messages in order https://cloud.google.com/pubsub/docs/publisher#using_ordering_keys - ''' + """ self.key = key or self.default_key self.ordering_key = ordering_key self.numeric_type = numeric_type self.none_to = none_to - self.topic = topic or f'cryptofeed-{self.key}' + self.topic = topic or f"cryptofeed-{self.key}" self.topic_path = self.get_topic() self.service_file = service_file self.session = None @@ -66,7 +76,7 @@ def __init__(self, topic: Optional[str] = None, key: Optional[str] = None, def get_topic(self): publisher = pubsub_v1.PublisherClient() - project_id = os.getenv('GCP_PROJECT') + project_id = os.getenv("GCP_PROJECT") topic_path = PublisherClient.topic_path(project_id, self.topic) try: publisher.create_topic(request={"name": topic_path}) @@ -89,26 +99,26 @@ async def get_client(self): return self.client async def write(self, data: dict): - ''' + """ Publish message. For filtering, "feed" and "symbol" are added as attributes. https://cloud.google.com/pubsub/docs/filtering - ''' + """ client = await self.get_client() payload = json.dumps(data).encode() - message = PubsubMessage(payload, feed=data['exchange'], symbol=data['symbol']) + message = PubsubMessage(payload, feed=data["exchange"], symbol=data["symbol"]) await client.publish(self.topic_path, [message]) class TradeGCPPubSub(GCPPubSubCallback, BackendCallback): - default_key = 'trades' + default_key = "trades" class FundingGCPPubSub(GCPPubSubCallback, BackendCallback): - default_key = 'funding' + default_key = "funding" class BookGCPPubSub(GCPPubSubCallback, BackendBookCallback): - default_key = 'book' + default_key = "book" def __init__(self, *args, snapshots_only=False, snapshot_interval=1000, **kwargs): self.snapshots_only = snapshots_only @@ -118,32 +128,32 @@ def __init__(self, *args, snapshots_only=False, snapshot_interval=1000, **kwargs class TickerGCPPubSub(GCPPubSubCallback, BackendCallback): - default_key = 'ticker' + default_key = "ticker" class OpenInterestGCPPubSub(GCPPubSubCallback, BackendCallback): - default_key = 'open_interest' + default_key = "open_interest" class LiquidationsGCPPubSub(GCPPubSubCallback, BackendCallback): - default_key = 'liquidations' + default_key = "liquidations" class CandlesGCPPubSub(GCPPubSubCallback, BackendCallback): - default_key = 'candles' + default_key = "candles" class OrderInfoGCPPubSub(GCPPubSubCallback, BackendCallback): - default_key = 'order_info' + default_key = "order_info" class TransactionsGCPPubSub(GCPPubSubCallback, BackendCallback): - default_key = 'transactions' + default_key = "transactions" class BalancesGCPPubSub(GCPPubSubCallback, BackendCallback): - default_key = 'balances' + default_key = "balances" class FillsGCPPubSub(GCPPubSubCallback, BackendCallback): - default_key = 'fills' + default_key = "fills" diff --git a/cryptofeed/backends/http.py b/cryptofeed/backends/http.py index 8198126b3..ea44630ad 100644 --- a/cryptofeed/backends/http.py +++ b/cryptofeed/backends/http.py @@ -1,9 +1,10 @@ -''' +""" Copyright (C) 2017-2025 Bryant Moscon - bmoscon@gmail.com Please see the LICENSE file for the terms and conditions associated with this software. -''' +""" + import logging import aiohttp @@ -11,7 +12,7 @@ from cryptofeed.backends.backend import BackendQueue -LOG = logging.getLogger('feedhandler') +LOG = logging.getLogger("feedhandler") class HTTPCallback(BackendQueue): @@ -24,6 +25,7 @@ async def http_write(self, data, headers=None): if not self.session or self.session.closed: self.session = aiohttp.ClientSession() + assert self.session is not None async with self.session.post(self.addr, data=data, headers=headers) as resp: if resp.status >= 400: error = await resp.text() diff --git a/cryptofeed/backends/influxdb.py b/cryptofeed/backends/influxdb.py index 4332af05b..2a955f810 100644 --- a/cryptofeed/backends/influxdb.py +++ b/cryptofeed/backends/influxdb.py @@ -1,9 +1,10 @@ -''' +""" Copyright (C) 2017-2025 Bryant Moscon - bmoscon@gmail.com Please see the LICENSE file for the terms and conditions associated with this software. -''' +""" + from collections import defaultdict import logging @@ -13,11 +14,16 @@ from cryptofeed.backends.http import HTTPCallback from cryptofeed.defines import BID, ASK -LOG = logging.getLogger('feedhandler') +LOG = logging.getLogger("feedhandler") class InfluxCallback(HTTPCallback): - def __init__(self, addr: str, org: str, bucket: str, token: str, key=None, **kwargs): + # Default key - subclasses should override + default_key = "unknown" + + def __init__( + self, addr: str, org: str, bucket: str, token: str, key=None, **kwargs + ): """ Parent class for InfluxDB callbacks @@ -66,13 +72,13 @@ def __init__(self, addr: str, org: str, bucket: str, token: str, key=None, **kwa def format(self, data): ret = [] for key, value in data.items(): - if key in {'timestamp', 'exchange', 'symbol', 'receipt_timestamp'}: + if key in {"timestamp", "exchange", "symbol", "receipt_timestamp"}: continue if isinstance(value, str) or value is None: ret.append(f'{key}="{value}"') else: - ret.append(f'{key}={value}') - return ','.join(ret) + ret.append(f"{key}={value}") + return ",".join(ret) async def writer(self): while self.running: @@ -80,31 +86,36 @@ async def writer(self): for update in updates: d = self.format(update) timestamp = update["timestamp"] - timestamp_str = f',timestamp={timestamp}' if timestamp is not None else '' - - if 'interval' in update: - trades = f',trades={update["trades"]},' if update['trades'] else ',' - update = f'{self.key}-{update["exchange"]},symbol={update["symbol"]},interval={update["interval"]} start={update["start"]},stop={update["stop"]}{trades}open={update["open"]},close={update["close"]},high={update["high"]},low={update["low"]},volume={update["volume"]}{timestamp_str},receipt_timestamp={update["receipt_timestamp"]} {int(update["receipt_timestamp"] * 1000000)}' + timestamp_str = ( + f",timestamp={timestamp}" if timestamp is not None else "" + ) + + if "interval" in update: + trades = ( + f",trades={update['trades']}," if update["trades"] else "," + ) + update = f"{self.key}-{update['exchange']},symbol={update['symbol']},interval={update['interval']} start={update['start']},stop={update['stop']}{trades}open={update['open']},close={update['close']},high={update['high']},low={update['low']},volume={update['volume']}{timestamp_str},receipt_timestamp={update['receipt_timestamp']} {int(update['receipt_timestamp'] * 1000000)}" else: - update = f'{self.key}-{update["exchange"]},symbol={update["symbol"]} {d}{timestamp_str},receipt_timestamp={update["receipt_timestamp"]} {int(update["receipt_timestamp"] * 1000000)}' + update = f"{self.key}-{update['exchange']},symbol={update['symbol']} {d}{timestamp_str},receipt_timestamp={update['receipt_timestamp']} {int(update['receipt_timestamp'] * 1000000)}" await self.http_write(update, headers=self.headers) - await self.session.close() + if self.session: + await self.session.close() class TradeInflux(InfluxCallback, BackendCallback): - default_key = 'trades' + default_key = "trades" def format(self, data): return f'side="{data["side"]}",price={data["price"]},amount={data["amount"]},id="{str(data["id"])}",type="{str(data["type"])}"' class FundingInflux(InfluxCallback, BackendCallback): - default_key = 'funding' + default_key = "funding" class BookInflux(InfluxCallback, BackendBookCallback): - default_key = 'book' + default_key = "book" def __init__(self, *args, snapshots_only=False, snapshot_interval=1000, **kwargs): self.snapshots_only = snapshots_only @@ -113,8 +124,8 @@ def __init__(self, *args, snapshots_only=False, snapshot_interval=1000, **kwargs super().__init__(*args, **kwargs) def format(self, data): - delta = 'delta' in data - book = data['book'] if not delta else data['delta'] + delta = "delta" in data + book = data["book"] if not delta else data["delta"] bids = json.dumps(book[BID]) asks = json.dumps(book[ASK]) @@ -122,32 +133,32 @@ def format(self, data): class TickerInflux(InfluxCallback, BackendCallback): - default_key = 'ticker' + default_key = "ticker" class OpenInterestInflux(InfluxCallback, BackendCallback): - default_key = 'open_interest' + default_key = "open_interest" class LiquidationsInflux(InfluxCallback, BackendCallback): - default_key = 'liquidations' + default_key = "liquidations" class CandlesInflux(InfluxCallback, BackendCallback): - default_key = 'candles' + default_key = "candles" class OrderInfoInflux(InfluxCallback, BackendCallback): - default_key = 'order_info' + default_key = "order_info" class TransactionsInflux(InfluxCallback, BackendCallback): - default_key = 'transactions' + default_key = "transactions" class BalancesInflux(InfluxCallback, BackendCallback): - default_key = 'balances' + default_key = "balances" class FillsInflux(InfluxCallback, BackendCallback): - default_key = 'fills' + default_key = "fills" diff --git a/cryptofeed/backends/kafka.py b/cryptofeed/backends/kafka.py index e7f47aeea..dabe47e1b 100644 --- a/cryptofeed/backends/kafka.py +++ b/cryptofeed/backends/kafka.py @@ -1,4 +1,4 @@ -''' +""" Copyright (C) 2017-2025 Bryant Moscon - bmoscon@gmail.com Please see the LICENSE file for the terms and conditions @@ -31,7 +31,8 @@ callback = KafkaCallback(kafka_config=config, serialization_format='protobuf') This legacy module will be removed in a future release. -''' +""" + from collections import defaultdict import asyncio import logging @@ -39,12 +40,20 @@ from typing import Optional, ByteString from aiokafka import AIOKafkaProducer -from aiokafka.errors import RequestTimedOutError, KafkaConnectionError, NodeNotReadyError +from aiokafka.errors import ( + RequestTimedOutError, + KafkaConnectionError, + NodeNotReadyError, +) from cryptofeed.json_utils import json -from cryptofeed.backends.backend import BackendBookCallback, BackendCallback, BackendQueue +from cryptofeed.backends.backend import ( + BackendBookCallback, + BackendCallback, + BackendQueue, +) -LOG = logging.getLogger('feedhandler') +LOG = logging.getLogger("feedhandler") # Issue deprecation warning when module is imported warnings.warn( @@ -53,12 +62,19 @@ "HeaderEnricher, and enhanced error handling. " "See module docstring for migration guide.", DeprecationWarning, - stacklevel=2 + stacklevel=2, ) class KafkaCallback(BackendQueue): - def __init__(self, key=None, serialization_format=None, numeric_type=float, none_to=None, **kwargs): + def __init__( + self, + key=None, + serialization_format=None, + numeric_type=float, + none_to=None, + **kwargs, + ): """ You can pass configuration options to AIOKafkaProducer as keyword arguments. (either individual kwargs, an unpacked dictionary `**config_dict`, or both) @@ -96,28 +112,36 @@ def _default_serializer(self, to_bytes: dict | str) -> ByteString: elif isinstance(to_bytes, str): return to_bytes.encode() else: - raise TypeError(f'{type(to_bytes)} is not a valid Serialization type') + raise TypeError(f"{type(to_bytes)} is not a valid Serialization type") async def _connect(self): if not self.producer: loop = asyncio.get_event_loop() try: - config_keys = ', '.join([k for k in self.producer_config.keys()]) - LOG.info(f'{self.__class__.__name__}: Configuring AIOKafka with the following parameters: {config_keys}') + config_keys = ", ".join([k for k in self.producer_config.keys()]) + LOG.info( + f"{self.__class__.__name__}: Configuring AIOKafka with the following parameters: {config_keys}" + ) self.producer = AIOKafkaProducer(**self.producer_config, loop=loop) # Quit if invalid config option passed to AIOKafka except (TypeError, ValueError) as e: - LOG.error(f'{self.__class__.__name__}: Invalid AIOKafka configuration: {e.args}{chr(10)}See https://aiokafka.readthedocs.io/en/stable/api.html#aiokafka.AIOKafkaProducer for list of configuration options') + LOG.error( + f"{self.__class__.__name__}: Invalid AIOKafka configuration: {e.args}{chr(10)}See https://aiokafka.readthedocs.io/en/stable/api.html#aiokafka.AIOKafkaProducer for list of configuration options" + ) raise SystemExit else: while not self.running: try: await self.producer.start() except KafkaConnectionError: - LOG.error(f'{self.__class__.__name__}: Unable to bootstrap from host(s)') + LOG.error( + f"{self.__class__.__name__}: Unable to bootstrap from host(s)" + ) await asyncio.sleep(10) else: - LOG.info(f'{self.__class__.__name__}: "{self.producer.client._client_id}" connected to cluster containing {len(self.producer.client.cluster.brokers())} broker(s)') + LOG.info( + f'{self.__class__.__name__}: "{self.producer.client._client_id}" connected to cluster containing {len(self.producer.client.cluster.brokers())} broker(s)' + ) self.running = True def _default_serializer(self, to_bytes: dict | str) -> ByteString: @@ -128,13 +152,13 @@ def _default_serializer(self, to_bytes: dict | str) -> ByteString: elif isinstance(to_bytes, bytes): return to_bytes else: - raise TypeError(f'{type(to_bytes)} is not a valid Serialization type') + raise TypeError(f"{type(to_bytes)} is not a valid Serialization type") def topic(self, data: dict | bytes) -> str: """Determine topic based on data format and metadata.""" if isinstance(data, bytes): # Protobuf: use data type for hierarchical topic - data_type = getattr(self, 'protobuf_data_type', self.key) + data_type = getattr(self, "protobuf_data_type", self.key) return f"cryptofeed.market.{data_type}.protobuf" # JSON: use key, exchange, symbol for backward compatibility @@ -146,9 +170,9 @@ def topic(self, data: dict | bytes) -> str: def partition_key(self, data: dict | bytes) -> Optional[bytes]: """Get partition key from symbol when available.""" if isinstance(data, dict): - symbol = data.get('symbol') + symbol = data.get("symbol") if symbol: - return str(symbol).encode('utf-8') + return str(symbol).encode("utf-8") return None def partition(self, data: dict | bytes) -> Optional[int]: @@ -156,6 +180,7 @@ def partition(self, data: dict | bytes) -> Optional[int]: async def writer(self): await self._connect() + assert self.producer is not None while self.running: async with self.read_queue() as updates: for index in range(len(updates)): @@ -164,78 +189,93 @@ async def writer(self): # Extract key - use symbol from dict or default to key if isinstance(message, dict): - raw_key = message.get('symbol') or self.key + raw_key = message.get("symbol") or self.key else: raw_key = self.key - key_serializer = self.producer_config.get('key_serializer') + key_serializer = self.producer_config.get("key_serializer") if key_serializer: key = raw_key else: key = self._default_serializer(raw_key) # Serialize value based on type - value_serializer = self.producer_config.get('value_serializer') + value_serializer = self.producer_config.get("value_serializer") if isinstance(message, bytes): # Protobuf: already serialized value = message if not value_serializer else message else: # JSON: serialize dict to bytes - value = message if value_serializer else self._default_serializer(message) + value = ( + message + if value_serializer + else self._default_serializer(message) + ) partition = self.partition(message) try: - send_future = await self.producer.send(topic, value, key, partition) + send_future = await self.producer.send( + topic, value, key, partition + ) await send_future except RequestTimedOutError: - LOG.error(f'{self.__class__.__name__}: No response received from server within {self.producer._request_timeout_ms} ms. Messages may not have been delivered') + LOG.error( + f"{self.__class__.__name__}: No response received from server within {self.producer._request_timeout_ms} ms. Messages may not have been delivered" + ) except NodeNotReadyError: - LOG.error(f'{self.__class__.__name__}: Node not ready') + LOG.error(f"{self.__class__.__name__}: Node not ready") except Exception as e: - LOG.info(f'{self.__class__.__name__}: Encountered an error:{chr(10)}{e}') - LOG.info(f"{self.__class__.__name__}: sending last messages and closing connection '{self.producer.client._client_id}'") + LOG.info( + f"{self.__class__.__name__}: Encountered an error:{chr(10)}{e}" + ) + LOG.info( + f"{self.__class__.__name__}: sending last messages and closing connection '{self.producer.client._client_id}'" + ) await self.producer.stop() class TradeKafka(KafkaCallback, BackendCallback): """DEPRECATED: Use cryptofeed.kafka_callback.KafkaCallback instead.""" - default_key = 'trades' - protobuf_data_type = 'trades' + + default_key = "trades" + protobuf_data_type = "trades" def __init__(self, *args, **kwargs): warnings.warn( "TradeKafka is deprecated. Use cryptofeed.kafka_callback.KafkaCallback instead.", DeprecationWarning, - stacklevel=2 + stacklevel=2, ) super().__init__(*args, **kwargs) class FundingKafka(KafkaCallback, BackendCallback): """DEPRECATED: Use cryptofeed.kafka_callback.KafkaCallback instead.""" - default_key = 'funding' - protobuf_data_type = 'funding' + + default_key = "funding" + protobuf_data_type = "funding" def __init__(self, *args, **kwargs): warnings.warn( "FundingKafka is deprecated. Use cryptofeed.kafka_callback.KafkaCallback instead.", DeprecationWarning, - stacklevel=2 + stacklevel=2, ) super().__init__(*args, **kwargs) class BookKafka(KafkaCallback, BackendBookCallback): """DEPRECATED: Use cryptofeed.kafka_callback.KafkaCallback instead.""" - default_key = 'book' - protobuf_data_type = 'orderbook' + + default_key = "book" + protobuf_data_type = "orderbook" def __init__(self, *args, snapshots_only=False, snapshot_interval=1000, **kwargs): warnings.warn( "BookKafka is deprecated. Use cryptofeed.kafka_callback.KafkaCallback instead.", DeprecationWarning, - stacklevel=2 + stacklevel=2, ) self.snapshots_only = snapshots_only self.snapshot_interval = snapshot_interval @@ -245,111 +285,119 @@ def __init__(self, *args, snapshots_only=False, snapshot_interval=1000, **kwargs class TickerKafka(KafkaCallback, BackendCallback): """DEPRECATED: Use cryptofeed.kafka_callback.KafkaCallback instead.""" - default_key = 'ticker' - protobuf_data_type = 'ticker' + + default_key = "ticker" + protobuf_data_type = "ticker" def __init__(self, *args, **kwargs): warnings.warn( "TickerKafka is deprecated. Use cryptofeed.kafka_callback.KafkaCallback instead.", DeprecationWarning, - stacklevel=2 + stacklevel=2, ) super().__init__(*args, **kwargs) class OpenInterestKafka(KafkaCallback, BackendCallback): """DEPRECATED: Use cryptofeed.kafka_callback.KafkaCallback instead.""" - default_key = 'open_interest' - protobuf_data_type = 'open_interest' + + default_key = "open_interest" + protobuf_data_type = "open_interest" def __init__(self, *args, **kwargs): warnings.warn( "OpenInterestKafka is deprecated. Use cryptofeed.kafka_callback.KafkaCallback instead.", DeprecationWarning, - stacklevel=2 + stacklevel=2, ) super().__init__(*args, **kwargs) class LiquidationsKafka(KafkaCallback, BackendCallback): """DEPRECATED: Use cryptofeed.kafka_callback.KafkaCallback instead.""" - default_key = 'liquidations' - protobuf_data_type = 'liquidation' + + default_key = "liquidations" + protobuf_data_type = "liquidation" def __init__(self, *args, **kwargs): warnings.warn( "LiquidationsKafka is deprecated. Use cryptofeed.kafka_callback.KafkaCallback instead.", DeprecationWarning, - stacklevel=2 + stacklevel=2, ) super().__init__(*args, **kwargs) class CandlesKafka(KafkaCallback, BackendCallback): """DEPRECATED: Use cryptofeed.kafka_callback.KafkaCallback instead.""" - default_key = 'candles' - protobuf_data_type = 'candles' + + default_key = "candles" + protobuf_data_type = "candles" def __init__(self, *args, **kwargs): warnings.warn( "CandlesKafka is deprecated. Use cryptofeed.kafka_callback.KafkaCallback instead.", DeprecationWarning, - stacklevel=2 + stacklevel=2, ) super().__init__(*args, **kwargs) class OrderInfoKafka(KafkaCallback, BackendCallback): """DEPRECATED: Use cryptofeed.kafka_callback.KafkaCallback instead.""" - default_key = 'order_info' - protobuf_data_type = 'order_info' + + default_key = "order_info" + protobuf_data_type = "order_info" def __init__(self, *args, **kwargs): warnings.warn( "OrderInfoKafka is deprecated. Use cryptofeed.kafka_callback.KafkaCallback instead.", DeprecationWarning, - stacklevel=2 + stacklevel=2, ) super().__init__(*args, **kwargs) class TransactionsKafka(KafkaCallback, BackendCallback): """DEPRECATED: Use cryptofeed.kafka_callback.KafkaCallback instead.""" - default_key = 'transactions' - protobuf_data_type = 'transactions' + + default_key = "transactions" + protobuf_data_type = "transactions" def __init__(self, *args, **kwargs): warnings.warn( "TransactionsKafka is deprecated. Use cryptofeed.kafka_callback.KafkaCallback instead.", DeprecationWarning, - stacklevel=2 + stacklevel=2, ) super().__init__(*args, **kwargs) class BalancesKafka(KafkaCallback, BackendCallback): """DEPRECATED: Use cryptofeed.kafka_callback.KafkaCallback instead.""" - default_key = 'balances' - protobuf_data_type = 'balances' + + default_key = "balances" + protobuf_data_type = "balances" def __init__(self, *args, **kwargs): warnings.warn( "BalancesKafka is deprecated. Use cryptofeed.kafka_callback.KafkaCallback instead.", DeprecationWarning, - stacklevel=2 + stacklevel=2, ) super().__init__(*args, **kwargs) class FillsKafka(KafkaCallback, BackendCallback): """DEPRECATED: Use cryptofeed.kafka_callback.KafkaCallback instead.""" - default_key = 'fills' - protobuf_data_type = 'fills' + + default_key = "fills" + protobuf_data_type = "fills" def __init__(self, *args, **kwargs): warnings.warn( "FillsKafka is deprecated. Use cryptofeed.kafka_callback.KafkaCallback instead.", DeprecationWarning, - stacklevel=2 + stacklevel=2, ) super().__init__(*args, **kwargs) diff --git a/cryptofeed/backends/kafka_metrics.py b/cryptofeed/backends/kafka_metrics.py index 386574acf..6e3945354 100644 --- a/cryptofeed/backends/kafka_metrics.py +++ b/cryptofeed/backends/kafka_metrics.py @@ -60,6 +60,7 @@ def _import_prometheus(self) -> None: """Lazy import prometheus_client to avoid hard dependency.""" try: from prometheus_client import Counter, Histogram, Gauge, REGISTRY + self.Counter = Counter self.Histogram = Histogram self.Gauge = Gauge @@ -75,8 +76,9 @@ def _ensure_prometheus(self) -> bool: return False return self.enabled - def _create_counter(self, name: str, documentation: str, - labelnames: List[str]) -> Any: + def _create_counter( + self, name: str, documentation: str, labelnames: List[str] + ) -> Any: """Create a Prometheus counter metric.""" if not self._ensure_prometheus(): return self._NoOpMetric() @@ -87,15 +89,19 @@ def _create_counter(self, name: str, documentation: str, # Metric already exists, retrieve it return self.REGISTRY._names_to_collectors.get(name) - def _create_histogram(self, name: str, documentation: str, - labelnames: List[str], - buckets: Optional[tuple] = None) -> Any: + def _create_histogram( + self, + name: str, + documentation: str, + labelnames: List[str], + buckets: Optional[tuple] = None, + ) -> Any: """Create a Prometheus histogram metric.""" if not self._ensure_prometheus(): return self._NoOpMetric() try: - kwargs = {"labelnames": labelnames} + kwargs: Dict[str, Any] = {"labelnames": labelnames} if buckets: kwargs["buckets"] = buckets return self.Histogram(name, documentation, **kwargs) @@ -103,8 +109,9 @@ def _create_histogram(self, name: str, documentation: str, # Metric already exists, retrieve it return self.REGISTRY._names_to_collectors.get(name) - def _create_gauge(self, name: str, documentation: str, - labelnames: List[str]) -> Any: + def _create_gauge( + self, name: str, documentation: str, labelnames: List[str] + ) -> Any: """Create a Prometheus gauge metric.""" if not self._ensure_prometheus(): return self._NoOpMetric() @@ -117,6 +124,7 @@ def _create_gauge(self, name: str, documentation: str, class _NoOpMetric: """No-op metric for when Prometheus is unavailable.""" + def labels(self, **kwargs) -> _NoOpMetric: return self @@ -139,7 +147,7 @@ def create_producer_metrics(self) -> None: self.messages_produced_total = self._create_counter( "messages_produced_total", "Total number of messages successfully produced to Kafka", - ["exchange", "symbol", "data_type", "partition_strategy"] + ["exchange", "symbol", "data_type", "partition_strategy"], ) # Histogram: produce_latency_seconds (buckets: 1ms, 5ms, 10ms, 50ms, 100ms, 500ms, 1s) @@ -147,21 +155,21 @@ def create_producer_metrics(self) -> None: "produce_latency_seconds", "Latency of message production from callback to broker acknowledgment", ["exchange", "data_type"], - buckets=(0.001, 0.005, 0.01, 0.05, 0.1, 0.5, 1.0) + buckets=(0.001, 0.005, 0.01, 0.05, 0.1, 0.5, 1.0), ) # Counter: produce_errors_total self.produce_errors_total = self._create_counter( "produce_errors_total", "Total number of produce errors", - ["exchange", "data_type", "error_type"] + ["exchange", "data_type", "error_type"], ) # Gauge: producer_buffer_usage_bytes self.producer_buffer_usage_bytes = self._create_gauge( "producer_buffer_usage_bytes", "Current bytes in producer buffer waiting for transmission", - ["producer_id"] + ["producer_id"], ) # ======================================================================== @@ -175,21 +183,21 @@ def create_kafka_metrics(self) -> None: "kafka_broker_latency_seconds", "Latency to Kafka broker for various operations", ["broker_id", "operation"], - buckets=(0.001, 0.005, 0.01, 0.05, 0.1, 0.5, 1.0) + buckets=(0.001, 0.005, 0.01, 0.05, 0.1, 0.5, 1.0), ) # Gauge: kafka_partition_lag_records self.kafka_partition_lag_records = self._create_gauge( "kafka_partition_lag_records", "Number of records behind in partition (consumer lag)", - ["partition"] + ["partition"], ) # Gauge: kafka_buffer_utilization_percent self.kafka_buffer_utilization_percent = self._create_gauge( "kafka_buffer_utilization_percent", "Percentage of producer buffer pool currently in use", - ["producer_id"] + ["producer_id"], ) # ======================================================================== @@ -203,7 +211,7 @@ def create_serialization_metrics(self) -> None: "message_size_bytes", "Distribution of serialized message sizes in bytes", ["data_type", "compression_enabled"], - buckets=(100, 250, 500, 1000, 2500, 5000, 10000) + buckets=(100, 250, 500, 1000, 2500, 5000, 10000), ) # Histogram: serialization_latency_seconds @@ -211,7 +219,7 @@ def create_serialization_metrics(self) -> None: "serialization_latency_seconds", "Time taken to serialize message to protobuf format", ["data_type"], - buckets=(0.00001, 0.00005, 0.0001, 0.0005, 0.001, 0.005, 0.01) + buckets=(0.00001, 0.00005, 0.0001, 0.0005, 0.001, 0.005, 0.01), ) def initialize(self) -> None: @@ -229,8 +237,9 @@ def initialize(self) -> None: # Metric Recording Methods # ======================================================================== - def record_message_produced(self, exchange: str, symbol: str, - data_type: str, partition_strategy: str) -> None: + def record_message_produced( + self, exchange: str, symbol: str, data_type: str, partition_strategy: str + ) -> None: """Record a successfully produced message.""" if not self.enabled: return @@ -239,34 +248,33 @@ def record_message_produced(self, exchange: str, symbol: str, exchange=exchange, symbol=symbol, data_type=data_type, - partition_strategy=partition_strategy + partition_strategy=partition_strategy, ).inc() except Exception as e: LOG.debug(f"Error recording message produced metric: {e}") - def record_produce_latency(self, latency_seconds: float, - exchange: str, data_type: str) -> None: + def record_produce_latency( + self, latency_seconds: float, exchange: str, data_type: str + ) -> None: """Record message produce latency.""" if not self.enabled: return try: self.produce_latency_seconds.labels( - exchange=exchange, - data_type=data_type + exchange=exchange, data_type=data_type ).observe(latency_seconds) except Exception as e: LOG.debug(f"Error recording produce latency metric: {e}") - def record_produce_error(self, exchange: str, data_type: str, - error_type: str) -> None: + def record_produce_error( + self, exchange: str, data_type: str, error_type: str + ) -> None: """Record a produce error.""" if not self.enabled: return try: self.produce_errors_total.labels( - exchange=exchange, - data_type=data_type, - error_type=error_type + exchange=exchange, data_type=data_type, error_type=error_type ).inc() except Exception as e: LOG.debug(f"Error recording produce error metric: {e}") @@ -276,21 +284,21 @@ def record_buffer_usage(self, bytes_used: float) -> None: if not self.enabled: return try: - self.producer_buffer_usage_bytes.labels( - producer_id=self.producer_id - ).set(bytes_used) + self.producer_buffer_usage_bytes.labels(producer_id=self.producer_id).set( + bytes_used + ) except Exception as e: LOG.debug(f"Error recording buffer usage metric: {e}") - def record_broker_latency(self, latency_seconds: float, - broker_id: str, operation: str) -> None: + def record_broker_latency( + self, latency_seconds: float, broker_id: str, operation: str + ) -> None: """Record Kafka broker latency.""" if not self.enabled: return try: self.kafka_broker_latency_seconds.labels( - broker_id=broker_id, - operation=operation + broker_id=broker_id, operation=operation ).observe(latency_seconds) except Exception as e: LOG.debug(f"Error recording broker latency metric: {e}") @@ -300,9 +308,9 @@ def record_partition_lag(self, lag_records: int, partition: int) -> None: if not self.enabled: return try: - self.kafka_partition_lag_records.labels( - partition=str(partition) - ).set(lag_records) + self.kafka_partition_lag_records.labels(partition=str(partition)).set( + lag_records + ) except Exception as e: LOG.debug(f"Error recording partition lag metric: {e}") @@ -317,28 +325,29 @@ def record_buffer_utilization(self, percent: float) -> None: except Exception as e: LOG.debug(f"Error recording buffer utilization metric: {e}") - def record_message_size(self, size_bytes: int, data_type: str, - compression_enabled: bool) -> None: + def record_message_size( + self, size_bytes: int, data_type: str, compression_enabled: bool + ) -> None: """Record serialized message size.""" if not self.enabled: return try: self.message_size_bytes.labels( - data_type=data_type, - compression_enabled=str(compression_enabled) + data_type=data_type, compression_enabled=str(compression_enabled) ).observe(size_bytes) except Exception as e: LOG.debug(f"Error recording message size metric: {e}") - def record_serialization_latency(self, latency_seconds: float, - data_type: str) -> None: + def record_serialization_latency( + self, latency_seconds: float, data_type: str + ) -> None: """Record message serialization latency.""" if not self.enabled: return try: - self.serialization_latency_seconds.labels( - data_type=data_type - ).observe(latency_seconds) + self.serialization_latency_seconds.labels(data_type=data_type).observe( + latency_seconds + ) except Exception as e: LOG.debug(f"Error recording serialization latency metric: {e}") @@ -348,6 +357,7 @@ def record_serialization_latency(self, latency_seconds: float, def producer_method(self, func: Callable) -> Callable: """Decorator to measure latency of producer methods.""" + def wrapper(*args: Any, **kwargs: Any) -> Any: start_time = time.time() try: @@ -357,11 +367,15 @@ def wrapper(*args: Any, **kwargs: Any) -> Any: elapsed = time.time() - start_time # Log latency but don't record to metrics (to avoid overhead) if elapsed > 0.01: # Log only if > 10ms - LOG.debug(f"Producer method {func.__name__} took {elapsed*1000:.2f}ms") + LOG.debug( + f"Producer method {func.__name__} took {elapsed * 1000:.2f}ms" + ) + return wrapper def track_produce_latency(self, exchange: str, data_type: str) -> Callable: """Decorator factory to track message produce latency.""" + def decorator(func: Callable) -> Callable: def wrapper(*args: Any, **kwargs: Any) -> Any: start_time = time.time() @@ -371,7 +385,9 @@ def wrapper(*args: Any, **kwargs: Any) -> Any: finally: elapsed = time.time() - start_time self.record_produce_latency(elapsed, exchange, data_type) + return wrapper + return decorator @@ -383,8 +399,9 @@ def wrapper(*args: Any, **kwargs: Any) -> Any: _global_metrics_exporter: Optional[PrometheusMetricsExporter] = None -def get_metrics_exporter(producer_id: str = "default", - enabled: bool = True) -> PrometheusMetricsExporter: +def get_metrics_exporter( + producer_id: str = "default", enabled: bool = True +) -> PrometheusMetricsExporter: """Get or create the global metrics exporter instance. Args: diff --git a/cryptofeed/backends/kafka_schema.py b/cryptofeed/backends/kafka_schema.py index 799ce00f8..fe3286ee4 100644 --- a/cryptofeed/backends/kafka_schema.py +++ b/cryptofeed/backends/kafka_schema.py @@ -15,6 +15,7 @@ import logging import struct from abc import ABC, abstractmethod +import threading from enum import Enum from functools import lru_cache from typing import Dict, Optional, Any, Tuple @@ -93,6 +94,22 @@ class SchemaRegistryConfig(BaseModel): default=CompatibilityMode.BACKWARD, description="Schema compatibility mode", ) + tls_client_cert: Optional[str] = Field( + default=None, + description="Path to client certificate for mTLS", + ) + tls_client_key: Optional[str] = Field( + default=None, + description="Path to client private key for mTLS", + ) + tls_ca: Optional[str] = Field( + default=None, + description="CA bundle path for TLS verification", + ) + verify: bool = Field( + default=True, + description="Verify TLS certificates (set False for local dev only)", + ) cache_size: int = Field(default=1000, description="Schema cache size") cache_ttl_seconds: int = Field( default=3600, description="Schema cache TTL in seconds" @@ -354,6 +371,12 @@ def __init__(self, config: SchemaRegistryConfig): self._auth = HTTPBasicAuth(config.username, config.password) # Schema cache: {schema_id: schema_dict} self._schema_cache: Dict[int, Dict[str, Any]] = {} + self._cache_lock = threading.RLock() + self._verify = config.tls_ca if config.tls_ca else config.verify + if config.tls_client_cert and config.tls_client_key: + self._cert = (config.tls_client_cert, config.tls_client_key) + else: + self._cert = None def register_schema( self, @@ -386,6 +409,8 @@ def register_schema( url, json=payload, auth=self._auth, + verify=self._verify, + cert=self._cert, timeout=30, ) @@ -439,9 +464,11 @@ def get_schema_by_id(self, schema_id: int) -> Dict[str, Any]: SchemaNotFoundError: If schema not found """ # Check cache first - if schema_id in self._schema_cache: + with self._cache_lock: + cached = self._schema_cache.get(schema_id) + if cached is not None: self.logger.debug(f"Retrieved cached schema for schema_id={schema_id}") - return self._schema_cache[schema_id] + return cached url = urljoin(self.config.url, f"/schemas/ids/{schema_id}") @@ -449,13 +476,16 @@ def get_schema_by_id(self, schema_id: int) -> Dict[str, Any]: response = requests.get( url, auth=self._auth, + verify=self._verify, + cert=self._cert, timeout=30, ) if response.status_code == 200: data = response.json() # Cache the schema - self._schema_cache[schema_id] = data + with self._cache_lock: + self._schema_cache[schema_id] = data self.logger.debug(f"Retrieved schema for schema_id={schema_id}") return data @@ -492,6 +522,8 @@ def get_schema_by_version( response = requests.get( url, auth=self._auth, + verify=self._verify, + cert=self._cert, timeout=30, ) @@ -542,6 +574,8 @@ def check_compatibility( url, json=payload, auth=self._auth, + verify=self._verify, + cert=self._cert, timeout=30, ) @@ -582,6 +616,8 @@ def set_compatibility_mode(self, subject: str, mode: str) -> None: url, json=payload, auth=self._auth, + verify=self._verify, + cert=self._cert, timeout=30, ) diff --git a/cryptofeed/backends/mongo.py b/cryptofeed/backends/mongo.py index c813d7871..f6395e791 100644 --- a/cryptofeed/backends/mongo.py +++ b/cryptofeed/backends/mongo.py @@ -1,20 +1,37 @@ -''' +""" Copyright (C) 2017-2025 Bryant Moscon - bmoscon@gmail.com Please see the LICENSE file for the terms and conditions associated with this software. -''' +""" + from collections import defaultdict from datetime import timezone, datetime as dt import bson import motor.motor_asyncio -from cryptofeed.backends.backend import BackendBookCallback, BackendCallback, BackendQueue +from cryptofeed.backends.backend import ( + BackendBookCallback, + BackendCallback, + BackendQueue, +) class MongoCallback(BackendQueue): - def __init__(self, db, host='127.0.0.1', port=27017, key=None, none_to=None, numeric_type=str, **kwargs): + # Default key - subclasses should override + default_key = "unknown" + + def __init__( + self, + db, + host="127.0.0.1", + port=27017, + key=None, + none_to=None, + numeric_type=str, + **kwargs, + ): self.host = host self.port = port self.db = db @@ -29,25 +46,51 @@ async def writer(self): while self.running: async with self.read_queue() as updates: for index in range(len(updates)): - updates[index]['timestamp'] = dt.fromtimestamp(updates[index]['timestamp'], tz=timezone.utc) if updates[index]['timestamp'] else None - updates[index]['receipt_timestamp'] = dt.fromtimestamp(updates[index]['receipt_timestamp'], tz=timezone.utc) if updates[index]['receipt_timestamp'] else None - - if 'book' in updates[index]: - updates[index] = {'exchange': updates[index]['exchange'], 'symbol': updates[index]['symbol'], 'timestamp': updates[index]['timestamp'], 'receipt_timestamp': updates[index]['receipt_timestamp'], 'delta': 'delta' in updates[index], 'bid': bson.BSON.encode(updates[index]['book']['bid'] if 'delta' not in updates[index] else updates[index]['delta']['bid']), 'ask': bson.BSON.encode(updates[index]['book']['ask'] if 'delta' not in updates[index] else updates[index]['delta']['ask'])} + updates[index]["timestamp"] = ( + dt.fromtimestamp(updates[index]["timestamp"], tz=timezone.utc) + if updates[index]["timestamp"] + else None + ) + updates[index]["receipt_timestamp"] = ( + dt.fromtimestamp( + updates[index]["receipt_timestamp"], tz=timezone.utc + ) + if updates[index]["receipt_timestamp"] + else None + ) + + if "book" in updates[index]: + updates[index] = { + "exchange": updates[index]["exchange"], + "symbol": updates[index]["symbol"], + "timestamp": updates[index]["timestamp"], + "receipt_timestamp": updates[index]["receipt_timestamp"], + "delta": "delta" in updates[index], + "bid": bson.BSON.encode( + updates[index]["book"]["bid"] + if "delta" not in updates[index] + else updates[index]["delta"]["bid"] + ), + "ask": bson.BSON.encode( + updates[index]["book"]["ask"] + if "delta" not in updates[index] + else updates[index]["delta"]["ask"] + ), + } await db[self.collection].insert_many(updates) class TradeMongo(MongoCallback, BackendCallback): - default_key = 'trades' + default_key = "trades" class FundingMongo(MongoCallback, BackendCallback): - default_key = 'funding' + default_key = "funding" class BookMongo(MongoCallback, BackendBookCallback): - default_key = 'book' + default_key = "book" def __init__(self, *args, snapshots_only=False, snapshot_interval=1000, **kwargs): self.snapshots_only = snapshots_only @@ -57,32 +100,32 @@ def __init__(self, *args, snapshots_only=False, snapshot_interval=1000, **kwargs class TickerMongo(MongoCallback, BackendCallback): - default_key = 'ticker' + default_key = "ticker" class OpenInterestMongo(MongoCallback, BackendCallback): - default_key = 'open_interest' + default_key = "open_interest" class LiquidationsMongo(MongoCallback, BackendCallback): - default_key = 'liquidations' + default_key = "liquidations" class CandlesMongo(MongoCallback, BackendCallback): - default_key = 'candles' + default_key = "candles" class OrderInfoMongo(MongoCallback, BackendCallback): - default_key = 'order_info' + default_key = "order_info" class TransactionsMongo(MongoCallback, BackendCallback): - default_key = 'transactions' + default_key = "transactions" class BalancesMongo(MongoCallback, BackendCallback): - default_key = 'balances' + default_key = "balances" class FillsMongo(MongoCallback, BackendCallback): - default_key = 'fills' + default_key = "fills" diff --git a/cryptofeed/backends/postgres.py b/cryptofeed/backends/postgres.py index 00c3f3a99..f598fcc05 100644 --- a/cryptofeed/backends/postgres.py +++ b/cryptofeed/backends/postgres.py @@ -1,9 +1,10 @@ -''' +""" Copyright (C) 2017-2025 Bryant Moscon - bmoscon@gmail.com Please see the LICENSE file for the terms and conditions associated with this software. -''' +""" + from collections import defaultdict from datetime import datetime as dt from typing import Tuple @@ -11,12 +12,39 @@ import asyncpg from cryptofeed.json_utils import json -from cryptofeed.backends.backend import BackendBookCallback, BackendCallback, BackendQueue -from cryptofeed.defines import CANDLES, FUNDING, OPEN_INTEREST, TICKER, TRADES, LIQUIDATIONS, INDEX +from cryptofeed.backends.backend import ( + BackendBookCallback, + BackendCallback, + BackendQueue, +) +from cryptofeed.defines import ( + CANDLES, + FUNDING, + OPEN_INTEREST, + TICKER, + TRADES, + LIQUIDATIONS, + INDEX, +) class PostgresCallback(BackendQueue): - def __init__(self, host='127.0.0.1', user=None, pw=None, db=None, port=None, table=None, custom_columns: dict = None, none_to=None, numeric_type=float, **kwargs): + # Default table - subclasses should override + default_table = "unknown" + + def __init__( + self, + host="127.0.0.1", + user=None, + pw=None, + db=None, + port=None, + table=None, + custom_columns: dict = None, + none_to=None, + numeric_type=float, + **kwargs, + ): """ host: str Database host address @@ -45,12 +73,22 @@ def __init__(self, host='127.0.0.1', user=None, pw=None, db=None, port=None, tab self.port = port # Parse INSERT statement with user-specified column names # Performed at init to avoid repeated list joins - self.insert_statement = f"INSERT INTO {self.table} ({','.join([v for v in self.custom_columns.values()])}) VALUES " if custom_columns else None + self.insert_statement = ( + f"INSERT INTO {self.table} ({','.join([v for v in self.custom_columns.values()])}) VALUES " + if custom_columns + else None + ) self.running = True async def _connect(self): if self.conn is None: - self.conn = await asyncpg.connect(user=self.user, password=self.pw, database=self.db, host=self.host, port=self.port) + self.conn = await asyncpg.connect( + user=self.user, + password=self.pw, + database=self.db, + host=self.host, + port=self.port, + ) def format(self, data: Tuple): feed = data[0] @@ -62,21 +100,25 @@ def format(self, data: Tuple): return f"(DEFAULT,'{timestamp}','{receipt_timestamp}','{feed}','{symbol}','{json.dumps(data)}')" def _custom_format(self, data: Tuple): - d = { **data[4], **{ - 'exchange': data[0], - 'symbol': data[1], - 'timestamp': data[2], - 'receipt': data[3], - } + "exchange": data[0], + "symbol": data[1], + "timestamp": data[2], + "receipt": data[3], + }, } # Cross-ref data dict with user column names from custom_columns dict, inserting NULL if requested data point not present - sequence_gen = (d[field] if d[field] else 'NULL' for field in self.custom_columns.keys()) + sequence_gen = ( + d[field] if d[field] else "NULL" for field in self.custom_columns.keys() + ) # Iterate through the generator and surround everything except floats and NULL in single quotes - sql_string = ','.join(str(s) if isinstance(s, float) or s == 'NULL' else "'" + str(s) + "'" for s in sequence_gen) + sql_string = ",".join( + str(s) if isinstance(s, float) or s == "NULL" else "'" + str(s) + "'" + for s in sequence_gen + ) return f"({sql_string})" async def writer(self): @@ -85,21 +127,29 @@ async def writer(self): if len(updates) > 0: batch = [] for data in updates: - ts = dt.utcfromtimestamp(data['timestamp']) if data['timestamp'] else None - rts = dt.utcfromtimestamp(data['receipt_timestamp']) - batch.append((data['exchange'], data['symbol'], ts, rts, data)) + ts = ( + dt.utcfromtimestamp(data["timestamp"]) + if data["timestamp"] + else None + ) + rts = dt.utcfromtimestamp(data["receipt_timestamp"]) + batch.append((data["exchange"], data["symbol"], ts, rts, data)) await self.write_batch(batch) async def write_batch(self, updates: list): await self._connect() - args_str = ','.join([self.format(u) for u in updates]) + assert self.conn is not None + args_str = ",".join([self.format(u) for u in updates]) async with self.conn.transaction(): try: if self.custom_columns: + assert self.insert_statement is not None await self.conn.execute(self.insert_statement + args_str) else: - await self.conn.execute(f"INSERT INTO {self.table} VALUES {args_str}") + await self.conn.execute( + f"INSERT INTO {self.table} VALUES {args_str}" + ) except asyncpg.UniqueViolationError: # when restarting a subscription, some exchanges will re-publish a few messages @@ -114,8 +164,8 @@ def format(self, data: Tuple): return self._custom_format(data) else: exchange, symbol, timestamp, receipt, data = data - id = f"'{data['id']}'" if data['id'] else 'NULL' - otype = f"'{data['type']}'" if data['type'] else 'NULL' + id = f"'{data['id']}'" if data["id"] else "NULL" + otype = f"'{data['type']}'" if data["type"] else "NULL" return f"(DEFAULT,'{timestamp}','{receipt}','{exchange}','{symbol}','{data['side']}',{data['amount']},{data['price']},{id},{otype})" @@ -124,12 +174,18 @@ class FundingPostgres(PostgresCallback, BackendCallback): def format(self, data: Tuple): if self.custom_columns: - if data[4]['next_funding_time']: - data[4]['next_funding_time'] = dt.utcfromtimestamp(data[4]['next_funding_time']) + if data[4]["next_funding_time"]: + data[4]["next_funding_time"] = dt.utcfromtimestamp( + data[4]["next_funding_time"] + ) return self._custom_format(data) else: exchange, symbol, timestamp, receipt, data = data - ts = dt.utcfromtimestamp(data['next_funding_time']) if data['next_funding_time'] else 'NULL' + ts = ( + dt.utcfromtimestamp(data["next_funding_time"]) + if data["next_funding_time"] + else "NULL" + ) return f"(DEFAULT,'{timestamp}','{receipt}','{exchange}','{symbol}',{data['mark_price'] if data['mark_price'] else 'NULL'},{data['rate']},'{ts}',{data['predicted_rate']})" @@ -178,7 +234,7 @@ def format(self, data: Tuple): class BookPostgres(PostgresCallback, BackendBookCallback): - default_table = 'book' + default_table = "book" def __init__(self, *args, snapshots_only=False, snapshot_interval=1000, **kwargs): self.snapshots_only = snapshots_only @@ -188,10 +244,10 @@ def __init__(self, *args, snapshots_only=False, snapshot_interval=1000, **kwargs def format(self, data: Tuple): if self.custom_columns: - if 'book' in data[4]: - data[4]['data'] = json.dumps({'snapshot': data[4]['book']}) + if "book" in data[4]: + data[4]["data"] = json.dumps({"snapshot": data[4]["book"]}) else: - data[4]['data'] = json.dumps({'delta': data[4]['delta']}) + data[4]["data"] = json.dumps({"delta": data[4]["delta"]}) return self._custom_format(data) else: feed = data[0] @@ -199,10 +255,10 @@ def format(self, data: Tuple): timestamp = data[2] receipt_timestamp = data[3] data = data[4] - if 'book' in data: - data = {'snapshot': data['book']} + if "book" in data: + data = {"snapshot": data["book"]} else: - data = {'delta': data['delta']} + data = {"delta": data["delta"]} return f"(DEFAULT,'{timestamp}','{receipt_timestamp}','{feed}','{symbol}','{json.dumps(data)}')" @@ -212,12 +268,12 @@ class CandlesPostgres(PostgresCallback, BackendCallback): def format(self, data: Tuple): if self.custom_columns: - data[4]['start'] = dt.utcfromtimestamp(data[4]['start']) - data[4]['stop'] = dt.utcfromtimestamp(data[4]['stop']) + data[4]["start"] = dt.utcfromtimestamp(data[4]["start"]) + data[4]["stop"] = dt.utcfromtimestamp(data[4]["stop"]) return self._custom_format(data) else: exchange, symbol, timestamp, receipt, data = data - open_ts = dt.utcfromtimestamp(data['start']) - close_ts = dt.utcfromtimestamp(data['stop']) + open_ts = dt.utcfromtimestamp(data["start"]) + close_ts = dt.utcfromtimestamp(data["stop"]) return f"(DEFAULT,'{timestamp}','{receipt}','{exchange}','{symbol}','{open_ts}','{close_ts}','{data['interval']}',{data['trades'] if data['trades'] is not None else 'NULL'},{data['open']},{data['close']},{data['high']},{data['low']},{data['volume']},{data['closed'] if data['closed'] else 'NULL'})" diff --git a/cryptofeed/backends/protobuf_helpers_v2.py b/cryptofeed/backends/protobuf_helpers_v2.py new file mode 100644 index 000000000..02e1734b6 --- /dev/null +++ b/cryptofeed/backends/protobuf_helpers_v2.py @@ -0,0 +1,258 @@ +"""Protobuf v2 serialization helpers (native numeric types). + +These helpers mirror ``cryptofeed.backends.protobuf_helpers`` but target the +normalized v2 schemas that use native numeric types (`double`, `uint64`) and +`google.protobuf.Timestamp`. They are schema-registry friendly and avoid the +string-based decimal encoding used in v1. +""" + +from __future__ import annotations + +import math +from decimal import Decimal +from typing import Any, Callable, Dict + +from google.protobuf import timestamp_pb2 + +from cryptofeed.exceptions import ProtobufEncodeError, SerializationError + +try: + from gen.python.cryptofeed.normalized.v2 import trade_pb2 as trade_v2_pb2 + from gen.python.cryptofeed.normalized.v2 import ticker_pb2 as ticker_v2_pb2 + from gen.python.cryptofeed.normalized.v2 import order_book_pb2 as order_book_v2_pb2 + from gen.python.cryptofeed.normalized.v2 import candle_pb2 as candle_v2_pb2 +except ImportError as exc: # pragma: no cover - import guarded by tests + raise ImportError( + "Missing generated v2 protobuf bindings. Run 'buf generate proto/' first." + ) from exc + + +_DEFAULT_SCHEMA_VERSION = "v2" + + +def _to_timestamp_proto(value: Any) -> timestamp_pb2.Timestamp: + """Convert float/int/Decimal seconds to Timestamp.""" + + proto_ts = timestamp_pb2.Timestamp() + if value is None: + return proto_ts + + seconds = int(math.floor(float(value))) + nanos = int(round((float(value) - seconds) * 1_000_000_000)) + + proto_ts.seconds = seconds + proto_ts.nanos = nanos + return proto_ts + + +def _to_double(value: Any) -> float: + """Lossy conversion that accepts Decimal/str/float/int.""" + if value is None: + return 0.0 + if isinstance(value, Decimal): + return float(value) # intentional lossy conversion per v2 spec + return float(value) + + +def trade_to_proto_v2(trade_obj) -> trade_v2_pb2.Trade: + proto = trade_v2_pb2.Trade() + + proto.exchange = getattr(trade_obj, "exchange", "") or "" + proto.symbol = getattr(trade_obj, "symbol", "") or "" + + side = getattr(trade_obj, "side", None) + if side: + if str(side).lower() == "buy": + proto.side = trade_v2_pb2.Trade.SIDE_BUY + elif str(side).lower() == "sell": + proto.side = trade_v2_pb2.Trade.SIDE_SELL + else: + proto.side = trade_v2_pb2.Trade.SIDE_UNSPECIFIED + + trade_id = getattr(trade_obj, "id", None) or getattr(trade_obj, "trade_id", None) + if trade_id is not None: + proto.trade_id = str(trade_id) + + price = getattr(trade_obj, "price", None) + amount = getattr(trade_obj, "amount", None) + proto.price = _to_double(price) + proto.amount = _to_double(amount) + + timestamp_val = getattr(trade_obj, "timestamp", None) + proto.timestamp.CopyFrom(_to_timestamp_proto(timestamp_val)) + + seq = getattr(trade_obj, "sequence_number", None) + if seq is not None: + proto.sequence_number = int(seq) + + # scale is kept at default 0 unless explicitly provided by callers who + # opt into bytes+scale semantics in the future. + if hasattr(trade_obj, "scale") and getattr(trade_obj, "scale") is not None: + proto.scale = int(getattr(trade_obj, "scale")) + + return proto + + +def ticker_to_proto_v2(ticker_obj) -> ticker_v2_pb2.Ticker: + proto = ticker_v2_pb2.Ticker() + proto.exchange = getattr(ticker_obj, "exchange", "") or "" + proto.symbol = getattr(ticker_obj, "symbol", "") or "" + + proto.best_bid_price = _to_double(getattr(ticker_obj, "bid", None) or getattr(ticker_obj, "best_bid", None)) + proto.best_ask_price = _to_double(getattr(ticker_obj, "ask", None) or getattr(ticker_obj, "best_ask", None)) + proto.best_bid_size = _to_double(getattr(ticker_obj, "bid_size", None) or getattr(ticker_obj, "best_bid_size", None)) + proto.best_ask_size = _to_double(getattr(ticker_obj, "ask_size", None) or getattr(ticker_obj, "best_ask_size", None)) + + proto.timestamp.CopyFrom(_to_timestamp_proto(getattr(ticker_obj, "timestamp", None))) + + seq = getattr(ticker_obj, "sequence_number", None) + if seq is not None: + proto.sequence_number = int(seq) + + if hasattr(ticker_obj, "scale") and getattr(ticker_obj, "scale") is not None: + proto.scale = int(getattr(ticker_obj, "scale")) + + return proto + + +def orderbook_to_proto_v2(book_obj) -> order_book_v2_pb2.OrderBook: + proto = order_book_v2_pb2.OrderBook() + proto.exchange = getattr(book_obj, "exchange", "") or "" + proto.symbol = getattr(book_obj, "symbol", "") or "" + + bids = getattr(book_obj, "bids", None) or {} + asks = getattr(book_obj, "asks", None) or {} + + for price, qty in getattr(bids, "items", bids.items)(): + level = proto.bids.add() + level.price = _to_double(price) + level.quantity = _to_double(qty) + + for price, qty in getattr(asks, "items", asks.items)(): + level = proto.asks.add() + level.price = _to_double(price) + level.quantity = _to_double(qty) + + proto.timestamp.CopyFrom(_to_timestamp_proto(getattr(book_obj, "timestamp", None))) + + seq = getattr(book_obj, "sequence_number", None) + if seq is not None: + proto.sequence_number = int(seq) + + checksum = getattr(book_obj, "checksum", None) + if checksum is not None: + proto.checksum = str(checksum) + + if hasattr(book_obj, "scale") and getattr(book_obj, "scale") is not None: + proto.scale = int(getattr(book_obj, "scale")) + + return proto + + +def candle_to_proto_v2(candle_obj) -> candle_v2_pb2.Candle: + proto = candle_v2_pb2.Candle() + proto.exchange = getattr(candle_obj, "exchange", "") or "" + proto.symbol = getattr(candle_obj, "symbol", "") or "" + + proto.start.CopyFrom(_to_timestamp_proto(getattr(candle_obj, "start", getattr(candle_obj, "open_time", None)))) + proto.end.CopyFrom(_to_timestamp_proto(getattr(candle_obj, "end", getattr(candle_obj, "stop", None)))) + + interval = getattr(candle_obj, "interval", None) + if interval is not None: + proto.interval = str(interval) + + trades = getattr(candle_obj, "trades", None) + if trades is not None: + proto.trades = int(trades) + + proto.open = _to_double(getattr(candle_obj, "open", None)) + proto.close = _to_double(getattr(candle_obj, "close", None)) + proto.high = _to_double(getattr(candle_obj, "high", None)) + proto.low = _to_double(getattr(candle_obj, "low", None)) + proto.volume = _to_double(getattr(candle_obj, "volume", None)) + + closed = getattr(candle_obj, "closed", None) + if closed is not None: + proto.closed = bool(closed) + + proto.timestamp.CopyFrom(_to_timestamp_proto(getattr(candle_obj, "timestamp", None))) + + seq = getattr(candle_obj, "sequence_number", None) + if seq is not None: + proto.sequence_number = int(seq) + + if hasattr(candle_obj, "scale") and getattr(candle_obj, "scale") is not None: + proto.scale = int(getattr(candle_obj, "scale")) + + return proto + + +_CONVERTER_MAP: Dict[str, Callable[[Any], Any]] = { + "Trade": trade_to_proto_v2, + "Ticker": ticker_to_proto_v2, + "OrderBook": orderbook_to_proto_v2, + "Candle": candle_to_proto_v2, + # common alternates in the codebase + "L2Book": orderbook_to_proto_v2, +} + + +def get_converter_v2(type_name: str) -> Callable[[Any], Any] | None: + normalized = type_name.lstrip("_") + return _CONVERTER_MAP.get(normalized) + + +def _ensure_message(proto_obj, type_name: str, source: str): + from google.protobuf.message import Message + + if not isinstance(proto_obj, Message): # pragma: no cover - defensive guard + raise ProtobufEncodeError( + "Converter did not return a protobuf Message", + data_type=type_name, + schema_version=_DEFAULT_SCHEMA_VERSION, + schema_name=source, + ) + return proto_obj + + +def serialize_to_protobuf_v2(obj: Any) -> bytes: + type_name = type(obj).__name__ + + converter = get_converter_v2(type_name) + if not converter: + raise SerializationError( + "No protobuf v2 converter registered for data type.", + data_type=type_name, + ) + + try: + proto_msg = converter(obj) + except Exception as exc: + raise ProtobufEncodeError( + "Converter raised an exception", + data_type=type_name, + schema_name=type_name, + schema_version=_DEFAULT_SCHEMA_VERSION, + ) from exc + + proto_msg = _ensure_message(proto_msg, type_name, "converter") + + try: + return proto_msg.SerializeToString() + except Exception as exc: # pragma: no cover - defensive guard + raise ProtobufEncodeError( + "SerializeToString() failed", + data_type=type_name, + schema_name=type(proto_msg).__name__, + schema_version=_DEFAULT_SCHEMA_VERSION, + ) from exc + + +__all__ = [ + "trade_to_proto_v2", + "ticker_to_proto_v2", + "orderbook_to_proto_v2", + "candle_to_proto_v2", + "get_converter_v2", + "serialize_to_protobuf_v2", +] diff --git a/cryptofeed/backends/quasardb.py b/cryptofeed/backends/quasardb.py index bfe213613..25f804335 100644 --- a/cryptofeed/backends/quasardb.py +++ b/cryptofeed/backends/quasardb.py @@ -6,14 +6,35 @@ class QuasarCallback(BackendCallback): - def __init__(self, uri="qdb://127.0.0.1:2836", username: str = "", private_key: str = "", public_key: str = "", none_to=None, shard_size: timedelta = timedelta(minutes=15)): + # Default attributes - subclasses should override + table_prefix = "unknown" + query = None + + def _create_query(self): + """Subclasses should override this method""" + pass + + def __init__( + self, + uri="qdb://127.0.0.1:2836", + username: str = "", + private_key: str = "", + public_key: str = "", + none_to=None, + shard_size: timedelta = timedelta(minutes=15), + ): self.numeric_type = float self.table = "" self.running = True self.none_to = none_to self.shard_size = self._get_str_timedelta(shard_size) - pool.initialize(uri=uri, user_name=username, user_private_key=private_key, cluster_public_key=public_key) + pool.initialize( + uri=uri, + user_name=username, + user_private_key=private_key, + cluster_public_key=public_key, + ) def _get_str_timedelta(self, delta: timedelta): # calculate the number of hours, minutes, and remaining seconds from timedelta, return it in correct format for query @@ -22,11 +43,18 @@ def _get_str_timedelta(self, delta: timedelta): return f"{int(hours)}hour {int(minutes)}min {int(seconds)}s" def format(self, data: dict): - data['timestamp'] = np.datetime64(datetime.utcfromtimestamp(data['timestamp']), 'ns') - data['receipt_timestamp'] = np.datetime64(datetime.utcfromtimestamp(data['receipt_timestamp']), 'ns') - data['timestamp'], data['receipt_timestamp'] = data['receipt_timestamp'], data['timestamp'] - index = data['timestamp'] - data.pop('timestamp') + data["timestamp"] = np.datetime64( + datetime.utcfromtimestamp(data["timestamp"]), "ns" + ) + data["receipt_timestamp"] = np.datetime64( + datetime.utcfromtimestamp(data["receipt_timestamp"]), "ns" + ) + data["timestamp"], data["receipt_timestamp"] = ( + data["receipt_timestamp"], + data["timestamp"], + ) + index = data["timestamp"] + data.pop("timestamp") return index, data def _set_table_name(self, data: dict): @@ -53,7 +81,14 @@ async def write(self, data: dict): # write to table, if table doesnt exist it will be created with specified shard_size value with pool.instance().connect() as conn: self._create_table(conn) - qdbnp.write_arrays(np_array, conn, conn.table(self.table), index=idx, fast=True, _async=True) + qdbnp.write_arrays( + np_array, + conn, + conn.table(self.table), + index=idx, + fast=True, + _async=True, + ) class TickerQuasar(QuasarCallback): @@ -75,9 +110,9 @@ class CandlesQuasar(QuasarCallback): def format(self, data: dict): index, data = super().format(data) - data['start'] = datetime.utcfromtimestamp(data['start']) - data['stop'] = datetime.utcfromtimestamp(data['stop']) - data['closed'] = int(data['closed']) + data["start"] = datetime.utcfromtimestamp(data["start"]) + data["stop"] = datetime.utcfromtimestamp(data["stop"]) + data["closed"] = int(data["closed"]) return index, data def _create_query(self): @@ -89,7 +124,7 @@ class FundingQuasar(QuasarCallback): def format(self, data: dict): index, data = super().format(data) - data['next_funding_time'] = datetime.utcfromtimestamp(data['next_funding_time']) + data["next_funding_time"] = datetime.utcfromtimestamp(data["next_funding_time"]) return index, data def _create_query(self): @@ -102,24 +137,24 @@ class BookQuasar(QuasarCallback): def format(self, data: dict): index, data = super().format(data) # store only best bid and best ask - if not data['book']: + if not data["book"]: best_bid = max(data["delta"]["bid"], key=lambda x: x[0]) best_ask = min(data["delta"]["ask"], key=lambda x: x[0]) - data['best_bid_price'] = best_bid[0] - data['best_bid_amount'] = best_bid[1] - data['best_ask_price'] = best_ask[0] - data['best_ask_amount'] = best_ask[1] - data.pop('delta') + data["best_bid_price"] = best_bid[0] + data["best_bid_amount"] = best_bid[1] + data["best_ask_price"] = best_ask[0] + data["best_ask_amount"] = best_ask[1] + data.pop("delta") else: best_bid = max(data["book"]["bid"].keys()) best_ask = min(data["book"]["ask"].keys()) - data['best_bid_price'] = best_bid - data['best_bid_amount'] = data["book"]["bid"][best_bid] - data['best_ask_price'] = best_ask - data['best_ask_amount'] = data["book"]["ask"][best_ask] - data.pop('book') + data["best_bid_price"] = best_bid + data["best_bid_amount"] = data["book"]["bid"][best_bid] + data["best_ask_price"] = best_ask + data["best_ask_amount"] = data["book"]["ask"][best_ask] + data.pop("book") return index, data def _create_query(self): diff --git a/cryptofeed/backends/quest.py b/cryptofeed/backends/quest.py index aa11a9d3b..f11497d7d 100644 --- a/cryptofeed/backends/quest.py +++ b/cryptofeed/backends/quest.py @@ -1,20 +1,24 @@ -''' +""" Copyright (C) 2017-2025 Bryant Moscon - bmoscon@gmail.com Please see the LICENSE file for the terms and conditions associated with this software. -''' +""" + import logging from cryptofeed.backends.backend import BackendCallback from cryptofeed.backends.socket import SocketCallback -LOG = logging.getLogger('feedhandler') +LOG = logging.getLogger("feedhandler") class QuestCallback(SocketCallback): - def __init__(self, host='127.0.0.1', port=9009, key=None, **kwargs): + # Default key - subclasses should override + default_key = "unknown" + + def __init__(self, host="127.0.0.1", port=9009, key=None, **kwargs): super().__init__(f"tcp://{host}", port=port, **kwargs) self.key = key if key else self.default_key self.numeric_type = float @@ -24,6 +28,7 @@ def __init__(self, host='127.0.0.1', port=9009, key=None, **kwargs): async def writer(self): while self.running: await self.connect() + assert self.conn is not None async with self.read_queue() as updates: update = "\n".join(updates) + "\n" self.conn.write(update.encode()) @@ -32,91 +37,116 @@ async def write(self, data): d = self.format(data) timestamp = data["timestamp"] received_timestamp_int = int(data["receipt_timestamp"] * 1_000_000) - timestamp_int = int(timestamp * 1_000_000_000) if timestamp is not None else received_timestamp_int * 1000 - update = f'{self.key}-{data["exchange"]},symbol={data["symbol"]} {d},receipt_timestamp={received_timestamp_int}t {timestamp_int}' + timestamp_int = ( + int(timestamp * 1_000_000_000) + if timestamp is not None + else received_timestamp_int * 1000 + ) + update = f"{self.key}-{data['exchange']},symbol={data['symbol']} {d},receipt_timestamp={received_timestamp_int}t {timestamp_int}" await self.queue.put(update) def format(self, data): ret = [] for key, value in data.items(): - if key in {'timestamp', 'exchange', 'symbol', 'receipt_timestamp'}: + if key in {"timestamp", "exchange", "symbol", "receipt_timestamp"}: continue if isinstance(value, str): ret.append(f'{key}="{value}"') else: - ret.append(f'{key}={value}') - return ','.join(ret) + ret.append(f"{key}={value}") + return ",".join(ret) class TradeQuest(QuestCallback, BackendCallback): - default_key = 'trades' + default_key = "trades" async def write(self, data): timestamp = data["timestamp"] received_timestamp_int = int(data["receipt_timestamp"] * 1_000_000) - id_field = f'id={data["id"]}i,' if data["id"] is not None else '' - timestamp_int = int(timestamp * 1_000_000_000) if timestamp is not None else received_timestamp_int * 1000 + id_field = f"id={data['id']}i," if data["id"] is not None else "" + timestamp_int = ( + int(timestamp * 1_000_000_000) + if timestamp is not None + else received_timestamp_int * 1000 + ) update = ( - f'{self.key}-{data["exchange"]},symbol={data["symbol"]},side={data["side"]},type={data["type"]} ' - f'price={data["price"]},amount={data["amount"]},{id_field}receipt_timestamp={received_timestamp_int}t {timestamp_int}' + f"{self.key}-{data['exchange']},symbol={data['symbol']},side={data['side']},type={data['type']} " + f"price={data['price']},amount={data['amount']},{id_field}receipt_timestamp={received_timestamp_int}t {timestamp_int}" ) await self.queue.put(update) class FundingQuest(QuestCallback, BackendCallback): - default_key = 'funding' + default_key = "funding" class BookQuest(QuestCallback): - default_key = 'book' + default_key = "book" def __init__(self, *args, depth=10, **kwargs): super().__init__(*args, **kwargs) self.depth = depth async def __call__(self, book, receipt_timestamp: float): - vals = ','.join([f"bid_{i}_price={book.book.bids.index(i)[0]},bid_{i}_size={book.book.bids.index(i)[1]}" for i in range(self.depth)] + [f"ask_{i}_price={book.book.asks.index(i)[0]},ask_{i}_size={book.book.asks.index(i)[1]}" for i in range(self.depth)]) + vals = ",".join( + [ + f"bid_{i}_price={book.book.bids.index(i)[0]},bid_{i}_size={book.book.bids.index(i)[1]}" + for i in range(self.depth) + ] + + [ + f"ask_{i}_price={book.book.asks.index(i)[0]},ask_{i}_size={book.book.asks.index(i)[1]}" + for i in range(self.depth) + ] + ) timestamp = book.timestamp receipt_timestamp_int = int(receipt_timestamp * 1_000_000) - timestamp_int = int(timestamp * 1_000_000_000) if timestamp is not None else receipt_timestamp_int * 1000 - update = f'{self.key}-{book.exchange},symbol={book.symbol} {vals},receipt_timestamp={receipt_timestamp_int}t {timestamp_int}' + timestamp_int = ( + int(timestamp * 1_000_000_000) + if timestamp is not None + else receipt_timestamp_int * 1000 + ) + update = f"{self.key}-{book.exchange},symbol={book.symbol} {vals},receipt_timestamp={receipt_timestamp_int}t {timestamp_int}" await self.queue.put(update) class TickerQuest(QuestCallback, BackendCallback): - default_key = 'ticker' + default_key = "ticker" class OpenInterestQuest(QuestCallback, BackendCallback): - default_key = 'open_interest' + default_key = "open_interest" class LiquidationsQuest(QuestCallback, BackendCallback): - default_key = 'liquidations' + default_key = "liquidations" class CandlesQuest(QuestCallback, BackendCallback): - default_key = 'candles' + default_key = "candles" async def write(self, data): timestamp = data["timestamp"] - timestamp_str = f',timestamp={int(timestamp * 1_000_000_000)}i' if timestamp is not None else '' - trades = f',trades={data["trades"]},' if data['trades'] else ',' - update = f'{self.key}-{data["exchange"]},symbol={data["symbol"]},interval={data["interval"]} start={data["start"]},stop={data["stop"]}{trades}open={data["open"]},close={data["close"]},high={data["high"]},low={data["low"]},volume={data["volume"]}{timestamp_str},receipt_timestamp={int(data["receipt_timestamp"]) * 1_000_000}t {int(data["receipt_timestamp"] * 1_000_000_000)}' + timestamp_str = ( + f",timestamp={int(timestamp * 1_000_000_000)}i" + if timestamp is not None + else "" + ) + trades = f",trades={data['trades']}," if data["trades"] else "," + update = f"{self.key}-{data['exchange']},symbol={data['symbol']},interval={data['interval']} start={data['start']},stop={data['stop']}{trades}open={data['open']},close={data['close']},high={data['high']},low={data['low']},volume={data['volume']}{timestamp_str},receipt_timestamp={int(data['receipt_timestamp']) * 1_000_000}t {int(data['receipt_timestamp'] * 1_000_000_000)}" await self.queue.put(update) class OrderInfoQuest(QuestCallback, BackendCallback): - default_key = 'order_info' + default_key = "order_info" class TransactionsQuest(QuestCallback, BackendCallback): - default_key = 'transactions' + default_key = "transactions" class BalancesQuest(QuestCallback, BackendCallback): - default_key = 'balances' + default_key = "balances" class FillsQuest(QuestCallback, BackendCallback): - default_key = 'fills' + default_key = "fills" diff --git a/cryptofeed/backends/rabbitmq.py b/cryptofeed/backends/rabbitmq.py index 9e0a39bce..80d0ffc3f 100644 --- a/cryptofeed/backends/rabbitmq.py +++ b/cryptofeed/backends/rabbitmq.py @@ -1,9 +1,10 @@ -''' +""" Copyright (C) 2017-2025 Bryant Moscon - bmoscon@gmail.com Please see the LICENSE file for the terms and conditions associated with this software. -''' +""" + import asyncio from collections import defaultdict @@ -14,7 +15,18 @@ class RabbitCallback: - def __init__(self, host='localhost', none_to=None, numeric_type=float, queue_name='cryptofeed', exchange_mode=False, exchange_name='amq.topic', exchange_type='topic', routing_key='cryptofeed', **kwargs): + def __init__( + self, + host="localhost", + none_to=None, + numeric_type=float, + queue_name="cryptofeed", + exchange_mode=False, + exchange_name="amq.topic", + exchange_type="topic", + routing_key="cryptofeed", + **kwargs, + ): """ Parameters ---------- @@ -44,30 +56,38 @@ def __init__(self, host='localhost', none_to=None, numeric_type=float, queue_nam async def connect(self): if not self.conn: if self.exchange_mode: - connection = await aio_pika.connect_robust(f"amqp://{self.host}", loop=asyncio.get_running_loop()) + connection = await aio_pika.connect_robust( + f"amqp://{self.host}", loop=asyncio.get_running_loop() + ) self.conn = await connection.channel() - self.conn = await self.conn.declare_exchange(self.exchange_name, self.exchange_type, durable=True, auto_delete=False) + self.conn = await self.conn.declare_exchange( + self.exchange_name, + self.exchange_type, + durable=True, + auto_delete=False, + ) else: - connection = await aio_pika.connect_robust(f"amqp://{self.host}", loop=asyncio.get_running_loop()) + connection = await aio_pika.connect_robust( + f"amqp://{self.host}", loop=asyncio.get_running_loop() + ) self.conn = await connection.channel() - await self.conn.declare_queue(self.queue_name, auto_delete=False, durable=True) + await self.conn.declare_queue( + self.queue_name, auto_delete=False, durable=True + ) async def write(self, data: dict): await self.connect() + assert self.conn is not None if self.exchange_mode: await self.conn.publish( - aio_pika.Message( - body=json.dumps(data).encode() - ), - routing_key=self.routing_key + aio_pika.Message(body=json.dumps(data).encode()), + routing_key=self.routing_key, ) else: await self.conn.default_exchange.publish( - aio_pika.Message( - body=json.dumps(data).encode() - ), - routing_key=self.routing_key + aio_pika.Message(body=json.dumps(data).encode()), + routing_key=self.routing_key, ) diff --git a/cryptofeed/backends/redis.py b/cryptofeed/backends/redis.py index 55bf0b4e4..d8d8c5917 100644 --- a/cryptofeed/backends/redis.py +++ b/cryptofeed/backends/redis.py @@ -1,28 +1,46 @@ -''' +""" Copyright (C) 2017-2025 Bryant Moscon - bmoscon@gmail.com Please see the LICENSE file for the terms and conditions associated with this software. -''' +""" + from collections import defaultdict import base64 from redis import asyncio as aioredis from cryptofeed.json_utils import json -from cryptofeed.backends.backend import BackendBookCallback, BackendCallback, BackendQueue +from cryptofeed.backends.backend import ( + BackendBookCallback, + BackendCallback, + BackendQueue, +) class RedisCallback(BackendQueue): - def __init__(self, host='127.0.0.1', port=6379, socket=None, key=None, none_to='None', numeric_type=float, serialization_format=None, **kwargs): + # Default key - subclasses should override + default_key = "unknown" + + def __init__( + self, + host="127.0.0.1", + port=6379, + socket=None, + key=None, + none_to="None", + numeric_type=float, + serialization_format=None, + **kwargs, + ): """ setting key lets you override the prefix on the key used in redis. The defaults are related to the data being stored, i.e. trade, funding, etc """ - prefix = 'redis://' + prefix = "redis://" if socket: - prefix = 'unix://' + prefix = "unix://" port = None self.redis = f"{prefix}{host}" + f":{port}" if port else "" @@ -34,33 +52,33 @@ def __init__(self, host='127.0.0.1', port=6379, socket=None, key=None, none_to=' self.set_serialization_format(serialization_format) def _prepare_json_record(self, update: dict) -> dict: - if isinstance(update, dict) and update.get('format') == 'protobuf': - encoded = base64.b64encode(update['payload']).decode('ascii') + if isinstance(update, dict) and update.get("format") == "protobuf": + encoded = base64.b64encode(update["payload"]).decode("ascii") return { - 'format': 'protobuf', - 'content_type': update['content_type'], - 'metadata': update['metadata'], - 'payload_b64': encoded, + "format": "protobuf", + "content_type": update["content_type"], + "metadata": update["metadata"], + "payload_b64": encoded, } return update async def __call__(self, dtype, receipt_timestamp: float): # Handle protobuf format explicitly to wrap payload in dict - if self.serialization_format == 'protobuf': + if self.serialization_format == "protobuf": from cryptofeed.backends.protobuf_helpers import serialize_to_protobuf payload = serialize_to_protobuf(dtype) metadata = { - 'exchange': getattr(dtype, 'exchange', 'unknown'), - 'symbol': getattr(dtype, 'symbol', 'unknown'), - 'receipt_timestamp': receipt_timestamp, + "exchange": getattr(dtype, "exchange", "unknown"), + "symbol": getattr(dtype, "symbol", "unknown"), + "receipt_timestamp": receipt_timestamp, } update = { - 'format': 'protobuf', - 'content_type': 'application/x-protobuf', - 'metadata': metadata, - 'payload': payload, + "format": "protobuf", + "content_type": "application/x-protobuf", + "metadata": metadata, + "payload": payload, } await self.write(update) else: @@ -68,18 +86,27 @@ async def __call__(self, dtype, receipt_timestamp: float): await BackendCallback.__call__(self, dtype, receipt_timestamp) def _prepare_stream_record(self, update: dict) -> dict: - if isinstance(update, dict) and update.get('format') == 'protobuf': + if isinstance(update, dict) and update.get("format") == "protobuf": return { - 'format': 'protobuf', - 'content_type': update['content_type'], - 'metadata': json.dumps(update['metadata']), - 'payload': update['payload'], + "format": "protobuf", + "content_type": update["content_type"], + "metadata": json.dumps(update["metadata"]), + "payload": update["payload"], } return update class RedisZSetCallback(RedisCallback): - def __init__(self, host='127.0.0.1', port=6379, socket=None, key=None, numeric_type=float, score_key='timestamp', **kwargs): + def __init__( + self, + host="127.0.0.1", + port=6379, + socket=None, + key=None, + numeric_type=float, + score_key="timestamp", + **kwargs, + ): """ score_key: str the value at this key will be used to store the data in the ZSet in redis. The @@ -87,7 +114,14 @@ def __init__(self, host='127.0.0.1', port=6379, socket=None, key=None, numeric_t use this to change it. It must be a numeric value. """ self.score_key = score_key - super().__init__(host=host, port=port, socket=socket, key=key, numeric_type=numeric_type, **kwargs) + super().__init__( + host=host, + port=port, + socket=socket, + key=key, + numeric_type=numeric_type, + **kwargs, + ) async def writer(self): conn = aioredis.from_url(self.redis) @@ -98,8 +132,16 @@ async def writer(self): for update in updates: record = self._prepare_json_record(update) pipe = pipe.zadd( - f"{self.key}-{record['metadata']['exchange']}-{record['metadata']['symbol']}" if record.get('format') == 'protobuf' else f"{self.key}-{update['exchange']}-{update['symbol']}", - {json.dumps(record): (record['metadata']['receipt_timestamp'] if record.get('format') == 'protobuf' else update[self.score_key])}, + f"{self.key}-{record['metadata']['exchange']}-{record['metadata']['symbol']}" + if record.get("format") == "protobuf" + else f"{self.key}-{update['exchange']}-{update['symbol']}", + { + json.dumps(record): ( + record["metadata"]["receipt_timestamp"] + if record.get("format") == "protobuf" + else update[self.score_key] + ) + }, nx=True, ) await pipe.execute() @@ -116,19 +158,24 @@ async def writer(self): async with self.read_queue() as updates: async with conn.pipeline(transaction=False) as pipe: for update in updates: - if isinstance(update, dict) and update.get('format') == 'protobuf': + if ( + isinstance(update, dict) + and update.get("format") == "protobuf" + ): record = self._prepare_stream_record(update) - metadata = json.loads(record['metadata']) + metadata = json.loads(record["metadata"]) stream_key = f"{self.key}-{metadata['exchange']}-{metadata['symbol']}" else: record = update - stream_key = f"{self.key}-{update['exchange']}-{update['symbol']}" - if 'delta' in record: - record['delta'] = json.dumps(record['delta']) - elif 'book' in record: - record['book'] = json.dumps(record['book']) - elif 'closed' in record: - record['closed'] = str(record['closed']) + stream_key = ( + f"{self.key}-{update['exchange']}-{update['symbol']}" + ) + if "delta" in record: + record["delta"] = json.dumps(record["delta"]) + elif "book" in record: + record["book"] = json.dumps(record["book"]) + elif "closed" in record: + record["closed"] = str(record["closed"]) pipe = pipe.xadd(stream_key, record) await pipe.execute() @@ -138,7 +185,6 @@ async def writer(self): class RedisKeyCallback(RedisCallback): - async def writer(self): conn = aioredis.from_url(self.redis) @@ -147,8 +193,8 @@ async def writer(self): update = list(updates)[-1] if update: record = self._prepare_json_record(update) - if record.get('format') == 'protobuf': - metadata = record['metadata'] + if record.get("format") == "protobuf": + metadata = record["metadata"] key = f"{self.key}-{metadata['exchange']}-{metadata['symbol']}" else: key = f"{self.key}-{update['exchange']}-{update['symbol']}" @@ -159,25 +205,32 @@ async def writer(self): class TradeRedis(RedisZSetCallback, BackendCallback): - default_key = 'trades' + default_key = "trades" class TradeStream(RedisStreamCallback, BackendCallback): - default_key = 'trades' + default_key = "trades" class FundingRedis(RedisZSetCallback, BackendCallback): - default_key = 'funding' + default_key = "funding" class FundingStream(RedisStreamCallback, BackendCallback): - default_key = 'funding' + default_key = "funding" class BookRedis(RedisZSetCallback, BackendBookCallback): - default_key = 'book' - - def __init__(self, *args, snapshots_only=False, snapshot_interval=1000, score_key='receipt_timestamp', **kwargs): + default_key = "book" + + def __init__( + self, + *args, + snapshots_only=False, + snapshot_interval=1000, + score_key="receipt_timestamp", + **kwargs, + ): self.snapshots_only = snapshots_only self.snapshot_interval = snapshot_interval self.snapshot_count = defaultdict(int) @@ -185,7 +238,7 @@ def __init__(self, *args, snapshots_only=False, snapshot_interval=1000, score_ke class BookStream(RedisStreamCallback, BackendBookCallback): - default_key = 'book' + default_key = "book" def __init__(self, *args, snapshots_only=False, snapshot_interval=1000, **kwargs): self.snapshots_only = snapshots_only @@ -195,74 +248,76 @@ def __init__(self, *args, snapshots_only=False, snapshot_interval=1000, **kwargs class BookSnapshotRedisKey(RedisKeyCallback, BackendBookCallback): - default_key = 'book' + default_key = "book" - def __init__(self, *args, snapshot_interval=1000, score_key='receipt_timestamp', **kwargs): - kwargs['snapshots_only'] = True + def __init__( + self, *args, snapshot_interval=1000, score_key="receipt_timestamp", **kwargs + ): + kwargs["snapshots_only"] = True self.snapshot_interval = snapshot_interval self.snapshot_count = defaultdict(int) super().__init__(*args, score_key=score_key, **kwargs) class TickerRedis(RedisZSetCallback, BackendCallback): - default_key = 'ticker' + default_key = "ticker" class TickerStream(RedisStreamCallback, BackendCallback): - default_key = 'ticker' + default_key = "ticker" class OpenInterestRedis(RedisZSetCallback, BackendCallback): - default_key = 'open_interest' + default_key = "open_interest" class OpenInterestStream(RedisStreamCallback, BackendCallback): - default_key = 'open_interest' + default_key = "open_interest" class LiquidationsRedis(RedisZSetCallback, BackendCallback): - default_key = 'liquidations' + default_key = "liquidations" class LiquidationsStream(RedisStreamCallback, BackendCallback): - default_key = 'liquidations' + default_key = "liquidations" class CandlesRedis(RedisZSetCallback, BackendCallback): - default_key = 'candles' + default_key = "candles" class CandlesStream(RedisStreamCallback, BackendCallback): - default_key = 'candles' + default_key = "candles" class OrderInfoRedis(RedisZSetCallback, BackendCallback): - default_key = 'order_info' + default_key = "order_info" class OrderInfoStream(RedisStreamCallback, BackendCallback): - default_key = 'order_info' + default_key = "order_info" class TransactionsRedis(RedisZSetCallback, BackendCallback): - default_key = 'transactions' + default_key = "transactions" class TransactionsStream(RedisStreamCallback, BackendCallback): - default_key = 'transactions' + default_key = "transactions" class BalancesRedis(RedisZSetCallback, BackendCallback): - default_key = 'balances' + default_key = "balances" class BalancesStream(RedisStreamCallback, BackendCallback): - default_key = 'balances' + default_key = "balances" class FillsRedis(RedisZSetCallback, BackendCallback): - default_key = 'fills' + default_key = "fills" class FillsStream(RedisStreamCallback, BackendCallback): - default_key = 'fills' + default_key = "fills" diff --git a/cryptofeed/backends/socket.py b/cryptofeed/backends/socket.py index 0c27638c0..0b7a753c0 100644 --- a/cryptofeed/backends/socket.py +++ b/cryptofeed/backends/socket.py @@ -1,9 +1,10 @@ -''' +""" Copyright (C) 2017-2025 Bryant Moscon - bmoscon@gmail.com Please see the LICENSE file for the terms and conditions associated with this software. -''' +""" + from collections import defaultdict import asyncio import logging @@ -11,10 +12,14 @@ from cryptofeed.json_utils import json -from cryptofeed.backends.backend import BackendQueue, BackendBookCallback, BackendCallback +from cryptofeed.backends.backend import ( + BackendQueue, + BackendBookCallback, + BackendCallback, +) -LOG = logging.getLogger('feedhandler') +LOG = logging.getLogger("feedhandler") class UDPProtocol: @@ -29,18 +34,32 @@ def datagram_received(self, data, addr): pass def error_received(self, exc): - LOG.error('UDP backend received exception: %s', exc) - self.transport.close() + LOG.error("UDP backend received exception: %s", exc) + if self.transport: + self.transport.close() self.transport = None def connection_lost(self, exc): - LOG.error('UDP backend connection lost: %s', exc) - self.transport.close() + LOG.error("UDP backend connection lost: %s", exc) + if self.transport: + self.transport.close() self.transport = None class SocketCallback(BackendQueue): - def __init__(self, addr: str, port=None, none_to=None, numeric_type=float, key=None, mtu=1400, **kwargs): + # Default key - subclasses should override + default_key = "unknown" + + def __init__( + self, + addr: str, + port=None, + none_to=None, + numeric_type=float, + key=None, + mtu=1400, + **kwargs, + ): """ Common parent class for all socket callbacks @@ -59,7 +78,7 @@ def __init__(self, addr: str, port=None, none_to=None, numeric_type=float, key=N MTU for UDP message size. Should be slightly less than actual MTU for overhead """ self.conn_type = addr[:6] - if self.conn_type not in {'tcp://', 'uds://', 'udp://'}: + if self.conn_type not in {"tcp://", "uds://", "udp://"}: raise ValueError("Invalid protocol specified for SocketCallback") self.conn = None self.protocol = None @@ -74,15 +93,22 @@ def __init__(self, addr: str, port=None, none_to=None, numeric_type=float, key=N async def writer(self): while self.running: await self.connect() + assert self.conn is not None async with self.read_queue() as updates: for update in updates: - data = {'type': self.key, 'data': update} + data = {"type": self.key, "data": update} data = json.dumps(data) - if self.conn_type == 'udp://': + if self.conn_type == "udp://": if len(update) > self.mtu: chunks = wrap(update, self.mtu) for chunk in chunks: - msg = json.dumps({'type': 'chunked', 'chunks': len(chunks), 'data': chunk}).encode() + msg = json.dumps( + { + "type": "chunked", + "chunks": len(chunks), + "data": chunk, + } + ).encode() self.conn.sendto(msg) else: self.conn.sendto(data.encode()) @@ -91,26 +117,29 @@ async def writer(self): async def connect(self): if not self.conn: - if self.conn_type == 'udp://': + if self.conn_type == "udp://": loop = asyncio.get_event_loop() self.conn, self.protocol = await loop.create_datagram_endpoint( - lambda: UDPProtocol(loop), remote_addr=(self.addr, self.port)) - elif self.conn_type == 'tcp://': - _, self.conn = await asyncio.open_connection(host=self.addr, port=self.port) - elif self.conn_type == 'uds://': + lambda: UDPProtocol(loop), remote_addr=(self.addr, self.port) + ) + elif self.conn_type == "tcp://": + _, self.conn = await asyncio.open_connection( + host=self.addr, port=self.port + ) + elif self.conn_type == "uds://": _, self.conn = await asyncio.open_unix_connection(path=self.addr) class TradeSocket(SocketCallback, BackendCallback): - default_key = 'trades' + default_key = "trades" class FundingSocket(SocketCallback, BackendCallback): - default_key = 'funding' + default_key = "funding" class BookSocket(SocketCallback, BackendBookCallback): - default_key = 'book' + default_key = "book" def __init__(self, *args, snapshots_only=False, snapshot_interval=1000, **kwargs): self.snapshots_only = snapshots_only @@ -120,32 +149,32 @@ def __init__(self, *args, snapshots_only=False, snapshot_interval=1000, **kwargs class TickerSocket(SocketCallback, BackendCallback): - default_key = 'ticker' + default_key = "ticker" class OpenInterestSocket(SocketCallback, BackendCallback): - default_key = 'open_interest' + default_key = "open_interest" class LiquidationsSocket(SocketCallback, BackendCallback): - default_key = 'liquidations' + default_key = "liquidations" class CandlesSocket(SocketCallback, BackendCallback): - default_key = 'candles' + default_key = "candles" class OrderInfoSocket(SocketCallback, BackendCallback): - default_key = 'order_info' + default_key = "order_info" class TransactionsSocket(SocketCallback, BackendCallback): - default_key = 'transactions' + default_key = "transactions" class BalancesSocket(SocketCallback, BackendCallback): - default_key = 'balances' + default_key = "balances" class FillsSocket(SocketCallback, BackendCallback): - default_key = 'fills' + default_key = "fills" diff --git a/cryptofeed/backends/zmq.py b/cryptofeed/backends/zmq.py index 1fdd612e9..339624417 100644 --- a/cryptofeed/backends/zmq.py +++ b/cryptofeed/backends/zmq.py @@ -1,20 +1,37 @@ -''' +""" Copyright (C) 2017-2025 Bryant Moscon - bmoscon@gmail.com Please see the LICENSE file for the terms and conditions associated with this software. -''' +""" + from collections import defaultdict import zmq import zmq.asyncio from cryptofeed.json_utils import json -from cryptofeed.backends.backend import BackendQueue, BackendBookCallback, BackendCallback +from cryptofeed.backends.backend import ( + BackendQueue, + BackendBookCallback, + BackendCallback, +) class ZMQCallback(BackendQueue): - def __init__(self, host='127.0.0.1', port=5555, none_to=None, numeric_type=float, key=None, dynamic_key=True, **kwargs): + # Default key - subclasses should override + default_key = "unknown" + + def __init__( + self, + host="127.0.0.1", + port=5555, + none_to=None, + numeric_type=float, + key=None, + dynamic_key=True, + **kwargs, + ): self.url = "tcp://{}:{}".format(host, port) self.key = key if key else self.default_key self.numeric_type = numeric_type @@ -41,26 +58,26 @@ async def writer(self): # JSON: send as string with metadata if self.dynamic_key: - message = f'{update["exchange"]}-{self.key}-{update["symbol"]} {json.dumps(update)}' + message = f"{update['exchange']}-{self.key}-{update['symbol']} {json.dumps(update)}" else: - message = f'{self.key} {json.dumps(update)}' + message = f"{self.key} {json.dumps(update)}" await con.send_string(message) class TradeZMQ(ZMQCallback, BackendCallback): - default_key = 'trades' + default_key = "trades" class TickerZMQ(ZMQCallback, BackendCallback): - default_key = 'ticker' + default_key = "ticker" class FundingZMQ(ZMQCallback, BackendCallback): - default_key = 'funding' + default_key = "funding" class BookZMQ(ZMQCallback, BackendBookCallback): - default_key = 'book' + default_key = "book" def __init__(self, *args, snapshots_only=False, snapshot_interval=1000, **kwargs): self.snapshots_only = snapshots_only @@ -70,32 +87,32 @@ def __init__(self, *args, snapshots_only=False, snapshot_interval=1000, **kwargs class OpenInterestZMQ(ZMQCallback, BackendCallback): - default_key = 'open_interest' + default_key = "open_interest" class LiquidationsZMQ(ZMQCallback, BackendCallback): - default_key = 'liquidations' + default_key = "liquidations" class CandlesZMQ(ZMQCallback, BackendCallback): - default_key = 'candles' + default_key = "candles" class BalancesZMQ(ZMQCallback, BackendCallback): - default_key = 'balances' + default_key = "balances" class PositionsZMQ(ZMQCallback, BackendCallback): - default_key = 'positions' + default_key = "positions" class OrderInfoZMQ(ZMQCallback, BackendCallback): - default_key = 'order_info' + default_key = "order_info" class FillsZMQ(ZMQCallback, BackendCallback): - default_key = 'fills' + default_key = "fills" class TransactionsZMQ(ZMQCallback, BackendCallback): - default_key = 'transactions' + default_key = "transactions" diff --git a/cryptofeed/connection.py b/cryptofeed/connection.py index 7104b7a03..82591eb2f 100644 --- a/cryptofeed/connection.py +++ b/cryptofeed/connection.py @@ -1,9 +1,10 @@ -''' +""" Copyright (C) 2017-2025 Bryant Moscon - bmoscon@gmail.com Please see the LICENSE file for the terms and conditions associated with this software. -''' +""" + import logging import time import asyncio @@ -28,7 +29,7 @@ from cryptofeed.proxy import get_proxy_injector, log_proxy_usage -LOG = logging.getLogger('feedhandler') +LOG = logging.getLogger("feedhandler") class Connection: @@ -44,7 +45,9 @@ async def write(self, msg: str): class HTTPSync(Connection): def process_response(self, r, address, json=False, text=False, uuid=None): if self.raw_data_callback: - self.raw_data_callback.sync_callback(r.text, time.time(), str(uuid), endpoint=address) + self.raw_data_callback.sync_callback( + r.text, time.time(), str(uuid), endpoint=address + ) r.raise_for_status() if json: @@ -53,14 +56,24 @@ def process_response(self, r, address, json=False, text=False, uuid=None): return r.text return r - def read(self, address: str, params=None, headers=None, json=False, text=True, uuid=None): + def read( + self, address: str, params=None, headers=None, json=False, text=True, uuid=None + ): LOG.debug("HTTPSync: requesting data from %s", address) r = requests.get(address, headers=headers, params=params) return self.process_response(r, address, json=json, text=text, uuid=uuid) - def write(self, address: str, data=None, json=False, text=True, uuid=None, is_data_json=False): + def write( + self, + address: str, + data=None, + json=False, + text=True, + uuid=None, + is_data_json=False, + ): LOG.debug("HTTPSync: post to %s", address) - if (is_data_json): + if is_data_json: r = requests.post(address, json=data) else: r = requests.post(address, data=data) @@ -126,7 +139,7 @@ async def close(self): conn = self.conn self.conn = None await conn.close() - LOG.info('%s: closed connection %r', self.id, conn.__class__.__name__) + LOG.info("%s: closed connection %r", self.id, conn.__class__.__name__) class HTTPAsyncConn(AsyncConnection): @@ -139,7 +152,7 @@ def __init__(self, conn_id: str, proxy: StrOrURL = None, exchange_id: str = None exchange_id: str exchange identifier for proxy configuration """ - super().__init__(f'{conn_id}.http.{self.conn_count}') + super().__init__(f"{conn_id}.http.{self.conn_count}") self.proxy = proxy self._legacy_proxy = proxy self._current_proxy: Optional[StrOrURL] = None @@ -149,7 +162,7 @@ def __init__(self, conn_id: str, proxy: StrOrURL = None, exchange_id: str = None @property def is_open(self) -> bool: - return self.conn and not self.conn.closed + return self.conn and not self.conn.closed # type: ignore[attr-defined] def _handle_error(self, resp: ClientResponse, data: bytes): if resp.status != 200: @@ -160,16 +173,18 @@ def _handle_error(self, resp: ClientResponse, data: bytes): async def _open(self): if self.is_open: - LOG.warning('%s: HTTP session already created', self.id) + LOG.warning("%s: HTTP session already created", self.id) else: - LOG.debug('%s: create HTTP session', self.id) - + LOG.debug("%s: create HTTP session", self.id) + # Get proxy URL if configured through proxy system proxy_url = None release_proxy = self._proxy_release injector = get_proxy_injector() if injector and self.exchange_id: - proxy_url, release_proxy = injector.lease_proxy(self.exchange_id, 'http') + proxy_url, release_proxy = injector.lease_proxy( + self.exchange_id, "http" + ) if proxy_url is not None: proxy = proxy_url @@ -183,16 +198,18 @@ async def _open(self): self._proxy_release = release_proxy if proxy: - log_proxy_usage(transport='http', exchange_id=self.exchange_id, proxy_url=proxy) + log_proxy_usage( + transport="http", exchange_id=self.exchange_id, proxy_url=proxy + ) self._request_proxy_kwargs = {} if proxy: - scheme = (urlparse(proxy).scheme or '').lower() + scheme = (urlparse(proxy).scheme or "").lower() else: - scheme = '' + scheme = "" - if proxy and scheme in {'socks4', 'socks4a', 'socks5', 'socks5h'}: + if proxy and scheme in {"socks4", "socks4a", "socks5", "socks5h"}: try: from aiohttp_socks import ProxyConnector except ModuleNotFoundError as exc: @@ -214,7 +231,7 @@ async def _open(self): except Exception: release_proxy() raise - + self.sent = 0 self.received = 0 self.last_message = None @@ -229,15 +246,23 @@ async def close(self): self._proxy_release() self._proxy_release = lambda: None self._current_proxy = None - LOG.info('%s: closed connection %r', self.id, conn.__class__.__name__) - - async def read(self, address: str, header=None, params=None, return_headers=False, retry_count=0, retry_delay=60) -> str: + LOG.info("%s: closed connection %r", self.id, conn.__class__.__name__) + + async def read( + self, + address: str, + header=None, + params=None, + return_headers=False, + retry_count=0, + retry_delay=60, + ) -> str: if not self.is_open: await self._open() LOG.debug("%s: requesting data from %s", self.id, address) while True: - async with self.conn.get( + async with self.conn.get( # type: ignore[attr-defined] address, headers=header, params=params, @@ -247,9 +272,21 @@ async def read(self, address: str, header=None, params=None, return_headers=Fals self.last_message = time.time() self.received += 1 if self.raw_data_callback: - await self.raw_data_callback(data, self.last_message, self.id, endpoint=address, header=None if return_headers is False else dict(response.headers)) + await self.raw_data_callback( + data, + self.last_message, + self.id, + endpoint=address, + header=None + if return_headers is False + else dict(response.headers), + ) if response.status == 429 and retry_count: - LOG.warning("%s: encountered a rate limit for address %s, retrying in 60 seconds", self.id, address) + LOG.warning( + "%s: encountered a rate limit for address %s, retrying in 60 seconds", + self.id, + address, + ) retry_count -= 1 if retry_count < 0: self._handle_error(response, data) @@ -260,12 +297,14 @@ async def read(self, address: str, header=None, params=None, return_headers=Fals return data, response.headers return data - async def write(self, address: str, msg: str, header=None, retry_count=0, retry_delay=60) -> str: + async def write( + self, address: str, msg: str, header=None, retry_count=0, retry_delay=60 + ) -> str: if not self.is_open: await self._open() while True: - async with self.conn.post( + async with self.conn.post( # type: ignore[attr-defined] address, data=msg, headers=header, @@ -274,9 +313,15 @@ async def write(self, address: str, msg: str, header=None, retry_count=0, retry_ self.sent += 1 data = await response.read() if self.raw_data_callback: - await self.raw_data_callback(data, time.time(), self.id, send=address) + await self.raw_data_callback( + data, time.time(), self.id, send=address + ) if response.status == 429 and retry_count: - LOG.warning("%s: encountered a rate limit for address %s, retrying in 60 seconds", self.id, address) + LOG.warning( + "%s: encountered a rate limit for address %s, retrying in 60 seconds", + self.id, + address, + ) retry_count -= 1 if retry_count < 0: self._handle_error(response, data) @@ -285,12 +330,14 @@ async def write(self, address: str, msg: str, header=None, retry_count=0, retry_ self._handle_error(response, data) return data - async def delete(self, address: str, header=None, retry_count=0, retry_delay=60) -> str: + async def delete( + self, address: str, header=None, retry_count=0, retry_delay=60 + ) -> str: if not self.is_open: await self._open() while True: - async with self.conn.delete( + async with self.conn.delete( # type: ignore[attr-defined] address, headers=header, **self._request_proxy_kwargs, @@ -298,9 +345,15 @@ async def delete(self, address: str, header=None, retry_count=0, retry_delay=60) self.sent += 1 data = await response.read() if self.raw_data_callback: - await self.raw_data_callback(data, time.time(), self.id, send=address) + await self.raw_data_callback( + data, time.time(), self.id, send=address + ) if response.status == 429 and retry_count: - LOG.warning("%s: encountered a rate limit for address %s, retrying in 60 seconds", self.id, address) + LOG.warning( + "%s: encountered a rate limit for address %s, retrying in 60 seconds", + self.id, + address, + ) retry_count -= 1 if retry_count < 0: response.raise_for_status() @@ -311,8 +364,15 @@ async def delete(self, address: str, header=None, retry_count=0, retry_delay=60) class HTTPPoll(HTTPAsyncConn): - def __init__(self, address: Union[List, str], conn_id: str, delay: float = 60, sleep: float = 1, proxy: StrOrURL = None): - super().__init__(f'{conn_id}.http.{self.conn_count}', proxy) + def __init__( + self, + address: Union[List, str], + conn_id: str, + delay: float = 60, + sleep: float = 1, + proxy: StrOrURL = None, + ): + super().__init__(f"{conn_id}.http.{self.conn_count}", proxy) if isinstance(address, str): address = [address] self.address = address @@ -324,10 +384,10 @@ async def _read_address(self, address: str, header=None) -> str: LOG.debug("%s: polling %s", self.id, address) while True: if not self.is_open: - LOG.error('%s: connection closed in read()', self.id) + LOG.error("%s: connection closed in read()", self.id) raise ConnectionClosed - async with self.conn.get( + async with self.conn.get( # type: ignore[attr-defined] address, headers=header, **self._request_proxy_kwargs, @@ -336,11 +396,18 @@ async def _read_address(self, address: str, header=None) -> str: self.received += 1 self.last_message = time.time() if self.raw_data_callback: - await self.raw_data_callback(data, self.last_message, self.id, endpoint=address) + await self.raw_data_callback( + data, self.last_message, self.id, endpoint=address + ) if response.status != 429: response.raise_for_status() return data - LOG.warning("%s: encountered a rate limit for address %s, retrying in %f seconds", self.id, address, self.delay) + LOG.warning( + "%s: encountered a rate limit for address %s, retrying in %f seconds", + self.id, + address, + self.delay, + ) await asyncio.sleep(self.delay) async def read(self, header=None) -> AsyncIterable[str]: @@ -364,7 +431,9 @@ async def _poll_address(self, address: str, header=None): await asyncio.sleep(self.sleep) async def read(self, header=None) -> AsyncIterable[str]: - tasks = asyncio.gather(*(self._poll_address(address, header) for address in self.address)) + tasks = asyncio.gather( + *(self._poll_address(address, header) for address in self.address) + ) try: while not tasks.done(): @@ -380,8 +449,15 @@ async def read(self, header=None) -> AsyncIterable[str]: class WSAsyncConn(AsyncConnection): - - def __init__(self, address: str, conn_id: str, authentication=None, subscription=None, exchange_id: str = None, **kwargs): + def __init__( + self, + address: str, + conn_id: str, + authentication=None, + subscription=None, + exchange_id: str = None, + **kwargs, + ): """ address: str the websocket address to connect to @@ -390,47 +466,59 @@ def __init__(self, address: str, conn_id: str, authentication=None, subscription authentication: Callable function pointer for authentication subscription: dict - optional connection information + optional connection information exchange_id: str exchange identifier for proxy configuration kwargs: passed into the websocket connection. """ if not address.startswith("wss://"): - raise ValueError(f'Invalid address, must be a wss address. Provided address is: {address!r}') + raise ValueError( + f"Invalid address, must be a wss address. Provided address is: {address!r}" + ) self.address = address self.exchange_id = exchange_id - super().__init__(f'{conn_id}.ws.{self.conn_count}', authentication=authentication, subscription=subscription) + super().__init__( + f"{conn_id}.ws.{self.conn_count}", + authentication=authentication, + subscription=subscription, + ) self.ws_kwargs = kwargs @property def is_open(self) -> bool: - return self.conn and not self.conn.state == State.CLOSED + return self.conn and not self.conn.state == State.CLOSED # type: ignore[attr-defined] async def _open(self): if self.is_open: - LOG.warning('%s: websocket already open', self.id) + LOG.warning("%s: websocket already open", self.id) else: - LOG.debug('%s: connecting to %s', self.id, self.address) + LOG.debug("%s: connecting to %s", self.id, self.address) if self.raw_data_callback: - await self.raw_data_callback(None, time.time(), self.id, connect=self.address) + await self.raw_data_callback( + None, time.time(), self.id, connect=self.address + ) if self.authentication: - self.address, self.ws_kwargs = await self.authentication(self.address, self.ws_kwargs) + self.address, self.ws_kwargs = await self.authentication( + self.address, self.ws_kwargs + ) # Use proxy injector if available injector = get_proxy_injector() if injector and self.exchange_id: - self.conn = await injector.create_websocket_connection(self.address, self.exchange_id, **self.ws_kwargs) + self.conn = await injector.create_websocket_connection( + self.address, self.exchange_id, **self.ws_kwargs + ) else: self.conn = await connect(self.address, **self.ws_kwargs) - + self.sent = 0 self.received = 0 self.last_message = None async def read(self) -> AsyncIterable: if not self.is_open: - LOG.error('%s: connection closed in read()', id(self)) + LOG.error("%s: connection closed in read()", id(self)) raise ConnectionClosed if self.raw_data_callback: async for data in self.conn: @@ -465,7 +553,12 @@ class WebsocketEndpoint: authentication: bool = None def __post_init__(self): - defaults = {'ping_interval': 10, 'ping_timeout': None, 'max_size': None, 'max_queue': None} + defaults = { + "ping_interval": 10, + "ping_timeout": None, + "max_size": None, + "max_queue": None, + } if self.options: defaults.update(self.options) self.options = defaults @@ -481,12 +574,24 @@ def subscription_filter(self, sub: dict) -> dict: if not self.instrument_filter: ret[chan].extend(sub[chan]) else: - if self.instrument_filter[0] == 'TYPE': - ret[chan].extend([s for s in syms if str_to_symbol(s).type in self.instrument_filter[1]]) - elif self.instrument_filter[0] == 'QUOTE': - ret[chan].extend([s for s in syms if str_to_symbol(s).quote in self.instrument_filter[1]]) + if self.instrument_filter[0] == "TYPE": + ret[chan].extend( + [ + s + for s in syms + if str_to_symbol(s).type in self.instrument_filter[1] + ] + ) + elif self.instrument_filter[0] == "QUOTE": + ret[chan].extend( + [ + s + for s in syms + if str_to_symbol(s).quote in self.instrument_filter[1] + ] + ) else: - raise ValueError('Invalid instrument filter type specified') + raise ValueError("Invalid instrument filter type specified") return ret def get_address(self, sandbox=False): @@ -518,4 +623,6 @@ class RestEndpoint: def route(self, ep, sandbox=False): endpoint = self.routes.__getattribute__(ep) api = self.sandbox if sandbox and self.sandbox else self.address - return api + endpoint if isinstance(endpoint, str) else [api + e for e in endpoint] + return ( + api + endpoint if isinstance(endpoint, str) else [api + e for e in endpoint] + ) diff --git a/cryptofeed/connection_handler.py b/cryptofeed/connection_handler.py index 69e656127..a31475f35 100644 --- a/cryptofeed/connection_handler.py +++ b/cryptofeed/connection_handler.py @@ -1,9 +1,10 @@ -''' +""" Copyright (C) 2017-2025 Bryant Moscon - bmoscon@gmail.com Please see the LICENSE file for the terms and conditions associated with this software. -''' +""" + import asyncio import logging from socket import error as socket_error @@ -18,11 +19,23 @@ from cryptofeed.defines import HUOBI, HUOBI_DM, HUOBI_SWAP, OKCOIN, OKX -LOG = logging.getLogger('feedhandler') +LOG = logging.getLogger("feedhandler") class ConnectionHandler: - def __init__(self, conn: AsyncConnection, subscribe: Awaitable, handler: Awaitable, authenticate: Awaitable, retries: int, timeout=120, timeout_interval=30, exceptions=None, log_on_error=False, start_delay=0): + def __init__( + self, + conn: AsyncConnection, + subscribe: Awaitable, + handler: Awaitable, + authenticate: Awaitable, + retries: int, + timeout=120, + timeout_interval=30, + exceptions=None, + log_on_error=False, + start_delay=0, + ): self.conn = conn self.subscribe = subscribe self.handler = handler @@ -42,7 +55,10 @@ async def _watcher(self): while self.conn.is_open and self.running: if self.conn.last_message: if time.time() - self.conn.last_message > self.timeout: - LOG.warning("%s: received no messages within timeout, restarting connection", self.conn.uuid) + LOG.warning( + "%s: received no messages within timeout, restarting connection", + self.conn.uuid, + ) await self.conn.close() break await asyncio.sleep(self.timeout_interval) @@ -57,20 +73,36 @@ async def _create_connection(self): await self._establish_connection() retries = 0 delay = 1 - except (ConnectionClosed, ConnectionAbortedError, ConnectionResetError, socket_error) as exc: - await self._handle_retry(exc, delay, LOG.warning, include_exc_message=True) + except ( + ConnectionClosed, + ConnectionAbortedError, + ConnectionResetError, + socket_error, + ) as exc: + await self._handle_retry( + exc, delay, LOG.warning, include_exc_message=True + ) retries += 1 delay *= 2 except Exception as exc: # pragma: no cover - defensive - await self._handle_retry(exc, delay, LOG.error, include_exc_message=False) + await self._handle_retry( + exc, delay, LOG.error, include_exc_message=False + ) retries += 1 delay *= 2 if not self.running: - LOG.info('%s: terminate the connection handler because not running', self.conn.uuid) + LOG.info( + "%s: terminate the connection handler because not running", + self.conn.uuid, + ) return - LOG.error('%s: failed to reconnect after %d retries - exiting', self.conn.uuid, retries) + LOG.error( + "%s: failed to reconnect after %d retries - exiting", + self.conn.uuid, + retries, + ) raise ExhaustedRetries() def _within_retry_budget(self, retries: int) -> bool: @@ -85,13 +117,26 @@ async def _establish_connection(self) -> None: loop.create_task(self._watcher()) await self._handler(connection, self.handler) - async def _handle_retry(self, exc: Exception, delay: float, log_method, *, include_exc_message: bool) -> None: + async def _handle_retry( + self, exc: Exception, delay: float, log_method, *, include_exc_message: bool + ) -> None: if self._should_raise(exc): raise if include_exc_message: - log_method("%s: encountered connection issue %s - reconnecting in %.1f seconds...", self.conn.uuid, str(exc), delay, exc_info=True) + log_method( + "%s: encountered connection issue %s - reconnecting in %.1f seconds...", + self.conn.uuid, + str(exc), + delay, + exc_info=True, + ) else: - log_method("%s: encountered an exception, reconnecting in %.1f seconds", self.conn.uuid, delay, exc_info=True) + log_method( + "%s: encountered an exception, reconnecting in %.1f seconds", + self.conn.uuid, + delay, + exc_info=True, + ) await asyncio.sleep(delay) def _should_raise(self, exc: Exception) -> bool: @@ -99,11 +144,16 @@ def _should_raise(self, exc: Exception) -> bool: return False for ignored in self.exceptions: if isinstance(exc, ignored): - LOG.warning("%s: encountered exception %s, which is on the ignore list. Raising", self.conn.uuid, str(exc)) + LOG.warning( + "%s: encountered exception %s, which is on the ignore list. Raising", + self.conn.uuid, + str(exc), + ) return True return False async def _handler(self, connection, handler): + message = None try: async for message in connection.read(): if not self.running: @@ -114,11 +164,13 @@ async def _handler(self, connection, handler): if not self.running: return if self.log_on_error: + # message is guaranteed to be assigned from the async for loop + log_message = message # type: ignore[possibly-unbound] if connection.uuid in {HUOBI, HUOBI_DM, HUOBI_SWAP}: - message = zlib.decompress(message, 16 + zlib.MAX_WBITS) + log_message = zlib.decompress(message, 16 + zlib.MAX_WBITS) # type: ignore[possibly-unbound] elif connection.uuid in {OKCOIN, OKX}: - message = zlib.decompress(message, -15) - LOG.error("%s: error handling message %s", connection.uuid, message) + log_message = zlib.decompress(message, -15) # type: ignore[possibly-unbound] + LOG.error("%s: error handling message %s", connection.uuid, log_message) # exception will be logged with traceback when connection handler # retries the connection raise diff --git a/cryptofeed/exchanges/__init__.py b/cryptofeed/exchanges/__init__.py index ba7dea4b1..756d38a80 100644 --- a/cryptofeed/exchanges/__init__.py +++ b/cryptofeed/exchanges/__init__.py @@ -5,6 +5,8 @@ associated with this software. """ +from typing import Any, Dict + from cryptofeed.defines import ( ASCENDEX, ASCENDEX_FUTURES, @@ -94,7 +96,7 @@ from .shim_monitor import get_shim_usage as get_shim_usage # Maps string name to class name for use with config -EXCHANGE_MAP = { +EXCHANGE_MAP: Dict[str, Any] = { ASCENDEX: AscendEX, ASCENDEX_FUTURES: AscendEXFutures, BEQUANT: Bequant, diff --git a/cryptofeed/exchanges/ascendex.py b/cryptofeed/exchanges/ascendex.py index 44ce1d093..1e022d229 100644 --- a/cryptofeed/exchanges/ascendex.py +++ b/cryptofeed/exchanges/ascendex.py @@ -1,12 +1,18 @@ -''' +""" Copyright (C) 2017-2025 Bryant Moscon - bmoscon@gmail.com Please see the LICENSE file for the terms and conditions associated with this software. -''' +""" + from collections import defaultdict -from typing import Dict, Tuple -from cryptofeed.connection import AsyncConnection, RestEndpoint, Routes, WebsocketEndpoint +from typing import Dict, Tuple, cast +from cryptofeed.connection import ( + AsyncConnection, + RestEndpoint, + Routes, + WebsocketEndpoint, +) import logging from decimal import Decimal @@ -19,19 +25,34 @@ from cryptofeed.types import Trade, OrderBook -LOG = logging.getLogger('feedhandler') +LOG = logging.getLogger("feedhandler") class AscendEX(Feed): id = ASCENDEX - rest_endpoints = [RestEndpoint('https://ascendex.com', routes=Routes('/api/pro/v1/products'), sandbox='https://api-test.ascendex-sandbox.com')] + rest_endpoints = [ + RestEndpoint( + "https://ascendex.com", + routes=Routes("/api/pro/v1/products"), + sandbox="https://api-test.ascendex-sandbox.com", + ) + ] websocket_channels = { - L2_BOOK: 'depth:', - TRADES: 'trades:', + L2_BOOK: "depth:", + TRADES: "trades:", } # Docs, https://ascendex.github.io/ascendex-pro-api/#websocket-authentication # noinspection PyTypeChecker - websocket_endpoints = [WebsocketEndpoint('wss://ascendex.com/1/api/pro/v1/stream', channel_filter=(websocket_channels[L2_BOOK], websocket_channels[TRADES],), sandbox='wss://api-test.ascendex-sandbox.com/1/api/pro/v1/stream',)] + websocket_endpoints = [ + WebsocketEndpoint( + "wss://ascendex.com/1/api/pro/v1/stream", + channel_filter=( + websocket_channels[L2_BOOK], + websocket_channels[TRADES], + ), + sandbox="wss://api-test.ascendex-sandbox.com/1/api/pro/v1/stream", + ) + ] @classmethod def timestamp_normalize(cls, ts: float) -> float: @@ -42,13 +63,13 @@ def _parse_symbol_data(cls, data: dict) -> Tuple[Dict, Dict]: ret = {} info = defaultdict(dict) - for entry in data['data']: + for entry in data["data"]: # Only "Normal" status symbols are tradeable - if entry['status'] == 'Normal': - s = Symbol(entry['baseAsset'], entry['quoteAsset']) - ret[s.normalized] = entry['symbol'] - info['tick_size'][s.normalized] = entry['tickSize'] - info['instrument_type'][s.normalized] = s.type + if entry["status"] == "Normal": + s = Symbol(entry["baseAsset"], entry["quoteAsset"]) + ret[s.normalized] = entry["symbol"] + info["tick_size"][s.normalized] = entry["tickSize"] + info["instrument_type"][s.normalized] = s.type return ret, info @@ -70,35 +91,38 @@ async def _trade(self, msg: dict, timestamp: float): }] } """ - for trade in msg['data']: - t = Trade(self.id, - self.exchange_symbol_to_std_symbol(msg['symbol']), - SELL if trade['bm'] else BUY, - Decimal(trade['q']), - Decimal(trade['p']), - self.timestamp_normalize(trade['ts']), - raw=trade) + for trade in msg["data"]: + t = Trade( + self.id, + self.exchange_symbol_to_std_symbol(msg["symbol"]), + SELL if trade["bm"] else BUY, + Decimal(trade["q"]), + Decimal(trade["p"]), + self.timestamp_normalize(trade["ts"]), + raw=trade, + ) await self.callback(TRADES, t, timestamp) async def _book(self, msg: dict, timestamp: float): - sequence_number = msg['data']['seqnum'] - pair = self.exchange_symbol_to_std_symbol(msg['symbol']) + sequence_number = msg["data"]["seqnum"] + pair = self.exchange_symbol_to_std_symbol(msg["symbol"]) delta = {BID: [], ASK: []} - if msg['m'] == 'depth-snapshot': + if msg["m"] == "depth-snapshot": self.seq_no[pair] = sequence_number self._l2_book[pair] = OrderBook(self.id, pair, max_depth=self.max_depth) else: # ignore messages while we wait for the snapshot if self.seq_no[pair] is None: return - if self.seq_no[pair] + 1 != sequence_number: + current_seq = cast(int, self.seq_no[pair]) + if current_seq + 1 != sequence_number: raise MissingSequenceNumber self.seq_no[pair] = sequence_number - for side in ('bids', 'asks'): - for price, amount in msg['data'][side]: - s = BID if side == 'bids' else ASK + for side in ("bids", "asks"): + for price, amount in msg["data"][side]: + s = BID if side == "bids" else ASK price = Decimal(price) size = Decimal(amount) if size == 0: @@ -109,22 +133,29 @@ async def _book(self, msg: dict, timestamp: float): delta[s].append((price, size)) self._l2_book[pair].book[s][price] = size - await self.book_callback(L2_BOOK, self._l2_book[pair], timestamp, timestamp=self.timestamp_normalize(msg['data']['ts']), raw=msg, delta=delta if msg['m'] != 'depth-snapshot' else None, sequence_number=sequence_number) + await self.book_callback( + L2_BOOK, + self._l2_book[pair], + timestamp, + timestamp=self.timestamp_normalize(msg["data"]["ts"]), + raw=msg, + delta=delta if msg["m"] != "depth-snapshot" else None, + sequence_number=sequence_number, + ) async def message_handler(self, msg: str, conn, timestamp: float): - msg = json.loads(msg, parse_float=Decimal) - if 'm' in msg: - if msg['m'] == 'depth' or msg['m'] == 'depth-snapshot': + if "m" in msg: + if msg["m"] == "depth" or msg["m"] == "depth-snapshot": await self._book(msg, timestamp) - elif msg['m'] == 'trades': + elif msg["m"] == "trades": await self._trade(msg, timestamp) - elif msg['m'] == 'ping': + elif msg["m"] == "ping": await conn.write('{"op":"pong"}') - elif msg['m'] == 'connected': + elif msg["m"] == "connected": return - elif msg['m'] == 'sub': + elif msg["m"] == "sub": return else: LOG.warning("%s: Invalid message type %s", self.id, msg) @@ -141,9 +172,13 @@ async def subscribe(self, conn: AsyncConnection): if channel == "depth:": l2_pairs.extend(pairs) - message = {'op': 'sub', 'ch': channel + ','.join(pairs)} + message = {"op": "sub", "ch": channel + ",".join(pairs)} await conn.write(json.dumps(message)) for pair in l2_pairs: - message = {"op": "req", "action": "depth-snapshot", "args": {"symbol": pair}} + message = { + "op": "req", + "action": "depth-snapshot", + "args": {"symbol": pair}, + } await conn.write(json.dumps(message)) diff --git a/cryptofeed/exchanges/backpack/feed.py b/cryptofeed/exchanges/backpack/feed.py index 65d3c2762..9bf2eb49e 100644 --- a/cryptofeed/exchanges/backpack/feed.py +++ b/cryptofeed/exchanges/backpack/feed.py @@ -313,6 +313,7 @@ def is_open(self) -> bool: async def read(self): if self.session is None: await self._open() + assert self.session is not None while True: message = await self.session.read() yield message @@ -320,6 +321,7 @@ async def read(self): async def write(self, msg: str): if self.session is None: await self._open() + assert self.session is not None await self.session.send(json.loads(msg)) async def close(self): diff --git a/cryptofeed/exchanges/backpack/health.py b/cryptofeed/exchanges/backpack/health.py index 8af470c2f..a439fbfa5 100644 --- a/cryptofeed/exchanges/backpack/health.py +++ b/cryptofeed/exchanges/backpack/health.py @@ -1,9 +1,10 @@ """Health evaluation for the Backpack native feed.""" + from __future__ import annotations import time from dataclasses import dataclass -from typing import List +from typing import List, cast from .metrics import BackpackMetrics @@ -15,8 +16,10 @@ class BackpackHealthReport: metrics: dict -def evaluate_health(metrics: BackpackMetrics, *, max_snapshot_age: float = 60.0) -> BackpackHealthReport: - snapshot = metrics.snapshot() +def evaluate_health( + metrics: BackpackMetrics, *, max_snapshot_age: float = 60.0 +) -> BackpackHealthReport: + snapshot = cast(dict, metrics.snapshot()) reasons: List[str] = [] healthy = True @@ -24,29 +27,34 @@ def evaluate_health(metrics: BackpackMetrics, *, max_snapshot_age: float = 60.0) healthy = False reasons.append("authentication failures detected") - if snapshot["ws_errors"] > 0: + ws_errors = cast(int, snapshot["ws_errors"]) + if ws_errors > 0: healthy = False reasons.append("websocket errors observed") - if snapshot["parser_errors"] > 0: + parser_errors = cast(int, snapshot["parser_errors"]) + if parser_errors > 0: healthy = False reasons.append("parser errors detected") last_snapshot = snapshot.get("last_snapshot_timestamp") if last_snapshot is not None: - age = time.time() - last_snapshot + last_snapshot_ts = cast(float, last_snapshot) + age = time.time() - last_snapshot_ts if age > max_snapshot_age: healthy = False reasons.append(f"order book snapshot stale ({int(age)}s)") last_message = snapshot.get("last_message_timestamp") if last_message is not None: - cadence = time.time() - last_message + last_message_ts = cast(float, last_message) + cadence = time.time() - last_message_ts if cadence > max_snapshot_age: healthy = False reasons.append(f"no messages received in {int(cadence)}s") - if snapshot["dropped_messages"] > 0: + dropped_messages = cast(int, snapshot["dropped_messages"]) + if dropped_messages > 0: healthy = False reasons.append("dropped websocket messages") diff --git a/cryptofeed/exchanges/backpack/rest.py b/cryptofeed/exchanges/backpack/rest.py index fb97e0111..3adcea098 100644 --- a/cryptofeed/exchanges/backpack/rest.py +++ b/cryptofeed/exchanges/backpack/rest.py @@ -1,4 +1,5 @@ """Backpack REST client built on cryptofeed HTTPAsyncConn.""" + from __future__ import annotations from dataclasses import dataclass @@ -33,7 +34,9 @@ class BackpackRestClient: def __init__(self, config: BackpackConfig, *, http_conn_factory=None) -> None: self._config = config - factory = http_conn_factory or (lambda: HTTPAsyncConn("backpack", exchange_id=config.exchange_id)) + factory = http_conn_factory or ( + lambda: HTTPAsyncConn("backpack", exchange_id=config.exchange_id) + ) self._conn: HTTPAsyncConn = factory() self._closed = False @@ -48,13 +51,17 @@ async def fetch_markets(self) -> Iterable[Dict[str, Any]]: text = await self._conn.read(url) try: data = json.loads(text) - except Exception as exc: # pragma: no cover - JSON backend may raise generic Exception types + except ( + Exception + ) as exc: # pragma: no cover - JSON backend may raise generic Exception types raise BackpackRestError(f"Unable to parse markets payload: {exc}") from exc if not isinstance(data, (list, tuple)): raise BackpackRestError("Markets endpoint returned unexpected payload") return data - async def fetch_order_book(self, *, native_symbol: str, depth: int = 50) -> BackpackOrderBookSnapshot: + async def fetch_order_book( + self, *, native_symbol: str, depth: int = 50 + ) -> BackpackOrderBookSnapshot: """Fetch an order book snapshot for the provided native Backpack symbol.""" url = f"{self._config.rest_endpoint}{self.L2_DEPTH_PATH}" params = {"symbol": native_symbol, "limit": depth} @@ -62,7 +69,9 @@ async def fetch_order_book(self, *, native_symbol: str, depth: int = 50) -> Back try: data = json.loads(text) except Exception as exc: # pragma: no cover - raise BackpackRestError(f"Unable to parse order book payload: {exc}") from exc + raise BackpackRestError( + f"Unable to parse order book payload: {exc}" + ) from exc if not isinstance(data, dict) or "bids" not in data or "asks" not in data: raise BackpackRestError("Malformed order book payload") @@ -75,13 +84,15 @@ async def fetch_order_book(self, *, native_symbol: str, depth: int = 50) -> Back timestamp_ms=data.get("timestamp"), ) - async def fetch_trades(self, *, native_symbol: str, limit: int = 100) -> Iterable[Dict[str, Any]]: + async def fetch_trades( + self, *, native_symbol: str, limit: int = 100 + ) -> Iterable[Dict[str, Any]]: """Fetch recent trades for the provided native Backpack symbol. - + Args: native_symbol: Native Backpack symbol (e.g., "BTC_USDC") limit: Maximum number of trades to fetch (default: 100, max: 1000) - + Returns: List of recent trades """ @@ -105,37 +116,35 @@ async def fetch_klines( interval: str = "1m", start_time: Optional[int] = None, end_time: Optional[int] = None, - limit: Optional[int] = None + limit: Optional[int] = None, ) -> Iterable[Dict[str, Any]]: """Fetch K-line/candle data for the provided native Backpack symbol. - + Args: native_symbol: Native Backpack symbol (e.g., "BTC_USDC") interval: Candle interval (1m, 3m, 5m, 15m, 30m, 1h, 2h, 4h, 6h, 8h, 12h, 1d, 3d, 1w, 1month) start_time: Start timestamp in seconds (UTC) end_time: End timestamp in seconds (UTC), defaults to current time if not provided limit: Maximum number of candles to fetch - + Returns: List of K-line data """ url = f"{self._config.rest_endpoint}{self.KLINES_PATH}" - params = { - "symbol": native_symbol, - "interval": interval - } - + params = {"symbol": native_symbol, "interval": interval} + # API requires startTime to be present if start_time is None: # Default to 24 hours ago if not specified import time + start_time = int(time.time()) - 86400 - - params["startTime"] = start_time - + + params["startTime"] = str(start_time) + if end_time is not None: - params["endTime"] = end_time - + params["endTime"] = str(end_time) + text = await self._conn.read(url, params=params) try: data = json.loads(text) diff --git a/cryptofeed/exchanges/backpack/ws.py b/cryptofeed/exchanges/backpack/ws.py index 21de0cf29..5f2403c58 100644 --- a/cryptofeed/exchanges/backpack/ws.py +++ b/cryptofeed/exchanges/backpack/ws.py @@ -1,4 +1,5 @@ """Backpack WebSocket session abstraction leveraging cryptofeed WSAsyncConn.""" + from __future__ import annotations import asyncio @@ -52,7 +53,9 @@ def __init__( self._metrics = deps.metrics factory = deps.conn_factory or ( - lambda: WSAsyncConn(self._config.ws_endpoint, "backpack", exchange_id=config.exchange_id) + lambda: WSAsyncConn( + self._config.ws_endpoint, "backpack", exchange_id=config.exchange_id + ) ) self._conn = factory() @@ -79,7 +82,9 @@ def _resolve_dependencies( deps: Optional[BackpackWsDependencies], legacy_kwargs: dict ) -> BackpackWsDependencies: if deps is not None and legacy_kwargs: - raise ValueError("Provide either dependencies or legacy keyword arguments, not both.") + raise ValueError( + "Provide either dependencies or legacy keyword arguments, not both." + ) if deps is not None: return deps @@ -191,10 +196,15 @@ async def _handle_auth_failure(self) -> None: self._last_auth_timestamp_us = None async def _send_auth(self) -> None: + assert self._auth_helper is not None try: timestamp = self._auth_helper._current_timestamp_us() - headers = self._auth_helper.build_headers(method="GET", path="/ws/auth", timestamp_us=timestamp) - except Exception as exc: # pragma: no cover - defensive, metrics capture auth failures + headers = self._auth_helper.build_headers( + method="GET", path="/ws/auth", timestamp_us=timestamp + ) + except ( + Exception + ) as exc: # pragma: no cover - defensive, metrics capture auth failures raise BackpackAuthError(str(exc)) from exc payload = { diff --git a/cryptofeed/exchanges/bitdotcom.py b/cryptofeed/exchanges/bitdotcom.py index c0a604c50..69ad4a323 100644 --- a/cryptofeed/exchanges/bitdotcom.py +++ b/cryptofeed/exchanges/bitdotcom.py @@ -1,6 +1,7 @@ -''' +""" Copyright (C) 2021 - STS Digital -''' +""" + import itertools import logging from decimal import Decimal @@ -11,37 +12,90 @@ import hmac from cryptofeed.json_utils import json -from cryptofeed.connection import AsyncConnection, RestEndpoint, Routes, WebsocketEndpoint - -from cryptofeed.defines import ASK, BALANCES, BID, BUY, BITDOTCOM, CANCELLED, FILLED, FILLS, FUTURES, L2_BOOK, LIMIT, MARKET, OPEN, OPTION, PENDING, PERPETUAL, SELL, SPOT, STOP_LIMIT, STOP_MARKET, TICKER, TRADES, ORDER_INFO, TRIGGER_LIMIT, TRIGGER_MARKET +from cryptofeed.connection import ( + AsyncConnection, + RestEndpoint, + Routes, + WebsocketEndpoint, +) + +from cryptofeed.defines import ( + ASK, + BALANCES, + BID, + BUY, + BITDOTCOM, + CANCELLED, + FILLED, + FILLS, + FUTURES, + L2_BOOK, + LIMIT, + MARKET, + OPEN, + OPTION, + PENDING, + PERPETUAL, + SELL, + SPOT, + STOP_LIMIT, + STOP_MARKET, + TICKER, + TRADES, + ORDER_INFO, + TRIGGER_LIMIT, + TRIGGER_MARKET, +) from cryptofeed.exceptions import MissingSequenceNumber from cryptofeed.feed import Feed from cryptofeed.symbols import Symbol, str_to_symbol from cryptofeed.types import Trade, Ticker, OrderBook, OrderInfo, Balance, Fill -LOG = logging.getLogger('feedhandler') +LOG = logging.getLogger("feedhandler") class BitDotCom(Feed): id = BITDOTCOM websocket_endpoints = [ - WebsocketEndpoint('wss://spot-ws.bit.com', instrument_filter=('TYPE', (SPOT,)), sandbox='wss://betaspot-ws.bitexch.dev'), - WebsocketEndpoint('wss://ws.bit.com', instrument_filter=('TYPE', (FUTURES, OPTION, PERPETUAL)), sandbox='wss://betaws.bitexch.dev'), + WebsocketEndpoint( + "wss://spot-ws.bit.com", + instrument_filter=("TYPE", (SPOT,)), + sandbox="wss://betaspot-ws.bitexch.dev", + ), + WebsocketEndpoint( + "wss://ws.bit.com", + instrument_filter=("TYPE", (FUTURES, OPTION, PERPETUAL)), + sandbox="wss://betaws.bitexch.dev", + ), ] rest_endpoints = [ - RestEndpoint('https://spot-api.bit.com', instrument_filter=('TYPE', (SPOT,)), sandbox='https://betaspot-api.bitexch.dev', routes=Routes('/spot/v1/instruments', authentication='/spot/v1/ws/auth')), - RestEndpoint('https://api.bit.com', instrument_filter=('TYPE', (OPTION, FUTURES, PERPETUAL)), sandbox='https://betaapi.bitexch.dev', routes=Routes('/linear/v1/instruments?currency={}&active=true', currencies=True, authentication='/v1/ws/auth')) + RestEndpoint( + "https://spot-api.bit.com", + instrument_filter=("TYPE", (SPOT,)), + sandbox="https://betaspot-api.bitexch.dev", + routes=Routes("/spot/v1/instruments", authentication="/spot/v1/ws/auth"), + ), + RestEndpoint( + "https://api.bit.com", + instrument_filter=("TYPE", (OPTION, FUTURES, PERPETUAL)), + sandbox="https://betaapi.bitexch.dev", + routes=Routes( + "/linear/v1/instruments?currency={}&active=true", + currencies=True, + authentication="/v1/ws/auth", + ), + ), ] websocket_channels = { - L2_BOOK: 'depth', - TRADES: 'trade', - TICKER: 'ticker', - ORDER_INFO: 'order', - BALANCES: 'account', - FILLS: 'user_trade', + L2_BOOK: "depth", + TRADES: "trade", + TICKER: "ticker", + ORDER_INFO: "order", + BALANCES: "account", + FILLS: "user_trade", # funding rates paid and received } request_limit = 10 @@ -53,8 +107,8 @@ def __init__(self, *args, **kwargs): @classmethod def _symbol_endpoint_prepare(cls, ep: RestEndpoint) -> str: if ep.routes.currencies: - return ep.route('instruments').format('USDT') - return ep.route('instruments') + return ep.route("instruments").format("USDT") + return ep.route("instruments") @classmethod def timestamp_normalize(cls, ts: float) -> float: @@ -66,34 +120,49 @@ def _parse_symbol_data(cls, data: list) -> Tuple[Dict, Dict]: info = defaultdict(dict) for entry in data: - if entry['code'] != 0: - raise ValueError('%s - Failed to collect instrument data - %s', cls.id, entry['message']) + if entry["code"] != 0: + raise ValueError( + "%s - Failed to collect instrument data - %s", + cls.id, + entry["message"], + ) - for mapping in entry['data']: - if 'category' in mapping: + for mapping in entry["data"]: + if "category" in mapping: expiry = None strike = None otype = None - if mapping['category'] == 'option': + if mapping["category"] == "option": stype = OPTION - strike = int(float(mapping['strike_price'])) - expiry = cls.timestamp_normalize(mapping['expiration_at']) - otype = mapping['option_type'] - elif mapping['category'] == 'future': - if 'PERPETUAL' in mapping['instrument_id']: + strike = int(float(mapping["strike_price"])) + expiry = cls.timestamp_normalize(mapping["expiration_at"]) + otype = mapping["option_type"] + elif mapping["category"] == "future": + if "PERPETUAL" in mapping["instrument_id"]: stype = PERPETUAL else: stype = FUTURES - expiry = cls.timestamp_normalize(mapping['expiration_at']) - - s = Symbol(mapping['base_currency'], mapping['quote_currency'], type=stype, option_type=otype, expiry_date=expiry, strike_price=strike) - ret[s.normalized] = mapping['instrument_id'] - info['instrument_type'][s.normalized] = stype + expiry = cls.timestamp_normalize(mapping["expiration_at"]) + else: + stype = SPOT # Default to spot for unknown categories + + s = Symbol( + mapping["base_currency"], + mapping["quote_currency"], + type=stype, + option_type=otype, + expiry_date=expiry, + strike_price=strike, + ) + ret[s.normalized] = mapping["instrument_id"] + info["instrument_type"][s.normalized] = stype else: # Spot - s = Symbol(mapping['base_currency'], mapping['quote_currency'], type=SPOT) - ret[s.normalized] = mapping['pair'] - info['instrument_type'][s.normalized] = SPOT + s = Symbol( + mapping["base_currency"], mapping["quote_currency"], type=SPOT + ) + ret[s.normalized] = mapping["pair"] + info["instrument_type"][s.normalized] = SPOT return ret, info @@ -113,12 +182,16 @@ def encode_list(self, item_list: list): for item in item_list: obj_val = self.encode_object(item) list_val.append(obj_val) - output = '&'.join(list_val) - return '[' + output + ']' + output = "&".join(list_val) + return "[" + output + "]" def get_signature(self, api_path: str, param_map: dict): - str_to_sign = api_path + '&' + self.encode_object(param_map) - return hmac.new(self.key_secret.encode('utf-8'), str_to_sign.encode('utf-8'), digestmod=hashlib.sha256).hexdigest() + str_to_sign = api_path + "&" + self.encode_object(param_map) + return hmac.new( + self.key_secret.encode("utf-8"), + str_to_sign.encode("utf-8"), + digestmod=hashlib.sha256, + ).hexdigest() def encode_object(self, param_map: dict): sorted_keys = sorted(param_map.keys()) @@ -127,35 +200,47 @@ def encode_object(self, param_map: dict): val = param_map[key] if isinstance(val, list): list_val = self.encode_list(val) - ret_list.append(f'{key}={list_val}') + ret_list.append(f"{key}={list_val}") elif isinstance(val, dict): dict_val = self.encode_object(val) - ret_list.append(f'{key}={dict_val}') + ret_list.append(f"{key}={dict_val}") elif isinstance(val, bool): bool_val = str(val).lower() - ret_list.append(f'{key}={bool_val}') + ret_list.append(f"{key}={bool_val}") else: general_val = str(val) - ret_list.append(f'{key}={general_val}') + ret_list.append(f"{key}={general_val}") sorted_list = sorted(ret_list) - return '&'.join(sorted_list) + return "&".join(sorted_list) async def authenticate(self, connection: AsyncConnection): if not self.key_id or not self.key_secret: return - if any([self.is_authenticated_channel(self.exchange_channel_to_std(c)) for c in connection.subscription]): + if any( + [ + self.is_authenticated_channel(self.exchange_channel_to_std(c)) + for c in connection.subscription + ] + ): symbols = list(set(itertools.chain(*connection.subscription.values()))) sym = str_to_symbol(self.exchange_symbol_to_std_symbol(symbols[0])) for ep in self.rest_endpoints: if sym.type in ep.instrument_filter[1]: ts = int(round(time.time() * 1000)) - signature = self.get_signature(ep.routes.authentication, {'timestamp': ts}) - params = {'timestamp': ts, 'signature': signature} - ret = self.http_sync.read(ep.route('authentication', sandbox=self.sandbox), params=params, headers={'X-Bit-Access-Key': self.key_id}, json=True) - if ret['code'] != 0 or 'token' not in ret['data']: - LOG.warning('%s: authentication failed: %s', ret) - token = ret['data']['token'] + signature = self.get_signature( + ep.routes.authentication, {"timestamp": ts} + ) + params = {"timestamp": ts, "signature": signature} + ret = self.http_sync.read( + ep.route("authentication", sandbox=self.sandbox), + params=params, + headers={"X-Bit-Access-Key": self.key_id}, + json=True, + ) + if ret["code"] != 0 or "token" not in ret["data"]: + LOG.warning("%s: authentication failed: %s", ret) + token = ret["data"]["token"] self._auth_token = token return @@ -167,12 +252,14 @@ async def subscribe(self, connection: AsyncConnection): continue stype = str_to_symbol(self.exchange_symbol_to_std_symbol(symbols[0])).type msg = { - 'type': 'subscribe', - 'channels': [chan], - 'instruments' if stype in {PERPETUAL, FUTURES, OPTION} else 'pairs': symbols, + "type": "subscribe", + "channels": [chan], + "instruments" + if stype in {PERPETUAL, FUTURES, OPTION} + else "pairs": symbols, } if self.is_authenticated_channel(self.exchange_channel_to_std(chan)): - msg['token'] = self._auth_token + msg["token"] = self._auth_token await connection.write(json.dumps(msg)) async def _trade(self, data: dict, timestamp: float): @@ -192,19 +279,23 @@ async def _trade(self, data: dict, timestamp: float): }] } """ - for t in data['data']: - trade = Trade(self.id, - self.exchange_symbol_to_std_symbol(t.get('instrument_id') or t.get('pair')), - SELL if t['side'] == 'sell' else BUY, - Decimal(t['qty']), - Decimal(t['price']), - self.timestamp_normalize(t['created_at']), - id=t['trade_id'], - raw=t) + for t in data["data"]: + trade = Trade( + self.id, + self.exchange_symbol_to_std_symbol( + t.get("instrument_id") or t.get("pair") + ), + SELL if t["side"] == "sell" else BUY, + Decimal(t["qty"]), + Decimal(t["price"]), + self.timestamp_normalize(t["created_at"]), + id=t["trade_id"], + raw=t, + ) await self.callback(TRADES, trade, timestamp) async def _book(self, data: dict, timestamp: float): - ''' + """ Snapshot { @@ -242,17 +333,19 @@ async def _book(self, data: dict, timestamp: float): ] } } - ''' - if data['data']['type'] == 'update': - pair = self.exchange_symbol_to_std_symbol(data['data'].get('instrument_id') or data['data'].get('pair')) - if data['data']['sequence'] != self._sequence_no[pair] + 1: + """ + if data["data"]["type"] == "update": + pair = self.exchange_symbol_to_std_symbol( + data["data"].get("instrument_id") or data["data"].get("pair") + ) + if data["data"]["sequence"] != self._sequence_no[pair] + 1: raise MissingSequenceNumber("Missing sequence number, restarting") - self._sequence_no[pair] = data['data']['sequence'] + self._sequence_no[pair] = data["data"]["sequence"] delta = {BID: [], ASK: []} - for side, price, amount in data['data']['changes']: - side = ASK if side == 'sell' else BID + for side, price, amount in data["data"]["changes"]: + side = ASK if side == "sell" else BID price = Decimal(price) amount = Decimal(amount) @@ -263,15 +356,44 @@ async def _book(self, data: dict, timestamp: float): delta[side].append((price, amount)) self._l2_book[pair].book[side][price] = amount - await self.book_callback(L2_BOOK, self._l2_book[pair], timestamp, timestamp=self.timestamp_normalize(data['timestamp']), raw=data, sequence_number=self._sequence_no[pair], delta=delta) + await self.book_callback( + L2_BOOK, + self._l2_book[pair], + timestamp, + timestamp=self.timestamp_normalize(data["timestamp"]), + raw=data, + sequence_number=self._sequence_no[pair], + delta=delta, + ) else: - pair = self.exchange_symbol_to_std_symbol(data['data'].get('instrument_id') or data['data'].get('pair')) - self._l2_book[pair] = OrderBook(self.id, pair, max_depth=self.max_depth, bids={Decimal(price): Decimal(size) for price, size in data['data']['bids']}, asks={Decimal(price): Decimal(size) for price, size in data['data']['asks']}) - self._sequence_no[pair] = data['data']['sequence'] - await self.book_callback(L2_BOOK, self._l2_book[pair], timestamp, timestamp=self.timestamp_normalize(data['timestamp']), raw=data, sequence_number=data['data']['sequence']) + pair = self.exchange_symbol_to_std_symbol( + data["data"].get("instrument_id") or data["data"].get("pair") + ) + self._l2_book[pair] = OrderBook( + self.id, + pair, + max_depth=self.max_depth, + bids={ + Decimal(price): Decimal(size) + for price, size in data["data"]["bids"] + }, + asks={ + Decimal(price): Decimal(size) + for price, size in data["data"]["asks"] + }, + ) + self._sequence_no[pair] = data["data"]["sequence"] + await self.book_callback( + L2_BOOK, + self._l2_book[pair], + timestamp, + timestamp=self.timestamp_normalize(data["timestamp"]), + raw=data, + sequence_number=data["data"]["sequence"], + ) async def _ticker(self, data: dict, timestamp: float): - ''' + """ { 'channel': 'ticker', 'timestamp': 1639093870710, @@ -299,43 +421,45 @@ async def _ticker(self, data: dict, timestamp: float): 'max_buy': '4280.50000000' } } - ''' - if data['data']['best_bid'] and data['data']['best_ask']: + """ + if data["data"]["best_bid"] and data["data"]["best_ask"]: t = Ticker( self.id, - self.exchange_symbol_to_std_symbol(data['data'].get('instrument_id') or data['data'].get('pair')), - Decimal(data['data']['best_bid']), - Decimal(data['data']['best_ask']), - self.timestamp_normalize(data['timestamp']), - raw=data + self.exchange_symbol_to_std_symbol( + data["data"].get("instrument_id") or data["data"].get("pair") + ), + Decimal(data["data"]["best_bid"]), + Decimal(data["data"]["best_ask"]), + self.timestamp_normalize(data["timestamp"]), + raw=data, ) await self.callback(TICKER, t, timestamp) def _order_type_translate(self, t: str) -> str: - if t == 'limit': + if t == "limit": return LIMIT - if t == 'market': + if t == "market": return MARKET - if t == 'stop-limit': + if t == "stop-limit": return STOP_LIMIT - if t == 'stop-market': + if t == "stop-market": return STOP_MARKET - if t == 'trigger-limit': + if t == "trigger-limit": return TRIGGER_LIMIT - if t == 'trigger-market': + if t == "trigger-market": return TRIGGER_MARKET - raise ValueError('Invalid order type detected %s', t) + raise ValueError("Invalid order type detected %s", t) def _status_translate(self, s: str) -> str: - if s == 'open': + if s == "open": return OPEN - if s == 'pending': + if s == "pending": return PENDING - if s == 'filled': + if s == "filled": return FILLED - if s == 'cancelled': + if s == "cancelled": return CANCELLED - raise ValueError('Invalid order status detected %s', s) + raise ValueError("Invalid order status detected %s", s) async def _order(self, msg: dict, timestamp: float): """ @@ -376,24 +500,24 @@ async def _order(self, msg: dict, timestamp: float): ] } """ - for entry in msg['data']: + for entry in msg["data"]: oi = OrderInfo( self.id, - self.exchange_symbol_to_std_symbol(entry['instrument_id']), - entry['order_id'], - BUY if entry['side'] == 'buy' else SELL, - self._status_translate(entry['status']), - self._order_type_translate(entry['order_type']), - Decimal(entry['price']), - Decimal(entry['filled_qty']), - Decimal(entry['remain_qty']), - self.timestamp_normalize(entry['updated_at']), - raw=entry + self.exchange_symbol_to_std_symbol(entry["instrument_id"]), + entry["order_id"], + BUY if entry["side"] == "buy" else SELL, + self._status_translate(entry["status"]), + self._order_type_translate(entry["order_type"]), + Decimal(entry["price"]), + Decimal(entry["filled_qty"]), + Decimal(entry["remain_qty"]), + self.timestamp_normalize(entry["updated_at"]), + raw=entry, ) await self.callback(ORDER_INFO, oi, timestamp) async def _balances(self, msg: dict, timestamp: float): - ''' + """ Futures/Options { "channel":"account", @@ -455,30 +579,31 @@ async def _balances(self, msg: dict, timestamp: float): ] } } - ''' - if 'balances' in msg['data']: + """ + if "balances" in msg["data"]: # Spot - for balance in msg['data']['balances']: + for balance in msg["data"]["balances"]: b = Balance( self.id, - balance['currency'], - Decimal(balance['available']), - Decimal(balance['frozen']), - raw=msg + balance["currency"], + Decimal(balance["available"]), + Decimal(balance["frozen"]), + raw=msg, ) await self.callback(BALANCES, b, timestamp) else: b = Balance( self.id, - msg['data']['currency'], - Decimal(msg['data']['cash_balance']), - Decimal(msg['data']['cash_balance']) - Decimal(msg['data']['available_balance']), - raw=msg + msg["data"]["currency"], + Decimal(msg["data"]["cash_balance"]), + Decimal(msg["data"]["cash_balance"]) + - Decimal(msg["data"]["available_balance"]), + raw=msg, ) await self.callback(BALANCES, b, timestamp) async def _fill(self, msg: dict, timestamp: float): - ''' + """ { "channel":"user_trade", "timestamp":1588997059737, @@ -504,40 +629,40 @@ async def _fill(self, msg: dict, timestamp: float): } ] } - ''' - for entry in msg['data']: + """ + for entry in msg["data"]: f = Fill( self.id, - self.exchange_symbol_to_std_symbol(entry['instrument_id']), - BUY if entry['side'] == 'buy' else SELL, - Decimal(entry['qty']), - Decimal(entry['price']), - Decimal(entry['fee']), - str(entry['trade_id']), - str(entry['order_id']), - self._order_type_translate(entry['order_type']), + self.exchange_symbol_to_std_symbol(entry["instrument_id"]), + BUY if entry["side"] == "buy" else SELL, + Decimal(entry["qty"]), + Decimal(entry["price"]), + Decimal(entry["fee"]), + str(entry["trade_id"]), + str(entry["order_id"]), + self._order_type_translate(entry["order_type"]), None, - self.timestamp_normalize(entry['created_at']), - raw=entry + self.timestamp_normalize(entry["created_at"]), + raw=entry, ) await self.callback(FILLS, f, timestamp) async def message_handler(self, msg: str, conn, timestamp: float): msg = json.loads(msg, parse_float=Decimal) - if msg['channel'] == 'depth': + if msg["channel"] == "depth": await self._book(msg, timestamp) - elif msg['channel'] == 'trade': + elif msg["channel"] == "trade": await self._trade(msg, timestamp) - elif msg['channel'] == 'ticker': + elif msg["channel"] == "ticker": await self._ticker(msg, timestamp) - elif msg['channel'] == 'order': + elif msg["channel"] == "order": await self._order(msg, timestamp) - elif msg['channel'] == 'account': + elif msg["channel"] == "account": await self._balances(msg, timestamp) - elif msg['channel'] == 'user_trade': + elif msg["channel"] == "user_trade": await self._fill(msg, timestamp) - elif msg['channel'] == 'subscription': + elif msg["channel"] == "subscription": """ { 'channel': 'subscription', @@ -545,9 +670,13 @@ async def message_handler(self, msg: str, conn, timestamp: float): 'data': {'code': 0, 'subscription': ['trade']} } """ - if msg['data']['code'] == 0: + if msg["data"]["code"] == 0: return else: - LOG.warning("%s: error received from exchange while subscribing: %s", self.id, msg) + LOG.warning( + "%s: error received from exchange while subscribing: %s", + self.id, + msg, + ) else: LOG.warning("%s: Unexpected message received: %s", self.id, msg) diff --git a/cryptofeed/exchanges/bitmex.py b/cryptofeed/exchanges/bitmex.py index 79fbfd7bb..0ba626810 100644 --- a/cryptofeed/exchanges/bitmex.py +++ b/cryptofeed/exchanges/bitmex.py @@ -1,9 +1,10 @@ -''' +""" Copyright (C) 2017-2025 Bryant Moscon - bmoscon@gmail.com Please see the LICENSE file for the terms and conditions associated with this software. -''' +""" + from typing import Dict, Tuple import hashlib import hmac @@ -15,28 +16,75 @@ from cryptofeed.json_utils import json -from cryptofeed.defines import BID, ASK, BITMEX, BUY, CANCELLED, FILLED, FUNDING, FUTURES, L2_BOOK, LIMIT, LIQUIDATIONS, MARKET, OPEN, OPEN_INTEREST, ORDER_INFO, PERPETUAL, SELL, SPOT, TICKER, TRADES, UNFILLED +from cryptofeed.defines import ( + BID, + ASK, + BITMEX, + BUY, + CANCELLED, + FILLED, + FUNDING, + FUTURES, + L2_BOOK, + LIMIT, + LIQUIDATIONS, + MARKET, + OPEN, + OPEN_INTEREST, + ORDER_INFO, + PERPETUAL, + SELL, + SPOT, + TICKER, + TRADES, + UNFILLED, +) from cryptofeed.feed import Feed from cryptofeed.symbols import Symbol -from cryptofeed.connection import AsyncConnection, RestEndpoint, Routes, WebsocketEndpoint +from cryptofeed.connection import ( + AsyncConnection, + RestEndpoint, + Routes, + WebsocketEndpoint, +) from cryptofeed.exchanges.mixins.bitmex_rest import BitmexRestMixin -from cryptofeed.types import OrderBook, Trade, Ticker, Funding, OrderInfo, OpenInterest, Liquidation +from cryptofeed.types import ( + OrderBook, + Trade, + Ticker, + Funding, + OrderInfo, + OpenInterest, + Liquidation, +) -LOG = logging.getLogger('feedhandler') +LOG = logging.getLogger("feedhandler") class Bitmex(Feed, BitmexRestMixin): id = BITMEX - websocket_endpoints = [WebsocketEndpoint('wss://www.bitmex.com/realtime', sandbox='wss://testnet.bitmex.com/realtime', options={'compression': None})] - rest_endpoints = [RestEndpoint('https://www.bitmex.com', routes=Routes('/api/v1/instrument/active'), sandbox='https://testnet.bitmex.com')] + websocket_endpoints = [ + WebsocketEndpoint( + "wss://www.bitmex.com/realtime", + sandbox="wss://testnet.bitmex.com/realtime", + options={"compression": None}, + ) + ] + rest_endpoints = [ + RestEndpoint( + "https://www.bitmex.com", + routes=Routes("/api/v1/instrument/active"), + sandbox="https://testnet.bitmex.com", + ) + ] websocket_channels = { - L2_BOOK: 'orderBookL2', - TRADES: 'trade', - TICKER: 'quote', - FUNDING: 'funding', - ORDER_INFO: 'order', - OPEN_INTEREST: 'instrument', - LIQUIDATIONS: 'liquidation' + L2_BOOK: "orderBookL2", + TRADES: "trade", + TICKER: "quote", + FUNDING: "funding", + ORDER_INFO: "order", + OPEN_INTEREST: "instrument", + LIQUIDATIONS: "liquidation", } request_limit = 0.5 @@ -46,26 +94,35 @@ def _parse_symbol_data(cls, data: dict) -> Tuple[Dict, Dict]: info = defaultdict(dict) for entry in data: - base = entry['rootSymbol'].replace("XBT", "BTC") - quote = entry['quoteCurrency'].replace("XBT", "BTC") + base = entry["rootSymbol"].replace("XBT", "BTC") + quote = entry["quoteCurrency"].replace("XBT", "BTC") - if entry['typ'] == 'FFWCSX': + if entry["typ"] == "FFWCSX": stype = PERPETUAL - elif entry['typ'] == 'FFCCSX': + elif entry["typ"] == "FFCCSX": stype = FUTURES - elif entry['typ'] == 'IFXXXP': + elif entry["typ"] == "IFXXXP": stype = SPOT else: - LOG.info('Unsupported type %s for instrument %s', entry['typ'], entry['symbol']) + LOG.info( + "Unsupported type %s for instrument %s", + entry["typ"], + entry["symbol"], + ) + continue - s = Symbol(base, quote, type=stype, expiry_date=entry.get('expiry')) + s = Symbol(base, quote, type=stype, expiry_date=entry.get("expiry")) if s.normalized not in ret: - ret[s.normalized] = entry['symbol'] - info['tick_size'][s.normalized] = entry['tickSize'] - info['instrument_type'][s.normalized] = stype - info['is_quanto'][s.normalized] = entry['isQuanto'] + ret[s.normalized] = entry["symbol"] + info["tick_size"][s.normalized] = entry["tickSize"] + info["instrument_type"][s.normalized] = stype + info["is_quanto"][s.normalized] = entry["isQuanto"] else: - LOG.info('Ignoring duplicate symbol mapping %s<=>%s', s.normalized, entry['symbol']) + LOG.info( + "Ignoring duplicate symbol mapping %s<=>%s", + s.normalized, + entry["symbol"], + ) return ret, info @@ -80,25 +137,31 @@ def _reset(self): @staticmethod def normalize_order_status(status): status_map = { - 'New': OPEN, - 'Filled': FILLED, - 'Canceled': CANCELLED, + "New": OPEN, + "Filled": FILLED, + "Canceled": CANCELLED, } return status_map[status] def init_order_info(self, o): oi = OrderInfo( self.id, - self.exchange_symbol_to_std_symbol(o['symbol']), - o['orderID'], - BUY if o['side'] == 'Buy' else SELL, - self.normalize_order_status(o['ordStatus']), - LIMIT if o['ordType'].lower() == 'limit' else MARKET if o['ordType'].lower() == 'market' else None, - Decimal(o['avgPx']) if o['avgPx'] else Decimal(o['price']), - Decimal(o['orderQty']), - Decimal(o['leavesQty']), - self.timestamp_normalize(o['timestamp']), - raw=str(o), # Need to convert to string to avoid json serialization error when updating order + self.exchange_symbol_to_std_symbol(o["symbol"]), + o["orderID"], + BUY if o["side"] == "Buy" else SELL, + self.normalize_order_status(o["ordStatus"]), + LIMIT + if o["ordType"].lower() == "limit" + else MARKET + if o["ordType"].lower() == "market" + else None, + Decimal(o["avgPx"]) if o["avgPx"] else Decimal(o["price"]), + Decimal(o["orderQty"]), + Decimal(o["leavesQty"]), + self.timestamp_normalize(o["timestamp"]), + raw=str( + o + ), # Need to convert to string to avoid json serialization error when updating order ) return oi @@ -297,29 +360,29 @@ async def _order(self, msg: dict, timestamp: float): } """ - if msg['action'] == 'partial': + if msg["action"] == "partial": # Initial snapshot of open orders self.open_orders = {} - for o in msg['data']: + for o in msg["data"]: oi = self.init_order_info(o) self.open_orders[oi.id] = oi - elif msg['action'] == 'insert': - for o in msg['data']: + elif msg["action"] == "insert": + for o in msg["data"]: oi = self.init_order_info(o) self.open_orders[oi.id] = oi await self.callback(ORDER_INFO, oi, timestamp) - elif msg['action'] == 'update': - for o in msg['data']: - oi = self.open_orders.get(o['orderID']) + elif msg["action"] == "update": + for o in msg["data"]: + oi = self.open_orders.get(o["orderID"]) if oi: info = oi.to_dict() - if 'ordStatus' in o: - info['status'] = self.normalize_order_status(o['ordStatus']) - if 'leaveQty' in o: - info['remaining'] = Decimal(o['leavesQty']) - if 'avgPx' in o: - info['price'] = Decimal(o['avgPx']) - info['raw'] = str(o) # Not sure if this is needed + if "ordStatus" in o: + info["status"] = self.normalize_order_status(o["ordStatus"]) + if "leaveQty" in o: + info["remaining"] = Decimal(o["leavesQty"]) + if "avgPx" in o: + info["price"] = Decimal(o["avgPx"]) + info["raw"] = str(o) # Not sure if this is needed new_oi = OrderInfo(**info) if new_oi.status in (FILLED, CANCELLED): self.open_orders.pop(new_oi.id) @@ -346,17 +409,17 @@ async def _trade(self, msg: dict, timestamp: float): 'foreignNotional': 40 } """ - for data in msg['data']: - ts = self.timestamp_normalize(data['timestamp']) + for data in msg["data"]: + ts = self.timestamp_normalize(data["timestamp"]) t = Trade( self.id, - self.exchange_symbol_to_std_symbol(data['symbol']), - BUY if data['side'] == 'Buy' else SELL, - Decimal(data['size']), - Decimal(data['price']), + self.exchange_symbol_to_std_symbol(data["symbol"]), + BUY if data["side"] == "Buy" else SELL, + Decimal(data["size"]), + Decimal(data["price"]), ts, - id=data['trdMatchID'], - raw=data + id=data["trdMatchID"], + raw=data, ) await self.callback(TRADES, t, timestamp) @@ -367,59 +430,59 @@ async def _book(self, msg: dict, timestamp: float): """ # PERF perf_start(self.id, 'book_msg') - if not msg['data']: + if not msg["data"]: # see https://github.com/bmoscon/cryptofeed/issues/688 # msg['data'] can be an empty list return delta = None # if we reset the book, force a full update - pair = self.exchange_symbol_to_std_symbol(msg['data'][0]['symbol']) + pair = self.exchange_symbol_to_std_symbol(msg["data"][0]["symbol"]) if not self.partial_received[pair]: # per bitmex documentation messages received before partial # should be discarded - if msg['action'] != 'partial': + if msg["action"] != "partial": return self.partial_received[pair] = True - if msg['action'] == 'partial': - for data in msg['data']: - side = BID if data['side'] == 'Buy' else ASK - price = Decimal(data['price']) - size = Decimal(data['size']) - order_id = data['id'] + if msg["action"] == "partial": + for data in msg["data"]: + side = BID if data["side"] == "Buy" else ASK + price = Decimal(data["price"]) + size = Decimal(data["size"]) + order_id = data["id"] self._l2_book[pair].book[side][price] = size self.order_id[pair][side][order_id] = price - elif msg['action'] == 'insert': + elif msg["action"] == "insert": delta = {BID: [], ASK: []} - for data in msg['data']: - side = BID if data['side'] == 'Buy' else ASK - price = Decimal(data['price']) - size = Decimal(data['size']) - order_id = data['id'] + for data in msg["data"]: + side = BID if data["side"] == "Buy" else ASK + price = Decimal(data["price"]) + size = Decimal(data["size"]) + order_id = data["id"] self._l2_book[pair].book[side][price] = size self.order_id[pair][side][order_id] = price delta[side].append((price, size)) - elif msg['action'] == 'update': + elif msg["action"] == "update": delta = {BID: [], ASK: []} - for data in msg['data']: - side = BID if data['side'] == 'Buy' else ASK - update_size = Decimal(data['size']) - order_id = data['id'] + for data in msg["data"]: + side = BID if data["side"] == "Buy" else ASK + update_size = Decimal(data["size"]) + order_id = data["id"] price = self.order_id[pair][side][order_id] self._l2_book[pair].book[side][price] = update_size self.order_id[pair][side][order_id] = price delta[side].append((price, update_size)) - elif msg['action'] == 'delete': + elif msg["action"] == "delete": delta = {BID: [], ASK: []} - for data in msg['data']: - side = BID if data['side'] == 'Buy' else ASK - order_id = data['id'] + for data in msg["data"]: + side = BID if data["side"] == "Buy" else ASK + order_id = data["id"] delete_price = self.order_id[pair][side][order_id] del self.order_id[pair][side][order_id] @@ -432,21 +495,28 @@ async def _book(self, msg: dict, timestamp: float): # PERF perf_end(self.id, 'book_msg') # PERF perf_log(self.id, 'book_msg') - self._l2_book[pair].timestamp = self.timestamp_normalize(msg["data"][0]["timestamp"]) \ - if "data" in msg and isinstance(msg["data"], list) and msg["data"] and "timestamp" in msg["data"][0] \ + self._l2_book[pair].timestamp = ( + self.timestamp_normalize(msg["data"][0]["timestamp"]) + if "data" in msg + and isinstance(msg["data"], list) + and msg["data"] + and "timestamp" in msg["data"][0] else None + ) - await self.book_callback(L2_BOOK, self._l2_book[pair], timestamp, raw=msg, delta=delta) + await self.book_callback( + L2_BOOK, self._l2_book[pair], timestamp, raw=msg, delta=delta + ) async def _ticker(self, msg: dict, timestamp: float): - for data in msg['data']: + for data in msg["data"]: t = Ticker( self.id, - self.exchange_symbol_to_std_symbol(data['symbol']), - Decimal(data['bidPrice']), - Decimal(data['askPrice']), - self.timestamp_normalize(data['timestamp']), - raw=data + self.exchange_symbol_to_std_symbol(data["symbol"]), + Decimal(data["bidPrice"]), + Decimal(data["askPrice"]), + self.timestamp_normalize(data["timestamp"]), + raw=data, ) await self.callback(TICKER, t, timestamp) @@ -479,17 +549,19 @@ async def _funding(self, msg: dict, timestamp: float): }] } """ - for data in msg['data']: - ts = self.timestamp_normalize(data['timestamp']) - interval = data['fundingInterval'] + for data in msg["data"]: + ts = self.timestamp_normalize(data["timestamp"]) + interval = data["fundingInterval"] f = Funding( self.id, - self.exchange_symbol_to_std_symbol(data['symbol']), + self.exchange_symbol_to_std_symbol(data["symbol"]), None, - data['fundingRate'], - self.timestamp_normalize(data['timestamp'] + timedelta(hours=interval.hour)), + data["fundingRate"], + self.timestamp_normalize( + data["timestamp"] + timedelta(hours=interval.hour) + ), ts, - raw=data + raw=data, ) await self.callback(FUNDING, f, timestamp) @@ -728,10 +800,16 @@ async def _instrument(self, msg: dict, timestamp: float): ] } """ - for data in msg['data']: - if 'openInterest' in data: - ts = self.timestamp_normalize(data['timestamp']) - oi = OpenInterest(self.id, self.exchange_symbol_to_std_symbol(data['symbol']), Decimal(data['openInterest']), ts, raw=data) + for data in msg["data"]: + if "openInterest" in data: + ts = self.timestamp_normalize(data["timestamp"]) + oi = OpenInterest( + self.id, + self.exchange_symbol_to_std_symbol(data["symbol"]), + Decimal(data["openInterest"]), + ts, + raw=data, + ) await self.callback(OPEN_INTEREST, oi, timestamp) async def _liquidation(self, msg: dict, timestamp: float): @@ -746,52 +824,54 @@ async def _liquidation(self, msg: dict, timestamp: float): 'leavesQty': 2020 } """ - if msg['action'] == 'insert': - for data in msg['data']: + if msg["action"] == "insert": + for data in msg["data"]: liq = Liquidation( self.id, - self.exchange_symbol_to_std_symbol(data['symbol']), - BUY if data['side'] == 'Buy' else SELL, - Decimal(data['leavesQty']), - Decimal(data['price']), - data['orderID'], + self.exchange_symbol_to_std_symbol(data["symbol"]), + BUY if data["side"] == "Buy" else SELL, + Decimal(data["leavesQty"]), + Decimal(data["price"]), + data["orderID"], UNFILLED, None, - raw=data + raw=data, ) await self.callback(LIQUIDATIONS, liq, timestamp) async def message_handler(self, msg: str, conn, timestamp: float): msg = json.loads(msg, parse_float=Decimal) - if 'table' in msg: - if msg['table'] == 'trade': + if "table" in msg: + if msg["table"] == "trade": await self._trade(msg, timestamp) - elif msg['table'] == 'order': + elif msg["table"] == "order": await self._order(msg, timestamp) - elif msg['table'] == 'orderBookL2': + elif msg["table"] == "orderBookL2": await self._book(msg, timestamp) - elif msg['table'] == 'funding': + elif msg["table"] == "funding": await self._funding(msg, timestamp) - elif msg['table'] == 'instrument': + elif msg["table"] == "instrument": await self._instrument(msg, timestamp) - elif msg['table'] == 'quote': + elif msg["table"] == "quote": await self._ticker(msg, timestamp) - elif msg['table'] == 'liquidation': + elif msg["table"] == "liquidation": await self._liquidation(msg, timestamp) else: - LOG.warning("%s: Unhandled table=%r in %r", conn.uuid, msg['table'], msg) - elif 'info' in msg: + LOG.warning( + "%s: Unhandled table=%r in %r", conn.uuid, msg["table"], msg + ) + elif "info" in msg: LOG.debug("%s: Info message from exchange: %s", conn.uuid, msg) - elif 'subscribe' in msg: - if not msg['success']: + elif "subscribe" in msg: + if not msg["success"]: LOG.error("%s: Subscribe failure: %s", conn.uuid, msg) - elif 'error' in msg: + elif "error" in msg: LOG.error("%s: Error message from exchange: %s", conn.uuid, msg) - elif 'request' in msg: - if msg['success']: - LOG.debug("%s: Success %s", conn.uuid, msg['request'].get('op')) + elif "request" in msg: + if msg["success"]: + LOG.debug("%s: Success %s", conn.uuid, msg["request"].get("op")) else: - LOG.warning("%s: Failure %s", conn.uuid, msg['request']) + LOG.warning("%s: Failure %s", conn.uuid, msg["request"]) else: LOG.warning("%s: Unexpected message from exchange: %s", conn.uuid, msg) @@ -804,16 +884,21 @@ async def subscribe(self, conn: AsyncConnection): chans.append(f"{chan}:{pair}") for i in range(0, len(chans), 10): - await conn.write(json.dumps({"op": "subscribe", - "args": chans[i:i + 10]})) + await conn.write(json.dumps({"op": "subscribe", "args": chans[i : i + 10]})) async def _authenticate(self, conn: AsyncConnection): """Send API Key with signed message.""" # Docs: https://www.bitmex.com/app/apiKeys # https://github.com/BitMEX/sample-market-maker/blob/master/test/websocket-apikey-auth-test.py if self.key_id and self.key_secret: - LOG.info('%s: Authenticate with signature', conn.uuid) + LOG.info("%s: Authenticate with signature", conn.uuid) expires = int(time.time()) + 365 * 24 * 3600 # One year - msg = f'GET/realtime{expires}'.encode('utf-8') - signature = hmac.new(self.key_secret.encode('utf-8'), msg, digestmod=hashlib.sha256).hexdigest() - await conn.write(json.dumps({'op': 'authKeyExpires', 'args': [self.key_id, expires, signature]})) + msg = f"GET/realtime{expires}".encode("utf-8") + signature = hmac.new( + self.key_secret.encode("utf-8"), msg, digestmod=hashlib.sha256 + ).hexdigest() + await conn.write( + json.dumps( + {"op": "authKeyExpires", "args": [self.key_id, expires, signature]} + ) + ) diff --git a/cryptofeed/exchanges/bybit.py b/cryptofeed/exchanges/bybit.py index 4f6186109..4b71a1c84 100644 --- a/cryptofeed/exchanges/bybit.py +++ b/cryptofeed/exchanges/bybit.py @@ -1,9 +1,10 @@ -''' +""" Copyright (C) 2018-2025 Bryant Moscon - bmoscon@gmail.com Please see the LICENSE file for the terms and conditions associated with this software. -''' +""" + import hmac import time from collections import defaultdict @@ -15,38 +16,149 @@ from cryptofeed.json_utils import json -from cryptofeed.connection import AsyncConnection, RestEndpoint, Routes, WebsocketEndpoint -from cryptofeed.defines import BID, ASK, BUY, BYBIT, CANCELLED, CANCELLING, CANDLES, FAILED, FILLED, FUNDING, L2_BOOK, LIMIT, LIQUIDATIONS, MAKER, MARKET, OPEN, PARTIAL, SELL, SUBMITTING, TAKER, TRADES, OPEN_INTEREST, INDEX, ORDER_INFO, FILLS, FUTURES, PERPETUAL, SPOT, TICKER +from cryptofeed.connection import ( + AsyncConnection, + RestEndpoint, + Routes, + WebsocketEndpoint, +) +from cryptofeed.defines import ( + BID, + ASK, + BUY, + BYBIT, + CANCELLED, + CANCELLING, + CANDLES, + FAILED, + FILLED, + FUNDING, + L2_BOOK, + LIMIT, + LIQUIDATIONS, + MAKER, + MARKET, + OPEN, + PARTIAL, + SELL, + SUBMITTING, + TAKER, + TRADES, + OPEN_INTEREST, + INDEX, + ORDER_INFO, + FILLS, + FUTURES, + PERPETUAL, + SPOT, + TICKER, +) from cryptofeed.feed import Feed -from cryptofeed.types import OrderBook, Trade, Index, OpenInterest, Funding, OrderInfo, Fill, Candle, Liquidation, Ticker - -LOG = logging.getLogger('feedhandler') +from cryptofeed.types import ( + OrderBook, + Trade, + Index, + OpenInterest, + Funding, + OrderInfo, + Fill, + Candle, + Liquidation, + Ticker, +) + +LOG = logging.getLogger("feedhandler") class Bybit(Feed): id = BYBIT websocket_channels = { - L2_BOOK: '', # Assigned in self.subscribe - TRADES: 'publicTrade', - FILLS: 'execution', - ORDER_INFO: 'order', - INDEX: 'index', - OPEN_INTEREST: 'open_interest', - FUNDING: 'funding', - CANDLES: 'kline', - LIQUIDATIONS: 'liquidation', - TICKER: 'tickers' + L2_BOOK: "", # Assigned in self.subscribe + TRADES: "publicTrade", + FILLS: "execution", + ORDER_INFO: "order", + INDEX: "index", + OPEN_INTEREST: "open_interest", + FUNDING: "funding", + CANDLES: "kline", + LIQUIDATIONS: "liquidation", + TICKER: "tickers", } websocket_endpoints = [ - WebsocketEndpoint('wss://stream.bybit.com/v5/public/linear', instrument_filter=('TYPE', (FUTURES, PERPETUAL)), channel_filter=(websocket_channels[L2_BOOK], websocket_channels[TRADES], websocket_channels[INDEX], websocket_channels[OPEN_INTEREST], websocket_channels[FUNDING], websocket_channels[CANDLES], websocket_channels[LIQUIDATIONS], websocket_channels[TICKER]), sandbox='wss://stream-testnet.bybit.com/v5/public/linear', options={'compression': None}), - WebsocketEndpoint('wss://stream.bybit.com/v5/public/spot', instrument_filter=('TYPE', (SPOT)), channel_filter=(websocket_channels[L2_BOOK], websocket_channels[TRADES], websocket_channels[CANDLES],), sandbox='wss://stream-testnet.bybit.com/v5/public/spot', options={'compression': None}), - WebsocketEndpoint('wss://stream.bybit.com/realtime_private', channel_filter=(websocket_channels[ORDER_INFO], websocket_channels[FILLS]), instrument_filter=('QUOTE', ('USDT',)), sandbox='wss://stream-testnet.bybit.com/realtime_private', options={'compression': None}), + WebsocketEndpoint( + "wss://stream.bybit.com/v5/public/linear", + instrument_filter=("TYPE", (FUTURES, PERPETUAL)), + channel_filter=( + websocket_channels[L2_BOOK], + websocket_channels[TRADES], + websocket_channels[INDEX], + websocket_channels[OPEN_INTEREST], + websocket_channels[FUNDING], + websocket_channels[CANDLES], + websocket_channels[LIQUIDATIONS], + websocket_channels[TICKER], + ), + sandbox="wss://stream-testnet.bybit.com/v5/public/linear", + options={"compression": None}, + ), + WebsocketEndpoint( + "wss://stream.bybit.com/v5/public/spot", + instrument_filter=("TYPE", (SPOT)), + channel_filter=( + websocket_channels[L2_BOOK], + websocket_channels[TRADES], + websocket_channels[CANDLES], + ), + sandbox="wss://stream-testnet.bybit.com/v5/public/spot", + options={"compression": None}, + ), + WebsocketEndpoint( + "wss://stream.bybit.com/realtime_private", + channel_filter=(websocket_channels[ORDER_INFO], websocket_channels[FILLS]), + instrument_filter=("QUOTE", ("USDT",)), + sandbox="wss://stream-testnet.bybit.com/realtime_private", + options={"compression": None}, + ), ] rest_endpoints = [ - RestEndpoint('https://api.bybit.com', routes=Routes(['/v5/market/instruments-info?&category=linear&status=Trading&limit=1000', '/v5/market/instruments-info?&category=spot&status=Trading&limit=1000'])) + RestEndpoint( + "https://api.bybit.com", + routes=Routes( + [ + "/v5/market/instruments-info?&category=linear&status=Trading&limit=1000", + "/v5/market/instruments-info?&category=spot&status=Trading&limit=1000", + ] + ), + ) ] - valid_candle_intervals = {'1m', '3m', '5m', '15m', '30m', '1h', '2h', '4h', '6h', '1d', '1w', '1M'} - candle_interval_map = {'1m': '1', '3m': '3', '5m': '5', '15m': '15', '30m': '30', '1h': '60', '2h': '120', '4h': '240', '6h': '360', '1d': 'D', '1w': 'W', '1M': 'M'} + valid_candle_intervals = { + "1m", + "3m", + "5m", + "15m", + "30m", + "1h", + "2h", + "4h", + "6h", + "1d", + "1w", + "1M", + } + candle_interval_map = { + "1m": "1", + "3m": "3", + "5m": "5", + "15m": "15", + "30m": "30", + "1h": "60", + "2h": "120", + "4h": "240", + "6h": "360", + "1d": "D", + "1w": "W", + "1M": "M", + } # Bybit sends delta updates for futures, which might not include some values if they haven't changed. # https://bybit-exchange.github.io/docs/v5/websocket/public/ticker @@ -56,6 +168,7 @@ class Bybit(Feed): @classmethod def timestamp_normalize(cls, ts) -> float: from cryptofeed.exchange import Exchange + return Exchange.timestamp_normalize(ts) @staticmethod @@ -76,56 +189,68 @@ def _parse_symbol_data(cls, data: dict) -> Tuple[Dict, Dict]: messages = data if isinstance(data, list) else [data] for msg in messages: # Bybit responses typically: {'result': {'list': [...]}} - container = msg.get('result', msg) if isinstance(msg, dict) else msg + container = msg.get("result", msg) if isinstance(msg, dict) else msg items = None if isinstance(container, dict): - items = container.get('list') or container.get('data') or container.get('result') + items = ( + container.get("list") + or container.get("data") + or container.get("result") + ) # If 'items' is still a dict with 'list' if isinstance(items, dict): - items = items.get('list') + items = items.get("list") if not isinstance(items, list): continue for symbol in items: # Determine instrument type stype = SPOT - ctype = symbol.get('contractType') - if ctype == 'LinearPerpetual': + ctype = symbol.get("contractType") + if ctype == "LinearPerpetual": stype = PERPETUAL - elif ctype == 'LinearFutures': + elif ctype == "LinearFutures": stype = FUTURES - base = symbol.get('baseCoin') or symbol.get('baseCurrency') or symbol.get('base') - quote = symbol.get('quoteCoin') or symbol.get('quoteCurrency') or symbol.get('quote') + base = ( + symbol.get("baseCoin") + or symbol.get("baseCurrency") + or symbol.get("base") + ) + quote = ( + symbol.get("quoteCoin") + or symbol.get("quoteCurrency") + or symbol.get("quote") + ) if not base or not quote: continue expiry = None - sym_name = symbol.get('symbol') or f"{base}{quote}" + sym_name = symbol.get("symbol") or f"{base}{quote}" if stype is FUTURES: - if not sym_name.endswith(quote) and '-' in sym_name: - expiry = sym_name.split('-')[-1] + if not sym_name.endswith(quote) and "-" in sym_name: + expiry = sym_name.split("-")[-1] s = Symbol(base, quote, type=stype, expiry_date=expiry) # Normalized exchange symbol mapping if stype == SPOT: - ret[s.normalized] = f'{base}/{quote}' - elif stype == PERPETUAL and sym_name.endswith('PERP'): + ret[s.normalized] = f"{base}/{quote}" + elif stype == PERPETUAL and sym_name.endswith("PERP"): ret[s.normalized] = sym_name elif stype == PERPETUAL: - ret[s.normalized] = f'{base}{quote}' + ret[s.normalized] = f"{base}{quote}" elif stype == FUTURES: ret[s.normalized] = sym_name # Metadata try: - tick = symbol.get('priceFilter', {}).get('tickSize') + tick = symbol.get("priceFilter", {}).get("tickSize") if tick is not None: - info['tick_size'][s.normalized] = Decimal(str(tick)) + info["tick_size"][s.normalized] = Decimal(str(tick)) except Exception: pass - info['instrument_type'][s.normalized] = stype + info["instrument_type"][s.normalized] = stype return ret, info @@ -162,55 +287,59 @@ async def _candle(self, msg: dict, timestamp: float, market: str): "type": "snapshot" } """ - symbol = msg['topic'].split(".")[-1] - if market == 'spot': + symbol = msg["topic"].split(".")[-1] + if market == "spot": symbol = self.convert_to_spot_name(self, symbol) if not symbol: return symbol = self.exchange_symbol_to_std_symbol(symbol) - ts = int(msg['ts']) + ts = int(msg["ts"]) - for entry in msg['data']: - if self.candle_closed_only and not entry['confirm']: + for entry in msg["data"]: + if self.candle_closed_only and not entry["confirm"]: continue - c = Candle(self.id, - symbol, - entry['start'], - entry['end'], - self.candle_interval, - entry['confirm'], - Decimal(entry['open']), - Decimal(entry['close']), - Decimal(entry['high']), - Decimal(entry['low']), - Decimal(entry['volume']), - None, - ts, - raw=entry) + c = Candle( + self.id, + symbol, + entry["start"], + entry["end"], + self.candle_interval, + entry["confirm"], + Decimal(entry["open"]), + Decimal(entry["close"]), + Decimal(entry["high"]), + Decimal(entry["low"]), + Decimal(entry["volume"]), + None, + ts, + raw=entry, + ) await self.callback(CANDLES, c, timestamp) async def _book_legacy_l2(self, msg: dict, timestamp: float): """Handle legacy orderBookL2_* topics from older recordings.""" - symbol = msg['topic'].split('.')[-1] + symbol = msg["topic"].split(".")[-1] std_symbol = self.exchange_symbol_to_std_symbol(symbol) - data = msg.get('data') + data = msg.get("data") # Derive list of entries - if isinstance(data, dict) and 'order_book' in data: - entries = data['order_book'] - mode = 'snapshot' + if isinstance(data, dict) and "order_book" in data: + entries = data["order_book"] + mode = "snapshot" else: entries = data if isinstance(data, list) else [data] - mode = msg.get('type') or 'update' + mode = msg.get("type") or "update" if std_symbol not in self._l2_book: - self._l2_book[std_symbol] = OrderBook(self.id, std_symbol, max_depth=self.max_depth) + self._l2_book[std_symbol] = OrderBook( + self.id, std_symbol, max_depth=self.max_depth + ) delta = {BID: [], ASK: []} for e in entries or []: if isinstance(e, dict): - price = e.get('price') or e.get('p') - size = e.get('size') or e.get('q') - side = e.get('side') or e.get('s') + price = e.get("price") or e.get("p") + size = e.get("size") or e.get("q") + side = e.get("side") or e.get("s") elif isinstance(e, (list, tuple)) and len(e) >= 3: price, size, side = e[0], e[1], e[2] else: @@ -220,8 +349,8 @@ async def _book_legacy_l2(self, msg: dict, timestamp: float): size = Decimal(str(size)) except Exception: continue - side_key = BID if str(side).lower() in ('buy','bid','b') else ASK - if mode == 'snapshot': + side_key = BID if str(side).lower() in ("buy", "bid", "b") else ASK + if mode == "snapshot": self._l2_book[std_symbol].book[side_key][price] = size else: if size == 0: @@ -231,10 +360,16 @@ async def _book_legacy_l2(self, msg: dict, timestamp: float): else: self._l2_book[std_symbol].book[side_key][price] = size delta[side_key].append((price, size)) - await self.book_callback(L2_BOOK, self._l2_book[std_symbol], timestamp, delta=delta if any(delta.values()) else None, raw=msg) + await self.book_callback( + L2_BOOK, + self._l2_book[std_symbol], + timestamp, + delta=delta if any(delta.values()) else None, + raw=msg, + ) async def _liquidation(self, msg: dict, timestamp: float): - ''' + """ { "topic": "liquidation.BTCUSDT", "type": "snapshot", @@ -247,54 +382,55 @@ async def _liquidation(self, msg: dict, timestamp: float): "price": "43511.70" } } - ''' + """ liq = Liquidation( self.id, - self.exchange_symbol_to_std_symbol(msg['data']['symbol']), - BUY if msg['data']['side'] == 'Buy' else SELL, - Decimal(msg['data']['size']), - Decimal(msg['data']['price']), + self.exchange_symbol_to_std_symbol(msg["data"]["symbol"]), + BUY if msg["data"]["side"] == "Buy" else SELL, + Decimal(msg["data"]["size"]), + Decimal(msg["data"]["price"]), None, None, - msg['ts'], - raw=msg + msg["ts"], + raw=msg, ) await self.callback(LIQUIDATIONS, liq, timestamp) async def message_handler(self, msg: str, conn, timestamp: float): - msg = json.loads(msg, parse_float=Decimal) # Bybit spot and USDT perps share the same symbol name, so to help to distinguish spot pairs from USDT perps, # pick the market from the WebSocket address URL and pass it to the functions. # 'linear' - futures, perpetual, 'spot' - spot - addr = getattr(conn, 'address', '') - market = addr.split('/')[-1] if isinstance(addr, str) else '' + addr = getattr(conn, "address", "") + market = addr.split("/")[-1] if isinstance(addr, str) else "" if "success" in msg: - if msg['success']: - if 'request' in msg: - if msg['request']['op'] == 'auth': + if msg["success"]: + if "request" in msg: + if msg["request"]["op"] == "auth": LOG.debug("%s: Authenticated successful", conn.uuid) - elif msg['op'] == 'subscribe': + elif msg["op"] == "subscribe": # {"success": true, "ret_msg": "","op": "subscribe","conn_id": "cejreassvfrsfvb9v1a0-2m"} LOG.debug("%s: Subscribed to channel.", conn.uuid) else: LOG.warning("%s: Unhandled 'successs' message received", conn.uuid) else: LOG.error("%s: Error from exchange %s", conn.uuid, msg) - elif msg["topic"].startswith('publicTrade'): + elif msg["topic"].startswith("publicTrade"): await self._trade(msg, timestamp, market) - elif msg["topic"].startswith('orderbook'): + elif msg["topic"].startswith("orderbook"): await self._book(msg, timestamp, market) - elif msg['topic'].startswith('kline'): + elif msg["topic"].startswith("kline"): await self._candle(msg, timestamp, market) - elif msg['topic'].startswith('liquidation'): + elif msg["topic"].startswith("liquidation"): await self._liquidation(msg, timestamp) - elif msg['topic'].startswith('tickers'): + elif msg["topic"].startswith("tickers"): await self._ticker_open_interest_funding_index(msg, timestamp, conn) - elif msg['topic'].startswith('orderBookL2') or msg['topic'].startswith('orderBookL2_'): + elif msg["topic"].startswith("orderBookL2") or msg["topic"].startswith( + "orderBookL2_" + ): await self._book_legacy_l2(msg, timestamp) - elif msg['topic'].startswith('trade.'): + elif msg["topic"].startswith("trade."): await self._trade_legacy(msg, timestamp) elif "order" in msg["topic"]: await self._order(msg, timestamp) @@ -325,10 +461,12 @@ async def subscribe(self, connection: AsyncConnection): for pair in connection.subscription[chan]: sym = str_to_symbol(self.exchange_symbol_to_std_symbol(pair)) if sym.type == SPOT: - pair = pair.replace('/', '') + pair = pair.replace("/", "") if self.exchange_channel_to_std(chan) == CANDLES: - sub = [f"{self.websocket_channels[CANDLES]}.{self.candle_interval_map[self.candle_interval]}.{pair}"] + sub = [ + f"{self.websocket_channels[CANDLES]}.{self.candle_interval_map[self.candle_interval]}.{pair}" + ] elif self.exchange_channel_to_std(chan) == L2_BOOK: l2_book_channel = { SPOT: "orderbook.200", @@ -339,15 +477,19 @@ async def subscribe(self, connection: AsyncConnection): else: sub = [f"{chan}.{pair}"] - if self.exchange_channel_to_std(chan) not in [self.websocket_channels[TICKER], OPEN_INTEREST, FUNDING, INDEX]: - await connection.write(json.dumps({"op": "subscribe", "args": sub})) + if self.exchange_channel_to_std(chan) not in [ + self.websocket_channels[TICKER], + OPEN_INTEREST, + FUNDING, + INDEX, + ]: + await connection.write( + json.dumps({"op": "subscribe", "args": sub}) + ) else: - await connection.write(json.dumps( - { - "op": "subscribe", - "args": [f"{chan}"] - } - )) + await connection.write( + json.dumps({"op": "subscribe", "args": [f"{chan}"]}) + ) async def _trade(self, msg: dict, timestamp: float, market: str): """ @@ -366,58 +508,63 @@ async def _trade(self, msg: dict, timestamp: float, market: str): "i": "20f43950-d8dd-5b31-9112-a178eb6023af", "BT": false}]} """ - data = msg['data'] + data = msg["data"] if isinstance(data, list): for trade in data: - symbol = trade['s'] + symbol = trade["s"] - if market == 'spot': - symbol = self.convert_to_spot_name(self, trade['s']) + if market == "spot": + symbol = self.convert_to_spot_name(self, trade["s"]) if not symbol: - return + continue - ts = int(trade['T']) if isinstance(trade['T'], str) else trade['T'] + ts = int(trade["T"]) if isinstance(trade["T"], str) else trade["T"] t = Trade( self.id, self.exchange_symbol_to_std_symbol(symbol), - BUY if trade['S'] == 'Buy' else SELL, - Decimal(trade['v']), - Decimal(trade['p']), + BUY if trade["S"] == "Buy" else SELL, + Decimal(trade["v"]), + Decimal(trade["p"]), self.timestamp_normalize(ts), - id=trade['i'], - raw=trade + id=trade["i"], + raw=trade, ) - await self.callback(TRADES, t, timestamp) + await self.callback(TRADES, t, timestamp) async def _trade_legacy(self, msg: dict, timestamp: float): """Handle legacy trade.* topic payloads from older recordings.""" - symbol = msg['topic'].split('.')[-1] - data = msg.get('data') + symbol = msg["topic"].split(".")[-1] + data = msg.get("data") records = data if isinstance(data, list) else [data] for rec in records: if not isinstance(rec, dict): continue - side = rec.get('side') or rec.get('S') - qty = rec.get('size') or rec.get('qty') or rec.get('v') - price = rec.get('price') or rec.get('p') - ts = rec.get('trade_time_ms') or rec.get('T') or rec.get('timestamp') or msg.get('ts') + side = rec.get("side") or rec.get("S") + qty = rec.get("size") or rec.get("qty") or rec.get("v") + price = rec.get("price") or rec.get("p") + ts = ( + rec.get("trade_time_ms") + or rec.get("T") + or rec.get("timestamp") + or msg.get("ts") + ) if qty is None or price is None: continue t = Trade( self.id, self.exchange_symbol_to_std_symbol(symbol), - BUY if (str(side).lower() in ('buy','b')) else SELL, + BUY if (str(side).lower() in ("buy", "b")) else SELL, Decimal(str(qty)), Decimal(str(price)), self.timestamp_normalize(ts) if ts is not None else None, - id=str(rec.get('trade_id') or rec.get('i') or ''), + id=str(rec.get("trade_id") or rec.get("i") or ""), raw=rec, ) await self.callback(TRADES, t, timestamp) async def _book(self, msg: dict, timestamp: float, market: str): - ''' + """ { "topic": "orderbook.50.BTCUSDT", "type": "snapshot", @@ -451,28 +598,27 @@ async def _book(self, msg: dict, timestamp: float, market: str): } "cts": 1672304484976 } - ''' - pair = msg['topic'].split('.')[-1] - update_type = msg['type'] - data = msg['data'] + """ + pair = msg["topic"].split(".")[-1] + update_type = msg["type"] + data = msg["data"] delta = {BID: [], ASK: []} - if market == 'spot': - pair = self.convert_to_spot_name(self, data['s']) + if market == "spot": + pair = self.convert_to_spot_name(self, data["s"]) if not pair: return pair = self.exchange_symbol_to_std_symbol(pair) - if update_type == 'snapshot': + if update_type == "snapshot": delta = None self._l2_book[pair] = OrderBook(self.id, pair, max_depth=self.max_depth) for key, update in data.items(): - side = BID if key == 'b' else ASK - if key == 'a' or key == 'b': + side = BID if key == "b" else ASK + if key == "a" or key == "b": for price, size in update: - price = Decimal(price) size = Decimal(size) @@ -482,13 +628,22 @@ async def _book(self, msg: dict, timestamp: float, market: str): else: self._l2_book[pair].book[side][price] = size - if update_type == 'delta': - delta = {BID: data['b'], ASK: data['a']} + if update_type == "delta": + delta = {BID: data["b"], ASK: data["a"]} - await self.book_callback(L2_BOOK, self._l2_book[pair], timestamp, timestamp=self.timestamp_normalize(int(msg['ts'])), raw=msg, delta=delta) + await self.book_callback( + L2_BOOK, + self._l2_book[pair], + timestamp, + timestamp=self.timestamp_normalize(int(msg["ts"])), + raw=msg, + delta=delta, + ) - async def _ticker_open_interest_funding_index(self, msg: dict, timestamp: float, conn: AsyncConnection): - ''' + async def _ticker_open_interest_funding_index( + self, msg: dict, timestamp: float, conn: AsyncConnection + ): + """ { "topic": "tickers.BTCUSDT", "type": "snapshot", @@ -517,63 +672,66 @@ async def _ticker_open_interest_funding_index(self, msg: dict, timestamp: float, "cs": 24987956059, "ts": 1673272861686 } - ''' + """ # Bybit does not provide bid/ask information for the spot market, only for perps at the moment - update_type = msg['type'] - update = msg['data'] - _pair = msg['data']['symbol'] + update_type = msg["type"] + update = msg["data"] + _pair = msg["data"]["symbol"] symbol = self.exchange_symbol_to_std_symbol(_pair) - if update_type == 'snapshot': + if update_type == "snapshot": self.tickers[symbol] = update - if update_type == 'delta': + if update_type == "delta": self.tickers[symbol].update(update) update = self.tickers[symbol] - if 'tickers' in conn.subscription and _pair in conn.subscription['tickers']: + if "tickers" in conn.subscription and _pair in conn.subscription["tickers"]: t = Ticker( self.id, symbol, - Decimal(update['bid1Price']) if 'bid1Price' in update else Decimal(0), - Decimal(update['ask1Price']) if 'ask1Price' in update else Decimal(0), - int(msg['ts']), - raw=update + Decimal(update["bid1Price"]) if "bid1Price" in update else Decimal(0), + Decimal(update["ask1Price"]) if "ask1Price" in update else Decimal(0), + int(msg["ts"]), + raw=update, ) await self.callback(TICKER, t, timestamp) - if 'funding' in conn.subscription and _pair in conn.subscription['funding']: + if "funding" in conn.subscription and _pair in conn.subscription["funding"]: f = Funding( self.id, symbol, - Decimal(update['markPrice']), - Decimal(update['fundingRate']), - int(update['nextFundingTime']), - int(msg['ts']), + Decimal(update["markPrice"]), + Decimal(update["fundingRate"]), + int(update["nextFundingTime"]), + int(msg["ts"]), None, - raw=update + raw=update, ) await self.callback(FUNDING, f, timestamp) - if 'open_interest' in conn.subscription and _pair in conn.subscription['open_interest']: + if ( + "open_interest" in conn.subscription + and _pair in conn.subscription["open_interest"] + ): o = OpenInterest( self.id, symbol, - Decimal(update['openInterest']), - int(msg['ts']), - raw=update + Decimal(update["openInterest"]), + int(msg["ts"]), + raw=update, ) await self.callback(OPEN_INTEREST, o, timestamp) - if 'index' in conn.subscription and _pair in conn.subscription['index']: + if "index" in conn.subscription and _pair in conn.subscription["index"]: i = Index( self.id, symbol, - Decimal(update['indexPrice']), - int(msg['ts']), - raw=update + Decimal(update["indexPrice"]), + int(msg["ts"]), + raw=update, ) await self.callback(INDEX, i, timestamp) @@ -613,35 +771,37 @@ async def _order(self, msg: dict, timestamp: float): } """ order_status = { - 'Created': SUBMITTING, - 'Rejected': FAILED, - 'New': OPEN, - 'PartiallyFilled': PARTIAL, - 'Filled': FILLED, - 'Cancelled': CANCELLED, - 'PendingCancel': CANCELLING + "Created": SUBMITTING, + "Rejected": FAILED, + "New": OPEN, + "PartiallyFilled": PARTIAL, + "Filled": FILLED, + "Cancelled": CANCELLED, + "PendingCancel": CANCELLING, } - for i in range(len(msg['data'])): - data = msg['data'][i] + for i in range(len(msg["data"])): + data = msg["data"][i] oi = OrderInfo( self.id, - self.exchange_symbol_to_std_symbol(data['symbol']), + self.exchange_symbol_to_std_symbol(data["symbol"]), data["order_id"], - BUY if data["side"] == 'Buy' else SELL, + BUY if data["side"] == "Buy" else SELL, order_status[data["order_status"]], - LIMIT if data['order_type'] == 'Limit' else MARKET, - Decimal(data['price']), - Decimal(data['qty']), - Decimal(data['qty']) - Decimal(data['cum_exec_qty']), - self.timestamp_normalize(data.get('update_time') or data.get('O') or data.get('timestamp')), + LIMIT if data["order_type"] == "Limit" else MARKET, + Decimal(data["price"]), + Decimal(data["qty"]), + Decimal(data["qty"]) - Decimal(data["cum_exec_qty"]), + self.timestamp_normalize( + data.get("update_time") or data.get("O") or data.get("timestamp") + ), raw=data, ) await self.callback(ORDER_INFO, oi, timestamp) async def _execution(self, msg: dict, timestamp: float): - ''' + """ { "topic": "execution", "data": [ @@ -662,22 +822,22 @@ async def _execution(self, msg: dict, timestamp: float): } ] } - ''' - for entry in msg['data']: - symbol = self.exchange_symbol_to_std_symbol(entry['symbol']) + """ + for entry in msg["data"]: + symbol = self.exchange_symbol_to_std_symbol(entry["symbol"]) f = Fill( self.id, symbol, - BUY if entry['side'] == 'Buy' else SELL, - Decimal(entry['exec_qty']), - Decimal(entry['price']), - Decimal(entry['exec_fee']), - entry['exec_id'], - entry['order_id'], + BUY if entry["side"] == "Buy" else SELL, + Decimal(entry["exec_qty"]), + Decimal(entry["price"]), + Decimal(entry["exec_fee"]), + entry["exec_id"], + entry["order_id"], None, - MAKER if entry['is_maker'] else TAKER, - entry['trade_time'].timestamp(), - raw=entry + MAKER if entry["is_maker"] else TAKER, + entry["trade_time"].timestamp(), + raw=entry, ) await self.callback(FILLS, f, timestamp) @@ -688,14 +848,25 @@ async def _execution(self, msg: dict, timestamp: float): # await self.callback(BALANCES, feed=self.id, symbol=symbol, data=data, receipt_timestamp=timestamp) async def authenticate(self, conn: AsyncConnection): - if any(self.is_authenticated_channel(self.exchange_channel_to_std(chan)) for chan in conn.subscription): + if any( + self.is_authenticated_channel(self.exchange_channel_to_std(chan)) + for chan in conn.subscription + ): auth = self._auth(self.key_id, self.key_secret) - LOG.debug(f"{conn.uuid}: Sending authentication request with message {auth}") + LOG.debug( + f"{conn.uuid}: Sending authentication request with message {auth}" + ) await conn.write(auth) def _auth(self, key_id: str, key_secret: str) -> str: # https://bybit-exchange.github.io/docs/inverse/#t-websocketauthentication expires = int((time.time() + 60)) * 1000 - signature = str(hmac.new(bytes(key_secret, 'utf-8'), bytes(f'GET/realtime{expires}', 'utf-8'), digestmod='sha256').hexdigest()) - return json.dumps({'op': 'auth', 'args': [key_id, expires, signature]}) + signature = str( + hmac.new( + bytes(key_secret, "utf-8"), + bytes(f"GET/realtime{expires}", "utf-8"), + digestmod="sha256", + ).hexdigest() + ) + return json.dumps({"op": "auth", "args": [key_id, expires, signature]}) diff --git a/cryptofeed/exchanges/mixins/binance_rest.py b/cryptofeed/exchanges/mixins/binance_rest.py index 27ece2889..870860911 100644 --- a/cryptofeed/exchanges/mixins/binance_rest.py +++ b/cryptofeed/exchanges/mixins/binance_rest.py @@ -1,9 +1,10 @@ -''' +""" Copyright (C) 2017-2025 Bryant Moscon - bmoscon@gmail.com Please see the LICENSE file for the terms and conditions associated with this software. -''' +""" + import asyncio from decimal import Decimal import hashlib @@ -14,50 +15,79 @@ from cryptofeed.json_utils import json -from cryptofeed.defines import BALANCES, BUY, CANCEL_ORDER, CANDLES, DELETE, FILL_OR_KILL, GET, GOOD_TIL_CANCELED, IMMEDIATE_OR_CANCEL, LIMIT, MARKET, ORDERS, ORDER_STATUS, PLACE_ORDER, POSITIONS, POST, SELL, TRADES +from cryptofeed.defines import ( + BALANCES, + BUY, + CANCEL_ORDER, + CANDLES, + DELETE, + FILL_OR_KILL, + GET, + GOOD_TIL_CANCELED, + IMMEDIATE_OR_CANCEL, + LIMIT, + MARKET, + ORDERS, + ORDER_STATUS, + PLACE_ORDER, + POSITIONS, + POST, + SELL, + TRADES, +) from cryptofeed.exchange import RestExchange from cryptofeed.types import Candle -LOG = logging.getLogger('feedhandler') +LOG = logging.getLogger("feedhandler") class BinanceRestMixin(RestExchange): api = "https://api.binance.com/api/v3/" rest_channels = ( - TRADES, ORDER_STATUS, CANCEL_ORDER, PLACE_ORDER, BALANCES, ORDERS, CANDLES + TRADES, + ORDER_STATUS, + CANCEL_ORDER, + PLACE_ORDER, + BALANCES, + ORDERS, + CANDLES, ) order_options = { - LIMIT: 'LIMIT', - MARKET: 'MARKET', - FILL_OR_KILL: 'FOK', - IMMEDIATE_OR_CANCEL: 'IOC', - GOOD_TIL_CANCELED: 'GTC', + LIMIT: "LIMIT", + MARKET: "MARKET", + FILL_OR_KILL: "FOK", + IMMEDIATE_OR_CANCEL: "IOC", + GOOD_TIL_CANCELED: "GTC", } def _nonce(self): return str(int(round(time.time() * 1000))) def _generate_signature(self, query_string: str): - h = hmac.new(self.key_secret.encode('utf8'), query_string.encode('utf8'), hashlib.sha256) + h = hmac.new( + self.key_secret.encode("utf8"), query_string.encode("utf8"), hashlib.sha256 + ) return h.hexdigest() - async def _request(self, method: str, endpoint: str, auth: bool = False, payload={}, api=None): + async def _request( + self, method: str, endpoint: str, auth: bool = False, payload={}, api=None + ): query_string = urlencode(payload) if auth: if query_string: - query_string = '{}×tamp={}'.format(query_string, self._nonce()) + query_string = "{}×tamp={}".format(query_string, self._nonce()) else: - query_string = 'timestamp={}'.format(self._nonce()) + query_string = "timestamp={}".format(self._nonce()) if not api: api = self.api - url = f'{api}{endpoint}?{query_string}' + url = f"{api}{endpoint}?{query_string}" header = {} if auth: signature = self._generate_signature(query_string) - url += f'&signature={signature}' + url += f"&signature={signature}" header = { "X-MBX-APIKEY": self.key_id, } @@ -67,9 +97,13 @@ async def _request(self, method: str, endpoint: str, auth: bool = False, payload data = await self.http_conn.write(url, msg=None, header=header) elif method == DELETE: data = await self.http_conn.delete(url, header=header) + else: + raise ValueError(f"Unsupported HTTP method: {method}") return json.loads(data, parse_float=Decimal) - async def trades(self, symbol: str, start=None, end=None, retry_count=1, retry_delay=60): + async def trades( + self, symbol: str, start=None, end=None, retry_count=1, retry_delay=60 + ): symbol = self.std_symbol_to_exchange_symbol(symbol) start, end = self._interval_normalize(start, end) if start and end: @@ -82,15 +116,21 @@ async def trades(self, symbol: str, start=None, end=None, retry_count=1, retry_d else: endpoint = f"{self.api}aggTrades?symbol={symbol}&limit=1000" - r = await self.http_conn.read(endpoint, retry_count=retry_count, retry_delay=retry_delay) + r = await self.http_conn.read( + endpoint, retry_count=retry_count, retry_delay=retry_delay + ) data = json.loads(r, parse_float=Decimal) if data: - if data[-1]['T'] == start: - LOG.warning("%s: number of trades exceeds exchange time window, some data will not be retrieved for time %d", self.id, start) + if data[-1]["T"] == start: + LOG.warning( + "%s: number of trades exceeds exchange time window, some data will not be retrieved for time %d", + self.id, + start, + ) start += 1 else: - start = data[-1]['T'] + start = data[-1]["T"] yield [self._trade_normalization(symbol, d) for d in data] @@ -100,19 +140,27 @@ async def trades(self, symbol: str, start=None, end=None, retry_count=1, retry_d def _trade_normalization(self, symbol: str, trade: list) -> dict: ret = { - 'timestamp': self.timestamp_normalize(trade['T']), - 'symbol': self.exchange_symbol_to_std_symbol(symbol), - 'id': trade['a'], - 'feed': self.id, - 'side': BUY if trade['m'] else SELL, - 'amount': abs(Decimal(trade['q'])), - 'price': Decimal(trade['p']), + "timestamp": self.timestamp_normalize(trade["T"]), + "symbol": self.exchange_symbol_to_std_symbol(symbol), + "id": trade["a"], + "feed": self.id, + "side": BUY if trade["m"] else SELL, + "amount": abs(Decimal(trade["q"])), + "price": Decimal(trade["p"]), } return ret - async def candles(self, symbol: str, start=None, end=None, interval='1m', retry_count=1, retry_delay=60): + async def candles( + self, + symbol: str, + start=None, + end=None, + interval="1m", + retry_count=1, + retry_delay=60, + ): sym = self.std_symbol_to_exchange_symbol(symbol) - ep = f'{self.api}klines?symbol={sym}&interval={interval}&limit=1000' + ep = f"{self.api}klines?symbol={sym}&interval={interval}&limit=1000" start, end = self._interval_normalize(start, end) if start and end: @@ -121,13 +169,33 @@ async def candles(self, symbol: str, start=None, end=None, interval='1m', retry_ while True: if start and end: - endpoint = f'{ep}&startTime={start}&endTime={end}' + endpoint = f"{ep}&startTime={start}&endTime={end}" else: endpoint = ep - r = await self.http_conn.read(endpoint, retry_count=retry_count, retry_delay=retry_delay) + r = await self.http_conn.read( + endpoint, retry_count=retry_count, retry_delay=retry_delay + ) data = json.loads(r, parse_float=Decimal) start = data[-1][6] - data = [Candle(self.id, symbol, self.timestamp_normalize(e[0]), self.timestamp_normalize(e[6]), interval, e[8], Decimal(e[1]), Decimal(e[4]), Decimal(e[2]), Decimal(e[3]), Decimal(e[5]), True, self.timestamp_normalize(e[6]), raw=e) for e in data] + data = [ + Candle( + self.id, + symbol, + self.timestamp_normalize(e[0]), + self.timestamp_normalize(e[6]), + interval, + e[8], + Decimal(e[1]), + Decimal(e[4]), + Decimal(e[2]), + Decimal(e[3]), + Decimal(e[5]), + True, + self.timestamp_normalize(e[6]), + raw=e, + ) + for e in data + ] yield data if len(data) < 1000 or end is None: @@ -135,95 +203,161 @@ async def candles(self, symbol: str, start=None, end=None, interval='1m', retry_ await asyncio.sleep(1 / self.request_limit) # Trading APIs - async def place_order(self, symbol: str, side: str, order_type: str, amount: Decimal, price=None, time_in_force=None, test=False): + async def place_order( + self, + symbol: str, + side: str, + order_type: str, + amount: Decimal, + price=None, + time_in_force=None, + test=False, + ): if order_type == MARKET and price: - raise ValueError('Cannot specify price on a market order') + raise ValueError("Cannot specify price on a market order") if order_type == LIMIT: if not price: - raise ValueError('Must specify price on a limit order') + raise ValueError("Must specify price on a limit order") if not time_in_force: - raise ValueError('Must specify time in force on a limit order') + raise ValueError("Must specify time in force on a limit order") ot = self.normalize_order_options(order_type) sym = self.std_symbol_to_exchange_symbol(symbol) parameters = { - 'symbol': sym, - 'side': 'BUY' if side is BUY else 'SELL', - 'type': ot, - 'quantity': str(amount), + "symbol": sym, + "side": "BUY" if side is BUY else "SELL", + "type": ot, + "quantity": str(amount), } if price: - parameters['price'] = str(price) + parameters["price"] = str(price) if time_in_force: - parameters['timeInForce'] = self.normalize_order_options(time_in_force) + parameters["timeInForce"] = self.normalize_order_options(time_in_force) - data = await self._request(POST, 'test' if test else 'order', auth=True, payload=parameters) + data = await self._request( + POST, "test" if test else "order", auth=True, payload=parameters + ) return data async def cancel_order(self, order_id: str, symbol: str): sym = self.std_symbol_to_exchange_symbol(symbol) - data = await self._request(DELETE, 'order', auth=True, payload={'symbol': sym, 'orderId': order_id}) + data = await self._request( + DELETE, "order", auth=True, payload={"symbol": sym, "orderId": order_id} + ) return data async def balances(self): - data = await self._request(GET, 'account', auth=True) - return data['balances'] + data = await self._request(GET, "account", auth=True) + return data["balances"] async def orders(self, symbol: str = None): - data = await self._request(GET, 'openOrders', auth=True, payload={'symbol': self.std_symbol_to_exchange_symbol(symbol)} if symbol else {}) + data = await self._request( + GET, + "openOrders", + auth=True, + payload={"symbol": self.std_symbol_to_exchange_symbol(symbol)} + if symbol + else {}, + ) return data async def order_status(self, order_id: str): - data = await self._request(GET, 'order', auth=True, payload={'orderId': order_id}) + data = await self._request( + GET, "order", auth=True, payload={"orderId": order_id} + ) return data class BinanceFuturesRestMixin(BinanceRestMixin): - api = 'https://fapi.binance.com/fapi/v1/' + api = "https://fapi.binance.com/fapi/v1/" rest_channels = ( - TRADES, ORDER_STATUS, CANCEL_ORDER, PLACE_ORDER, BALANCES, ORDERS, POSITIONS + TRADES, + ORDER_STATUS, + CANCEL_ORDER, + PLACE_ORDER, + BALANCES, + ORDERS, + POSITIONS, ) - async def place_order(self, symbol: str, side: str, order_type: str, amount: Decimal, price=None, time_in_force=None): - data = await super().place_order(symbol, side, order_type, amount, price=price, time_in_force=time_in_force, test=False) + async def place_order( + self, + symbol: str, + side: str, + order_type: str, + amount: Decimal, + price=None, + time_in_force=None, + ): + data = await super().place_order( + symbol, + side, + order_type, + amount, + price=price, + time_in_force=time_in_force, + test=False, + ) return data async def balances(self): - data = await self._request(GET, 'account', auth=True, api='https://fapi.binance.com/fapi/v2/') - return data['assets'] + data = await self._request( + GET, "account", auth=True, api="https://fapi.binance.com/fapi/v2/" + ) + return data["assets"] async def positions(self): - data = await self._request(GET, 'account', auth=True, api='https://fapi.binance.com/fapi/v2/') - return data['positions'] + data = await self._request( + GET, "account", auth=True, api="https://fapi.binance.com/fapi/v2/" + ) + return data["positions"] class BinanceDeliveryRestMixin(BinanceRestMixin): - api = 'https://dapi.binance.com/dapi/v1/' + api = "https://dapi.binance.com/dapi/v1/" rest_channels = ( - TRADES, ORDER_STATUS, CANCEL_ORDER, PLACE_ORDER, BALANCES, ORDERS, POSITIONS + TRADES, + ORDER_STATUS, + CANCEL_ORDER, + PLACE_ORDER, + BALANCES, + ORDERS, + POSITIONS, ) - async def place_order(self, symbol: str, side: str, order_type: str, amount: Decimal, price=None, time_in_force=None): - data = await super().place_order(symbol, side, order_type, amount, price=price, time_in_force=time_in_force, test=False) + async def place_order( + self, + symbol: str, + side: str, + order_type: str, + amount: Decimal, + price=None, + time_in_force=None, + ): + data = await super().place_order( + symbol, + side, + order_type, + amount, + price=price, + time_in_force=time_in_force, + test=False, + ) return data async def balances(self): - data = await self._request(GET, 'account', auth=True) - return data['assets'] + data = await self._request(GET, "account", auth=True) + return data["assets"] async def positions(self): - data = await self._request(GET, 'account', auth=True) - return data['positions'] + data = await self._request(GET, "account", auth=True) + return data["positions"] class BinanceUSRestMixin(BinanceRestMixin): - api = 'https://api.binance.us/api/v3/' - rest_channels = ( - TRADES - ) + api = "https://api.binance.us/api/v3/" + rest_channels = TRADES class BinanceTRRestMixin(BinanceRestMixin): - api = 'https://api.binance.me/api/v3/' - rest_channels = ( - TRADES - ) + api = "https://api.binance.me/api/v3/" + rest_channels = TRADES diff --git a/cryptofeed/exchanges/mixins/coinbase_rest.py b/cryptofeed/exchanges/mixins/coinbase_rest.py index 99e12d515..dcd5a0bbc 100644 --- a/cryptofeed/exchanges/mixins/coinbase_rest.py +++ b/cryptofeed/exchanges/mixins/coinbase_rest.py @@ -1,9 +1,10 @@ -''' +""" Copyright (C) 2017-2025 Bryant Moscon - bmoscon@gmail.com Please see the LICENSE file for the terms and conditions associated with this software. -''' +""" + import asyncio import base64 from cryptofeed.util.time import timedelta_str_to_sec @@ -17,93 +18,157 @@ from cryptofeed.json_utils import json -from cryptofeed.defines import BUY, CANCELLED, FILLED, FILL_OR_KILL, IMMEDIATE_OR_CANCEL, MAKER_OR_CANCEL, MARKET, OPEN, PARTIAL, PENDING, SELL, TRADES, TICKER, L2_BOOK, L3_BOOK, ORDER_INFO, ORDER_STATUS, CANDLES, CANCEL_ORDER, PLACE_ORDER, BALANCES, TRADE_HISTORY, LIMIT +from cryptofeed.defines import ( + BUY, + CANCELLED, + FILLED, + FILL_OR_KILL, + IMMEDIATE_OR_CANCEL, + MAKER_OR_CANCEL, + MARKET, + OPEN, + PARTIAL, + PENDING, + SELL, + TRADES, + TICKER, + L2_BOOK, + L3_BOOK, + ORDER_INFO, + ORDER_STATUS, + CANDLES, + CANCEL_ORDER, + PLACE_ORDER, + BALANCES, + TRADE_HISTORY, + LIMIT, +) from cryptofeed.exceptions import UnexpectedMessage from cryptofeed.exchange import RestExchange from cryptofeed.types import OrderBook, Candle, Trade, Ticker, OrderInfo, Balance -LOG = logging.getLogger('feedhandler') +LOG = logging.getLogger("feedhandler") class CoinbaseRestMixin(RestExchange): api = "https://api.pro.coinbase.com" sandbox_api = "https://api-public.sandbox.pro.coinbase.com" rest_channels = ( - TRADES, TICKER, L2_BOOK, L3_BOOK, ORDER_INFO, ORDER_STATUS, CANDLES, CANCEL_ORDER, PLACE_ORDER, BALANCES, TRADE_HISTORY + TRADES, + TICKER, + L2_BOOK, + L3_BOOK, + ORDER_INFO, + ORDER_STATUS, + CANDLES, + CANCEL_ORDER, + PLACE_ORDER, + BALANCES, + TRADE_HISTORY, ) order_options = { - LIMIT: 'limit', - MARKET: 'market', - FILL_OR_KILL: {'time_in_force': 'FOK'}, - IMMEDIATE_OR_CANCEL: {'time_in_force': 'IOC'}, - MAKER_OR_CANCEL: {'post_only': 1}, + LIMIT: "limit", + MARKET: "market", + FILL_OR_KILL: {"time_in_force": "FOK"}, + IMMEDIATE_OR_CANCEL: {"time_in_force": "IOC"}, + MAKER_OR_CANCEL: {"post_only": 1}, } def _order_status(self, data: dict): - if 'status' not in data: + if "status" not in data: raise UnexpectedMessage(f"Message from exchange: {data}") - status = data['status'] - if data['status'] == 'done' and data['done_reason'] == 'canceled': + status = data["status"] + if data["status"] == "done" and data["done_reason"] == "canceled": status = PARTIAL - elif data['status'] == 'done': + elif data["status"] == "done": status = FILLED - elif data['status'] == 'open': + elif data["status"] == "open": status = OPEN - elif data['status'] == 'pending': + elif data["status"] == "pending": status = PENDING - elif data['status'] == CANCELLED: + elif data["status"] == CANCELLED: status = CANCELLED - if 'price' not in data: - price = Decimal(data['executed_value']) / Decimal(data['filled_size']) + if "price" not in data: + price = Decimal(data["executed_value"]) / Decimal(data["filled_size"]) else: - price = Decimal(data['price']) + price = Decimal(data["price"]) # exchange, symbol, id, side, status, type, price, amount, remaining, timestamp, account=None, raw=None): return OrderInfo( self.id, - data['product_id'], - data['id'], - BUY if data['side'] == 'buy' else SELL, + data["product_id"], + data["id"], + BUY if data["side"] == "buy" else SELL, status, - LIMIT if data['type'] == 'limit' else MARKET, + LIMIT if data["type"] == "limit" else MARKET, price, - Decimal(data['size']), - Decimal(data['size']) - Decimal(data['filled_size']), - data['done_at'].timestamp() if 'done_at' in data else data['created_at'].timestamp(), - client_order_id=data['client_oid'], - raw=data + Decimal(data["size"]), + Decimal(data["size"]) - Decimal(data["filled_size"]), + data["done_at"].timestamp() + if "done_at" in data + else data["created_at"].timestamp(), + client_order_id=data["client_oid"], + raw=data, ) - def _generate_signature(self, endpoint: str, method: str, body=''): + def _generate_signature(self, endpoint: str, method: str, body=""): timestamp = str(time.time()) - message = ''.join([timestamp, method, endpoint, body]) + message = "".join([timestamp, method, endpoint, body]) hmac_key = base64.b64decode(self.key_secret) - signature = hmac.new(hmac_key, message.encode('ascii'), hashlib.sha256) - signature_b64 = base64.b64encode(signature.digest()).decode('utf-8') + signature = hmac.new(hmac_key, message.encode("ascii"), hashlib.sha256) + signature_b64 = base64.b64encode(signature.digest()).decode("utf-8") return { - 'CB-ACCESS-KEY': self.key_id, # The api key as a string. - 'CB-ACCESS-SIGN': signature_b64, # The base64-encoded signature (see Signing a Message). - 'CB-ACCESS-TIMESTAMP': timestamp, # A timestamp for your request. - 'CB-ACCESS-PASSPHRASE': self.key_passphrase, # The passphrase you specified when creating the API key + "CB-ACCESS-KEY": self.key_id, # The api key as a string. + "CB-ACCESS-SIGN": signature_b64, # The base64-encoded signature (see Signing a Message). + "CB-ACCESS-TIMESTAMP": timestamp, # A timestamp for your request. + "CB-ACCESS-PASSPHRASE": self.key_passphrase, # The passphrase you specified when creating the API key "Accept": "application/json", - "Content-Type": "application/json" + "Content-Type": "application/json", } - async def _request(self, method: str, endpoint: str, auth: bool = False, body=None, retry_count=1, retry_delay=60): + async def _request( + self, + method: str, + endpoint: str, + auth: bool = False, + body=None, + retry_count=1, + retry_delay=60, + ): api = self.sandbox_api if self.sandbox else self.api header = None if auth: - header = self._generate_signature(endpoint, method, body=json.dumps(body) if body else '') + header = self._generate_signature( + endpoint, method, body=json.dumps(body) if body else "" + ) if method == "GET": - data = await self.http_conn.read(f'{api}{endpoint}', header=header, retry_count=retry_count, retry_delay=retry_delay) - elif method == 'POST': - data = await self.http_conn.write(f'{api}{endpoint}', msg=json.dumps(body), header=header, retry_count=retry_count, retry_delay=retry_delay) - elif method == 'DELETE': - data = await self.http_conn.delete(f'{api}{endpoint}', header=header, retry_count=retry_count, retry_delay=retry_delay) + data = await self.http_conn.read( + f"{api}{endpoint}", + header=header, + retry_count=retry_count, + retry_delay=retry_delay, + ) + elif method == "POST": + data = await self.http_conn.write( + f"{api}{endpoint}", + msg=json.dumps(body), + header=header, + retry_count=retry_count, + retry_delay=retry_delay, + ) + elif method == "DELETE": + data = await self.http_conn.delete( + f"{api}{endpoint}", + header=header, + retry_count=retry_count, + retry_delay=retry_delay, + ) + else: + raise ValueError(f"Unsupported HTTP method: {method}") return json.loads(data, parse_float=Decimal) async def _date_to_trade(self, symbol: str, timestamp: float) -> int: @@ -111,22 +176,24 @@ async def _date_to_trade(self, symbol: str, timestamp: float) -> int: Coinbase uses trade ids to query historical trades, so need to search for the start date """ - upper = await self._request('GET', f'/products/{symbol}/trades') - upper = upper[0]['trade_id'] + upper = await self._request("GET", f"/products/{symbol}/trades") + upper = upper[0]["trade_id"] lower = 0 bound = (upper - lower) // 2 while True: - data = await self._request('GET', f'/products/{symbol}/trades?after={bound}') + data = await self._request( + "GET", f"/products/{symbol}/trades?after={bound}" + ) data = list(reversed(data)) if len(data) == 0: return bound - if data[0]['time'].timestamp() <= timestamp <= data[-1]['time'].timestamp(): + if data[0]["time"].timestamp() <= timestamp <= data[-1]["time"].timestamp(): for idx in range(len(data)): - d = data[idx]['time'].timestamp() + d = data[idx]["time"].timestamp() if d >= timestamp: - return data[idx]['trade_id'] + return data[idx]["trade_id"] else: - if timestamp > data[0]['time'].timestamp(): + if timestamp > data[0]["time"].timestamp(): lower = bound bound = (upper + lower) // 2 else: @@ -138,14 +205,17 @@ def _trade_normalize(self, symbol: str, data: dict) -> dict: return Trade( self.id, symbol, - SELL if data['side'] == 'buy' else BUY, - Decimal(data['size']), - Decimal(data['price']), - data['time'].timestamp(), - id=str(data['trade_id']), - raw=data) - - async def trades(self, symbol: str, start=None, end=None, retry_count=1, retry_delay=60): + SELL if data["side"] == "buy" else BUY, + Decimal(data["size"]), + Decimal(data["price"]), + data["time"].timestamp(), + id=str(data["trade_id"]), + raw=data, + ) + + async def trades( + self, symbol: str, start=None, end=None, retry_count=1, retry_delay=60 + ): start, end = self._interval_normalize(start, end) if start: start_id = await self._date_to_trade(symbol, start) @@ -159,7 +229,12 @@ async def trades(self, symbol: str, start=None, end=None, retry_count=1, retry_d limit = 100 - (start_id - end_id) start_id = end_id if limit > 0: - data = await self._request('GET', f'/products/{symbol}/trades?after={start_id}&limit={limit}', retry_count=retry_count, retry_delay=retry_delay) + data = await self._request( + "GET", + f"/products/{symbol}/trades?after={start_id}&limit={limit}", + retry_count=retry_count, + retry_delay=retry_delay, + ) data = list(reversed(data)) yield list(map(lambda x: self._trade_normalize(symbol, x), data)) @@ -167,32 +242,52 @@ async def trades(self, symbol: str, start=None, end=None, retry_count=1, retry_d break await asyncio.sleep(1 / self.request_limit) else: - data = await self._request('GET', f"/products/{symbol}/trades", retry_count=retry_count, retry_delay=retry_delay) + data = await self._request( + "GET", + f"/products/{symbol}/trades", + retry_count=retry_count, + retry_delay=retry_delay, + ) yield [self._trade_normalize(symbol, d) for d in data] async def ticker(self, symbol: str, retry_count=1, retry_delay=60): - data = await self._request('GET', f'/products/{symbol}/ticker', retry_count=retry_count, retry_delay=retry_delay) + data = await self._request( + "GET", + f"/products/{symbol}/ticker", + retry_count=retry_count, + retry_delay=retry_delay, + ) return Ticker( self.id, symbol, - Decimal(data['bid']), - Decimal(data['ask']), - data['time'].timestamp(), - raw=data + Decimal(data["bid"]), + Decimal(data["ask"]), + data["time"].timestamp(), + raw=data, ) async def l2_book(self, symbol: str, retry_count=1, retry_delay=60): - data = await self._request('GET', f'/products/{symbol}/book?level=2', retry_count=retry_count, retry_delay=retry_delay) + data = await self._request( + "GET", + f"/products/{symbol}/book?level=2", + retry_count=retry_count, + retry_delay=retry_delay, + ) ret = OrderBook(self.id, symbol) - ret.book.bids = {Decimal(u[0]): Decimal(u[1]) for u in data['bids']} - ret.book.asks = {Decimal(u[0]): Decimal(u[1]) for u in data['asks']} + ret.book.bids = {Decimal(u[0]): Decimal(u[1]) for u in data["bids"]} + ret.book.asks = {Decimal(u[0]): Decimal(u[1]) for u in data["asks"]} return ret async def l3_book(self, symbol: str, retry_count=1, retry_delay=60): - data = await self._request('GET', f'/products/{symbol}/book?level=3', retry_count=retry_count, retry_delay=retry_delay) + data = await self._request( + "GET", + f"/products/{symbol}/book?level=3", + retry_count=retry_count, + retry_delay=retry_delay, + ) ret = OrderBook(self.id, symbol) - for side in ('bids', 'asks'): + for side in ("bids", "asks"): for price, size, order_id in data[side]: price = Decimal(price) size = Decimal(size) @@ -203,16 +298,19 @@ async def l3_book(self, symbol: str, retry_count=1, retry_delay=60): return ret async def balances(self) -> List[Balance]: - data = await self._request('GET', "/accounts", auth=True) + data = await self._request("GET", "/accounts", auth=True) # def __init__(self, exchange, currency, balance, reserved, raw=None): - return [Balance( - self.id, - entry['currency'], - Decimal(entry['balance']), - Decimal(entry['balance']) - Decimal(entry['available']), - raw=entry - ) for entry in data] + return [ + Balance( + self.id, + entry["currency"], + Decimal(entry["balance"]), + Decimal(entry["balance"]) - Decimal(entry["available"]), + raw=entry, + ) + for entry in data + ] async def orders(self): data = await self._request("GET", "/orders", auth=True) @@ -222,27 +320,36 @@ async def order_status(self, order_id: str): order = await self._request("GET", f"/orders/{order_id}", auth=True) return self._order_status(order) - async def place_order(self, symbol: str, side: str, order_type: str, amount: Decimal, price=None, client_order_id=None, options=None): + async def place_order( + self, + symbol: str, + side: str, + order_type: str, + amount: Decimal, + price=None, + client_order_id=None, + options=None, + ): ot = self.normalize_order_options(order_type) if ot == MARKET and price: - raise ValueError('Cannot specify price on a market order') + raise ValueError("Cannot specify price on a market order") if ot == LIMIT and not price: - raise ValueError('Must specify price on a limit order') + raise ValueError("Must specify price on a limit order") body = { - 'product_id': symbol, - 'side': 'buy' if BUY else SELL, - 'size': str(amount), - 'type': ot + "product_id": symbol, + "side": "buy" if BUY else SELL, + "size": str(amount), + "type": ot, } if price: - body['price'] = str(price) + body["price"] = str(price) if client_order_id: - body['client_oid'] = client_order_id + body["client_oid"] = client_order_id if options: _ = [body.update(self.normalize_order_options(o)) for o in options] - data = await self._request('POST', '/orders', auth=True, body=body) + data = await self._request("POST", "/orders", auth=True, body=body) return self._order_status(data) async def cancel_order(self, order_id: str): @@ -256,17 +363,20 @@ async def cancel_order(self, order_id: str): return None async def trade_history(self, symbol: str, start=None, end=None): - data = await self._request("GET", f"/orders?product_id={symbol}&status=done", auth=True) + data = await self._request( + "GET", f"/orders?product_id={symbol}&status=done", auth=True + ) return [ { - 'order_id': order['id'], - 'trade_id': order['id'], - 'side': BUY if order['side'] == 'buy' else SELL, - 'price': Decimal(order['executed_value']) / Decimal(order['filled_size']), - 'amount': Decimal(order['filled_size']), - 'timestamp': order['done_at'].timestamp(), - 'fee_amount': Decimal(order['fill_fees']), - 'fee_currency': symbol.split('-')[1] + "order_id": order["id"], + "trade_id": order["id"], + "side": BUY if order["side"] == "buy" else SELL, + "price": Decimal(order["executed_value"]) + / Decimal(order["filled_size"]), + "amount": Decimal(order["filled_size"]), + "timestamp": order["done_at"].timestamp(), + "fee_amount": Decimal(order["fill_fees"]), + "fee_currency": symbol.split("-")[1], } for order in data ] @@ -286,15 +396,22 @@ def _candle_normalize(self, symbol: str, data: list, interval: str) -> dict: Decimal(data[5]), True, data[0], - raw=data + raw=data, ) def _to_isoformat(self, timestamp): - """Required as cryptostore doesnt allow +00:00 for UTC requires Z explicitly. - """ + """Required as cryptostore doesnt allow +00:00 for UTC requires Z explicitly.""" return dt.utcfromtimestamp(timestamp).isoformat() - async def candles(self, symbol: str, start: Optional[Union[str, dt, float]] = None, end: Optional[Union[str, dt, float]] = None, interval: Optional[str] = '1m', retry_count=1, retry_delay=60): + async def candles( + self, + symbol: str, + start: Optional[Union[str, dt, float]] = None, + end: Optional[Union[str, dt, float]] = None, + interval: Optional[str] = "1m", + retry_count=1, + retry_delay=60, + ): """ Historic rate OHLC candles [ @@ -313,8 +430,17 @@ async def candles(self, symbol: str, start: Optional[Union[str, dt, float]] = No string corresponding to the interval (1m, 5m, etc) """ limit = 300 # return max of 300 rows per request - valid_intervals = {'1m': 60, '5m': 300, '15m': 900, '1h': 3600, '6h': 21600, '1d': 86400} - assert interval in list(valid_intervals.keys()), f'Interval must be one of {", ".join(list(valid_intervals.keys()))}' + valid_intervals = { + "1m": 60, + "5m": 300, + "15m": 900, + "1h": 3600, + "6h": 21600, + "1d": 86400, + } + assert interval in list(valid_intervals.keys()), ( + f"Interval must be one of {', '.join(list(valid_intervals.keys()))}" + ) start, end = self._interval_normalize(start, end) if start: @@ -329,12 +455,21 @@ async def candles(self, symbol: str, start: Optional[Union[str, dt, float]] = No if start_id > end_id_max: break - url = f'/products/{symbol}/candles?granularity={valid_intervals[interval]}&start={self._to_isoformat(start_id)}&end={self._to_isoformat(end_id)}' - data = await self._request('GET', url, retry_count=retry_count, retry_delay=retry_delay) + url = f"/products/{symbol}/candles?granularity={valid_intervals[interval]}&start={self._to_isoformat(start_id)}&end={self._to_isoformat(end_id)}" + data = await self._request( + "GET", url, retry_count=retry_count, retry_delay=retry_delay + ) data = list(reversed(data)) - yield list(map(lambda x: self._candle_normalize(symbol, x, interval), data)) + yield list( + map(lambda x: self._candle_normalize(symbol, x, interval), data) + ) await asyncio.sleep(1 / self.request_limit) start_id = end_id + valid_intervals[interval] else: - data = await self._request('GET', f"/products/{symbol}/candles?granularity={valid_intervals[interval]}", retry_count=retry_count, retry_delay=retry_delay) + data = await self._request( + "GET", + f"/products/{symbol}/candles?granularity={valid_intervals[interval]}", + retry_count=retry_count, + retry_delay=retry_delay, + ) yield [self._candle_normalize(symbol, d, interval) for d in data] diff --git a/cryptofeed/kafka_callback.py b/cryptofeed/kafka_callback.py index 5d787d67c..a49a2b34f 100644 --- a/cryptofeed/kafka_callback.py +++ b/cryptofeed/kafka_callback.py @@ -3,6 +3,7 @@ from __future__ import annotations import asyncio +import functools import logging from abc import ABC, abstractmethod from dataclasses import asdict, dataclass @@ -14,6 +15,7 @@ from cryptofeed.backends.backend import BackendCallback from cryptofeed.json_utils import dumps_bytes +from cryptofeed.backends.kafka_schema import SchemaRegistry, SchemaRegistryConfig from .kafka_producer import KafkaProducer @@ -62,8 +64,7 @@ def validate_strategy(cls, v: str) -> str: """Validate topic strategy is supported.""" if v not in {"consolidated", "per_symbol"}: raise ValueError( - f"Invalid topic strategy: {v}. " - f"Must be 'consolidated' or 'per_symbol'" + f"Invalid topic strategy: {v}. Must be 'consolidated' or 'per_symbol'" ) return v @@ -214,9 +215,7 @@ def validate_compression(cls, v: str) -> str: """Validate compression_type is supported.""" valid = {"none", "gzip", "snappy", "lz4", "zstd"} if v not in valid: - raise ValueError( - f"compression_type must be one of {valid}, got {v}" - ) + raise ValueError(f"compression_type must be one of {valid}, got {v}") return v @@ -256,13 +255,34 @@ class KafkaConfig(BaseModel): bootstrap_servers: list[str] = Field(description="Kafka broker addresses") topic: KafkaTopicConfig = Field( - default_factory=KafkaTopicConfig, - description="Topic configuration" + default_factory=KafkaTopicConfig, description="Topic configuration" ) partition: KafkaPartitionConfig = Field( - default_factory=KafkaPartitionConfig, - description="Partition configuration" + default_factory=KafkaPartitionConfig, description="Partition configuration" + ) + schema_registry: SchemaRegistryConfig | None = Field( + default=None, description="Schema registry configuration (optional)" + ) + dual_production: bool = Field( + default=False, + description="Produce to legacy v1 and registry-backed v2 topics simultaneously", ) + registry_topic_suffix: str = Field( + default="v2", + description="Suffix appended to topic when producing schema-registry payloads", + ) + registry_failure_policy: str = Field( + default="fail", + description="Behavior when schema registry is unavailable: 'fail' or 'buffer'", + ) + + @field_validator("registry_failure_policy") + @classmethod + def validate_registry_policy(cls, v: str) -> str: + policy = v.lower() + if policy not in {"fail", "buffer"}: + raise ValueError("registry_failure_policy must be 'fail' or 'buffer'") + return policy acks: str = Field(default="all", description="Delivery guarantee") idempotence: bool = Field(default=True, description="Enable idempotence") retries: int = Field(default=3, description="Number of retries") @@ -333,7 +353,7 @@ def from_yaml(cls, yaml_path: str | Path) -> KafkaConfig: if not yaml_path.exists(): raise FileNotFoundError(f"Configuration file not found: {yaml_path}") - with open(yaml_path, 'r') as f: + with open(yaml_path, "r") as f: config_dict = yaml.safe_load(f) if config_dict is None: @@ -349,8 +369,9 @@ class TopicStrategy(Enum): CONSOLIDATED: Single topic per data type, aggregates all exchanges and symbols PER_SYMBOL: One topic per exchange-symbol pair (legacy support, higher topic count) """ - CONSOLIDATED = 'consolidated' - PER_SYMBOL = 'per_symbol' + + CONSOLIDATED = "consolidated" + PER_SYMBOL = "per_symbol" class TopicManager: @@ -390,12 +411,23 @@ class TopicManager: # Supported data types (normalized to singular form for topic naming) # These match the protobuf schema message types and topic naming conventions SUPPORTED_DATA_TYPES = { - 'trade', 'orderbook', 'ticker', 'candle', 'funding', - 'liquidation', 'index', 'openinterest', 'fill', 'balance', - 'position', 'margin', 'order', 'transaction' + "trade", + "orderbook", + "ticker", + "candle", + "funding", + "liquidation", + "index", + "openinterest", + "fill", + "balance", + "position", + "margin", + "order", + "transaction", } - STRATEGIES = {'consolidated', 'per_symbol'} + STRATEGIES = {"consolidated", "per_symbol"} @staticmethod def validate_strategy(strategy: str) -> None: @@ -424,10 +456,9 @@ def validate_data_type(data_type: str) -> None: ValueError: If data type is not supported """ if data_type not in TopicManager.SUPPORTED_DATA_TYPES: - sorted_types = ', '.join(sorted(TopicManager.SUPPORTED_DATA_TYPES)) + sorted_types = ", ".join(sorted(TopicManager.SUPPORTED_DATA_TYPES)) raise ValueError( - f"Unsupported data type: {data_type}. " - f"Supported types: {sorted_types}" + f"Unsupported data type: {data_type}. Supported types: {sorted_types}" ) @staticmethod @@ -443,7 +474,7 @@ def _normalize_symbol(symbol: str) -> str: Returns: Normalized symbol in lowercase with hyphens """ - return str(symbol).lower().replace('_', '-') + return str(symbol).lower().replace("_", "-") @staticmethod def _normalize_exchange(exchange: str) -> str: @@ -464,8 +495,8 @@ def get_topic( data_type: str, symbol: str, exchange: str, - strategy: str = 'consolidated', - prefix: Optional[str] = None + strategy: str = "consolidated", + prefix: Optional[str] = None, ) -> str: """Generate topic name based on strategy. @@ -514,28 +545,30 @@ def get_topic( TopicManager.validate_data_type(data_type) # Validate required parameters for per_symbol strategy - if strategy == 'per_symbol': + if strategy == "per_symbol": if symbol is None or not symbol: raise ValueError("symbol is required for per_symbol strategy") if exchange is None or not exchange: raise ValueError("exchange is required for per_symbol strategy") # Generate base topic - if strategy == 'consolidated': + if strategy == "consolidated": # Consolidated: cryptofeed.{data_type} - base_topic = f'cryptofeed.{data_type}' - elif strategy == 'per_symbol': + base_topic = f"cryptofeed.{data_type}" + elif strategy == "per_symbol": # Per-symbol: cryptofeed.{data_type}.{exchange}.{symbol} normalized_symbol = TopicManager._normalize_symbol(symbol) normalized_exchange = TopicManager._normalize_exchange(exchange) - base_topic = f'cryptofeed.{data_type}.{normalized_exchange}.{normalized_symbol}' + base_topic = ( + f"cryptofeed.{data_type}.{normalized_exchange}.{normalized_symbol}" + ) else: # Should not reach here due to validate_strategy, but include for completeness raise ValueError(f"Unknown strategy: {strategy}") # Add prefix if provided and non-empty if prefix is not None and prefix.strip(): - return f'{prefix.strip()}.{base_topic}' + return f"{prefix.strip()}.{base_topic}" return base_topic @@ -547,21 +580,21 @@ def get_topic( # Method names follow BackendCallback conventions (may be plural or have underscores) # Topic names are singular and normalized for TopicManager validation _SUPPORTED_METHODS: Dict[str, str] = { - "trade": "trade", # method: trade → topic: trade - "orderbook": "orderbook", # method: orderbook → topic: orderbook - "ticker": "ticker", # method: ticker → topic: ticker - "candle": "candle", # method: candle → topic: candle - "liquidation": "liquidation", # method: liquidation → topic: liquidation - "funding": "funding", # method: funding → topic: funding - "open_interest": "openinterest", # method: open_interest → topic: openinterest (no underscore) - "order_info": "order", # method: order_info → topic: order - "balances": "balance", # method: balances (plural) → topic: balance (singular) - "transactions": "transaction", # method: transactions (plural) → topic: transaction (singular) - "fills": "fill", # method: fills (plural) → topic: fill (singular) - "index": "index", # method: index → topic: index - "indices": "index", # method: indices (plural) → topic: index (singular) - "position": "position", # method: position → topic: position - "positions": "position", # method: positions (plural) → topic: position (singular) + "trade": "trade", # method: trade → topic: trade + "orderbook": "orderbook", # method: orderbook → topic: orderbook + "ticker": "ticker", # method: ticker → topic: ticker + "candle": "candle", # method: candle → topic: candle + "liquidation": "liquidation", # method: liquidation → topic: liquidation + "funding": "funding", # method: funding → topic: funding + "open_interest": "openinterest", # method: open_interest → topic: openinterest (no underscore) + "order_info": "order", # method: order_info → topic: order + "balances": "balance", # method: balances (plural) → topic: balance (singular) + "transactions": "transaction", # method: transactions (plural) → topic: transaction (singular) + "fills": "fill", # method: fills (plural) → topic: fill (singular) + "index": "index", # method: index → topic: index + "indices": "index", # method: indices (plural) → topic: index (singular) + "position": "position", # method: position → topic: position + "positions": "position", # method: positions (plural) → topic: position (singular) } @@ -573,6 +606,9 @@ class _QueuedMessage: class KafkaCallback(BackendCallback): + # KafkaCallback doesn't use default_key (uses topic-based routing) + default_key = "unknown" + """Backend callback that routes normalized messages to Kafka. Supports two initialization modes: @@ -603,6 +639,11 @@ def __init__( partition_key_cache_size: int = 1000, enable_header_precomputation: bool = True, drain_frequency_ms: int = 10, + schema_registry_config: SchemaRegistryConfig | dict | None = None, + schema_registry_enabled: bool | None = None, + dual_production: bool | None = None, + registry_topic_suffix: str | None = None, + registry_failure_policy: str | None = None, **config: Any, ) -> None: # Handle KafkaConfig parameter (Task 4.2 - refactoring) @@ -613,6 +654,20 @@ def __init__( self.enable_idempotence = kafka_config.idempotence self.topic_config = kafka_config.topic self.partition_config = kafka_config.partition + schema_registry_config = ( + schema_registry_config or kafka_config.schema_registry + ) + dual_production = ( + kafka_config.dual_production + if dual_production is None + else dual_production + ) + registry_topic_suffix = ( + registry_topic_suffix or kafka_config.registry_topic_suffix + ) + registry_failure_policy = ( + registry_failure_policy or kafka_config.registry_failure_policy + ) # Extract other producer settings from config config.setdefault("batch_size", kafka_config.batch_size) config.setdefault("linger_ms", kafka_config.linger_ms) @@ -623,7 +678,9 @@ def __init__( # Backward compatible: direct parameters self.bootstrap_servers = list(bootstrap_servers) self.acks = acks - self.enable_idempotence = enable_idempotence if enable_idempotence is not None else True + self.enable_idempotence = ( + enable_idempotence if enable_idempotence is not None else True + ) # Create default configs for backward compatibility self.topic_config = KafkaTopicConfig() self.partition_config = KafkaPartitionConfig() @@ -648,7 +705,31 @@ def __init__( if serialization_format is not None: self.set_serialization_format(serialization_format) - self._queue: asyncio.Queue[_QueuedMessage | object] = asyncio.Queue(maxsize=queue_maxsize) + # Schema registry integration (v2 protobuf) + self._registry_topic_suffix = registry_topic_suffix or "v2" + self._registry_failure_policy = (registry_failure_policy or "fail").lower() + if self._registry_failure_policy not in {"fail", "buffer"}: + raise ValueError("registry_failure_policy must be 'fail' or 'buffer'") + + self._schema_registry: SchemaRegistry | None = None + if schema_registry_config is not None: + if isinstance(schema_registry_config, dict): + schema_registry_config = SchemaRegistryConfig(**schema_registry_config) + self._schema_registry = SchemaRegistry.create(schema_registry_config) + + if schema_registry_enabled is None: + self._schema_registry_enabled = self._schema_registry is not None + else: + self._schema_registry_enabled = schema_registry_enabled + + self._dual_production = bool(dual_production) if dual_production is not None else False + self._schema_id_cache: Dict[str, int] = {} + self._schema_version_v1 = "v1" + self._schema_version_v2 = "v2" + + self._queue: asyncio.Queue[_QueuedMessage | object] = asyncio.Queue( + maxsize=queue_maxsize + ) # Instantiate topic manager with config strategy (Task 4.3) self._topic_manager = TopicManager() @@ -666,12 +747,20 @@ def __init__( else: self._partition_key_cache = None - # Instantiate header enricher (Task 4.3) - self._header_enricher = HeaderEnricher( - content_type="application/x-protobuf" - if serialization_format == "protobuf" + # Instantiate header enrichers (Task 4.3 + v2 registry mode) + content_type_v1 = ( + "application/x-protobuf" + if self.serialization_format == "protobuf" else "application/json" ) + self._header_enricher = HeaderEnricher( + content_type=content_type_v1, + schema_version=self._schema_version_v1, + ) + self._header_enricher_v2 = HeaderEnricher( + content_type="application/vnd.confluent.protobuf", + schema_version=self._schema_version_v2, + ) self._producer = KafkaProducer( self.bootstrap_servers, @@ -687,6 +776,10 @@ def __init__( self._writer_task: asyncio.Task | None = None self._running: bool = False + async def write(self, data): + """Write data to Kafka via queue (implements BackendCallback abstract method).""" + await self._queue.put(data) + # ------------------------------------------------------------------ # Lifecycle helpers # ------------------------------------------------------------------ @@ -722,7 +815,9 @@ def is_connected(self) -> bool: def queue_size(self) -> int: return self._queue.qsize() - def _queue_message(self, data_type: str, obj: Any, receipt_timestamp: Optional[float] = None) -> bool: + def _queue_message( + self, data_type: str, obj: Any, receipt_timestamp: Optional[float] = None + ) -> bool: """Queue a message for processing with backpressure protection. Args: @@ -738,7 +833,9 @@ def _queue_message(self, data_type: str, obj: Any, receipt_timestamp: Optional[f - Drop message to prevent blocking upstream data ingestion - Emit metrics for monitoring and alerting """ - message = _QueuedMessage(data_type=data_type, obj=obj, receipt_timestamp=receipt_timestamp) + message = _QueuedMessage( + data_type=data_type, obj=obj, receipt_timestamp=receipt_timestamp + ) # Extract metadata for error logging exchange = getattr(obj, "exchange", "unknown") @@ -749,14 +846,17 @@ def _queue_message(self, data_type: str, obj: Any, receipt_timestamp: Optional[f except asyncio.QueueFull: LOG.error( "KafkaCallback queue is full; dropping %s message from %s/%s (queue size: %d)", - data_type, exchange, symbol, self._queue.maxsize, + data_type, + exchange, + symbol, + self._queue.maxsize, extra={ "exchange": exchange, "symbol": symbol, "data_type": data_type, "queue_size": self._queue.maxsize, "error_type": "queue_full", - } + }, ) return False return True @@ -775,10 +875,14 @@ async def _handler(obj, receipt_timestamp: float): setattr(self, name, _handler) return _handler - async def _handle_message(self, data_type: str, obj: Any, receipt_timestamp: float) -> None: + async def _handle_message( + self, data_type: str, obj: Any, receipt_timestamp: float + ) -> None: queued = self._queue_message(data_type, obj, receipt_timestamp) if not queued: - LOG.warning("KafkaCallback: dropped %s message due to full queue", data_type) + LOG.warning( + "KafkaCallback: dropped %s message due to full queue", data_type + ) # ------------------------------------------------------------------ # Serialization + Kafka writer loop @@ -806,7 +910,7 @@ def _topic_name(self, data_type: str, obj: Any) -> str: symbol=symbol, exchange=exchange, strategy=self._topic_strategy, - prefix=custom_prefix + prefix=custom_prefix, ) except Exception: # Fallback to old behavior for backward compatibility @@ -855,7 +959,11 @@ def _partition_key(self, obj: Any) -> Optional[bytes]: def _serialize_payload(self, obj: Any, receipt_timestamp: Optional[float]): """Serialize message payload using configured format.""" - timestamp = receipt_timestamp if receipt_timestamp is not None else getattr(obj, "timestamp", None) + timestamp = ( + receipt_timestamp + if receipt_timestamp is not None + else getattr(obj, "timestamp", None) + ) if self.serialization_format == "protobuf": from cryptofeed.backends.protobuf_helpers import serialize_to_protobuf @@ -867,6 +975,64 @@ def _serialize_payload(self, obj: Any, receipt_timestamp: Optional[float]): headers = [("content-type", b"application/json")] return payload, headers + def _schema_definition_for_data_type(self, data_type: str) -> str | None: + """Load .proto schema text for the given data_type (v2), if available. + + Returns None when the data type has no v2 schema. This allows the + caller to skip registry production while still producing the legacy + payload instead of dropping the message. + """ + + filename_map = { + "trade": "trade.proto", + "trades": "trade.proto", + "ticker": "ticker.proto", + "tickers": "ticker.proto", + "orderbook": "order_book.proto", + "order_book": "order_book.proto", + "l2_book": "order_book.proto", + "candle": "candle.proto", + "candles": "candle.proto", + } + filename = filename_map.get(data_type) + if not filename: + return None + + proto_path = ( + Path(__file__).resolve().parents[1] + / "proto" + / "cryptofeed" + / "normalized" + / "v2" + / filename + ) + return proto_path.read_text(encoding="utf-8") + + async def _resolve_schema_id(self, subject: str, schema_definition: str) -> int: + """Register schema if needed and return schema ID (async via executor).""" + + if subject in self._schema_id_cache: + return self._schema_id_cache[subject] + + if not self._schema_registry: + raise RuntimeError("Schema registry not configured") + + loop = self._loop or asyncio.get_event_loop() + schema_id = await loop.run_in_executor( + None, + functools.partial( + self._schema_registry.register_schema, + subject, + schema_definition, + "PROTOBUF", + ), + ) + self._schema_id_cache[subject] = schema_id + return schema_id + + def _registry_subject(self, topic: str) -> str: + return f"{topic}-value" + async def _drain_once(self) -> None: """Process one message from queue (legacy mode, non-optimized). @@ -900,10 +1066,7 @@ async def _drain_once(self) -> None: LOG.error( "KafkaCallback: Failed to mark task as done: %s", e, - extra={ - "error_type": "task_done_error", - "error": str(e) - } + extra={"error_type": "task_done_error", "error": str(e)}, ) async def _drain_batch(self) -> None: @@ -976,20 +1139,29 @@ async def _process_message(self, message: _QueuedMessage) -> None: symbol = getattr(message.obj, "symbol", "unknown") data_type = message.data_type - # Step 1: Serialize payload + use_registry = ( + self._schema_registry_enabled and self.serialization_format == "protobuf" + ) + + # Step 1: Serialize payload (v1 path for legacy / dual mode) try: - payload, base_headers = self._serialize_payload(message.obj, message.receipt_timestamp) + payload_v1, base_headers = self._serialize_payload( + message.obj, message.receipt_timestamp + ) except Exception as e: LOG.error( "KafkaCallback: Serialization failed for %s message from %s/%s: %s", - data_type, exchange, symbol, e, + data_type, + exchange, + symbol, + e, extra={ "exchange": exchange, "symbol": symbol, "data_type": data_type, "error_type": "serialization_error", - "error": str(e) - } + "error": str(e), + }, ) return # Skip this message, continue processing queue @@ -999,14 +1171,17 @@ async def _process_message(self, message: _QueuedMessage) -> None: except Exception as e: LOG.error( "KafkaCallback: Topic resolution failed for %s message from %s/%s: %s", - data_type, exchange, symbol, e, + data_type, + exchange, + symbol, + e, extra={ "exchange": exchange, "symbol": symbol, "data_type": data_type, "error_type": "topic_resolution_error", - "error": str(e) - } + "error": str(e), + }, ) return # Skip this message, continue processing queue @@ -1016,65 +1191,217 @@ async def _process_message(self, message: _QueuedMessage) -> None: except Exception as e: LOG.warning( "KafkaCallback: Partition key generation failed for %s/%s, using None: %s", - exchange, symbol, e, + exchange, + symbol, + e, extra={ "exchange": exchange, "symbol": symbol, "data_type": data_type, "error_type": "partition_key_error", - "error": str(e) - } + "error": str(e), + }, ) key = None # Fall back to None (round-robin partition assignment) - # Step 4: Build enriched headers using HeaderEnricher + produced = False + + # Step 4a: Schema Registry (v2) production + schema_definition = None + if use_registry: + schema_definition = self._schema_definition_for_data_type(data_type) + if schema_definition is None: + LOG.debug( + "KafkaCallback: skipping registry for unsupported data_type=%s", + data_type, + ) + use_registry = False + + if use_registry: + try: + from cryptofeed.backends.protobuf_helpers_v2 import ( + serialize_to_protobuf_v2, + ) + + payload_v2 = serialize_to_protobuf_v2(message.obj) + except Exception as e: + LOG.error( + "KafkaCallback: v2 serialization failed for %s message from %s/%s: %s", + data_type, + exchange, + symbol, + e, + extra={ + "exchange": exchange, + "symbol": symbol, + "data_type": data_type, + "error_type": "serialization_error_v2", + "error": str(e), + }, + ) + else: + topic_v2 = ( + f"{topic}.{self._registry_topic_suffix}" + if self._registry_topic_suffix + else topic + ) + subject = self._registry_subject(topic_v2) + + try: + schema_id = await self._resolve_schema_id( + subject, schema_definition + ) + except Exception as e: + LOG.error( + "KafkaCallback: Schema registry failure for %s/%s (subject=%s): %s", + exchange, + symbol, + subject, + e, + extra={ + "exchange": exchange, + "symbol": symbol, + "data_type": data_type, + "error_type": "schema_registry_error", + "error": str(e), + }, + ) + if self._registry_failure_policy == "buffer": + try: + self._queue.put_nowait(message) + except asyncio.QueueFull: + LOG.error( + "KafkaCallback: registry buffer queue full; dropping %s/%s (%s)", + exchange, + symbol, + data_type, + extra={ + "exchange": exchange, + "symbol": symbol, + "data_type": data_type, + "error_type": "schema_registry_buffer_full", + }, + ) + return + return + else: + # Build headers for v2 + try: + headers_v2 = self._header_enricher_v2.build( + message=message.obj, data_type=data_type + ) + except Exception as e: + LOG.warning( + "KafkaCallback: v2 header enrichment failed for %s/%s, using minimal v2 headers: %s", + exchange, + symbol, + e, + extra={ + "exchange": exchange, + "symbol": symbol, + "data_type": data_type, + "error_type": "header_enrichment_error_v2", + "error": str(e), + }, + ) + headers_v2 = MessageHeaders.build( + message=message.obj, + data_type=data_type, + content_type="application/vnd.confluent.protobuf", + ) + headers_v2 += OptionalHeaders.build( + schema_version=self._schema_version_v2 + ) + + try: + framed_payload = self._schema_registry.embed_schema_id_in_message( + payload_v2, schema_id + ) + headers_v2.append( + ( + b"schema_id", + self._schema_registry.get_schema_id_header( + schema_id + ), + ) + ) + self._producer.produce( + topic_v2, framed_payload, key=key, headers=headers_v2 + ) + produced = True + except Exception as e: + LOG.error( + "KafkaCallback: Kafka produce failed for %s message (v2) on topic %s: %s", + data_type, + topic_v2, + e, + extra={ + "exchange": exchange, + "symbol": symbol, + "data_type": data_type, + "topic": topic_v2, + "error_type": "kafka_produce_error", + "error": str(e), + }, + ) + + # If not dual production, short-circuit after v2 + if produced and not self._dual_production: + self._producer.poll(0.0) + return + + # Step 4b: Legacy / dual-production v1 path try: enriched_headers = self._header_enricher.build( - message=message.obj, - data_type=data_type + message=message.obj, data_type=data_type ) except Exception as e: LOG.warning( "KafkaCallback: Header enrichment failed for %s/%s, using base headers: %s", - exchange, symbol, e, + exchange, + symbol, + e, extra={ "exchange": exchange, "symbol": symbol, "data_type": data_type, "error_type": "header_enrichment_error", - "error": str(e) - } + "error": str(e), + }, ) enriched_headers = base_headers # Fallback to base headers - # Step 5: Produce to Kafka try: - self._producer.produce(topic, payload, key=key, headers=enriched_headers) - self._producer.poll(0.0) + self._producer.produce( + topic, payload_v1, key=key, headers=enriched_headers + ) + produced = True except Exception as e: LOG.error( "KafkaCallback: Kafka produce failed for %s message from %s/%s on topic %s: %s", - data_type, exchange, symbol, topic, e, + data_type, + exchange, + symbol, + topic, + e, extra={ "exchange": exchange, "symbol": symbol, "data_type": data_type, "topic": topic, "error_type": "kafka_produce_error", - "error": str(e) - } + "error": str(e), + }, ) - # Note: Producer retries are configured in KafkaProducer settings - # We continue processing to avoid blocking the queue on transient errors + finally: + if produced: + self._producer.poll(0.0) except Exception as e: # Catch-all for unexpected errors to prevent writer task collapse LOG.error( "KafkaCallback: Unexpected error in _process_message: %s", e, - extra={ - "error_type": "unexpected_error", - "error": str(e) - } + extra={"error_type": "unexpected_error", "error": str(e)}, ) async def _writer(self) -> None: @@ -1354,7 +1681,9 @@ class MessageHeaders: """ @staticmethod - def build(message: Any, data_type: str, content_type: str) -> list[tuple[bytes, bytes]]: + def build( + message: Any, data_type: str, content_type: str + ) -> list[tuple[bytes, bytes]]: """Build mandatory headers from message metadata. Extracts exchange and symbol from message object and normalizes them @@ -1641,6 +1970,7 @@ def enrich_message( return mandatory + optional + # ============================================================================ # Health Check Models and Implementation (Task 17.3) # ============================================================================ @@ -1648,6 +1978,7 @@ def enrich_message( class HealthStatus(str, Enum): """Health check status levels.""" + HEALTHY = "healthy" DEGRADED = "degraded" UNHEALTHY = "unhealthy" @@ -1669,6 +2000,7 @@ class HealthCheckResponse: memory_bytes: Memory usage in bytes uptime_seconds: Producer uptime in seconds """ + status: str kafka_connected: bool buffer_health: float diff --git a/docs/consumer-integration-guide.md b/docs/consumer-integration-guide.md index 948431d0e..283af2cc9 100644 --- a/docs/consumer-integration-guide.md +++ b/docs/consumer-integration-guide.md @@ -4,6 +4,9 @@ Cryptofeed produces protobuf-serialized market data to Kafka topics. This guide shows how downstream consumers integrate these topics with storage backends (Apache Iceberg, DuckDB, Parquet) and analytics engines (Flink, Spark). +- **v1 (legacy)**: Protobuf with string-encoded decimals, topics like `cryptofeed.trades`. +- **v2 (schema-registry)**: Native numeric types, Confluent Wire Format, topics like `cryptofeed.trades.v2` with subject `{topic}-value`. + ## Architecture ``` @@ -15,14 +18,16 @@ Cryptofeed (Ingestion) → Kafka Topics → Consumer (Storage + Analytics) ## Topic Schema -Topics follow naming convention: `cryptofeed.{data_type}.{exchange}.{symbol}` +Topics (consolidated mode) follow naming convention: `cryptofeed.{data_type}` with optional `.v2` suffix when Schema Registry mode is enabled. Per-symbol strategy still prefixes exchange/symbol for backwards compatibility. Examples: -- `cryptofeed.trades.coinbase.btc-usd` -- `cryptofeed.l2_book.binance.eth-usdt` -- `cryptofeed.ticker.kraken.sol-usd` +- `cryptofeed.trades` (v1, JSON/protobuf) +- `cryptofeed.trades.v2` (v2, Confluent wire format + Schema Registry) +- `cryptofeed.orderbook` / `cryptofeed.orderbook.v2` -Message format: Protobuf (schemas from `cryptofeed.normalized.v1`) +Message format: +- v1: Protobuf schemas in `cryptofeed.normalized.v1` +- v2: Protobuf schemas in `cryptofeed.normalized.v2` (native doubles, `google.protobuf.Timestamp`) ## Integration Patterns @@ -81,6 +86,28 @@ t_env.execute_sql(""" """) ``` +### Pattern 1b: Flink → Iceberg (Schema Registry, v2) + +Use the Confluent wire format with Schema Registry subjects named `{topic}-value`. + +```sql +CREATE TABLE trades_v2_source ( + exchange STRING, + symbol STRING, + side STRING, + price DOUBLE, + amount DOUBLE, + trade_id STRING, + sequence_number BIGINT +) WITH ( + 'connector' = 'kafka', + 'topic' = 'cryptofeed.trades.v2', + 'properties.bootstrap.servers' = 'kafka:9092', + 'format' = 'protobuf-confluent', + 'protobuf-confluent.schema-registry.url' = 'https://schema-registry:8081' +); +``` + **Benefits**: - Schema evolution (Iceberg native) - Time travel queries diff --git a/docs/e2e/CONSOLIDATION_SUMMARY.md b/docs/deliverables/CONSOLIDATION_SUMMARY.md similarity index 100% rename from docs/e2e/CONSOLIDATION_SUMMARY.md rename to docs/deliverables/CONSOLIDATION_SUMMARY.md diff --git a/docs/deliverables/README.md b/docs/deliverables/README.md new file mode 100644 index 000000000..57537e35d --- /dev/null +++ b/docs/deliverables/README.md @@ -0,0 +1,27 @@ +# Project Deliverables + +This directory contains permanent project documentation, execution results, and deliverables. + +## Contents + +### [results/](results/) - Execution Reports & Results +- **Phase 5 Execution** - Complete market-data-kafka-producer implementation +- **E2E Testing** - End-to-end test execution reports +- **Historical Results** - Archived execution and validation reports + +### [REPRODUCIBILITY.md](REPRODUCIBILITY.md) - Testing Environment Guide +Comprehensive guide for reproducing test environments using `uv` for deterministic dependency management. + +### [TEST_PLAN.md](TEST_PLAN.md) - E2E Test Plan +Complete test plan for validating proxy systems, exchange integrations, and data normalization. + +### [CONSOLIDATION_SUMMARY.md](CONSOLIDATION_SUMMARY.md) - Documentation Consolidation +Historical record of E2E documentation consolidation efforts. + +### [e2e-overview.md](e2e-overview.md) - E2E Testing Overview +Quick start guide and overview of end-to-end testing infrastructure. + +## Organization + +This directory consolidates important project deliverables that were previously scattered across temporary locations. All content here represents permanent, reference documentation.</content> +<parameter name="filePath">docs/deliverables/README.md \ No newline at end of file diff --git a/docs/e2e/REPRODUCIBILITY.md b/docs/deliverables/REPRODUCIBILITY.md similarity index 100% rename from docs/e2e/REPRODUCIBILITY.md rename to docs/deliverables/REPRODUCIBILITY.md diff --git a/docs/e2e/TEST_PLAN.md b/docs/deliverables/TEST_PLAN.md similarity index 100% rename from docs/e2e/TEST_PLAN.md rename to docs/deliverables/TEST_PLAN.md diff --git a/docs/e2e/README.md b/docs/deliverables/e2e-overview.md similarity index 100% rename from docs/e2e/README.md rename to docs/deliverables/e2e-overview.md diff --git a/docs/e2e/results/2025-10-24-execution.md b/docs/deliverables/results/2025-10-24-execution.md similarity index 100% rename from docs/e2e/results/2025-10-24-execution.md rename to docs/deliverables/results/2025-10-24-execution.md diff --git a/docs/e2e/results/2025-10-24-review.md b/docs/deliverables/results/2025-10-24-review.md similarity index 100% rename from docs/e2e/results/2025-10-24-review.md rename to docs/deliverables/results/2025-10-24-review.md diff --git a/docs/e2e/results/README.md b/docs/deliverables/results/README.md similarity index 50% rename from docs/e2e/results/README.md rename to docs/deliverables/results/README.md index d55c7cbaf..af14d3a9c 100644 --- a/docs/e2e/results/README.md +++ b/docs/deliverables/results/README.md @@ -1,6 +1,6 @@ -# E2E Test Results Archive +# Project Execution Results Archive -This directory contains historical test execution reports and detailed results. +This directory contains historical project execution reports, test results, and deliverables. --- @@ -29,6 +29,33 @@ This directory contains historical test execution reports and detailed results. --- +## Phase 5 Execution (November 2025) + +### Summary +- **Project**: market-data-kafka-producer +- **Status**: ✅ COMPLETE - Production Ready +- **Test Results**: 628+ tests passing (100% pass rate) +- **Deliverables**: Kafka producer, consumer templates, monitoring, documentation + +### Reports +- **[Phase 5 Completion Final Report](phase5-completion-final-report.md)** - Comprehensive completion summary +- **[Week 1 TDD Execution Summary](phase5-week1-tdd-execution-summary.md)** - Test-driven development approach +- **[Week 2 Execution Summary](phase5-week2-execution-summary.md)** - Consumer templates and monitoring setup +- **[Week 2 Deliverables](phase5-week2-deliverables.md)** - Detailed deliverables documentation +- **[Week 3 Task 25-26 Implementation](phase5-week3-task25-26-implementation.md)** - Incremental migration and monitoring +- **[Week 4 Final Tasks Execution](phase5-week4-final-tasks-execution.md)** - Production stability and handoff +- **[Review Validation Report](review-validation-report.md)** - Requirements review and validation +- **[Task 25-26 Execution Summary](task25-task26-execution-summary.md)** - Final task completion summary + +### Key Achievements +- ✅ High-performance Kafka producer with protobuf serialization +- ✅ Exactly-once semantics and comprehensive error handling +- ✅ Consumer migration templates (Flink, Python async, Custom) +- ✅ Monitoring dashboard and alert rules +- ✅ Production-ready documentation and runbooks + +--- + ## How to Use These Reports ### For Developers diff --git a/docs/e2e/results/backpack-test-results.md b/docs/deliverables/results/backpack-test-results.md similarity index 100% rename from docs/e2e/results/backpack-test-results.md rename to docs/deliverables/results/backpack-test-results.md diff --git a/docs/e2e/results/consolidation-plan.md b/docs/deliverables/results/consolidation-plan.md similarity index 100% rename from docs/e2e/results/consolidation-plan.md rename to docs/deliverables/results/consolidation-plan.md diff --git a/docs/e2e/results/phase2-results.md b/docs/deliverables/results/phase2-results.md similarity index 100% rename from docs/e2e/results/phase2-results.md rename to docs/deliverables/results/phase2-results.md diff --git a/docs/archive/execution-reports/market-data-kafka-producer/PHASE_5_COMPLETION_FINAL_REPORT.md b/docs/deliverables/results/phase5-completion-final-report.md similarity index 100% rename from docs/archive/execution-reports/market-data-kafka-producer/PHASE_5_COMPLETION_FINAL_REPORT.md rename to docs/deliverables/results/phase5-completion-final-report.md diff --git a/docs/archive/execution-reports/market-data-kafka-producer/PHASE5_WEEK1_TDD_EXECUTION_SUMMARY.md b/docs/deliverables/results/phase5-week1-tdd-execution-summary.md similarity index 100% rename from docs/archive/execution-reports/market-data-kafka-producer/PHASE5_WEEK1_TDD_EXECUTION_SUMMARY.md rename to docs/deliverables/results/phase5-week1-tdd-execution-summary.md diff --git a/docs/archive/execution-reports/market-data-kafka-producer/PHASE_5_WEEK2_DELIVERABLES.md b/docs/deliverables/results/phase5-week2-deliverables.md similarity index 98% rename from docs/archive/execution-reports/market-data-kafka-producer/PHASE_5_WEEK2_DELIVERABLES.md rename to docs/deliverables/results/phase5-week2-deliverables.md index 8f09dd498..d4127648d 100644 --- a/docs/archive/execution-reports/market-data-kafka-producer/PHASE_5_WEEK2_DELIVERABLES.md +++ b/docs/deliverables/results/phase5-week2-deliverables.md @@ -401,9 +401,9 @@ Tests: └── test_monitoring_dashboard_setup.py Summary Documents: -/home/tommyk/projects/quant/data-sources/crypto-data/cryptofeed/ - ├── PHASE_5_WEEK2_EXECUTION_SUMMARY.md - └── PHASE_5_WEEK2_DELIVERABLES.md +docs/deliverables/results/ + ├── phase5-week2-execution-summary.md + └── phase5-week2-deliverables.md ``` --- diff --git a/docs/archive/execution-reports/market-data-kafka-producer/PHASE_5_WEEK2_EXECUTION_SUMMARY.md b/docs/deliverables/results/phase5-week2-execution-summary.md similarity index 100% rename from docs/archive/execution-reports/market-data-kafka-producer/PHASE_5_WEEK2_EXECUTION_SUMMARY.md rename to docs/deliverables/results/phase5-week2-execution-summary.md diff --git a/docs/archive/execution-reports/market-data-kafka-producer/PHASE5_WEEK3_TASK25_26_IMPLEMENTATION.md b/docs/deliverables/results/phase5-week3-task25-26-implementation.md similarity index 100% rename from docs/archive/execution-reports/market-data-kafka-producer/PHASE5_WEEK3_TASK25_26_IMPLEMENTATION.md rename to docs/deliverables/results/phase5-week3-task25-26-implementation.md diff --git a/docs/archive/execution-reports/market-data-kafka-producer/PHASE5_WEEK4_FINAL_TASKS_EXECUTION.md b/docs/deliverables/results/phase5-week4-final-tasks-execution.md similarity index 100% rename from docs/archive/execution-reports/market-data-kafka-producer/PHASE5_WEEK4_FINAL_TASKS_EXECUTION.md rename to docs/deliverables/results/phase5-week4-final-tasks-execution.md diff --git a/docs/archive/execution-reports/market-data-kafka-producer/REVIEW_VALIDATION_REPORT.md b/docs/deliverables/results/review-validation-report.md similarity index 100% rename from docs/archive/execution-reports/market-data-kafka-producer/REVIEW_VALIDATION_REPORT.md rename to docs/deliverables/results/review-validation-report.md diff --git a/docs/archive/execution-reports/market-data-kafka-producer/TASK25_TASK26_EXECUTION_SUMMARY.md b/docs/deliverables/results/task25-task26-execution-summary.md similarity index 99% rename from docs/archive/execution-reports/market-data-kafka-producer/TASK25_TASK26_EXECUTION_SUMMARY.md rename to docs/deliverables/results/task25-task26-execution-summary.md index 7904bcf42..e3c1c08b5 100644 --- a/docs/archive/execution-reports/market-data-kafka-producer/TASK25_TASK26_EXECUTION_SUMMARY.md +++ b/docs/deliverables/results/task25-task26-execution-summary.md @@ -291,13 +291,13 @@ EXECUTION TIME: 0.29 seconds - Task 26: Production stability monitoring ### Documentation Files -3. **`PHASE5_WEEK3_TASK25_26_IMPLEMENTATION.md`** +3. **`phase5-week3-task25-26-implementation.md`** - Detailed implementation guide - Architecture overview - Test-by-test breakdown - Integration guidelines -4. **`TASK25_TASK26_EXECUTION_SUMMARY.md`** (this file) +4. **`task25-task26-execution-summary.md`** (this file) - Quick reference - Key metrics - Next steps diff --git a/docs/e2e/planning/README.md b/docs/e2e/planning/README.md deleted file mode 100644 index 1ad06ab00..000000000 --- a/docs/e2e/planning/README.md +++ /dev/null @@ -1,30 +0,0 @@ -# E2E Testing Planning Documentation - -Planning, coordination, and execution documentation for end-to-end testing. - -## Documents - -### Implementation Plans -- **[Backpack Test Plan](backpack-test-plan.md)** – Comprehensive test plan for Backpack CCXT and native implementations (REST + WebSocket) - -### Execution Plans -- **[Atomic Commit Plan](atomic-commit-plan.md)** – Strategy for committing E2E infrastructure in 3 focused atomic commits -- **[Final Commit Plan](final-commit-plan.md)** – Detailed commit execution plan with commands and verification steps - -### Results & Reports -- **[Completion Summary](completion-summary.md)** – Overview of E2E completion status -- **[Test Fixes Report](test-fixes-report.md)** – Documentation of test issues and resolutions - -## Purpose - -These documents contain planning and coordination information for E2E testing execution, including: -- Test design and scope definition -- Commit strategies and execution plans -- Issue tracking and resolution documentation -- Phase-based testing methodologies - -## Related Documentation - -- **[E2E Results](../results/)** – Detailed test execution results and analysis -- **[E2E Overview](../README.md)** – Quick start guide and testing overview -- **[E2E Test Plan](../TEST_PLAN.md)** – Comprehensive test scenarios diff --git a/docs/e2e/planning/atomic-commit-plan.md b/docs/e2e/planning/atomic-commit-plan.md deleted file mode 100644 index d3d50dd1e..000000000 --- a/docs/e2e/planning/atomic-commit-plan.md +++ /dev/null @@ -1,382 +0,0 @@ -# Atomic Commit Plan - E2E Testing Infrastructure - -**Date**: 2025-10-24 -**Branch**: `feature/normalized-data-schema-crypto` -**Strategy**: 3 focused atomic commits - ---- - -## Commit Strategy - -### Why Atomic Commits? - -1. **Reviewability** - Smaller, focused diffs are easier to review -2. **Revertability** - Can revert specific changes without affecting others -3. **Clarity** - Each commit has clear, single purpose -4. **History** - Better git history and blame information - -### Commit Boundaries - -1. **Infrastructure** - Scripts, tools, setup automation -2. **Documentation** - Guides, plans, how-tos -3. **Results** - Test execution results and analysis - ---- - -## Commit 1: Test Infrastructure - -### Scope -Test automation scripts, environment setup, and dependency management - -### Files Included -``` -tests/e2e/ -├── setup_e2e_env.sh # Automated environment setup -├── requirements-e2e-lock.txt # Locked dependencies (59 packages) -└── README.md # E2E directory documentation - -tests/integration/ -├── T4.2-stress-test.py # Stress testing script -└── regional_validation.sh # Regional matrix validation -``` - -### Commit Message -``` -feat(e2e): add test infrastructure with reproducible environment setup - -Implements automated E2E test environment using uv for fast, deterministic -dependency management (10-100x faster than pip). - -Infrastructure components: -- setup_e2e_env.sh: Automated environment setup script (267 lines) -- requirements-e2e-lock.txt: Locked dependencies (59 packages) -- T4.2-stress-test.py: Concurrent feed stress testing (275 lines) -- regional_validation.sh: Multi-region proxy validation (197 lines) - -Features: -- Reproducible environments with exact dependency versions -- Automated Mullvad relay list download -- Stress testing for 20+ concurrent feeds -- Regional validation across US/EU/Asia proxies - -Setup time: ~25 seconds (vs 2-3 minutes with pip) -Lock file includes: cryptofeed, ccxt, pytest, aiohttp-socks, pysocks - -Co-authored-by: factory-droid[bot] <138933559+factory-droid[bot]@users.noreply.github.com> -``` - -### Commands -```bash -git add tests/e2e/setup_e2e_env.sh -git add tests/e2e/requirements-e2e-lock.txt -git add tests/e2e/README.md -git add tests/integration/T4.2-stress-test.py -git add tests/integration/regional_validation.sh -git status # Verify -git commit -F- <<'EOF' -[paste message above] -EOF -``` - ---- - -## Commit 2: Core Documentation - -### Scope -User-facing guides, test plans, and technical documentation - -### Files Included -``` -docs/e2e/ -├── README.md # Quick Start guide (303 lines) -├── TEST_PLAN.md # Comprehensive test scenarios (491 lines) -└── REPRODUCIBILITY.md # Technical deep-dive (339 lines) -``` - -### Commit Message -``` -docs(e2e): add comprehensive E2E testing documentation - -Complete documentation suite for E2E testing with quick start guide, -detailed test plan, and reproducibility technical guide. - -Documentation structure: -- README.md: Quick Start + Overview (303 lines) - - Setup instructions - - Test phases (1-4) - - Proxy configuration - - Troubleshooting - -- TEST_PLAN.md: Comprehensive test scenarios (491 lines) - - Test objectives and prerequisites - - 5 test categories (proxy, live, CCXT, native, regional) - - Success criteria and expected results - - Regional behavior matrix - -- REPRODUCIBILITY.md: Technical guide (339 lines) - - Lock file management - - CI/CD integration examples - - Dependency updates - - Best practices - -Total: 1,133 lines of user-facing documentation - -Key features documented: -- uv-based reproducible environments -- Live proxy validation (SOCKS5) -- Multi-region testing (US/EU/Asia) -- Stress testing capabilities - -Co-authored-by: factory-droid[bot] <138933559+factory-droid[bot]@users.noreply.github.com> -``` - -### Commands -```bash -git add docs/e2e/README.md -git add docs/e2e/TEST_PLAN.md -git add docs/e2e/REPRODUCIBILITY.md -git status # Verify -git commit -F- <<'EOF' -[paste message above] -EOF -``` - ---- - -## Commit 3: Test Results & Archive - -### Scope -Test execution results, analysis reports, and historical archive - -### Files Included -``` -docs/e2e/results/ -├── README.md # Results index (68 lines) -├── 2025-10-24-execution.md # Final execution report (470 lines) -├── 2025-10-24-review.md # Pre-execution review (468 lines) -├── phase2-results.md # Phase 2 detailed results (284 lines) -└── consolidation-plan.md # Documentation cleanup plan - -docs/e2e/CONSOLIDATION_SUMMARY.md # Consolidation summary -FINAL_COMMIT_PLAN.md # Commit planning document -ATOMIC_COMMIT_PLAN.md # This file -``` - -### Commit Message -``` -test(e2e): add test execution results and consolidation summary - -Documents E2E test execution results with 98.3% pass rate (59/60 tests) -and archives detailed analysis reports. - -Test Results Summary: -- Phase 1 (Smoke Tests): 52/52 tests passed (100%) -- Phase 2 (Live Connectivity): 7/8 tests passed (87.5%) -- Overall: 59/60 tests (98.3% pass rate) - -Exchanges validated: -- Binance: 4/4 tests (REST ticker, orderbook, WS trades) -- Hyperliquid (CCXT): 2/2 tests (REST orderbook, WS trades) -- Backpack (CCXT): 1/2 tests (REST markets, WS skipped) - -Environment: -- Python 3.12.11 with uv-based setup -- Proxy: Europe region (Mullvad SOCKS5) -- Duration: ~90 minutes (planning + execution) - -Issues resolved: -- Added missing pysocks dependency for CCXT SOCKS5 support -- Updated lock file with complete dependency tree -- Validated reproducibility across environments - -Documentation consolidation: -- Reduced from 9 files (3,382 lines) to 8 files (2,423 lines) -- 28.3% reduction while preserving all content -- Organized into docs/e2e/ structure -- Archived historical reports in results/ - -Archived reports: -- 2025-10-24-execution.md: Complete test results -- 2025-10-24-review.md: Pre-execution review -- phase2-results.md: Phase 2 live connectivity details -- consolidation-plan.md: Documentation cleanup methodology - -Co-authored-by: factory-droid[bot] <138933559+factory-droid[bot]@users.noreply.github.com> -``` - -### Commands -```bash -git add docs/e2e/results/ -git add docs/e2e/CONSOLIDATION_SUMMARY.md -git add FINAL_COMMIT_PLAN.md -git add ATOMIC_COMMIT_PLAN.md -git status # Verify -git commit -F- <<'EOF' -[paste message above] -EOF -``` - ---- - -## Execution Sequence - -### Step-by-Step - -```bash -# 1. Verify starting state -git status -git log --oneline -5 - -# 2. Execute Commit 1 (Infrastructure) -git add tests/e2e/setup_e2e_env.sh tests/e2e/requirements-e2e-lock.txt tests/e2e/README.md -git add tests/integration/T4.2-stress-test.py tests/integration/regional_validation.sh -git commit -m "feat(e2e): add test infrastructure with reproducible environment setup - -[... full message ...]" - -# 3. Execute Commit 2 (Documentation) -git add docs/e2e/README.md docs/e2e/TEST_PLAN.md docs/e2e/REPRODUCIBILITY.md -git commit -m "docs(e2e): add comprehensive E2E testing documentation - -[... full message ...]" - -# 4. Execute Commit 3 (Results) -git add docs/e2e/results/ docs/e2e/CONSOLIDATION_SUMMARY.md -git add FINAL_COMMIT_PLAN.md ATOMIC_COMMIT_PLAN.md -git commit -m "test(e2e): add test execution results and consolidation summary - -[... full message ...]" - -# 5. Verify commits -git log --oneline -5 -git show --stat HEAD~2 # First commit -git show --stat HEAD~1 # Second commit -git show --stat HEAD # Third commit - -# 6. Push all commits -git push origin feature/normalized-data-schema-crypto -``` - ---- - -## Commit Verification - -### After Each Commit - -```bash -# Check commit was created -git log --oneline -1 - -# Review commit contents -git show --stat HEAD - -# Verify no uncommitted changes remain (for this commit) -git status -``` - -### After All Commits - -```bash -# Review all three commits -git log --oneline -3 - -# Verify total diff -git diff HEAD~3 --stat - -# Ensure tests still pass -source .venv-e2e/bin/activate -pytest tests/unit/test_proxy_mvp.py -v --tb=no -q -``` - ---- - -## Rollback Plan - -### If Something Goes Wrong - -**Undo last commit (keep changes)**: -```bash -git reset --soft HEAD~1 -``` - -**Undo last commit (discard changes)**: -```bash -git reset --hard HEAD~1 -``` - -**Undo all three commits**: -```bash -git reset --soft HEAD~3 -``` - -**Start over from clean state**: -```bash -git reset --hard origin/feature/normalized-data-schema-crypto -``` - ---- - -## Benefits of This Approach - -### Commit 1 Benefits -- **Standalone** - Infrastructure can be tested independently -- **Reusable** - Scripts work without docs -- **Atomic** - Single functional unit - -### Commit 2 Benefits -- **Documentation-only** - Easy to review text changes -- **No code changes** - Pure documentation commit -- **Safe** - Can't break functionality - -### Commit 3 Benefits -- **Historical** - Results and analysis -- **Optional** - Could be deferred or excluded -- **Informational** - No functional impact - ---- - -## Timeline - -| Step | Duration | Cumulative | -|------|----------|------------| -| Review plan | 3 min | 3 min | -| Commit 1 | 3 min | 6 min | -| Commit 2 | 3 min | 9 min | -| Commit 3 | 3 min | 12 min | -| Verify | 3 min | 15 min | -| Push | 2 min | 17 min | -| **Total** | **17 min** | - | - ---- - -## Success Criteria - -### Per-Commit Validation -- [x] Commit message follows conventional commits -- [x] Co-author attribution included -- [x] Files staged correctly -- [x] No unintended files included - -### Overall Validation -- [x] All files committed -- [x] Tests still pass -- [x] Git history clean -- [x] Pushed to remote successfully - ---- - -## Ready to Execute - -**Status**: ✅ **READY** - -**First Command**: -```bash -git add tests/e2e/setup_e2e_env.sh tests/e2e/requirements-e2e-lock.txt tests/e2e/README.md tests/integration/T4.2-stress-test.py tests/integration/regional_validation.sh -``` - ---- - -**Plan Created**: 2025-10-24 -**Strategy**: 3 atomic commits -**Risk Level**: Low -**Estimated Time**: 17 minutes diff --git a/docs/e2e/planning/backpack-test-plan.md b/docs/e2e/planning/backpack-test-plan.md deleted file mode 100644 index 45315073b..000000000 --- a/docs/e2e/planning/backpack-test-plan.md +++ /dev/null @@ -1,481 +0,0 @@ -# E2E Test Plan: Backpack Exchange Integration - -**Date**: 2025-10-24 -**Purpose**: Comprehensive testing of Backpack CCXT and Native implementations (REST + WebSocket) -**Status**: Planning Phase - ---- - -## Current State Analysis - -### Existing Tests - -#### CCXT Tests (`tests/integration/test_live_ccxt_backpack.py`) -- ✅ REST: `test_backpack_ccxt_rest_over_socks_proxy` - Load markets, fetch orderbook -- ⚠️ WS: `test_backpack_ccxt_ws_over_socks_proxy` - Watch trades (skips on timeout) - -**Current Coverage**: 2 tests (REST + WS basics) - -#### Native Tests (`tests/integration/test_live_backpack.py`) -- ✅ REST: `test_backpack_rest_over_socks_proxy` - Fetch markets -- ⚠️ WS: `test_backpack_trades_websocket_over_socks_proxy` - Trade stream (known parse error 4002) - -**Current Coverage**: 2 tests (REST + WS basics) - -### Backpack Implementation Components - -``` -cryptofeed/exchanges/backpack/ -├── __init__.py -├── adapters.py # Data adapters (order book, trades) -├── auth.py # Authentication -├── config.py # Pydantic configuration -├── feed.py # BackpackFeed (main feed class) -├── health.py # Health checks -├── metrics.py # Metrics tracking -├── rest.py # REST client -├── router.py # Message routing -├── symbols.py # Symbol normalization -└── ws.py # WebSocket client -``` - ---- - -## Test Gap Analysis - -### What's Missing - -#### CCXT Tests Gaps -1. **REST API Coverage**: - - ❌ Fetch ticker - - ❌ Fetch trades history - - ❌ Fetch OHLCV/candles - - ❌ Fetch balance (authenticated) - - ❌ Multiple symbol fetches - -2. **WebSocket Coverage**: - - ❌ Order book stream - - ❌ Ticker stream - - ❌ Multiple subscriptions - - ❌ Reconnection handling - -#### Native Tests Gaps -1. **REST API Coverage**: - - ❌ Fetch ticker - - ❌ Fetch order book - - ❌ Fetch trades - - ❌ Fetch K-lines (candles) - - ❌ Symbol info details - -2. **WebSocket Coverage**: - - ❌ Order book stream - - ❌ Ticker stream - - ❌ K-line (candle) stream - - ❌ Multiple subscriptions - - ❌ Subscription management - - ❌ Error handling (currently fails with 4002) - ---- - -## Comprehensive Test Plan - -### Test Structure - -``` -tests/integration/ -├── test_live_ccxt_backpack.py # Enhanced CCXT tests -├── test_live_backpack_native.py # Enhanced native tests -└── fixtures/backpack/ # Test fixtures - ├── markets.json - ├── ticker.json - ├── orderbook.json - └── trades.json -``` - ---- - -## Test Cases - -### Category 1: Backpack CCXT REST API - -#### T1.1: Markets and Symbols -```python -@pytest.mark.live_proxy -@pytest.mark.live_ccxt -def test_backpack_ccxt_rest_markets(): - """Validate market loading and symbol availability""" - # Load markets - # Verify BTC/USDC exists - # Check market structure (limits, precision) -``` - -#### T1.2: Order Book -```python -@pytest.mark.live_proxy -@pytest.mark.live_ccxt -def test_backpack_ccxt_rest_orderbook(): - """Fetch order book with different depth levels""" - # Fetch orderbook (limit=5, 10, 20) - # Validate bids/asks structure - # Check price/amount types -``` - -#### T1.3: Ticker -```python -@pytest.mark.live_proxy -@pytest.mark.live_ccxt -def test_backpack_ccxt_rest_ticker(): - """Fetch ticker data""" - # Fetch ticker for BTC/USDC - # Validate bid/ask/last prices - # Check timestamp -``` - -#### T1.4: Recent Trades -```python -@pytest.mark.live_proxy -@pytest.mark.live_ccxt -def test_backpack_ccxt_rest_trades(): - """Fetch recent trades history""" - # Fetch trades (limit=10) - # Validate trade structure - # Check side, price, amount -``` - -#### T1.5: OHLCV -```python -@pytest.mark.live_proxy -@pytest.mark.live_ccxt -def test_backpack_ccxt_rest_ohlcv(): - """Fetch candle/kline data""" - # Fetch OHLCV (1m, 5m timeframes) - # Validate OHLCV structure - # Check timestamp sequence -``` - ---- - -### Category 2: Backpack CCXT WebSocket - -#### T2.1: Trade Stream -```python -@pytest.mark.asyncio -@pytest.mark.live_proxy -@pytest.mark.live_ccxt -async def test_backpack_ccxt_ws_trades(): - """Watch live trade stream""" - # Subscribe to trades - # Receive at least 1 trade within timeout - # Validate trade structure - # Verify proxy routing -``` - -#### T2.2: Order Book Stream -```python -@pytest.mark.asyncio -@pytest.mark.live_proxy -@pytest.mark.live_ccxt -async def test_backpack_ccxt_ws_orderbook(): - """Watch live order book updates""" - # Subscribe to order book - # Receive snapshot or delta - # Validate structure - # Check bid/ask updates -``` - -#### T2.3: Ticker Stream -```python -@pytest.mark.asyncio -@pytest.mark.live_proxy -@pytest.mark.live_ccxt -async def test_backpack_ccxt_ws_ticker(): - """Watch live ticker updates""" - # Subscribe to ticker - # Receive ticker update - # Validate prices -``` - -#### T2.4: Multiple Subscriptions -```python -@pytest.mark.asyncio -@pytest.mark.live_proxy -@pytest.mark.live_ccxt -async def test_backpack_ccxt_ws_multiple(): - """Handle multiple concurrent subscriptions""" - # Subscribe to trades + orderbook - # Receive messages from both - # Verify no conflicts -``` - ---- - -### Category 3: Backpack Native REST API - -#### T3.1: Markets -```python -@pytest.mark.asyncio -@pytest.mark.live_proxy -@pytest.mark.live_backpack -async def test_backpack_native_rest_markets(): - """Fetch markets via native REST client""" - # Use BackpackRestClient - # Fetch markets - # Validate response structure -``` - -#### T3.2: Ticker -```python -@pytest.mark.asyncio -@pytest.mark.live_proxy -@pytest.mark.live_backpack -async def test_backpack_native_rest_ticker(): - """Fetch ticker via native REST""" - # Fetch ticker for BTC_USDC (native format) - # Validate response - # Check proxy routing -``` - -#### T3.3: Order Book -```python -@pytest.mark.asyncio -@pytest.mark.live_proxy -@pytest.mark.live_backpack -async def test_backpack_native_rest_orderbook(): - """Fetch order book via native REST""" - # Fetch order book - # Validate bids/asks - # Check depth -``` - -#### T3.4: Trades -```python -@pytest.mark.asyncio -@pytest.mark.live_proxy -@pytest.mark.live_backpack -async def test_backpack_native_rest_trades(): - """Fetch recent trades via native REST""" - # Fetch trades - # Validate structure - # Check trade fields -``` - -#### T3.5: K-Lines (Candles) -```python -@pytest.mark.asyncio -@pytest.mark.live_proxy -@pytest.mark.live_backpack -async def test_backpack_native_rest_klines(): - """Fetch candle data via native REST""" - # Fetch k-lines - # Validate OHLCV - # Check intervals -``` - ---- - -### Category 4: Backpack Native WebSocket - -#### T4.1: Trade Stream -```python -@pytest.mark.asyncio -@pytest.mark.live_proxy -@pytest.mark.live_backpack -async def test_backpack_native_ws_trades(): - """Subscribe to native trade stream""" - # Use BackpackWsSession - # Subscribe to trades channel - # Receive message (handle 4002 gracefully) - # Validate structure if successful -``` - -#### T4.2: Order Book Stream -```python -@pytest.mark.asyncio -@pytest.mark.live_proxy -@pytest.mark.live_backpack -async def test_backpack_native_ws_orderbook(): - """Subscribe to native order book stream""" - # Subscribe to orderbook channel - # Receive snapshot/delta - # Validate structure -``` - -#### T4.3: Ticker Stream -```python -@pytest.mark.asyncio -@pytest.mark.live_proxy -@pytest.mark.live_backpack -async def test_backpack_native_ws_ticker(): - """Subscribe to native ticker stream""" - # Subscribe to ticker channel - # Receive updates - # Validate prices -``` - -#### T4.4: K-Line Stream -```python -@pytest.mark.asyncio -@pytest.mark.live_proxy -@pytest.mark.live_backpack -async def test_backpack_native_ws_klines(): - """Subscribe to native k-line stream""" - # Subscribe to k-line channel - # Receive candle updates - # Validate OHLCV -``` - -#### T4.5: Error Handling -```python -@pytest.mark.asyncio -@pytest.mark.live_proxy -@pytest.mark.live_backpack -async def test_backpack_native_ws_error_handling(): - """Test error code 4002 and other errors""" - # Attempt subscriptions - # Catch known errors (4002 parse error) - # Verify graceful handling - # Document error conditions -``` - ---- - -## Implementation Plan - -### Phase 1: Enhance CCXT Tests (30 minutes) - -**Files to Create/Modify**: -- `tests/integration/test_live_ccxt_backpack.py` - Add 8 new tests - -**Tests to Add**: -1. REST: ticker, trades, ohlcv (3 tests) -2. WS: orderbook, ticker, multiple subs (3 tests) -3. Enhanced existing tests with better assertions - -**Expected Results**: -- 10 total CCXT tests (2 existing + 8 new) -- 80-90% pass rate (WS may timeout occasionally) - -### Phase 2: Enhance Native Tests (45 minutes) - -**Files to Create/Modify**: -- `tests/integration/test_live_backpack_native.py` - Rename from test_live_backpack.py -- Add 8 new REST + WS tests - -**Tests to Add**: -1. REST: ticker, orderbook, trades, klines (4 tests) -2. WS: orderbook, ticker, klines, error handling (4 tests) - -**Expected Results**: -- 10 total native tests (2 existing + 8 new) -- 70-80% pass rate (WS known issues with 4002) - -### Phase 3: Test Fixtures (15 minutes) - -**Files to Create**: -``` -tests/fixtures/backpack/ -├── markets_response.json -├── ticker_response.json -├── orderbook_response.json -├── trades_response.json -└── klines_response.json -``` - -**Purpose**: Sample responses for validation - -### Phase 4: Update Documentation (15 minutes) - -**Files to Update**: -1. `docs/e2e/README.md` - Add Backpack test section -2. `docs/e2e/TEST_PLAN.md` - Add Backpack test scenarios -3. Create `docs/e2e/BACKPACK_TESTING.md` - Detailed Backpack guide - ---- - -## Test Execution Strategy - -### Sequential Execution - -```bash -# Activate environment -source .venv-e2e/bin/activate -export CRYPTOFEED_TEST_SOCKS_PROXY="socks5://de-fra-wg-socks5-101.relays.mullvad.net:1080" - -# Phase 1: CCXT tests -pytest tests/integration/test_live_ccxt_backpack.py -v -m live_proxy - -# Phase 2: Native tests -pytest tests/integration/test_live_backpack_native.py -v -m live_proxy - -# Combined -pytest tests/integration/test_live_*backpack*.py -v -m live_proxy -``` - -### Expected Timeline - -| Phase | Duration | Tests | Expected Pass | -|-------|----------|-------|---------------| -| Phase 1 | ~5 min | 10 CCXT | 8-9 (80-90%) | -| Phase 2 | ~7 min | 10 Native | 7-8 (70-80%) | -| **Total** | **~12 min** | **20** | **15-17 (75-85%)** | - ---- - -## Known Issues to Document - -### Issue 1: Native WS Parse Error 4002 -**Status**: Known limitation -**Impact**: Native WS tests may fail or skip -**Workaround**: Use CCXT implementation -**Tests Affected**: T4.1-T4.5 - -### Issue 2: Timeout Behavior -**Status**: Network-dependent -**Impact**: Tests may skip on slow connections -**Workaround**: Increase timeout env vars -**Tests Affected**: All WS tests - -### Issue 3: Rate Limiting -**Status**: Exchange limitation -**Impact**: Rapid sequential tests may fail -**Workaround**: Add delays between tests -**Tests Affected**: All REST tests - ---- - -## Success Criteria - -### CCXT Implementation -- [x] REST: ≥80% pass rate (8/10 tests) -- [x] WS: ≥70% pass rate (7/10 tests) -- [x] Proxy routing validated -- [x] All endpoints covered - -### Native Implementation -- [x] REST: ≥80% pass rate (8/10 tests) -- [x] WS: ≥50% pass rate (5/10 tests, known 4002 issue) -- [x] Proxy routing validated -- [x] Error handling graceful - -### Overall -- [x] 20 total tests implemented -- [x] 15+ tests passing (75%+) -- [x] Documentation complete -- [x] Fixtures created -- [x] Known issues documented - ---- - -## Next Steps - -1. **Immediate**: Implement Phase 1 (CCXT tests) -2. **Short-term**: Implement Phase 2 (Native tests) -3. **Documentation**: Update test plan docs -4. **Commit**: Atomic commits for each phase - ---- - -**Plan Created**: 2025-10-24 -**Estimated Duration**: 2 hours (implementation + testing) -**Risk Level**: Low (additive changes only) -**Dependencies**: Existing Backpack implementation, E2E infrastructure diff --git a/docs/e2e/planning/completion-summary.md b/docs/e2e/planning/completion-summary.md deleted file mode 100644 index bfe91e347..000000000 --- a/docs/e2e/planning/completion-summary.md +++ /dev/null @@ -1,405 +0,0 @@ -# E2E Testing Infrastructure - Completion Summary - -**Date**: 2025-10-24 -**Branch**: `feature/normalized-data-schema-crypto` -**Status**: ✅ **COMPLETE** - ---- - -## Executive Summary - -Successfully implemented and validated comprehensive E2E testing infrastructure for cryptofeed with reproducible environments, live proxy validation, and enhanced Backpack exchange test coverage. - -**Total Tests**: 78 tests -**Overall Pass Rate**: 89.7% (70/78) -**Total Commits**: 4 atomic commits -**Lines Added**: ~6,500 lines (tests + docs + infrastructure) - ---- - -## What Was Delivered - -### 1. E2E Test Infrastructure ✅ - -**Commit**: `d51cd778` - feat(e2e): add test infrastructure with reproducible environment setup - -**Components**: -- Automated setup script (`setup_e2e_env.sh`) - 267 lines -- Dependency lock file (59 packages with exact versions) -- Stress testing script (`T4.2-stress-test.py`) - 275 lines -- Regional validation script (`regional_validation.sh`) - 197 lines - -**Features**: -- uv-based reproducible environments (10-100x faster than pip) -- Automated Mullvad relay list download -- Stress testing for 20+ concurrent feeds -- Regional validation across US/EU/Asia proxies - -**Setup Time**: ~25 seconds (vs 2-3 minutes with pip) - -### 2. Comprehensive Documentation ✅ - -**Commit**: `8e4b435c` - docs(e2e): add comprehensive E2E testing documentation - -**Files Created**: -- `docs/e2e/README.md` - Quick Start guide (303 lines) -- `docs/e2e/TEST_PLAN.md` - Comprehensive test scenarios (491 lines) -- `docs/e2e/REPRODUCIBILITY.md` - Technical guide (339 lines) - -**Total**: 1,133 lines of user-facing documentation - -**Coverage**: -- Setup instructions -- Test phases (1-4) -- Proxy configuration -- Troubleshooting -- CI/CD integration examples -- Best practices - -### 3. Test Execution Results ✅ - -**Commit**: `8c6812f9` - test(e2e): add test execution results and consolidation summary - -**Test Results**: -- Phase 1 (Smoke Tests): 52/52 tests passed (100%) -- Phase 2 (Live Connectivity): 7/8 tests passed (87.5%) -- Overall: 59/60 tests (98.3% pass rate) - -**Exchanges Validated**: -- Binance: 4/4 tests (REST ticker, orderbook, WS trades) -- Hyperliquid (CCXT): 2/2 tests (REST orderbook, WS trades) -- Backpack (CCXT): 1/2 tests (REST markets, WS skipped) - -**Environment**: -- Python 3.12.11 with uv-based setup -- Proxy: Europe region (Mullvad SOCKS5) -- Duration: ~90 minutes (planning + execution) - -**Issues Resolved**: -- Added missing pysocks dependency for CCXT SOCKS5 support -- Updated lock file with complete dependency tree -- Validated reproducibility across environments - -**Documentation Consolidation**: -- Reduced from 9 files (3,382 lines) to 8 files (2,423 lines) -- 28.3% reduction while preserving all content -- Organized into `docs/e2e/` structure -- Archived historical reports in `results/` - -### 4. Backpack Enhanced Testing ✅ - -**Commit**: `ad81632f` - test(e2e): enhance Backpack exchange test coverage - -**Test Coverage**: -- CCXT: 8 tests (4 REST + 4 WS) - 87.5% pass rate -- Native: 10 tests (5 REST + 5 WS) - 40% pass rate -- Overall: 18 tests, 61% pass rate (11/18) - -**CCXT Implementation (7/8 passed)**: -- REST API (4/4 = 100%): Markets, Ticker, Trades, OHLCV -- WebSocket (3/4 = 75%): Orderbook, Ticker, Multiple subscriptions - -**Native Implementation (4/10 passed)**: -- REST API (3/5 = 60%): Markets, Orderbook, Ticker working -- WebSocket (1/5 = 20%): Error handling test passed - -**Code Changes**: -- `test_live_ccxt_backpack.py`: +189 lines (6 new tests) -- `test_live_backpack.py`: +332 lines (8 new tests) -- Total: +521 lines of test code - -**Documentation**: -- `E2E_BACKPACK_TEST_PLAN.md` - Comprehensive test plan -- `BACKPACK_TEST_RESULTS.md` - Detailed execution results - -**Known Issues Documented**: -1. Native WS parse error 4002 - blocking 80% of WS tests -2. Missing native REST methods (fetch_trades, fetch_klines) -3. CCXT WS trades timeout (network-dependent) - -**Recommendation**: Use CCXT implementation for Backpack (87.5% success) - ---- - -## Overall Test Statistics - -### Test Count Breakdown - -| Category | Tests | Passed | Skipped | Pass Rate | -|----------|-------|--------|---------|-----------| -| Phase 1 (Smoke) | 52 | 52 | 0 | 100% ✅ | -| Phase 2 (Live) | 8 | 7 | 1 | 87.5% ✅ | -| Backpack CCXT | 8 | 7 | 1 | 87.5% ✅ | -| Backpack Native | 10 | 4 | 6 | 40% ⚠️ | -| **Total** | **78** | **70** | **8** | **89.7% ✅** | - -### Code Statistics - -| Component | Lines | Files | -|-----------|-------|-------| -| Infrastructure Scripts | 910 | 5 | -| Documentation | 2,423 | 8 | -| Test Code | 521 | 2 | -| Planning/Results | 849 | 2 | -| **Total** | **~4,703** | **17** | - ---- - -## Commit Summary - -### Commit History - -``` -ad81632f test(e2e): enhance Backpack exchange test coverage with CCXT and native implementations -8c6812f9 test(e2e): add test execution results and consolidation summary -8e4b435c docs(e2e): add comprehensive E2E testing documentation -d51cd778 feat(e2e): add test infrastructure with reproducible environment setup -``` - -### Files Structure - -``` -docs/e2e/ -├── README.md # Quick Start guide -├── TEST_PLAN.md # Test scenarios -├── REPRODUCIBILITY.md # Technical guide -├── CONSOLIDATION_SUMMARY.md # Cleanup summary -└── results/ - ├── README.md # Results index - ├── 2025-10-24-execution.md # Execution report - ├── 2025-10-24-review.md # Review report - ├── phase2-results.md # Phase 2 details - └── consolidation-plan.md # Historical reference - -tests/e2e/ -├── setup_e2e_env.sh # Automated setup -├── requirements-e2e-lock.txt # Locked dependencies -└── README.md # E2E directory docs - -tests/integration/ -├── T4.2-stress-test.py # Stress testing -├── regional_validation.sh # Regional matrix -├── test_live_ccxt_backpack.py # CCXT tests (8 tests) -└── test_live_backpack.py # Native tests (10 tests) - -Root: -├── E2E_BACKPACK_TEST_PLAN.md # Backpack test plan -├── BACKPACK_TEST_RESULTS.md # Backpack results -├── ATOMIC_COMMIT_PLAN.md # Commit planning -└── FINAL_COMMIT_PLAN.md # Commit execution guide -``` - ---- - -## Key Achievements - -### Technical Excellence ✅ - -1. **Reproducible Environments** - - Lock file with exact dependency versions - - Setup time reduced from 2-3 min to 25 sec - - Cross-machine reproducibility validated - -2. **Comprehensive Testing** - - 78 total tests across multiple exchanges - - 89.7% overall pass rate - - Live proxy validation working - -3. **Clear Documentation** - - 2,423 lines of user guides - - Quick start, technical deep-dive, test plans - - Organized structure with clear navigation - -4. **Proper Engineering** - - Atomic commits with clear messages - - Co-authorship attribution - - Known issues documented - - Graceful error handling - -### Coverage Improvements 📈 - -| Metric | Before | After | Improvement | -|--------|--------|-------|-------------| -| E2E Tests | 0 | 78 | +∞ | -| Backpack Tests | 2 | 18 | +800% | -| Test Infrastructure | 0 | 5 scripts | New | -| E2E Documentation | 0 | 2,423 lines | New | -| Pass Rate | N/A | 89.7% | Excellent | - ---- - -## What Works Excellently - -### Infrastructure ✅ -- uv-based setup (10-100x faster) -- Automated environment creation -- Dependency locking for reproducibility -- Clean untracked file handling - -### Testing ✅ -- CCXT implementations: 87.5% success -- Proxy routing: 100% validated -- Error handling: Graceful skips -- Comprehensive coverage - -### Documentation ✅ -- Clear structure by audience -- Quick start for users -- Technical guide for developers -- Test plan for QA - ---- - -## Known Limitations - -### Backpack Native WebSocket ⚠️ -- Parse error 4002 blocks 80% of WS tests -- Workaround: Use CCXT (87.5% success) -- Action: Investigate with Backpack support - -### Missing Native Methods ⚠️ -- `fetch_trades()` not implemented -- `fetch_klines()` not implemented -- Workaround: Use CCXT (100% success) -- Action: Implement missing methods - -### Network-Dependent Timeouts ⚠️ -- Some WS tests may timeout on low volume -- Not a code issue, just timing -- Workaround: Increase timeout values - ---- - -## Time Investment - -| Phase | Duration | Cumulative | -|-------|----------|------------| -| Initial E2E planning | 30 min | 30 min | -| Infrastructure setup | 45 min | 75 min | -| Phase 1 execution | 30 min | 105 min | -| Phase 2 execution | 60 min | 165 min | -| Documentation | 45 min | 210 min | -| Consolidation | 30 min | 240 min | -| Backpack planning | 20 min | 260 min | -| Backpack implementation | 90 min | 350 min | -| Backpack testing | 30 min | 380 min | -| Final commits | 20 min | 400 min | -| **Total** | **~6.7 hours** | - | - -**Efficiency**: ~1,180 lines of code/docs per hour - ---- - -## Success Metrics - -| Metric | Target | Actual | Status | -|--------|--------|--------|--------| -| E2E tests implemented | 50+ | 78 | ✅ Exceeded | -| Pass rate | 75%+ | 89.7% | ✅ Exceeded | -| Documentation complete | Yes | Yes | ✅ | -| Reproducibility validated | Yes | Yes | ✅ | -| Proxy routing working | Yes | Yes | ✅ | -| Known issues documented | Yes | Yes | ✅ | -| Atomic commits | Yes | 4 commits | ✅ | -| All pushed to remote | Yes | Yes | ✅ | - -**Overall**: ✅ **All targets met or exceeded** - ---- - -## Recommendations - -### Immediate -- ✅ All work committed and pushed -- ✅ Documentation complete -- ✅ Tests validated - -### Short-Term (Next Sprint) -1. **Investigate Backpack WS error 4002** - - Review API documentation - - Test alternative subscription formats - - Contact Backpack support if needed - -2. **Implement Missing Native Methods** - - Add `fetch_trades()` to BackpackRestClient - - Add `fetch_klines()` to BackpackRestClient - - Achieve feature parity with CCXT - -3. **Expand Coverage** - - Add more exchanges (OKX, Kraken, Gemini) - - Execute Phase 3 (Regional Validation) - - Execute Phase 4 (Stress Testing) - -### Long-Term (Future Quarters) -1. **CI/CD Integration** - - Add E2E tests to CI pipeline - - Automated nightly runs - - Regression detection - -2. **Monitoring & Alerting** - - Dashboard for test results - - Alert on failures - - Trend analysis - -3. **Coverage Expansion** - - More exchanges - - More data types (funding rates, liquidations) - - Performance benchmarking - ---- - -## Lessons Learned - -### What Went Well ✅ -1. **Atomic commits** - Easy to review and revert -2. **uv package manager** - Massive speed improvement -3. **Lock files** - True reproducibility achieved -4. **Comprehensive docs** - Clear guidance for all users -5. **Graceful error handling** - Tests skip appropriately -6. **CCXT validation** - Excellent success rates - -### What Could Improve ⚠️ -1. **Earlier planning** - Consolidation should happen during creation -2. **API verification** - Check native APIs before implementing tests -3. **Incremental commits** - Could have committed during phases -4. **Test fixtures** - Could add sample response data - ---- - -## Next Actions - -### For User -1. ✅ Review this summary -2. ✅ Verify all commits on GitHub -3. ⏳ Create PR if ready to merge to master -4. ⏳ Plan next phase of work - -### For Future Sessions -1. ⏳ Investigate Backpack WS error 4002 -2. ⏳ Implement missing native REST methods -3. ⏳ Execute Phase 3 (Regional Validation) -4. ⏳ Execute Phase 4 (Stress Testing) -5. ⏳ Expand to more exchanges - ---- - -## Final Status - -**Infrastructure**: ✅ Complete -**Documentation**: ✅ Complete -**Testing**: ✅ Complete (89.7% pass rate) -**Commits**: ✅ All pushed to remote -**Known Issues**: ✅ Documented with workarounds - -**Overall Status**: ✅ **PROJECT COMPLETE** - -All E2E testing infrastructure successfully implemented, tested, documented, and delivered! 🎉 - ---- - -**Completed**: 2025-10-24 -**Branch**: `feature/normalized-data-schema-crypto` -**Total Commits**: 4 -**Total Tests**: 78 -**Pass Rate**: 89.7% -**Time Invested**: ~6.7 hours diff --git a/docs/e2e/planning/final-commit-plan.md b/docs/e2e/planning/final-commit-plan.md deleted file mode 100644 index 994d194d3..000000000 --- a/docs/e2e/planning/final-commit-plan.md +++ /dev/null @@ -1,366 +0,0 @@ -# Final Commit Plan - E2E Testing Infrastructure - -**Date**: 2025-10-24 -**Branch**: `feature/normalized-data-schema-crypto` -**Status**: ✅ Ready to Commit - ---- - -## Summary of Changes - -### What Was Accomplished - -1. ✅ **E2E Test Infrastructure** - Complete testing framework with reproducible environments -2. ✅ **Test Execution** - Phase 1 & 2 completed successfully (98.3% pass rate) -3. ✅ **Documentation** - 2,423 lines of comprehensive guides -4. ✅ **Consolidation** - Reduced redundancy by 28.3% -5. ✅ **Validation** - All tests pass after cleanup - -### Test Results - -- **Phase 1**: 52/52 tests (100%) -- **Phase 2**: 7/8 tests (87.5%) -- **Overall**: 59/60 tests (98.3%) - ---- - -## Files to Commit - -### New E2E Documentation (`docs/e2e/`) - -``` -docs/e2e/ -├── README.md # 303 lines - Quick Start guide -├── TEST_PLAN.md # 491 lines - Test scenarios -├── REPRODUCIBILITY.md # 339 lines - Technical guide -├── CONSOLIDATION_SUMMARY.md # New - Cleanup summary -└── results/ - ├── README.md # 68 lines - Results index - ├── 2025-10-24-execution.md # 470 lines - Execution report - ├── 2025-10-24-review.md # 468 lines - Review report - ├── phase2-results.md # 284 lines - Phase 2 details - └── consolidation-plan.md # Historical reference -``` - -### Test Infrastructure (`tests/`) - -``` -tests/e2e/ -├── setup_e2e_env.sh # 267 lines - Automated setup -├── requirements-e2e-lock.txt # 59 lines - Locked dependencies -└── README.md # Documentation - -tests/integration/ -├── T4.2-stress-test.py # 275 lines - Stress testing -├── regional_validation.sh # 197 lines - Regional matrix -├── test_live_binance.py # Existing -├── test_live_ccxt_hyperliquid.py # Existing -└── test_live_ccxt_backpack.py # Existing -``` - -### Test Output (Optional) - -``` -test-results/phase2/ -├── binance-output.log -├── hyperliquid-output.log -└── backpack-output.log -``` - ---- - -## Commit Strategy - -### Option A: Single Large Commit (Recommended) - -**Pros**: -- Complete feature in one commit -- Easier to review as unit -- Clear "before/after" in history - -**Cons**: -- Large diff may be harder to review - -### Option B: Three Atomic Commits - -**Pros**: -- Smaller, focused commits -- Easier to review individually -- Can cherry-pick if needed - -**Cons**: -- More commits to manage -- Might break at intermediate states - ---- - -## Recommended: Option A (Single Commit) - -### Commit Message - -``` -feat(e2e): add comprehensive E2E test infrastructure with reproducible environments - -Complete end-to-end testing framework for proxy system, CCXT exchanges, and -native exchange implementations with uv-based reproducible environments. - -## Features -- Reproducible environment setup (uv + lock files, 10-100x faster than pip) -- Live proxy validation tests (Phase 1: 52/52, Phase 2: 7/8) -- Regional validation framework (3 regions × 5 exchanges) -- Stress testing capabilities (concurrent feeds, memory monitoring) -- 2,423 lines of comprehensive documentation - -## Test Results -- Phase 1 (Smoke): 52/52 tests passed (100%) -- Phase 2 (Live): 7/8 tests passed (87.5%) -- Overall: 59/60 tests (98.3% pass rate) - -## Validated Components -- HTTP and WebSocket proxy routing (SOCKS5) -- CCXT generic feed architecture (Hyperliquid, Backpack) -- Live exchange connectivity (Binance, Hyperliquid, Backpack) -- Data normalization and timestamp handling -- Reproducible environments across machines - -## Infrastructure -- Automated setup script (setup_e2e_env.sh) -- Dependency lock file (59 packages) -- Stress test script (T4.2-stress-test.py) -- Regional validation script (regional_validation.sh) -- Comprehensive documentation (docs/e2e/) - -## Issues Resolved -- Added missing pysocks dependency (CCXT SOCKS5 support) -- Updated lock file with complete dependency tree -- Validated reproducibility across environments - -## Documentation Structure -docs/e2e/ -├── README.md - Quick Start guide -├── TEST_PLAN.md - Comprehensive test scenarios -├── REPRODUCIBILITY.md - Technical deep-dive -└── results/ - Archived test results - -BREAKING CHANGE: None - new infrastructure only - -Co-authored-by: factory-droid[bot] <138933559+factory-droid[bot]@users.noreply.github.com> -``` - -### Git Commands - -```bash -# Stage all E2E changes -git add docs/e2e/ -git add tests/e2e/ -git add tests/integration/T4.2-stress-test.py -git add tests/integration/regional_validation.sh - -# Optional: Include test output -git add test-results/phase2/ - -# Commit with detailed message -git commit -F- <<'EOF' -[paste commit message above] -EOF - -# Verify commit -git show --stat HEAD -git log --oneline -1 -``` - ---- - -## Pre-Commit Checklist - -### Validation - -- [x] All tests pass (`pytest tests/unit/test_proxy_mvp.py -v`) -- [x] Setup script works (`./tests/e2e/setup_e2e_env.sh`) -- [x] Lock file is complete (`tests/e2e/requirements-e2e-lock.txt`) -- [x] Documentation is consolidated (`docs/e2e/`) -- [x] No redundant files in root (`ls E2E*.md` → none) -- [x] Results archived (`docs/e2e/results/`) - -### Content Review - -- [x] No sensitive data in commits -- [x] No TODOs in production code -- [x] All cross-references valid -- [x] Scripts have proper permissions (`chmod +x`) -- [x] Lock file committed (essential for reproducibility) - -### Git Hygiene - -- [x] Commit message follows conventional commits -- [x] Co-authored-by includes factory-droid -- [x] BREAKING CHANGE noted if applicable (None here) -- [x] Commit is atomic and complete - ---- - -## Post-Commit Actions - -### Immediate - -1. **Push to remote**: - ```bash - git push origin feature/normalized-data-schema-crypto - ``` - -2. **Verify on GitHub**: - - Check commit appears - - Review diff rendering - - Confirm all files present - -3. **Update branch tracking**: - ```bash - git log --oneline -5 - ``` - -### Short-Term - -1. **Update main README**: - - Add E2E Testing section - - Link to `docs/e2e/README.md` - - Mention quick start - -2. **Update SPEC_STATUS.md**: - - Mark E2E testing as complete - - Update documentation references - - Note pass rates - -3. **Update IMPLEMENTATION_SUMMARY.md**: - - Add E2E results section - - Document test infrastructure - - Link to detailed reports - -### Before Merge to Master - -1. **Create PR**: - - Title: "feat: add E2E test infrastructure and normalized data schema support" - - Description: Link to E2E final report - - Reviewers: Assign appropriate team members - -2. **PR Checklist**: - - [ ] All tests pass in CI - - [ ] Documentation reviewed - - [ ] Breaking changes noted (none) - - [ ] Security review (if needed) - -3. **Final validation**: - ```bash - # On clean clone - git clone [repo] - git checkout feature/normalized-data-schema-crypto - ./tests/e2e/setup_e2e_env.sh - source .venv-e2e/bin/activate - pytest tests/unit/test_proxy_mvp.py -v - ``` - ---- - -## Alternative: Option B (Three Commits) - -If you prefer smaller atomic commits: - -### Commit 1: Test Infrastructure - -```bash -git add tests/e2e/ tests/integration/T4.2-stress-test.py tests/integration/regional_validation.sh -git commit -m "feat(e2e): add test infrastructure and scripts - -- Automated setup with uv (setup_e2e_env.sh) -- Dependency lock file (59 packages) -- Stress test script (T4.2-stress-test.py) -- Regional validation script (regional_validation.sh)" -``` - -### Commit 2: Documentation - -```bash -git add docs/e2e/ -git commit -m "docs(e2e): add comprehensive E2E testing documentation - -- Quick Start guide (README.md) -- Test plan (TEST_PLAN.md) -- Reproducibility guide (REPRODUCIBILITY.md) -- Results archive (results/)" -``` - -### Commit 3: Test Results - -```bash -git add test-results/ -git commit -m "test(e2e): add Phase 2 test execution results - -Results: 59/60 tests passed (98.3%) -- Phase 1: 52/52 (100%) -- Phase 2: 7/8 (87.5%)" -``` - ---- - -## Risk Assessment - -### Low Risk ✅ -- New infrastructure only (no changes to existing code) -- All tests pass -- Documentation complete -- Reproducible setup validated - -### Medium Risk ⚠️ -- Large commit size (may be harder to review) -- Lock file needs to be maintained - -### Mitigation -- Clear commit message with detailed breakdown -- Documentation makes review easier -- Lock file is version controlled -- Backup branch exists - ---- - -## Timeline - -| Action | Duration | -|--------|----------| -| Review commit plan | 5 min | -| Stage files | 2 min | -| Create commit | 3 min | -| Push to remote | 2 min | -| Verify on GitHub | 3 min | -| **Total** | **15 min** | - ---- - -## Success Criteria - -- [x] Commit created successfully -- [x] All files included -- [x] Commit message follows conventions -- [x] Pushed to remote -- [x] Visible on GitHub -- [x] No errors or warnings - ---- - -## Ready to Execute - -**Status**: ✅ **READY** - -**Recommendation**: Proceed with **Option A (Single Commit)** - -**Next Command**: -```bash -git add docs/e2e/ tests/e2e/ tests/integration/T4.2-stress-test.py tests/integration/regional_validation.sh -git status # Verify what's staged -git commit # Use commit message from above -``` - ---- - -**Plan Created**: 2025-10-24 -**Risk Level**: Low -**Confidence**: High -**Estimated Time**: 15 minutes diff --git a/docs/e2e/planning/test-fixes-report.md b/docs/e2e/planning/test-fixes-report.md deleted file mode 100644 index 75a348312..000000000 --- a/docs/e2e/planning/test-fixes-report.md +++ /dev/null @@ -1,380 +0,0 @@ -# E2E Test Fixes Report - -**Date**: 2025-10-24 -**Branch**: feature/normalized-data-schema-crypto -**Status**: ✅ **ALL TESTS PASSING** - ---- - -## Executive Summary - -Successfully identified and fixed all failing e2e and integration tests. All 66 proxy and unit tests now pass with 100% success rate. - -### Before - -- **Failing Tests**: 4 tests - - 1 import error (test collection failure) - - 3 proxy integration test failures -- **Pass Rate**: 92.4% (61/66 tests) - -### After - -- **Failing Tests**: 0 tests -- **Pass Rate**: 100% (66/66 tests) -- **Time to Fix**: ~40 minutes - ---- - -## Issues Found & Fixed - -### Issue 1: Import Error ❌ → ✅ - -**File**: `tests/unit/test_backpack_auth_tool.py` - -**Error**: -``` -ModuleNotFoundError: No module named 'tools.backpack_auth_check' -``` - -**Root Cause**: -- `tools/` directory was not a Python package -- Missing `__init__.py` file - -**Fix**: -```bash -# Created tools/__init__.py -touch tools/__init__.py -``` - -**Result**: ✅ 2 tests now pass -- `test_normalize_hex_key` -- `test_build_signature_deterministic` - ---- - -### Issue 2: Proxy State Management ❌ → ✅ - -**Files**: -- `cryptofeed/proxy.py` -- `tests/integration/test_proxy_integration.py` -- `tests/unit/test_proxy_mvp.py` - -**Error**: -```python -# Test expected None but got ProxyInjector object -assert get_proxy_injector() is None -# Actual: <cryptofeed.proxy.ProxyInjector object at 0x...> -``` - -**Root Cause**: -- `init_proxy_system()` not clearing global state when `enabled=False` -- Test isolation issues - state leaked between tests - -**Fix 1**: Updated `cryptofeed/proxy.py` -```python -def init_proxy_system(settings: ProxySettings) -> None: - """Initialize proxy system with settings.""" - global _proxy_injector - if settings.enabled: - _proxy_injector = ProxyInjector(settings) - else: - _proxy_injector = None # ← ADDED THIS -``` - -**Fix 2**: Added cleanup fixture in `tests/integration/test_proxy_integration.py` -```python -@pytest.fixture(autouse=True) -def cleanup_proxy_state(): - """Ensure clean proxy state before and after each test.""" - # Cleanup before test - init_proxy_system(ProxySettings(enabled=False)) - yield - # Cleanup after test - init_proxy_system(ProxySettings(enabled=False)) -``` - -**Fix 3**: Updated test expectation in `tests/unit/test_proxy_mvp.py` -```python -def test_init_proxy_system_disabled(self): - """Test proxy system initialization when disabled.""" - settings = ProxySettings(enabled=False) - init_proxy_system(settings) - injector = get_proxy_injector() - # When disabled, injector should be None to avoid overhead - assert injector is None # ← UPDATED EXPECTATION -``` - -**Result**: ✅ 3 tests now pass -- `test_proxy_system_initialization` -- `test_connection_without_proxy_system` -- `test_init_proxy_system_disabled` - ---- - -### Issue 3: HTTP Proxy Test Assertion ❌ → ✅ - -**File**: `tests/integration/test_proxy_integration.py` - -**Error**: -```python -assert str(conn_binance.conn._default_proxy) == "http://region-asia.proxy.company.com:8080" -# AssertionError: assert 'None' == 'http://region-asia.proxy.company.com:8080' -``` - -**Root Cause**: -- Test was checking wrong attribute for HTTP proxies -- HTTP proxies use `_request_proxy_kwargs`, not `_default_proxy` -- `_default_proxy` is only set for direct aiohttp proxy parameter - -**Fix**: Updated test assertion -```python -# Test connection with exchange-specific proxy -conn_binance = HTTPAsyncConn("test-binance", exchange_id="binance") -await conn_binance._open() - -assert conn_binance.is_open -assert conn_binance.exchange_id == "binance" -assert conn_binance.proxy == "http://region-asia.proxy.company.com:8080" -# HTTP proxies are passed via _request_proxy_kwargs, not _default_proxy -assert conn_binance._request_proxy_kwargs.get("proxy") == "http://region-asia.proxy.company.com:8080" # ← FIXED -``` - -**Result**: ✅ 1 test now passes -- `test_http_connection_with_proxy_system` - ---- - -## Test Results - Full Suite - -### Unit Tests: 100% Pass ✅ - -```bash -pytest tests/unit/test_proxy_mvp.py tests/unit/test_backpack_auth_tool.py -v -``` - -**Results**: -- ✅ 52 proxy MVP tests -- ✅ 2 backpack auth tests -- **Total**: 54/54 passed (100%) - -### Integration Tests: 100% Pass ✅ - -```bash -pytest tests/integration/test_proxy_integration.py -v -``` - -**Results**: -- ✅ 2 configuration loading tests -- ✅ 3 system integration tests -- ✅ 2 error handling tests -- ✅ 3 configuration pattern tests -- ✅ 2 real-world usage tests -- **Total**: 12/12 passed (100%) - -### Combined: 100% Pass ✅ - -```bash -pytest tests/unit/test_proxy_mvp.py tests/integration/test_proxy_integration.py tests/unit/test_backpack_auth_tool.py -v -``` - -**Results**: -``` -============================== 66 passed in 0.40s ============================== -``` - ---- - -## Live Tests Status - -### Overview - -Live tests are **correctly skipped** when `CRYPTOFEED_TEST_SOCKS_PROXY` environment variable is not set. - -**Total Live Tests**: 24 tests across 4 files -- `test_live_binance.py` (4 tests) -- `test_live_ccxt_backpack.py` (8 tests) -- `test_live_ccxt_hyperliquid.py` (2 tests) -- `test_live_backpack.py` (10 tests) - -### Running Live Tests - -```bash -# Set proxy endpoint -export CRYPTOFEED_TEST_SOCKS_PROXY="socks5://de-fra-wg-socks5-101.relays.mullvad.net:1080" - -# Run Binance tests -pytest tests/integration/test_live_binance.py -v -m live_proxy - -# Run CCXT Backpack tests -pytest tests/integration/test_live_ccxt_backpack.py -v -m live_proxy - -# Run CCXT Hyperliquid tests -pytest tests/integration/test_live_ccxt_hyperliquid.py -v -m live_proxy - -# Run all live tests -pytest tests/integration/test_live_*.py -v -m live_proxy -``` - -**Note**: Live tests require: -1. Active SOCKS5 proxy endpoint -2. Network connectivity -3. Exchange API availability - ---- - -## Files Changed - -### Production Code - -1. **tools/__init__.py** (NEW) - - Made `tools/` a proper Python package - - 1 line added - -2. **cryptofeed/proxy.py** - - Fixed `init_proxy_system()` to clear state when disabled - - 3 lines changed (added if/else logic) - -### Test Code - -3. **tests/integration/test_proxy_integration.py** - - Added `cleanup_proxy_state` autouse fixture - - Fixed HTTP proxy test assertion - - 11 lines added, 1 line changed - -4. **tests/unit/test_proxy_mvp.py** - - Updated `test_init_proxy_system_disabled` expectation - - 2 lines changed - ---- - -## Code Quality - -### Changes Follow CLAUDE.md Principles - -- ✅ **START SMALL**: Minimal changes to fix issues -- ✅ **KISS**: Simple, straightforward solutions -- ✅ **NO MOCKS**: All tests use real implementations -- ✅ **TDD**: Tests guide implementation fixes -- ✅ **FRs Over NFRs**: Fixed functional issues first - -### Test Coverage Maintained - -- ✅ No regression in existing tests -- ✅ 100% of fixed tests now pass -- ✅ No new test flakiness introduced - ---- - -## Verification Commands - -### Quick Verification - -```bash -# Run all fixed tests -pytest tests/unit/test_proxy_mvp.py \ - tests/integration/test_proxy_integration.py \ - tests/unit/test_backpack_auth_tool.py \ - -v --tb=short - -# Expected output: -# ============================== 66 passed in 0.40s ============================== -``` - -### Full Test Suite - -```bash -# Run all non-live tests -pytest tests/unit/ tests/integration/ \ - -v \ - --ignore=tests/integration/test_live_* \ - --tb=short - -# Note: May have pre-existing failures in other test files (unrelated to these fixes) -``` - ---- - -## Documentation Updates - -### Updated Files - -1. **docs/e2e/README.md** - - Added section on running different test categories - - Clarified live test requirements - - Documented test execution patterns - -2. **E2E_TEST_FIXES_REPORT.md** (THIS FILE) - - Complete fix documentation - - Test results summary - - Verification commands - ---- - -## Success Criteria - -- [x] No import errors in test collection -- [x] All 66 unit/integration tests pass -- [x] Live tests skip gracefully (expected behavior) -- [x] Documentation updated -- [x] No regression in existing functionality -- [x] Follows CLAUDE.md principles -- [x] Changes are minimal and focused - -**Overall Status**: ✅ **ALL SUCCESS CRITERIA MET** - ---- - -## Next Steps - -### Immediate - -1. ✅ Commit fixes to repository -2. ⏳ Run full test suite to check for other unrelated failures -3. ⏳ Update SPEC_STATUS.md if needed - -### Optional - -1. Execute live tests with Mullvad proxy -2. Run regional validation matrix -3. Execute stress tests - -### For CI/CD - -1. Add unit/integration tests to CI pipeline -2. Configure live tests as optional manual trigger -3. Set up test result tracking - ---- - -## Timeline - -| Task | Duration | Status | -|------|----------|--------| -| Issue analysis & planning | 15 min | ✅ Complete | -| Fix Issue 1 (import) | 2 min | ✅ Complete | -| Fix Issue 2 (proxy state) | 20 min | ✅ Complete | -| Fix Issue 3 (test assertion) | 5 min | ✅ Complete | -| Verification | 5 min | ✅ Complete | -| Documentation | 10 min | ✅ Complete | -| **Total** | **~55 min** | ✅ **Complete** | - ---- - -## Summary - -All e2e test failures have been successfully resolved with minimal, focused changes: - -- **1 new file**: `tools/__init__.py` -- **1 production code fix**: `cryptofeed/proxy.py` -- **2 test fixes**: assertions and fixtures -- **66/66 tests passing**: 100% success rate -- **No regressions**: All existing tests still pass - -The fixes follow engineering best practices and maintain code quality standards. - ---- - -**Report Generated**: 2025-10-24 -**Execution Time**: ~55 minutes -**Final Status**: ✅ SUCCESS -**Confidence Level**: High diff --git a/docs/specs/SPEC_STATUS.md b/docs/specs/SPEC_STATUS.md index 8c456cbdd..ca861ced8 100644 --- a/docs/specs/SPEC_STATUS.md +++ b/docs/specs/SPEC_STATUS.md @@ -1,7 +1,7 @@ # Cryptofeed Specifications Status Report -**Generated**: November 15, 2025 -**Current Branch**: next +**Generated**: October 26, 2025 +**Current Branch**: feature/normalized-data-schema-crypto **Report Scope**: All active and inactive specifications --- @@ -10,12 +10,11 @@ | Status | Count | Details | |--------|-------|---------| -| ✅ **Completed** | 4 | proxy-system-complete, normalized-data-schema-crypto, market-data-kafka-producer, protobuf-callback-serialization | -| 🚧 **In Progress** | 2 | ccxt-generic-pro-exchange, backpack-exchange-integration | -| 🟢 **Implementation Ready** | 1 | cryptofeed-quixstreams-source (requirements/design/tasks approved) | +| ✅ **Completed** | 3 | proxy-system-complete, normalized-data-schema-crypto, market-data-kafka-producer | +| 🚧 **In Progress** | 3 | ccxt-generic-pro-exchange, backpack-exchange-integration, shift-left-streaming-lakehouse | | 📋 **Planning Phase** | 1 | unified-exchange-feed-architecture (design not approved) | | ⏸️ **Disabled** | 3 | cryptofeed-lakehouse-architecture, proxy-pool-system, external-proxy-service | -| **Total** | **11** | | +| **Total** | **10** | | --- @@ -397,70 +396,7 @@ python -m pytest tests/unit/kafka/test_phase2_error_handling.py::TestExactlyOnce --- -### 7. 📋 CryptofeedSource for QuixStreams - -**Spec Name**: `cryptofeed-quixstreams-source` -**Phase**: Tasks Generated -**Status**: Ready for Implementation (Artifacts Approved) -**Created**: November 14, 2025 -**Updated**: November 15, 2025 - -#### Status Summary -- **Requirements**: ✅ Generated & approved (83 EARS criteria across 10 functional areas, 2025-11-14) -- **Design**: ✅ Generated & approved (7-component architecture with circuit breaker, DLQ, RocksDB option) -- **Tasks**: ✅ Generated & approved (16 tasks / 42 subtasks over 4 phases) -- **Ready for Implementation**: ✅ YES — artifacts refreshed via `/kiro:spec-*` on Nov 15 after manual review - -#### Purpose -Seamless integration of Cryptofeed's Kafka producer with QuixStreams streaming framework. Enables real-time market data analytics and aggregations by consuming protobuf-serialized messages from cryptofeed.trade, cryptofeed.orderbook, cryptofeed.ticker, and 11 other data type topics. Provides a QuixStreams-compatible Source class with comprehensive error handling (DLQ), state management, monitoring, and exactly-once semantics. - -#### Key Features (Planned) -- **Phase 1**: Core deserialization and Kafka consumer integration (Week 1) -- **Phase 2**: Error handling, DLQ, integration tests (Week 2) -- **Phase 3**: Schema version compatibility, monitoring, observability (Week 3) -- **Phase 4**: Production deployment, configuration management, hardening (Week 4) - -#### Dependencies -| Dependency | Spec Name | Status | Impact | -|------------|-----------|--------|--------| -| Kafka Producer | market-data-kafka-producer | ✅ COMPLETE | Provides protobuf messages to consume | -| Protobuf Serialization | protobuf-callback-serialization | ✅ COMPLETE | Enables deserialization | -| Data Schemas | normalized-data-schema-crypto | ✅ COMPLETE | Defines message structures | - -#### Data Types Supported (14 Total) -Trade, Ticker, OrderBook, Candle, Funding, Liquidation, OpenInterest, IndexPrice, Balance, Position, Fill, OrderInfo, Order, Transaction - -#### Integration Points -``` -Cryptofeed Producer (market-data-kafka-producer) - ↓ -Kafka Topics (protobuf messages) - ↓ -CryptofeedSource (QuixStreams) - ↓ -QuixStreams Application (analytics, aggregations) -``` - -#### Documentation Location -- Specification: [`.kiro/specs/cryptofeed-quixstreams-source/`](../../.kiro/specs/cryptofeed-quixstreams-source/) -- Requirements: [`.kiro/specs/cryptofeed-quixstreams-source/requirements.md`](../../.kiro/specs/cryptofeed-quixstreams-source/requirements.md) -- Metadata: [`.kiro/specs/cryptofeed-quixstreams-source/spec.json`](../../.kiro/specs/cryptofeed-quixstreams-source/spec.json) - -#### Next Steps -1. **Kick off Phase 1 implementation** – tasks 1-5 cover QuixStreams Source lifecycle, Kafka adapter, deserializers, config loader, and baseline integration tests. -2. **Prepare Phase 2 readiness** – line up DLQ topic, circuit-breaker telemetry, and retry configuration so Tasks 6-8 can start immediately after Phase 1 testing passes. -3. **Align dependencies** – confirm Kafka topic/header schema from `market-data-kafka-producer` and protobuf objects from `protobuf-callback-serialization`/`normalized-data-schema-crypto` remain stable; capture any schema-version changes in Task 13.3 migration guide. -4. **Stand up observability scaffolding** – reserve Prometheus/health endpoints and RocksDB storage (if enabled) ahead of Phase 3 to avoid infra blockers. -5. **Document DLQ replay + schema migration expectations** – ensure Task 13.3 output (DLQ reprocessor + migration guide) has clear owners before production cutover. - -#### Notes -Specification bridges Cryptofeed's ingestion layer (market-data-kafka-producer, COMPLETE) with QuixStreams stream processing ecosystem. Design leverages completed specs for protobuf handling and data normalization. Expected timeline: 4 weeks to production-ready implementation. - -- Nov 15, 2025: Requirements/design/tasks refreshed to clarify schema_version fallback behavior, add schema_version metric labels, and include optional RocksDB state-store implementation tasks ahead of Phase 1 kickoff. - ---- - -### 8. ⏸️ Cryptofeed Lakehouse Architecture +### 7. ⏸️ Cryptofeed Lakehouse Architecture **Spec Name**: `cryptofeed-lakehouse-architecture` **Phase**: Disabled @@ -487,7 +423,7 @@ Contact user if reactivation is desired. All specification artifacts are preserv --- -### 9. ⏸️ Proxy Pool System +### 8. ⏸️ Proxy Pool System **Spec Name**: `proxy-pool-system` **Phase**: Disabled @@ -516,7 +452,7 @@ Enhancement to proxy-system-complete for proxy pool management and rotation. --- -### 10. ⏸️ External Proxy Service +### 9. ⏸️ External Proxy Service **Spec Name**: `external-proxy-service` **Phase**: Disabled @@ -552,6 +488,41 @@ Transform embedded proxy management into service-oriented architecture with exte 3. **Re-evaluate** priority and timeline 4. **Consider consolidation** or dependency restructuring +--- + +### 10. 🚧 Shift Left Streaming Lakehouse Integration + +**Spec Name**: `shift-left-streaming-lakehouse` +**Phase**: Implementation In Progress +**Status**: v2 schemas + registry path delivered; validation underway +**Created**: November 20, 2025 +**Updated**: November 21, 2025 + +#### Status Summary +- **Requirements**: ✅ Complete +- **Design**: ✅ Complete +- **Tasks**: ✅ Complete (Tasks 1‑6 marked) +- **Implementation**: 🚧 In Progress (v2 protos, helpers, registry path merged; E2E tests added) + +#### Purpose +Implement Confluent Schema Registry integration in KafkaCallback (Contract), create v2 Protobuf schemas with native double/bytes types (Compute), and align message headers/keys for Flink/Iceberg compatibility (Context). Unblocks the Flink -> Iceberg pattern. + +#### Dependencies +- market-data-kafka-producer (Required) +- normalized-data-schema-crypto (Required) + +#### Documentation Location +- Spec JSON: [`.kiro/specs/shift-left-streaming-lakehouse/spec.json`](../../.kiro/specs/shift-left-streaming-lakehouse/spec.json) +- Requirements: [`.kiro/specs/shift-left-streaming-lakehouse/requirements.md`](../../.kiro/specs/shift-left-streaming-lakehouse/requirements.md) +- Design: [`.kiro/specs/shift-left-streaming-lakehouse/design.md`](../../.kiro/specs/shift-left-streaming-lakehouse/design.md) +- Tasks: [`.kiro/specs/shift-left-streaming-lakehouse/tasks.md`](../../.kiro/specs/shift-left-streaming-lakehouse/tasks.md) + +#### Next Steps +1. Monitor integration test coverage and run full Kafka/backends suite. +2. Coordinate consumer validation (Flink/Iceberg) against v2 topics. +3. Prepare rollout/migration notes and confirm registry credentials paths. + + --- ## Specification Dependencies & Relationships @@ -570,14 +541,7 @@ backpack-exchange-integration (🚧 IN PROGRESS) normalized-data-schema-crypto (✅ COMPLETE - READY TO MERGE) ├─ tardis-node alignment (⏳ EXTERNAL DEPENDENCY) - ├─ DBN alignment (⏳ EXTERNAL DEPENDENCY) - └─ cryptofeed-quixstreams-source (📋 PLANNING) - -protobuf-callback-serialization (✅ COMPLETE) - └─ cryptofeed-quixstreams-source (📋 PLANNING) - -market-data-kafka-producer (✅ COMPLETE) - └─ cryptofeed-quixstreams-source (📋 PLANNING) + └─ DBN alignment (⏳ EXTERNAL DEPENDENCY) cryptofeed-lakehouse-architecture (⏸️ DISABLED) └─ (could leverage normalized-data-schema-crypto once merged) @@ -590,18 +554,17 @@ cryptofeed-lakehouse-architecture (⏸️ DISABLED) ### ✅ Ready to Merge (1) - **normalized-data-schema-crypto**: Merge to main, then publish v0.1.0 to Buf registry -### ✅ Completed, No Action Needed (3) +### ✅ Completed, No Action Needed (2) - **proxy-system-complete**: All tests passing, documentation complete - **market-data-kafka-producer**: Implementation complete, 493+ tests passing, ready for merge to main (Phase 4 deferred post-merge) -- **protobuf-callback-serialization**: Backend-only binary serialization, 144+ tests passing, production ready -### 🚧 Active Development (2) +### 🚧 Active Development (3) - **ccxt-generic-pro-exchange**: Begin TDD implementation, target completion before Backpack - **backpack-exchange-integration**: Begin native implementation, coordinate with CCXT generic +- **shift-left-streaming-lakehouse**: Ready for implementation (Tasks generated) -### 📋 Awaiting Approval/Requirements (2) +### 📋 Planning Phase (1) - **unified-exchange-feed-architecture**: Needs design review and approval before task generation -- **cryptofeed-quixstreams-source**: Initialized, awaiting requirements generation ### ⏸️ Paused/Disabled (3) - **proxy-pool-system**: Awaiting external roadmap clarification @@ -618,8 +581,7 @@ cryptofeed-lakehouse-architecture (⏸️ DISABLED) 3. **Merge market-data-kafka-producer** to main branch (implementation complete, 493+ tests passing) 4. **Create Phase 4 post-merge GitHub issue** (performance, monitoring, consumer guides) 5. **Approve unified-exchange-feed-architecture design** to unblock task generation -6. **Update CLAUDE.md** to reflect new specs and completion status -7. **Generate requirements for cryptofeed-quixstreams-source** using `/kiro:spec-requirements cryptofeed-quixstreams-source` +6. **Update CLAUDE.md** to reflect market-data-kafka-producer completion and Phase 4 deferral ### 🟡 High Priority (Next 2 Weeks) 1. **Execute Phase 4 post-merge work** (performance benchmarking, Prometheus metrics, consumer guides, migration tooling) @@ -627,6 +589,7 @@ cryptofeed-lakehouse-architecture (⏸️ DISABLED) 3. **Set up integration testing** for both specs (Binance US sandbox for CCXT, Backpack testnet for native) 4. **Clarify proxy roadmap** to determine priority of pool-system and external-service specs 5. **Document consolidation decision** for CCXT vs Native approach for future exchanges +6. **Generate requirements** for shift-left-streaming-lakehouse specification ### 🟢 Medium Priority (Next Month) 1. **Evaluate unified architecture** once CCXT generic and Backpack reach MVP status diff --git a/proto/cryptofeed/normalized/v2/README.md b/proto/cryptofeed/normalized/v2/README.md new file mode 100644 index 000000000..3508b96c0 --- /dev/null +++ b/proto/cryptofeed/normalized/v2/README.md @@ -0,0 +1,66 @@ +# Cryptofeed Normalized v2 Protobuf Schemas + +Authoritative field mapping for v2 message types used by `shift-left-streaming-lakehouse`. + +## Decimal Fidelity Rule (REQ-011) +- Default numeric type: `double` (lossy, acceptable for most exchanges). +- If an exchange needs precision beyond ~1e-9, switch the affected numeric fields to `bytes` and **add** a message-level `int32 scale` describing the quantization exponent. Keep original field numbers; place `scale` in a high, currently unused slot (e.g., 15). + +## Timestamp Rule (REQ-007) +- All timestamps use `google.protobuf.Timestamp`. + +## Field Matrix (v1 field numbers reused where possible) +| Message | Field | No. | Type | Notes | +|---|---|---|---|---| +| Trade | exchange | 1 | string | unchanged | +| Trade | symbol | 2 | string | unchanged | +| Trade | side | 3 | enum | unchanged | +| Trade | trade_id | 4 | string | unchanged | +| Trade | price | 5 | double | switch to bytes+scale if >1e-9 precision needed | +| Trade | amount | 6 | double | switch to bytes+scale if >1e-9 precision needed | +| Trade | timestamp | 7 | google.protobuf.Timestamp | standardized | +| Trade | sequence_number | 8 | uint64 | gap detection | +| Trade | (reserved) | 9 | — | reserved from v1 trade_type to prevent reuse | +| Ticker | exchange | 1 | string | unchanged | +| Ticker | symbol | 2 | string | unchanged | +| Ticker | best_bid_price | 3 | double | reuses v1 bid slot | +| Ticker | best_ask_price | 4 | double | reuses v1 ask slot | +| Ticker | best_bid_size | 5 | double | | +| Ticker | best_ask_size | 6 | double | | +| Ticker | timestamp | 7 | google.protobuf.Timestamp | | +| Ticker | sequence_number | 8 | uint64 | | +| Book | exchange | 1 | string | unchanged | +| Book | symbol | 2 | string | unchanged | +| Book | bids | 3 | repeated PriceLevelV2 | price/size double | +| Book | asks | 4 | repeated PriceLevelV2 | price/size double | +| Book | timestamp | 5 | google.protobuf.Timestamp | snapshot/delta aligned | +| Book | sequence_number | 6 | uint64 | | +| Book | checksum | 7 | string | retained | +| Candle | exchange | 1 | string | unchanged | +| Candle | symbol | 2 | string | unchanged | +| Candle | start | 3 | google.protobuf.Timestamp | was int64 µs | +| Candle | end | 4 | google.protobuf.Timestamp | was int64 µs | +| Candle | interval | 5 | string | unchanged | +| Candle | trades | 6 | uint64 | was optional int64 | +| Candle | open | 7 | double | switch to bytes+scale if precision-critical | +| Candle | close | 8 | double | | +| Candle | high | 9 | double | | +| Candle | low | 10 | double | | +| Candle | volume | 11 | double | | +| Candle | closed | 12 | bool | | +| Candle | timestamp (close/end) | 13 | google.protobuf.Timestamp | | +| Candle | sequence_number | 14 | uint64 | | + +### Launch Decision Table (Day 1 defaults) +| Field group | Default Type | Bytes+Scale? | Scale field number | +|---|---|---|---| +| Trade.price / Trade.amount | double | No | 15 (reserved if ever enabled) | +| Ticker bid/ask prices & sizes | double | No | 15 (reserved if ever enabled) | +| OrderBook price/quantity | double | No | 15 (reserved if ever enabled) | +| Candle OHLCV | double | No | 15 (reserved if ever enabled) | + + +## Schema Hygiene +- Syntax: `proto3` +- Package: `cryptofeed.normalized.v2` +- Run `buf lint proto/cryptofeed/normalized/v2` to validate style and reserved fields. diff --git a/proto/cryptofeed/normalized/v2/candle.proto b/proto/cryptofeed/normalized/v2/candle.proto new file mode 100644 index 000000000..4d5207bfd --- /dev/null +++ b/proto/cryptofeed/normalized/v2/candle.proto @@ -0,0 +1,29 @@ +syntax = "proto3"; + +package cryptofeed.normalized.v2; + +import "google/protobuf/timestamp.proto"; + +option go_package = "github.com/cryptofeed/proto/gen/go/normalized/v2"; +option java_package = "build.buf.cryptofeed.normalized.v2"; + +message Candle { + string exchange = 1; + string symbol = 2; + google.protobuf.Timestamp start = 3; + google.protobuf.Timestamp end = 4; + string interval = 5; + uint64 trades = 6; + double open = 7; // switch to bytes+scale if precision-critical + double close = 8; // switch to bytes+scale if precision-critical + double high = 9; // switch to bytes+scale if precision-critical + double low = 10; // switch to bytes+scale if precision-critical + double volume = 11; // switch to bytes+scale if precision-critical + bool closed = 12; + google.protobuf.Timestamp timestamp = 13; // close/end time + uint64 sequence_number = 14; + + // Decimal fidelity (REQ-011): if numeric fields switch to bytes, use message-level scale=15 + optional int32 scale = 15; +} + diff --git a/proto/cryptofeed/normalized/v2/order_book.proto b/proto/cryptofeed/normalized/v2/order_book.proto new file mode 100644 index 000000000..ca2958bab --- /dev/null +++ b/proto/cryptofeed/normalized/v2/order_book.proto @@ -0,0 +1,27 @@ +syntax = "proto3"; + +package cryptofeed.normalized.v2; + +import "google/protobuf/timestamp.proto"; + +option go_package = "github.com/cryptofeed/proto/gen/go/normalized/v2"; +option java_package = "build.buf.cryptofeed.normalized.v2"; + +message PriceLevelV2 { + double price = 1; // switch to bytes+scale at book level if precision demands + double quantity = 2; +} + +message OrderBook { + string exchange = 1; + string symbol = 2; + repeated PriceLevelV2 bids = 3; // sorted descending by price + repeated PriceLevelV2 asks = 4; // sorted ascending by price + google.protobuf.Timestamp timestamp = 5; + uint64 sequence_number = 6; + string checksum = 7; + + // Decimal fidelity (REQ-011): if price/quantity use bytes, add message-level scale=15 + optional int32 scale = 15; +} + diff --git a/proto/cryptofeed/normalized/v2/ticker.proto b/proto/cryptofeed/normalized/v2/ticker.proto new file mode 100644 index 000000000..c36f89a13 --- /dev/null +++ b/proto/cryptofeed/normalized/v2/ticker.proto @@ -0,0 +1,23 @@ +syntax = "proto3"; + +package cryptofeed.normalized.v2; + +import "google/protobuf/timestamp.proto"; + +option go_package = "github.com/cryptofeed/proto/gen/go/normalized/v2"; +option java_package = "build.buf.cryptofeed.normalized.v2"; + +message Ticker { + string exchange = 1; + string symbol = 2; + double best_bid_price = 3; // reuses v1 bid slot + double best_ask_price = 4; // reuses v1 ask slot + double best_bid_size = 5; + double best_ask_size = 6; + google.protobuf.Timestamp timestamp = 7; + uint64 sequence_number = 8; + + // Decimal fidelity (REQ-011): if any numeric field switches to bytes, use scale=15 + optional int32 scale = 15; +} + diff --git a/proto/cryptofeed/normalized/v2/trade.proto b/proto/cryptofeed/normalized/v2/trade.proto new file mode 100644 index 000000000..f17862e2f --- /dev/null +++ b/proto/cryptofeed/normalized/v2/trade.proto @@ -0,0 +1,34 @@ +syntax = "proto3"; + +package cryptofeed.normalized.v2; + +import "google/protobuf/timestamp.proto"; + +option go_package = "github.com/cryptofeed/proto/gen/go/normalized/v2"; +option java_package = "build.buf.cryptofeed.normalized.v2"; + +message Trade { + enum Side { + SIDE_UNSPECIFIED = 0; + SIDE_BUY = 1; + SIDE_SELL = 2; + } + + // Preserve backward compatibility field map from v1: + // 1=exchange, 2=symbol, 3=side, 4=trade_id, 5=price, 6=amount, + // 7=timestamp, 8=raw_id (v1) / sequence_number (v2), 9=trade_type (v1) + // Field 9 is no longer used in v2 — reserve to prevent reuse. + reserved 9; + + string exchange = 1; + string symbol = 2; + Side side = 3; + string trade_id = 4; + double price = 5; // switch to bytes+scale if precision > 1e-9 + double amount = 6; // switch to bytes+scale if precision > 1e-9 + google.protobuf.Timestamp timestamp = 7; + uint64 sequence_number = 8; + + // Decimal fidelity (REQ-011): if price/amount use bytes, add message-level scale=15 + optional int32 scale = 15; +} diff --git a/pyproject.toml b/pyproject.toml index b49bf3827..457ec122b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -25,6 +25,87 @@ skip_gitignore = true lines_after_imports = 2 ensure_newline_before_comments = true +# Pyrefly configuration - Exclude generated code +[tool.pyrefly] +# Exclude generated protobuf and schema files from type checking +project_excludes = [ + "gen/**/*.py", # Generated protobuf Python files + "gen/**/*.json", # Generated JSON schema files +] + +# Error code configuration - Phase 0.3: Extended Foundation +[tool.pyrefly.errors] +# Phase 0.3: Enable critical runtime safety checks +# Focus on most common crash causes +unbound-name = true # Prevents NameError crashes +unsupported-operation = true # Prevents TypeError crashes +missing-attribute = true # Prevents AttributeError crashes (252 errors) +bad-argument-type = true # Prevents function call errors (185 errors) + +# Explicitly disable all other types for controlled rollout +annotation-mismatch = false +assert-type = false +bad-argument-count = false +bad-assignment = false +bad-class-definition = false +bad-context-manager = false +bad-function-definition = false +bad-index = false +bad-instantiation = false +bad-keyword-argument = false +bad-match = false +bad-override = false +bad-param-name-override = false +bad-return = false +bad-specialization = false +bad-typed-dict = false +bad-typed-dict-key = false +bad-unpacking = false +deprecated = false +implicit-abstract-class = false +implicit-any = false +implicit-import = false +implicitly-defined-attribute = false +inconsistent-inheritance = false +inconsistent-overload = false +internal-error = false +invalid-annotation = false +invalid-argument = false +invalid-decorator = false +invalid-inheritance = false +invalid-literal = false +invalid-overload = false +invalid-param-spec = false +invalid-self-type = false +invalid-super-call = false +invalid-syntax = false +invalid-type-alias = false +invalid-type-var = false +invalid-type-var-tuple = false +invalid-yield = false +missing-argument = false +missing-import = false +missing-module-attribute = false +missing-source = false +no-access = false +no-matching-overload = false +not-a-type = false +not-async = false +not-callable = false +not-iterable = false +parse-error = false +protocol-implicitly-defined-attribute = false +read-only = false +redundant-cast = false +redundant-condition = false +reveal-type = false +unknown-name = false +unexpected-keyword = false +unexpected-positional-argument = false +unsupported = false +unsupported-delete = false +unused-coroutine = false + [build-system] requires = ["setuptools", "wheel", "Cython"] \ No newline at end of file diff --git a/tests/unit/backends/test_protobuf_helpers_v2.py b/tests/unit/backends/test_protobuf_helpers_v2.py new file mode 100644 index 000000000..50791f54b --- /dev/null +++ b/tests/unit/backends/test_protobuf_helpers_v2.py @@ -0,0 +1,144 @@ +import math +from decimal import Decimal + +import pytest + +from cryptofeed.backends.protobuf_helpers_v2 import ( + candle_to_proto_v2, + orderbook_to_proto_v2, + serialize_to_protobuf_v2, + ticker_to_proto_v2, + trade_to_proto_v2, +) + + +class _Trade: + def __init__(self): + self.exchange = "coinbase" + self.symbol = "btc-usd" + self.side = "buy" + self.id = "1234" + self.price = Decimal("35000.1234") + self.amount = Decimal("0.25") + self.timestamp = 1700000000.1234567 + self.sequence_number = 42 + + +class _Ticker: + def __init__(self): + self.exchange = "binance" + self.symbol = "eth-usdt" + self.bid = Decimal("2000.5") + self.ask = Decimal("2000.6") + self.bid_size = Decimal("10") + self.ask_size = Decimal("11") + self.timestamp = 1700000001.9 + self.sequence_number = 7 + + +class _OrderBook: + def __init__(self): + self.exchange = "kraken" + self.symbol = "ada-usd" + self.bids = {Decimal("0.25"): Decimal("100")} + self.asks = {Decimal("0.26"): Decimal("120")} + self.timestamp = 1700000002.5 + self.sequence_number = 88 + self.checksum = "abc123" + + +class _Candle: + def __init__(self): + self.exchange = "okx" + self.symbol = "sol-usdt" + self.start = 1700000000.0 + self.end = 1700000060.0 + self.interval = "1m" + self.trades = 120 + self.open = Decimal("54.1") + self.close = Decimal("55.2") + self.high = Decimal("55.5") + self.low = Decimal("53.9") + self.volume = Decimal("1000.123") + self.closed = True + self.timestamp = 1700000060.0 + self.sequence_number = 9 + + +def _assert_ts(proto_ts, expected): + seconds = int(math.floor(expected)) + nanos = int(round((expected - seconds) * 1_000_000_000)) + assert proto_ts.seconds == seconds + assert proto_ts.nanos == nanos + + +def test_trade_conversion_to_proto_v2(): + trade = _Trade() + + proto = trade_to_proto_v2(trade) + + assert proto.exchange == "coinbase" + assert proto.symbol == "btc-usd" + assert proto.side == proto.SIDE_BUY + assert proto.trade_id == "1234" + assert proto.price == pytest.approx(35000.1234) + assert proto.amount == pytest.approx(0.25) + _assert_ts(proto.timestamp, trade.timestamp) + assert proto.sequence_number == 42 + + +def test_ticker_conversion_to_proto_v2(): + ticker = _Ticker() + + proto = ticker_to_proto_v2(ticker) + + assert proto.best_bid_price == pytest.approx(2000.5) + assert proto.best_ask_price == pytest.approx(2000.6) + assert proto.best_bid_size == pytest.approx(10.0) + assert proto.best_ask_size == pytest.approx(11.0) + _assert_ts(proto.timestamp, ticker.timestamp) + assert proto.sequence_number == 7 + + +def test_orderbook_conversion_to_proto_v2(): + book = _OrderBook() + + proto = orderbook_to_proto_v2(book) + + assert proto.exchange == "kraken" + assert proto.symbol == "ada-usd" + assert len(proto.bids) == 1 + assert len(proto.asks) == 1 + assert proto.bids[0].price == pytest.approx(0.25) + assert proto.bids[0].quantity == pytest.approx(100.0) + assert proto.asks[0].price == pytest.approx(0.26) + assert proto.asks[0].quantity == pytest.approx(120.0) + _assert_ts(proto.timestamp, book.timestamp) + assert proto.sequence_number == 88 + assert proto.checksum == "abc123" + + +def test_candle_conversion_to_proto_v2(): + candle = _Candle() + + proto = candle_to_proto_v2(candle) + + assert proto.interval == "1m" + assert proto.trades == 120 + assert proto.open == pytest.approx(54.1) + assert proto.close == pytest.approx(55.2) + assert proto.high == pytest.approx(55.5) + assert proto.low == pytest.approx(53.9) + assert proto.volume == pytest.approx(1000.123) + assert proto.closed is True + _assert_ts(proto.start, candle.start) + _assert_ts(proto.end, candle.end) + _assert_ts(proto.timestamp, candle.timestamp) + assert proto.sequence_number == 9 + + +def test_serialize_to_protobuf_v2_returns_bytes(): + trade = _Trade() + encoded = serialize_to_protobuf_v2(trade) + assert isinstance(encoded, (bytes, bytearray)) + assert len(encoded) > 0 diff --git a/tests/unit/kafka/test_kafka_callback_schema_registry_v2.py b/tests/unit/kafka/test_kafka_callback_schema_registry_v2.py new file mode 100644 index 000000000..f70cbe33b --- /dev/null +++ b/tests/unit/kafka/test_kafka_callback_schema_registry_v2.py @@ -0,0 +1,291 @@ +"""Schema Registry integration path for KafkaCallback (v2 protobuf). + +This test exercises the end-to-end path inside KafkaCallback when the +Schema Registry mode is enabled, without requiring a live registry or +Kafka broker. It verifies: + - subject naming ({topic}.v2-value) + - schema registration & caching via SchemaRegistry.create() + - Confluent wire framing (magic byte + schema id + payload) + - dual-production (v2 + legacy v1) when enabled + - header enrichment carries schema_version=v2 +""" + +from __future__ import annotations + +import asyncio +from decimal import Decimal +from typing import Any, Dict, List, Optional + +import pytest + +import cryptofeed.kafka_callback as kafka_module +from cryptofeed.kafka_callback import KafkaCallback +from cryptofeed.types import Trade + + +class _RecordedMessage: + def __init__(self, topic: str, key: Optional[bytes], value: bytes, headers): + self.topic = topic + self.key = key + self.value = value + self.headers = headers + + +class _StubProducer: + """Simple in-memory producer used to avoid a real Kafka broker.""" + + def __init__(self, config: Dict[str, Any]): + self.config = config + self.messages: List[_RecordedMessage] = [] + self.poll_count = 0 + + def list_topics(self, timeout: Optional[float] = None): + return {"topics": {}} + + def produce(self, topic: str, value: bytes, key=None, headers=None, on_delivery=None): + self.messages.append(_RecordedMessage(topic, key, value, headers or [])) + if on_delivery: + on_delivery(None, None) + + def poll(self, timeout: float): + self.poll_count += 1 + return 0 + + def flush(self, timeout: Optional[float] = None): + return 0 + + +def _producer_factory(cls): + def _factory(config): + return cls(config) + + return _factory + + +class _FakeRegistry: + """Minimal Schema Registry stub to capture interactions.""" + + def __init__(self): + self.register_calls: List[tuple[str, str]] = [] + + def register_schema(self, subject: str, schema: str, schema_type: str): + self.register_calls.append((subject, schema_type)) + return 42 + + def embed_schema_id_in_message(self, payload: bytes, schema_id: int) -> bytes: + # Confluent wire format: magic byte 0 + 4-byte schema id + payload + return b"\x00" + schema_id.to_bytes(4, "big") + payload + + def get_schema_id_header(self, schema_id: int) -> bytes: + return str(schema_id).encode() + + +class _FlakyRegistry(_FakeRegistry): + """Schema registry stub that fails once, then recovers.""" + + def __init__(self): + super().__init__() + self._fail_once = True + + def register_schema(self, subject: str, schema: str, schema_type: str): + self.register_calls.append((subject, schema_type)) + if self._fail_once: + self._fail_once = False + raise RuntimeError("registry down") + return 7 + + +@pytest.mark.asyncio +async def test_kafka_callback_schema_registry_dual_production(monkeypatch): + """Ensure v2 + v1 production works with Schema Registry enabled.""" + + fake_registry = _FakeRegistry() + # Ensure KafkaCallback uses our fake registry instead of making HTTP calls + monkeypatch.setattr( + kafka_module.SchemaRegistry, "create", lambda config: fake_registry + ) + + callback = KafkaCallback( + bootstrap_servers=["kafka:9092"], + producer_factory=_producer_factory(_StubProducer), + serialization_format="protobuf", + schema_registry_config={ + "registry_type": "confluent", + "url": "https://schema-registry:8081", + }, + dual_production=True, + ) + + trade = Trade( + exchange="coinbase", + symbol="BTC-USD", + side="buy", + amount=Decimal("0.25"), + price=Decimal("68000.10"), + timestamp=1700000000.123, + id="t-1", + ) + + assert callback._queue_message("trade", trade) is True + + # Drain one message to trigger production + await callback._drain_once() + + # Expect two messages when dual_production is enabled: v2 first, then v1 + produced = callback._producer._producer.messages + assert len(produced) == 2 + + v2_msg = produced[0] + v1_msg = produced[1] + + # Topic suffix .v2 is applied for registry path + assert v2_msg.topic.endswith(".v2") + assert v1_msg.topic.endswith(".trade") or v1_msg.topic.endswith(".trades") + + # Registry was invoked with {topic}-value subject + assert fake_registry.register_calls + subject, schema_type = fake_registry.register_calls[0] + assert subject.endswith(".v2-value") + assert schema_type == "PROTOBUF" + + # Confluent wire format framing present (magic byte + schema id + payload) + assert v2_msg.value[:1] == b"\x00" + assert v2_msg.value[1:5] == (42).to_bytes(4, "big") + assert len(v2_msg.value) > 5 # payload not empty + + # Headers include schema_version v2 for registry path + header_dict = {k: v for k, v in v2_msg.headers} + assert header_dict.get(b"schema_version") == b"v2" + assert header_dict.get(b"schema_id") == b"42" + + # Producer poll invoked to flush delivery callbacks + assert callback._producer._producer.poll_count >= 1 + + +@pytest.mark.asyncio +async def test_schema_registry_skips_unmapped_types(monkeypatch): + """Unmapped data types should skip registry and still produce legacy payload.""" + + tracking_registry = _FakeRegistry() + monkeypatch.setattr( + kafka_module.SchemaRegistry, "create", lambda config: tracking_registry + ) + + callback = KafkaCallback( + bootstrap_servers=["kafka:9092"], + producer_factory=_producer_factory(_StubProducer), + serialization_format="protobuf", + schema_registry_config={ + "registry_type": "confluent", + "url": "https://schema-registry:8081", + }, + ) + + trade = Trade( + exchange="coinbase", + symbol="BTC-USD", + side="buy", + amount=Decimal("0.1"), + price=Decimal("100"), + timestamp=1.0, + id="skip-1", + ) + + assert callback._queue_message("funding", trade) is True + + await callback._drain_once() + + produced = callback._producer._producer.messages + assert len(produced) == 1 # legacy only + assert tracking_registry.register_calls == [] + + +@pytest.mark.asyncio +async def test_schema_registry_buffer_policy_requeues_without_duplicates(monkeypatch): + """Buffer policy requeues once and avoids duplicate legacy production.""" + + flaky_registry = _FlakyRegistry() + monkeypatch.setattr( + kafka_module.SchemaRegistry, "create", lambda config: flaky_registry + ) + + callback = KafkaCallback( + bootstrap_servers=["kafka:9092"], + producer_factory=_producer_factory(_StubProducer), + serialization_format="protobuf", + schema_registry_config={ + "registry_type": "confluent", + "url": "https://schema-registry:8081", + }, + registry_failure_policy="buffer", + queue_maxsize=1, + ) + + trade = Trade( + exchange="coinbase", + symbol="BTC-USD", + side="buy", + amount=Decimal("0.25"), + price=Decimal("68000.10"), + timestamp=1700000000.123, + id="t-buffer", + ) + + assert callback._queue_message("trade", trade) + + # First drain: registry fails, message requeued, nothing produced + await callback._drain_once() + assert callback._producer._producer.messages == [] + assert callback._queue.qsize() == 1 + + # Second drain: registry succeeds, exactly one v2 message produced + await callback._drain_once() + produced = callback._producer._producer.messages + assert len(produced) == 1 + assert produced[0].topic.endswith(".v2") + + +@pytest.mark.asyncio +async def test_v2_header_fallback_sets_correct_schema_version(monkeypatch): + """When v2 header enricher fails, fallback headers must still be v2.""" + + fake_registry = _FakeRegistry() + monkeypatch.setattr( + kafka_module.SchemaRegistry, "create", lambda config: fake_registry + ) + + callback = KafkaCallback( + bootstrap_servers=["kafka:9092"], + producer_factory=_producer_factory(_StubProducer), + serialization_format="protobuf", + schema_registry_config={ + "registry_type": "confluent", + "url": "https://schema-registry:8081", + }, + ) + + class _BrokenEnricher: + def build(self, *args, **kwargs): + raise RuntimeError("boom") + + callback._header_enricher_v2 = _BrokenEnricher() + + trade = Trade( + exchange="kraken", + symbol="ETH-USD", + side="sell", + amount=Decimal("1.0"), + price=Decimal("2000"), + timestamp=2.0, + id="hdr-1", + ) + + assert callback._queue_message("trade", trade) + await callback._drain_once() + + produced = callback._producer._producer.messages + assert len(produced) == 1 + headers = {k: v for k, v in produced[0].headers} + assert headers[b"content-type"] == b"application/vnd.confluent.protobuf" + assert headers[b"schema_version"] == b"v2" + assert headers[b"schema_id"] == b"42" diff --git a/tests/unit/kafka/test_schema_registry.py b/tests/unit/kafka/test_schema_registry.py index 59a50713c..fc3607b25 100644 --- a/tests/unit/kafka/test_schema_registry.py +++ b/tests/unit/kafka/test_schema_registry.py @@ -136,6 +136,29 @@ def test_register_schema(self, mock_post, registry): # Check that HTTPBasicAuth was used assert isinstance(call_args[1]["auth"], HTTPBasicAuth) + @patch("requests.post") + def test_register_schema_with_tls(self, mock_post): + """TLS settings should propagate to requests call.""" + config = SchemaRegistryConfig( + registry_type="confluent", + url="https://schema-registry:8081", + tls_client_cert="/tmp/cert.pem", + tls_client_key="/tmp/key.pem", + tls_ca="/tmp/ca.pem", + ) + registry = ConfluentSchemaRegistry(config) + + mock_response = Mock() + mock_response.status_code = 200 + mock_response.json.return_value = {"id": 1} + mock_post.return_value = mock_response + + registry.register_schema(subject="trades", schema="syntax = \"proto3\";") + + args, kwargs = mock_post.call_args + assert kwargs["verify"] == "/tmp/ca.pem" + assert kwargs["cert"] == ("/tmp/cert.pem", "/tmp/key.pem") + @patch("requests.post") def test_register_schema_already_exists(self, mock_post, registry): """Test schema registration when schema already exists.""" diff --git a/tools/benchmark_v1_v2_sizes.py b/tools/benchmark_v1_v2_sizes.py new file mode 100644 index 000000000..c17eb3bf3 --- /dev/null +++ b/tools/benchmark_v1_v2_sizes.py @@ -0,0 +1,41 @@ +"""Quick size comparison between v1 and v2 protobuf payloads. + +Usage: + python tools/benchmark_v1_v2_sizes.py + +Outputs byte lengths for a representative Trade message encoded with +legacy v1 helpers (string decimals) and v2 helpers (native doubles). +""" + +from decimal import Decimal + +from cryptofeed.backends.protobuf_helpers import serialize_to_protobuf +from cryptofeed.backends.protobuf_helpers_v2 import serialize_to_protobuf_v2 + + +class _Trade: + def __init__(self): + self.exchange = "coinbase" + self.symbol = "BTC-USD" + self.side = "buy" + self.id = "sample-1" + self.price = Decimal("68000.12345678") + self.amount = Decimal("0.25000000") + self.timestamp = 1700000000.123456 + + +def main() -> None: + trade = _Trade() + + v1_bytes = serialize_to_protobuf(trade) + v2_bytes = serialize_to_protobuf_v2(trade) + + reduction = 100 * (1 - len(v2_bytes) / len(v1_bytes)) if len(v1_bytes) else 0 + + print("v1 bytes:", len(v1_bytes)) + print("v2 bytes:", len(v2_bytes)) + print(f"size reduction: {reduction:.2f}%") + + +if __name__ == "__main__": + main()