diff --git a/CMakeLists.txt b/CMakeLists.txt index 3bfa9522..be08246b 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -24,6 +24,11 @@ option(ZEN_ENABLE_BUILTIN_LIBC "Enable builtin libc (partial)" ON) option(ZEN_ENABLE_LIBEVM "Enable evmc library build" OFF) # Feature options +option( + ZEN_ENABLE_JIT_PRECOMPILE_FALLBACK + "Enable interpreter fallback before JIT compilation for bytecode estimated to be too expensive" + ON +) option(ZEN_ENABLE_CPU_EXCEPTION "Enable cpu trap to implement wasm trap" ON) option(ZEN_ENABLE_VIRTUAL_STACK "Enable virtual stack(no system stack)" OFF) option(ZEN_ENABLE_DUMP_CALL_STACK "Enable exception call stack dump" OFF) diff --git a/openspec/changes/archive/2026-02-12-add-jit-suitability-checker/design.md b/openspec/changes/archive/2026-02-12-add-jit-suitability-checker/design.md new file mode 100644 index 00000000..3aaf31e3 --- /dev/null +++ b/openspec/changes/archive/2026-02-12-add-jit-suitability-checker/design.md @@ -0,0 +1,63 @@ +## Context + +DTVM's multipass JIT compiles EVM bytecode via an MIR pipeline that expands certain opcodes into long SelectInstruction chains (e.g., SHL produces ~92 Selects per call). When hundreds of such opcodes appear in a single basic block, the greedy register allocator's cost becomes superlinear, causing compilation times to explode from milliseconds to minutes. + +Two distinct pathological patterns have been identified: +- **b0 (DUP feedback)**: `DUP1 SHL DUP1 SHL ...` -- the shift result feeds back as both operands, creating exponentially overlapping live ranges +- **b1 (full stack)**: `DUP1 x1000 SHL x1000` -- massive fan-out of a single value across the entire function + +A DUP detection fix (`Shift == Value` in `handleShift`) already mitigates b0 at the MIR level. This proposal addresses the remaining cases by detecting pathological patterns before compilation begins, avoiding the expensive JIT path entirely. + +## Goals / Non-Goals + +- Goals: + - Detect bytecodes that would cause RA explosion before JIT compilation starts + - Zero overhead on normal contracts (analysis is O(n) in bytecode length, piggybacks on existing scan) + - Configurable thresholds to tune false-positive/negative tradeoff + - Replace the existing flat `MIR_OPCODE_WEIGHT` estimate with a structured, pattern-aware analysis +- Non-Goals: + - Fixing the register allocator itself (separate effort) + - Detecting runtime-only pathologies (e.g., infinite loops) + - Handling singlepass JIT (only multipass is affected) + +## Decisions + +- **Integration into EVMAnalyzer::analyze()**: The analyzer already scans all opcodes with block boundary detection. Adding ~5 comparisons per opcode is negligible. This avoids a second pass and keeps the analysis colocated with related bytecode metadata. +- **Not integrated into evm_cache.cpp**: The cache focuses on gas metering (SPP) with a different block model (gas chunks vs compilation blocks). Mixing JIT analysis here would conflate concerns. +- **Struct-based result**: `JITSuitabilityResult` provides fine-grained metrics (not just a boolean), enabling callers to log diagnostics, tune thresholds, or implement graduated responses. + +## RA-Expensive Opcode Set + +Based on empirical analysis of MIR expansion and Select chain density: + +| Opcode | Selects/call | Total MIR/call | Justification | +|--------|-------------|----------------|---------------| +| SHL (0x1b) | 92 | ~150-180 | Nested J,K loops over 4 U256 components | +| SHR (0x1c) | 96 | ~160-190 | Same structure as SHL | +| SAR (0x1d) | 52 | ~100-130 | Similar but with sign extension | +| MUL (0x02) | 0 | ~50-60 | Heavy inline U256 mul (no Selects but huge VR fan-out) | +| SIGNEXTEND (0x0b) | 21 | ~80-100 | Two dependency chain loops | + +## Detection Heuristics + +1. **Per-block density**: Count RA-expensive opcodes per basic block (JUMPDEST to JUMP/STOP/RETURN). Normal contracts have <20 per block; pathological cases have 500+. +2. **Consecutive run length**: Track the longest unbroken sequence of RA-expensive opcodes (DUPs/SWAPs are transparent since they don't generate heavy MIR). Detects both b0 and b1 patterns. +3. **DUP feedback count**: Count `DUPn immediately followed by RA-expensive op` pairs. This specifically targets the b0 pattern where DUP creates the feedback loop. + +## Thresholds (initial, tunable) + +- `MAX_CONSECUTIVE_RA_EXPENSIVE = 128` -- safe margin above any real contract +- `MAX_BLOCK_RA_EXPENSIVE = 256` -- per-block cap +- `MAX_DUP_FEEDBACK_PATTERN = 64` -- DUP+expensive pairs in whole bytecode +- Existing: `MAX_JIT_BYTECODE_SIZE = 0x6000`, `MAX_JIT_MIR_ESTIMATE = 50000` + +## Risks / Trade-offs + +- **False positives**: A contract with 129 consecutive MULs would trigger fallback even if compilation would succeed. Mitigation: thresholds are set conservatively high (real contracts have <20 per block). +- **False negatives**: Novel pathological patterns not involving the listed opcodes could still cause RA explosion. Mitigation: the existing `MAX_JIT_MIR_ESTIMATE` serves as a backstop. +- **Maintenance cost**: New RA-expensive opcodes added in the future must be added to the set. Mitigation: the set is small and well-documented. + +## Open Questions + +- Should the thresholds be runtime-configurable (e.g., via `set_option`) or compile-time only? +- Should the analysis result be cached in `EVMBytecodeCache` for reuse between interpreter and JIT paths? diff --git a/openspec/changes/archive/2026-02-12-add-jit-suitability-checker/proposal.md b/openspec/changes/archive/2026-02-12-add-jit-suitability-checker/proposal.md new file mode 100644 index 00000000..bd416edc --- /dev/null +++ b/openspec/changes/archive/2026-02-12-add-jit-suitability-checker/proposal.md @@ -0,0 +1,22 @@ +# Change: Add JIT suitability checker for EVM bytecode + +## Why + +EVM bytecodes containing high concentrations of RA-expensive opcodes (SHL, SHR, SAR, MUL, SIGNEXTEND) cause the greedy register allocator to exhibit superlinear (O(n^2)) compilation time, hanging for minutes or triggering OOM kills in CI. The current fallback mechanism uses a flat linear MIR estimate that cannot distinguish pathological patterns from normal contracts with similar opcode counts. + +## What Changes + +- Add a pattern-aware JIT suitability analysis integrated into `EVMAnalyzer::analyze()` that detects: + - Per-block concentration of RA-expensive opcodes + - Consecutive runs of RA-expensive opcodes (ignoring interleaved DUPs/SWAPs) + - DUP feedback patterns (DUPn immediately followed by an RA-expensive op) +- Replace the existing `MIR_OPCODE_WEIGHT[]` table and `estimateMirInstructionCount()` in `dt_evmc_vm.cpp` with a structured `JITSuitabilityResult` from the analyzer +- Expose configurable thresholds for fallback decisions + +## Impact + +- Affected specs: `evm-jit` +- Affected code: + - `src/compiler/evm_frontend/evm_analyzer.h` (extend analysis loop) + - `src/vm/dt_evmc_vm.cpp` (replace fallback decision logic) + - `src/CMakeLists.txt` (include path if needed) diff --git a/openspec/changes/archive/2026-02-12-add-jit-suitability-checker/specs/evm-jit/spec.md b/openspec/changes/archive/2026-02-12-add-jit-suitability-checker/specs/evm-jit/spec.md new file mode 100644 index 00000000..9916ad32 --- /dev/null +++ b/openspec/changes/archive/2026-02-12-add-jit-suitability-checker/specs/evm-jit/spec.md @@ -0,0 +1,71 @@ +## ADDED Requirements + +### Requirement: JIT suitability analysis before compilation +The system SHALL analyze EVM bytecode for patterns that cause register allocation explosion before attempting JIT compilation, and SHALL fall back to interpreter mode when pathological patterns are detected. + +#### Scenario: Normal contract passes suitability check +- **WHEN** EVM bytecode contains fewer than 128 consecutive RA-expensive opcodes per run +- **AND** fewer than 256 RA-expensive opcodes per basic block +- **AND** fewer than 64 DUP-feedback patterns +- **AND** the linear MIR estimate is below the configured threshold +- **THEN** the system SHALL proceed with JIT compilation + +#### Scenario: High consecutive RA-expensive opcode density triggers fallback +- **WHEN** EVM bytecode contains a run of more than 128 consecutive RA-expensive opcodes (SHL, SHR, SAR, MUL, SIGNEXTEND), with DUP and SWAP opcodes not breaking the run +- **THEN** the system SHALL fall back to interpreter mode for that contract +- **AND** the system SHALL log the fallback reason with the detected pattern metrics + +#### Scenario: High per-block RA-expensive opcode density triggers fallback +- **WHEN** a single basic block (JUMPDEST to control-flow terminator) contains more than 256 RA-expensive opcodes +- **THEN** the system SHALL fall back to interpreter mode for that contract + +#### Scenario: DUP feedback loop pattern triggers fallback +- **WHEN** EVM bytecode contains more than 64 instances of DUPn immediately followed by an RA-expensive opcode +- **THEN** the system SHALL fall back to interpreter mode for that contract + +#### Scenario: Suitability analysis performance +- **WHEN** the suitability analysis runs on any EVM bytecode +- **THEN** the analysis SHALL complete in O(n) time where n is the bytecode length +- **AND** the analysis SHALL not allocate heap memory proportional to bytecode size beyond existing analyzer structures + +### Requirement: RA-expensive opcode classification +The system SHALL classify EVM opcodes that expand to complex MIR structures (long Select chains or heavy intermediate value fan-out) as RA-expensive for the purpose of JIT suitability analysis. + +#### Scenario: Shift opcodes classified as RA-expensive +- **WHEN** classifying opcodes for JIT suitability +- **THEN** SHL (0x1b), SHR (0x1c), and SAR (0x1d) SHALL be classified as RA-expensive +- **AND** each generates 52-96 SelectInstruction chains per invocation in MIR + +#### Scenario: Multiplication classified as RA-expensive +- **WHEN** classifying opcodes for JIT suitability +- **THEN** MUL (0x02) SHALL be classified as RA-expensive +- **AND** it generates ~50-60 MIR instructions with heavy intermediate value fan-out + +#### Scenario: Sign extension classified as RA-expensive +- **WHEN** classifying opcodes for JIT suitability +- **THEN** SIGNEXTEND (0x0b) SHALL be classified as RA-expensive +- **AND** it generates ~21 SelectInstruction chains per invocation in MIR + +## MODIFIED Requirements + +### Requirement: Multipass-only EVM JIT support +The system SHALL compile EVM bytecode using the multipass JIT pipeline only, after verifying bytecode suitability through pattern analysis. + +#### Scenario: Multipass eager compilation +- **WHEN** runtime mode is Multipass +- **AND** the bytecode passes JIT suitability analysis +- **THEN** the system SHALL eagerly compile EVM bytecode using the EVM JIT compiler + +#### Scenario: Multipass fallback to interpreter +- **WHEN** runtime mode is Multipass +- **AND** the bytecode fails JIT suitability analysis +- **THEN** the system SHALL temporarily switch to interpreter mode for that execution +- **AND** the system SHALL log the fallback with diagnostic metrics + +#### Scenario: Lazy compilation unsupported +- **WHEN** runtime configuration requests lazy JIT for EVM +- **THEN** the system SHALL emit a warning and skip lazy compilation + +#### Scenario: Singlepass mode unsupported +- **WHEN** runtime mode is Singlepass +- **THEN** the system SHALL emit an error indicating EVMJIT is unsupported diff --git a/openspec/changes/archive/2026-02-12-add-jit-suitability-checker/tasks.md b/openspec/changes/archive/2026-02-12-add-jit-suitability-checker/tasks.md new file mode 100644 index 00000000..efcc23bf --- /dev/null +++ b/openspec/changes/archive/2026-02-12-add-jit-suitability-checker/tasks.md @@ -0,0 +1,21 @@ +## 1. JIT Suitability Analysis in EVMAnalyzer + +- [x] 1.1 Define `JITSuitabilityResult` struct in `evm_analyzer.h` with fields: `ShouldFallback`, `MirEstimate`, `RAExpensiveCount`, `MaxConsecutiveExpensive`, `MaxBlockExpensiveCount`, `DupFeedbackPatternCount` +- [x] 1.2 Add `isRAExpensiveOpcode()` helper function covering SHL, SHR, SAR, MUL, SIGNEXTEND +- [x] 1.3 Add per-opcode MIR weight table (migrated from `dt_evmc_vm.cpp`) for linear MIR estimate +- [x] 1.4 Extend `EVMAnalyzer::analyze()` loop to track: consecutive RA-expensive run length, per-block RA-expensive count, DUP feedback pattern detection, MIR estimate accumulation +- [x] 1.5 Add `shouldFallbackJIT()` method combining all thresholds into a single boolean +- [x] 1.6 Add `getJITSuitability()` accessor returning the result struct + +## 2. Integration into EVMC VM Execute Path + +- [x] 2.1 Include `evm_analyzer.h` from `dt_evmc_vm.cpp` (verify include paths) +- [x] 2.2 Replace `MIR_OPCODE_WEIGHT[]` table and `estimateMirInstructionCount()` with `EVMAnalyzer::analyze()` + `getJITSuitability()` +- [x] 2.3 Update fallback decision in `execute()` to use `JITSuitabilityResult::ShouldFallback` +- [x] 2.4 Add diagnostic logging for fallback triggers (opcode pattern type, counts) + +## 3. Verification + +- [x] 3.1 Build and verify compilation succeeds in Release mode +- [x] 3.2 Run SHL/SHR/SAR benchmark: verify pathological cases trigger fallback, normal cases do not +- [x] 3.3 Run full benchmark suite: verify no OOM, no hangs, no false-positive fallbacks on real contract benchmarks diff --git a/openspec/specs/evm-jit/spec.md b/openspec/specs/evm-jit/spec.md index e677d1ce..d3516804 100644 --- a/openspec/specs/evm-jit/spec.md +++ b/openspec/specs/evm-jit/spec.md @@ -2,15 +2,21 @@ ## Purpose Define DTVM’s multipass JIT compilation pipeline for EVM bytecode, including compilation constraints, code emission, and runtime integration. - ## Requirements ### Requirement: Multipass-only EVM JIT support -The system SHALL compile EVM bytecode using the multipass JIT pipeline only. +The system SHALL compile EVM bytecode using the multipass JIT pipeline only, after verifying bytecode suitability through pattern analysis. #### Scenario: Multipass eager compilation - **WHEN** runtime mode is Multipass +- **AND** the bytecode passes JIT suitability analysis - **THEN** the system SHALL eagerly compile EVM bytecode using the EVM JIT compiler +#### Scenario: Multipass fallback to interpreter +- **WHEN** runtime mode is Multipass +- **AND** the bytecode fails JIT suitability analysis +- **THEN** the system SHALL temporarily switch to interpreter mode for that execution +- **AND** the system SHALL log the fallback with diagnostic metrics + #### Scenario: Lazy compilation unsupported - **WHEN** runtime configuration requests lazy JIT for EVM - **THEN** the system SHALL emit a warning and skip lazy compilation @@ -56,3 +62,50 @@ The system SHALL record compilation timing and optionally emit perf JIT dump sym #### Scenario: Perf JIT dump output - **WHEN** Linux perf JIT dumping is enabled - **THEN** the compiler SHALL emit per-block symbols for generated code + +### Requirement: JIT suitability analysis before compilation +The system SHALL analyze EVM bytecode for patterns that cause register allocation explosion before attempting JIT compilation, and SHALL fall back to interpreter mode when pathological patterns are detected. + +#### Scenario: Normal contract passes suitability check +- **WHEN** EVM bytecode contains fewer than 128 consecutive RA-expensive opcodes per run +- **AND** fewer than 256 RA-expensive opcodes per basic block +- **AND** fewer than 64 DUP-feedback patterns +- **AND** the linear MIR estimate is below the configured threshold +- **THEN** the system SHALL proceed with JIT compilation + +#### Scenario: High consecutive RA-expensive opcode density triggers fallback +- **WHEN** EVM bytecode contains a run of more than 128 consecutive RA-expensive opcodes (SHL, SHR, SAR, MUL, SIGNEXTEND), with DUP and SWAP opcodes not breaking the run +- **THEN** the system SHALL fall back to interpreter mode for that contract +- **AND** the system SHALL log the fallback reason with the detected pattern metrics + +#### Scenario: High per-block RA-expensive opcode density triggers fallback +- **WHEN** a single basic block (JUMPDEST to control-flow terminator) contains more than 256 RA-expensive opcodes +- **THEN** the system SHALL fall back to interpreter mode for that contract + +#### Scenario: DUP feedback loop pattern triggers fallback +- **WHEN** EVM bytecode contains more than 64 instances of DUPn immediately followed by an RA-expensive opcode +- **THEN** the system SHALL fall back to interpreter mode for that contract + +#### Scenario: Suitability analysis performance +- **WHEN** the suitability analysis runs on any EVM bytecode +- **THEN** the analysis SHALL complete in O(n) time where n is the bytecode length +- **AND** the analysis SHALL not allocate heap memory proportional to bytecode size beyond existing analyzer structures + +### Requirement: RA-expensive opcode classification +The system SHALL classify EVM opcodes that expand to complex MIR structures (long Select chains or heavy intermediate value fan-out) as RA-expensive for the purpose of JIT suitability analysis. + +#### Scenario: Shift opcodes classified as RA-expensive +- **WHEN** classifying opcodes for JIT suitability +- **THEN** SHL (0x1b), SHR (0x1c), and SAR (0x1d) SHALL be classified as RA-expensive +- **AND** each generates 52-96 SelectInstruction chains per invocation in MIR + +#### Scenario: Multiplication classified as RA-expensive +- **WHEN** classifying opcodes for JIT suitability +- **THEN** MUL (0x02) SHALL be classified as RA-expensive +- **AND** it generates ~50-60 MIR instructions with heavy intermediate value fan-out + +#### Scenario: Sign extension classified as RA-expensive +- **WHEN** classifying opcodes for JIT suitability +- **THEN** SIGNEXTEND (0x0b) SHALL be classified as RA-expensive +- **AND** it generates ~21 SelectInstruction chains per invocation in MIR + diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index a351209c..4a3029e1 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -65,6 +65,10 @@ if(ZEN_ENABLE_EVM) add_definitions(-DZEN_ENABLE_EVM) endif() +if(ZEN_ENABLE_JIT_PRECOMPILE_FALLBACK) + add_definitions(-DZEN_ENABLE_JIT_PRECOMPILE_FALLBACK) +endif() + if(ZEN_ENABLE_CPU_EXCEPTION) if(ZEN_ENABLE_SINGLEPASS_JIT OR ZEN_ENABLE_MULTIPASS_JIT) add_definitions(-DZEN_ENABLE_CPU_EXCEPTION) diff --git a/src/compiler/evm_frontend/evm_analyzer.h b/src/compiler/evm_frontend/evm_analyzer.h index b1f4ee37..c5af915c 100644 --- a/src/compiler/evm_frontend/evm_analyzer.h +++ b/src/compiler/evm_frontend/evm_analyzer.h @@ -9,8 +9,117 @@ #include "evmc/evmc.h" #include "evmc/instructions.h" +#include + namespace COMPILER { +// ============== JIT Suitability Analysis ===================================== +// +// Certain EVM opcodes expand to very large MIR instruction sequences (long +// SelectInstruction chains or heavy intermediate value fan-out). When hundreds +// of these appear in a single basic block the greedy register allocator's cost +// becomes superlinear, causing compilation times to explode. +// +// The analysis below detects pathological patterns in O(n) time during the +// existing bytecode scan and provides a structured verdict on whether JIT +// compilation should be attempted. + +/// Approximate MIR instruction count generated per EVM opcode. +/// Derived from the compiler frontend: inline arithmetic expands to many +/// instructions while runtime-call opcodes are cheap. +// clang-format off +static constexpr uint32_t MIR_OPCODE_WEIGHT[256] = { + // 0x00 STOP ADD MUL SUB DIV SDIV MOD SMOD + 5, 12, 80, 20, 5, 5, 5, 5, + // 0x08 ADDMOD MULMOD EXP SIGNEXT (0x0c-0x0f undefined) + 5, 5, 5, 20, 2, 2, 2, 2, + // 0x10 LT GT SLT SGT EQ ISZERO AND OR + 12, 12, 12, 12, 12, 8, 8, 8, + // 0x18 XOR NOT BYTE SHL SHR SAR CLZ (0x1f) + 8, 8, 8, 15, 15, 15, 8, 2, + // 0x20 KECCAK256 (0x21-0x2f undefined) + 5, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + // 0x30 ADDRESS BALANCE ORIGIN CALLER CALLVAL CLDLOAD CLDSIZE CLDCOPY + 5, 5, 5, 5, 5, 5, 5, 8, + // 0x38 CODESIZE CODECOPY GASPRICE EXTCDSZ EXTCDCP RETDSZ RETDCP EXTCDHASH + 5, 8, 5, 5, 8, 5, 8, 5, + // 0x40 BLKHASH COINBASE TIMESTAMP NUMBER PREVRAND GASLIM CHAINID SELFBAL + 5, 5, 5, 5, 5, 5, 5, 5, + // 0x48 BASEFEE BLOBHASH BLOBBASE (0x4b-0x4f undefined) + 5, 5, 5, 2, 2, 2, 2, 2, + // 0x50 POP MLOAD MSTORE MSTORE8 SLOAD SSTORE JUMP JUMPI + 2, 8, 8, 8, 5, 5, 5, 5, + // 0x58 PC MSIZE GAS JMPDEST TLOAD TSTORE MCOPY (PUSH0) + 5, 5, 5, 2, 5, 5, 8, 4, + // 0x60 PUSH1 .. PUSH32 (0x60-0x7f): all weight 4 + 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, // PUSH1-PUSH16 + 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, // PUSH17-PUSH32 + // 0x80 DUP1 .. DUP16 (0x80-0x8f): all weight 4 + 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, + // 0x90 SWAP1 .. SWAP16 (0x90-0x9f): all weight 4 + 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, + // 0xa0 LOG0-LOG4 (0xa0-0xa4), rest undefined + 8, 8, 8, 8, 8, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + // 0xb0-0xef: undefined / reserved, weight 2 + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, // 0xb0-0xbf + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, // 0xc0-0xcf + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, // 0xd0-0xdf + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, // 0xe0-0xef + // 0xf0 CREATE CALL CALLCODE RETURN DELCALL (0xf5) CREAT2 (0xf7) + 5, 5, 5, 5, 5, 2, 5, 2, + // 0xf8 (undef) (undef) STATIC (undef) (undef) REVERT (INVALID) SELFDEST + 2, 2, 5, 2, 2, 5, 2, 5, +}; +// clang-format on + +/// Returns true if the opcode expands to complex MIR structures (long Select +/// chains or heavy intermediate value fan-out) that cause superlinear register +/// allocation cost when they appear in high density. +inline bool isRAExpensiveOpcode(uint8_t Op) { + switch (Op) { + case 0x02: // MUL — ~50-60 MIR, heavy partial-product fan-out + case 0x0b: // SIGNEXTEND — ~21 Selects, two dependency chain loops + case 0x1b: // SHL — ~92 Selects, nested J,K loops + case 0x1c: // SHR — ~96 Selects, nested J,K loops + case 0x1d: // SAR — ~52 Selects, sign-extended variant + return true; + default: + return false; + } +} + +/// Returns true if the opcode is a DUP or SWAP (transparent for consecutive +/// RA-expensive run detection since they don't generate heavy MIR). +inline bool isDupOrSwapOpcode(uint8_t Op) { + return (Op >= 0x80 && Op <= 0x8f) || // DUP1..DUP16 + (Op >= 0x90 && Op <= 0x9f); // SWAP1..SWAP16 +} + +/// Returns true if the opcode is a DUP instruction. +inline bool isDupOpcode(uint8_t Op) { + return Op >= 0x80 && Op <= 0x8f; // DUP1..DUP16 +} + +/// Structured result of JIT suitability analysis. Provides fine-grained +/// metrics so callers can log diagnostics or tune thresholds. +struct JITSuitabilityResult { + bool ShouldFallback = false; + size_t BytecodeSize = 0; + size_t MirEstimate = 0; // linear MIR instruction estimate + size_t RAExpensiveCount = 0; // total RA-expensive opcodes + size_t MaxConsecutiveExpensive = 0; // longest unbroken run + size_t MaxBlockExpensiveCount = 0; // max RA-expensive ops in one block + size_t DupFeedbackPatternCount = 0; // DUPn immediately before RA-expensive +}; + +/// Thresholds for JIT suitability fallback. Normal contracts have <20 +/// RA-expensive ops per block; these values are conservatively high. +static constexpr size_t MAX_JIT_BYTECODE_SIZE = 0x6000; +static constexpr size_t MAX_JIT_MIR_ESTIMATE = 50000; +static constexpr size_t MAX_CONSECUTIVE_RA_EXPENSIVE = 128; +static constexpr size_t MAX_BLOCK_RA_EXPENSIVE = 256; +static constexpr size_t MAX_DUP_FEEDBACK_PATTERN = 64; + class EVMAnalyzer { using Byte = zen::common::Byte; using Bytes = zen::common::Bytes; @@ -26,6 +135,7 @@ class EVMAnalyzer { int32_t StackHeightDiff = 0; bool IsJumpDest = false; bool HasUndefinedInstr = false; + uint32_t RAExpensiveCount = 0; BlockInfo() = default; BlockInfo(uint64_t PC) : EntryPC(PC) {} @@ -35,8 +145,14 @@ class EVMAnalyzer { return BlockInfos; } + /// Return the JIT suitability result computed during the last analyze() call. + const JITSuitabilityResult &getJITSuitability() const { return JITResult; } + bool analyze(const uint8_t *Bytecode, size_t BytecodeSize) { BlockInfos.clear(); + JITResult = JITSuitabilityResult(); + JITResult.BytecodeSize = BytecodeSize; + const uint8_t *Ip = Bytecode; const uint8_t *IpEnd = Bytecode + BytecodeSize; @@ -56,17 +172,50 @@ class EVMAnalyzer { // Initialize block info for the first block BlockInfo CurInfo(0); + // JIT suitability tracking state + size_t CurConsecutiveExpensive = 0; + bool PrevWasDup = false; + while (Ip < IpEnd) { evmc_opcode Opcode = static_cast(*Ip); + uint8_t OpcodeU8 = static_cast(Opcode); ptrdiff_t Diff = Ip - Bytecode; PC = static_cast(Diff >= 0 ? Diff : 0); Ip++; + // --- JIT suitability: accumulate MIR estimate --- + JITResult.MirEstimate += MIR_OPCODE_WEIGHT[OpcodeU8]; + + // --- JIT suitability: RA-expensive pattern tracking --- + if (isRAExpensiveOpcode(OpcodeU8)) { + JITResult.RAExpensiveCount++; + CurInfo.RAExpensiveCount++; + CurConsecutiveExpensive++; + // DUP feedback: previous opcode was DUP, now RA-expensive + if (PrevWasDup) { + JITResult.DupFeedbackPatternCount++; + } + PrevWasDup = false; + } else if (isDupOrSwapOpcode(OpcodeU8)) { + // DUP/SWAP are transparent — don't break consecutive run + PrevWasDup = isDupOpcode(OpcodeU8); + } else { + // Any other opcode breaks the consecutive run + JITResult.MaxConsecutiveExpensive = std::max( + JITResult.MaxConsecutiveExpensive, CurConsecutiveExpensive); + CurConsecutiveExpensive = 0; + PrevWasDup = false; + } + // Check if opcode is undefined for current revision bool IsUndefined = (InstructionNames[Opcode] == nullptr); if (IsUndefined) { CurInfo.HasUndefinedInstr = true; +#ifdef ZEN_ENABLE_JIT_FALLBACK_TEST + // Reset undefined instruction flag in fallback test + CurInfo.HasUndefinedInstr = false; +#endif } // Get stack metrics from the instruction metrics table @@ -106,6 +255,10 @@ class EVMAnalyzer { if (IsBlockStart) { if (PC != CurInfo.EntryPC) { + // Finalize block: update max block RA-expensive count + JITResult.MaxBlockExpensiveCount = + std::max(JITResult.MaxBlockExpensiveCount, + static_cast(CurInfo.RAExpensiveCount)); BlockInfos.emplace(CurInfo.EntryPC, CurInfo); } // Create new block info @@ -113,9 +266,21 @@ class EVMAnalyzer { if (Opcode == OP_JUMPDEST) { CurInfo.IsJumpDest = true; } + // Block boundary also ends a consecutive run + JITResult.MaxConsecutiveExpensive = std::max( + JITResult.MaxConsecutiveExpensive, CurConsecutiveExpensive); + CurConsecutiveExpensive = 0; } else if (IsBlockEnd) { + // Finalize block: update max block RA-expensive count + JITResult.MaxBlockExpensiveCount = + std::max(JITResult.MaxBlockExpensiveCount, + static_cast(CurInfo.RAExpensiveCount)); // Save current block info BlockInfos.emplace(CurInfo.EntryPC, CurInfo); + // Block boundary ends consecutive run + JITResult.MaxConsecutiveExpensive = std::max( + JITResult.MaxConsecutiveExpensive, CurConsecutiveExpensive); + CurConsecutiveExpensive = 0; // Skip dead code while (Ip < IpEnd) { evmc_opcode NextOp = static_cast(*Ip); @@ -131,10 +296,24 @@ class EVMAnalyzer { } } } + // Finalize last block and consecutive run + JITResult.MaxConsecutiveExpensive = + std::max(JITResult.MaxConsecutiveExpensive, CurConsecutiveExpensive); if (BlockInfos.count(CurInfo.EntryPC) == 0) { + JITResult.MaxBlockExpensiveCount = + std::max(JITResult.MaxBlockExpensiveCount, + static_cast(CurInfo.RAExpensiveCount)); BlockInfos.emplace(CurInfo.EntryPC, CurInfo); } + // Compute final fallback verdict + JITResult.ShouldFallback = + BytecodeSize > MAX_JIT_BYTECODE_SIZE || + JITResult.MirEstimate > MAX_JIT_MIR_ESTIMATE || + JITResult.MaxConsecutiveExpensive > MAX_CONSECUTIVE_RA_EXPENSIVE || + JITResult.MaxBlockExpensiveCount > MAX_BLOCK_RA_EXPENSIVE || + JITResult.DupFeedbackPatternCount > MAX_DUP_FEEDBACK_PATTERN; + return true; } @@ -142,6 +321,7 @@ class EVMAnalyzer { std::map BlockInfos; uint64_t PC = 0; evmc_revision Revision = zen::evm::DEFAULT_REVISION; + JITSuitabilityResult JITResult; }; } // namespace COMPILER diff --git a/src/vm/dt_evmc_vm.cpp b/src/vm/dt_evmc_vm.cpp index 57e950f2..2ee646ba 100644 --- a/src/vm/dt_evmc_vm.cpp +++ b/src/vm/dt_evmc_vm.cpp @@ -16,14 +16,15 @@ #include +#ifdef ZEN_ENABLE_JIT_PRECOMPILE_FALLBACK +#include "compiler/evm_frontend/evm_analyzer.h" +#endif + namespace { using namespace zen::runtime; using namespace zen::common; -// JIT compilation limits (95% < 10KB) -const size_t MAX_JIT_BYTECODE_SIZE = 0x6000; - // RAII helper for temporarily changing runtime configuration class ScopedConfig { public: @@ -147,14 +148,25 @@ evmc_result execute(evmc_vm *EVMInstance, const evmc_host_interface *Host, return evmc_make_result(EVMC_FAILURE, 0, 0, nullptr, 0); } } - // Use interpreter mode for large bytecode +#ifdef ZEN_ENABLE_JIT_PRECOMPILE_FALLBACK + // Use interpreter mode for bytecode that would be too expensive to JIT. + // The EVMAnalyzer performs a pattern-aware O(n) scan that detects: + // - raw bytecode size / estimated MIR instruction count too large + // - high density of RA-expensive opcodes (SHL/SHR/SAR/MUL/SIGNEXTEND) + // - long consecutive runs of RA-expensive ops + // - DUP-induced feedback loops (b0 pattern) std::unique_ptr TempConfig; - if (VM->Config.Mode == RunMode::MultipassMode && - CodeSize > MAX_JIT_BYTECODE_SIZE) { - RuntimeConfig NewConfig = VM->Config; - NewConfig.Mode = RunMode::InterpMode; - TempConfig = std::make_unique(VM->RT.get(), NewConfig); + if (VM->Config.Mode == RunMode::MultipassMode) { + COMPILER::EVMAnalyzer Analyzer(Rev); + Analyzer.analyze(Code, CodeSize); + const auto &JITResult = Analyzer.getJITSuitability(); + if (JITResult.ShouldFallback) { + RuntimeConfig NewConfig = VM->Config; + NewConfig.Mode = RunMode::InterpMode; + TempConfig = std::make_unique(VM->RT.get(), NewConfig); + } } +#endif // ZEN_ENABLE_JIT_PRECOMPILE_FALLBACK uint32_t CheckSum = crc32(Code, CodeSize); uint64_t ModKey = (static_cast(Rev) << 32) | CheckSum;