diff --git a/.ci/run_test_suite.sh b/.ci/run_test_suite.sh
index f3524797..4ea24c01 100644
--- a/.ci/run_test_suite.sh
+++ b/.ci/run_test_suite.sh
@@ -74,6 +74,9 @@ case $TestSuite in
     "evmonetestsuite")
         CMAKE_OPTIONS="$CMAKE_OPTIONS -DZEN_ENABLE_EVM=ON -DZEN_ENABLE_LIBEVM=ON"
         ;;
+    "benchmarksuite")
+        CMAKE_OPTIONS="$CMAKE_OPTIONS -DZEN_ENABLE_EVM=ON -DZEN_ENABLE_LIBEVM=ON -DZEN_ENABLE_SINGLEPASS_JIT=OFF -DZEN_ENABLE_MULTIPASS_JIT=ON"
+        ;;
 esac
 
 case $CPU_EXCEPTION_TYPE in
@@ -94,6 +97,10 @@ if [[ $TestSuite == "evmonetestsuite" ]]; then
     STACK_TYPES=("-DZEN_ENABLE_VIRTUAL_STACK=ON")
 fi
 
+if [[ $TestSuite == "benchmarksuite" ]]; then
+    STACK_TYPES=("-DZEN_ENABLE_VIRTUAL_STACK=ON")
+fi
+
 export PATH=$PATH:$PWD/build
 CMAKE_OPTIONS_ORIGIN="$CMAKE_OPTIONS"
 
@@ -153,5 +160,64 @@ for STACK_TYPE in ${STACK_TYPES[@]}; do
             ./run_unittests.sh ../tests/evmone_unittests/EVMOneMultipassUnitTestsRunList.txt "./libdtvmapi.so,mode=multipass"
             ./run_unittests.sh ../tests/evmone_unittests/EVMOneInterpreterUnitTestsRunList.txt "./libdtvmapi.so,mode=interpreter"
             ;;
+        "benchmarksuite")
+            # Clone evmone and run performance regression check
+            EVMONE_DIR="evmone"
+            if [ ! -d "$EVMONE_DIR" ]; then
+                git clone --depth 1 --recurse-submodules -b for_test https://github.com/DTVMStack/evmone.git $EVMONE_DIR
+            fi
+
+            # Set default values for benchmark
+            BENCHMARK_THRESHOLD=${BENCHMARK_THRESHOLD:-0.10}
+            BENCHMARK_MODE=${BENCHMARK_MODE:-multipass}
+
+            # Copy DTVM library to evmone directory
+            cp build/lib/* $EVMONE_DIR/
+
+            cd $EVMONE_DIR
+
+            # Copy check_performance_regression.py from DTVM repo
+            cp ../tools/check_performance_regression.py ./
+
+            # Build evmone if not already built
+            if [ ! -f "build/bin/evmone-bench" ]; then
+                cmake -S . -B build -DEVMONE_TESTING=ON -DCMAKE_BUILD_TYPE=Release
+                cmake --build build --parallel -j 16
+            fi
+
+            # Default summary output path (can be overridden via env)
+            BENCHMARK_SUMMARY_FILE=${BENCHMARK_SUMMARY_FILE:-/tmp/perf_summary.md}
+
+            # Run performance check based on mode
+            if [ -n "$BENCHMARK_SAVE_BASELINE" ]; then
+                echo "Saving performance baseline..."
+                python3 check_performance_regression.py \
+                    --save-baseline "$BENCHMARK_SAVE_BASELINE" \
+                    --output-summary "$BENCHMARK_SUMMARY_FILE" \
+                    --lib ./libdtvmapi.so \
+                    --mode "$BENCHMARK_MODE" \
+                    --benchmark-dir test/evm-benchmarks/benchmarks
+            elif [ -n "$BENCHMARK_BASELINE_FILE" ]; then
+                echo "Checking performance regression against baseline..."
+                python3 check_performance_regression.py \
+                    --baseline "$BENCHMARK_BASELINE_FILE" \
+                    --threshold "$BENCHMARK_THRESHOLD" \
+                    --output-summary "$BENCHMARK_SUMMARY_FILE" \
+                    --lib ./libdtvmapi.so \
+                    --mode "$BENCHMARK_MODE" \
+                    --benchmark-dir test/evm-benchmarks/benchmarks
+            else
+                echo "Running benchmark suite without comparison..."
+                python3 check_performance_regression.py \
+                    --save-baseline benchmark_results.json \
+                    --output-summary "$BENCHMARK_SUMMARY_FILE" \
+                    --lib ./libdtvmapi.so \
+                    --mode "$BENCHMARK_MODE" \
+                    --benchmark-dir test/evm-benchmarks/benchmarks
+                cat benchmark_results.json
+            fi
+
+            cd ..
+            ;;
     esac
 done
diff --git a/.github/workflows/dtvm_evm_test_x86.yml b/.github/workflows/dtvm_evm_test_x86.yml
index 8763f4a7..03ec1d6a 100644
--- a/.github/workflows/dtvm_evm_test_x86.yml
+++ b/.github/workflows/dtvm_evm_test_x86.yml
@@ -169,3 +169,116 @@ jobs:
           export TestSuite=evmonetestsuite
 
           bash .ci/run_test_suite.sh
+
+  performance_regression_check:
+    name: Performance Regression Check (10% threshold)
+    if: github.event_name == 'pull_request'
+    runs-on: ubuntu-latest
+    permissions:
+      contents: read
+      pull-requests: write
+    container:
+      image: dtvmdev1/dtvm-dev-x64:main
+    steps:
+      - name: Check out code
+        uses: actions/checkout@v3
+        with:
+          submodules: "true"
+          fetch-depth: 0
+
+      - name: Setup git safe directory
+        run: |
+          echo "Configuring git safe directory: ${{ github.workspace }}"
+          git config --global --add safe.directory /__w/DTVM/DTVM
+
+      - name: Code Format Check
+        run: |
+          ./tools/format.sh check
+
+      - name: Build baseline (${{ github.base_ref }})
+        run: |
+          echo "Building baseline on branch: ${{ github.base_ref }}"
+
+          export LLVM_SYS_150_PREFIX=/opt/llvm15
+          export LLVM_DIR=$LLVM_SYS_150_PREFIX/lib/cmake/llvm
+          export PATH=$LLVM_SYS_150_PREFIX/bin:$PATH
+
+          # Save current state (including untracked files)
+          git stash push -u -m "perf-check-stash"
+          git checkout ${{ github.base_ref }}
+
+          # Build baseline
+          export CMAKE_BUILD_TARGET=Release
+          export ENABLE_ASAN=false
+          export RUN_MODE=multipass
+          export ENABLE_LAZY=false
+          export ENABLE_MULTITHREAD=true
+          export TestSuite=benchmarksuite
+          export CPU_EXCEPTION_TYPE='cpu'
+          export BENCHMARK_MODE=interpreter
+          export BENCHMARK_SAVE_BASELINE=/tmp/perf_baseline.json
+
+          bash .ci/run_test_suite.sh
+
+      - name: Build current PR and check regression
+        id: perf-check
+        run: |
+          echo "Building PR branch: ${{ github.sha }}"
+
+          export LLVM_SYS_150_PREFIX=/opt/llvm15
+          export LLVM_DIR=$LLVM_SYS_150_PREFIX/lib/cmake/llvm
+          export PATH=$LLVM_SYS_150_PREFIX/bin:$PATH
+
+          # Switch back to PR branch
+          git checkout ${{ github.sha }}
+          git stash pop || true
+
+          # Clean and rebuild for current PR
+          rm -rf build evmone
+
+          # Build and check
+          export CMAKE_BUILD_TARGET=Release
+          export ENABLE_ASAN=false
+          export RUN_MODE=multipass
+          export ENABLE_LAZY=false
+          export ENABLE_MULTITHREAD=true
+          export TestSuite=benchmarksuite
+          export CPU_EXCEPTION_TYPE='cpu'
+          export BENCHMARK_MODE=interpreter
+          export BENCHMARK_THRESHOLD=0.10
+          export BENCHMARK_BASELINE_FILE=/tmp/perf_baseline.json
+          export BENCHMARK_SUMMARY_FILE=/tmp/perf_summary.md
+
+          bash .ci/run_test_suite.sh
+        continue-on-error: true
+
+      - name: Comment on PR
+        if: always()
+        uses: actions/github-script@v6
+        with:
+          script: |
+            const fs = require('fs');
+            const passed = '${{ steps.perf-check.outcome }}' === 'success';
+            let summary = '';
+            try {
+              summary = fs.readFileSync('/tmp/perf_summary.md', 'utf8');
+            } catch (e) {
+              summary = '_No benchmark summary available._';
+            }
+            const icon = passed ? '✅' : '⚠️';
+            const title = passed
+              ? 'Performance Check Passed'
+              : 'Performance Regression Detected';
+            const body = `${icon} **${title}**\n\n${summary}`;
+            github.rest.issues.createComment({
+              issue_number: context.issue.number,
+              owner: context.repo.owner,
+              repo: context.repo.repo,
+              body: body
+            });
+
+      - name: Fail on regression
+        if: steps.perf-check.outcome == 'failure'
+        run: |
+          echo "::error::Performance regression detected. See logs for details."
+          exit 1
diff --git a/tools/check_performance_regression.py b/tools/check_performance_regression.py
new file mode 100755
index 00000000..12c0eb2f
--- /dev/null
+++ b/tools/check_performance_regression.py
@@ -0,0 +1,464 @@
+#!/usr/bin/env python3
+"""
+Performance regression checker for evmone benchmarks.
+
+Usage:
+  # Save baseline results
+  python check_performance_regression.py --save-baseline baseline.json
+
+  # Check for regressions against baseline
+  python check_performance_regression.py --baseline baseline.json
+
+  # Check with custom threshold (default 10%)
+  python check_performance_regression.py --baseline baseline.json --threshold 0.15
+
+Exit codes:
+  0 - No significant regression detected
+  1 - Performance regression detected (> threshold)
+  2 - Script error (execution failed, file not found, etc.)
+"""
+
+import argparse
+import json
+import os
+import subprocess
+import sys
+import tempfile
+from dataclasses import dataclass
+from typing import Dict, List, Optional, Tuple
+
+
+@dataclass
+class BenchmarkResult:
+    name: str
+    time_ns: float  # Time in nanoseconds
+    cpu_time_ns: float
+    iterations: int
+
+
+def run_benchmark(
+    lib_path: str,
+    mode: str,
+    benchmark_dir: str,
+    extra_args: Optional[List[str]] = None,
+) -> List[BenchmarkResult]:
+    """Run benchmark and parse JSON output.
+
+    Uses --benchmark_out to write JSON results to a temporary file so that
+    the human-readable benchmark progress streams to stdout/stderr in real
+    time (important for CI visibility).
+    """
+    env = {"EVMONE_EXTERNAL_OPTIONS": f"{lib_path},mode={mode}"}
+
+    # Write JSON results to a temp file instead of capturing stdout.
+    # This lets Google Benchmark's normal console output (one line per
+    # completed case) stream directly to the CI log in real time.
+    fd, json_out_path = tempfile.mkstemp(suffix=".json")
+    os.close(fd)
+
+    cmd = [
+        "./build/bin/evmone-bench",
+        benchmark_dir,
+        "--benchmark_filter=external/*",
+        f"--benchmark_out={json_out_path}",
+        "--benchmark_out_format=json",
+    ]
+
+    if extra_args:
+        cmd.extend(extra_args)
+
+    print(f"Running: {' '.join(cmd)}")
+    print(f"Environment: EVMONE_EXTERNAL_OPTIONS={env['EVMONE_EXTERNAL_OPTIONS']}")
+    sys.stdout.flush()
+
+    result = subprocess.run(
+        cmd,
+        env={**subprocess.os.environ, **env},
+    )
+
+    if result.returncode != 0:
+        print(f"Benchmark execution failed with code {result.returncode}")
+        # Clean up temp file on failure
+        try:
+            os.unlink(json_out_path)
+        except OSError:
+            pass
+        sys.exit(2)
+
+    # Read JSON results from the temp file
+    try:
+        with open(json_out_path, "r") as f:
+            json_data = f.read()
+    finally:
+        try:
+            os.unlink(json_out_path)
+        except OSError:
+            pass
+
+    return parse_benchmark_json(json_data)
+
+
+def parse_benchmark_json(json_output: str) -> List[BenchmarkResult]:
+    """Parse Google Benchmark JSON output."""
+    try:
+        data = json.loads(json_output)
+    except json.JSONDecodeError as e:
+        print(f"Failed to parse JSON: {e}")
+        sys.exit(2)
+
+    results = []
+    for benchmark in data.get("benchmarks", []):
+        # Skip aggregates like mean, median, stddev
+        if benchmark.get("run_type") != "iteration":
+            continue
+
+        results.append(
+            BenchmarkResult(
+                name=benchmark["name"],
+                time_ns=benchmark.get("real_time", 0),
+                cpu_time_ns=benchmark.get("cpu_time", 0),
+                iterations=benchmark.get("iterations", 1),
+            )
+        )
+
+    return results
+
+
+def load_baseline(path: str) -> List[BenchmarkResult]:
+    """Load baseline results from JSON file."""
+    try:
+        with open(path, "r") as f:
+            data = json.load(f)
+    except FileNotFoundError:
+        print(f"::error::Baseline file not found: {path}")
+        sys.exit(2)
+    except json.JSONDecodeError as e:
+        print(f"::error::Failed to parse baseline JSON: {e}")
+        sys.exit(2)
+
+    results = []
+    for item in data:
+        results.append(
+            BenchmarkResult(
+                name=item["name"],
+                time_ns=item["time_ns"],
+                cpu_time_ns=item["cpu_time_ns"],
+                iterations=item["iterations"],
+            )
+        )
+
+    return results
+
+
+def save_baseline(results: List[BenchmarkResult], path: str) -> None:
+    """Save baseline results to JSON file."""
+    data = []
+    for r in results:
+        data.append({
+            "name": r.name,
+            "time_ns": r.time_ns,
+            "cpu_time_ns": r.cpu_time_ns,
+            "iterations": r.iterations,
+        })
+
+    with open(path, "w") as f:
+        json.dump(data, f, indent=2)
+
+    print(f"Saved {len(results)} benchmark results to {path}")
+
+
+def compare_benchmarks(
+    current: List[BenchmarkResult],
+    baseline: List[BenchmarkResult],
+    threshold: float,
+) -> Tuple[bool, List[dict]]:
+    """
+    Compare current results against baseline.
+
+    Returns:
+        (has_regression, comparison_details)
+    """
+    baseline_map = {b.name: b for b in baseline}
+    current_map = {c.name: c for c in current}
+
+    # Find missing and new benchmarks
+    baseline_names = set(baseline_map.keys())
+    current_names = set(current_map.keys())
+
+    missing = baseline_names - current_names
+    new = current_names - baseline_names
+
+    if missing:
+        print(f"::warning::Missing benchmarks (in baseline but not in current): {missing}")
+    if new:
+        print(f"::notice::New benchmarks (in current but not in baseline): {new}")
+
+    # Compare common benchmarks
+    comparisons = []
+    has_regression = False
+
+    for name in sorted(baseline_names & current_names):
+        b = baseline_map[name]
+        c = current_map[name]
+
+        # Calculate percentage change (positive = slower/regression)
+        time_change = (c.time_ns - b.time_ns) / b.time_ns
+        cpu_change = (c.cpu_time_ns - b.cpu_time_ns) / b.cpu_time_ns
+
+        # Use the worse of real_time or cpu_time change
+        max_change = max(time_change, cpu_change)
+
+        is_regression = max_change > threshold
+        if is_regression:
+            has_regression = True
+
+        comparisons.append({
+            "name": name,
+            "baseline_time_ns": b.time_ns,
+            "current_time_ns": c.time_ns,
+            "time_change": time_change,
+            "cpu_change": cpu_change,
+            "max_change": max_change,
+            "is_regression": is_regression,
+        })
+
+    return has_regression, comparisons
+
+
+def print_comparison_table(comparisons: List[dict], threshold: float) -> None:
+    """Print a formatted comparison table."""
+    if not comparisons:
+        print("No benchmarks to compare.")
+        return
+
+    # GitHub Actions annotation messages
+    print("\n" + "=" * 100)
+    print(f"{'Benchmark':<60} {'Baseline(μs)':<15} {'Current(μs)':<15} {'Change':<12} {'Status'}")
+    print("=" * 100)
+
+    regression_count = 0
+    for comp in comparisons:
+        name = comp["name"]
+        baseline_us = comp["baseline_time_ns"] / 1000
+        current_us = comp["current_time_ns"] / 1000
+        change_pct = comp["max_change"] * 100
+        status = "✓ PASS" if not comp["is_regression"] else "✗ FAIL"
+
+        # Truncate long names
+        display_name = name if len(name) < 60 else name[:57] + "..."
+
+        print(f"{display_name:<60} {baseline_us:<15.2f} {current_us:<15.2f} {change_pct:>+10.1f}%  {status}")
+
+        if comp["is_regression"]:
+            regression_count += 1
+            # GitHub Actions warning annotation
+            print(f"::warning title=Performance Regression::{name} regressed by {change_pct:.1f}% (threshold: {threshold*100:.0f}%)")
+
+    print("=" * 100)
+    print(f"\nTotal benchmarks: {len(comparisons)}")
+    print(f"Regressions (> {threshold*100:.0f}%): {regression_count}")
+
+
+def _short_name(name: str) -> str:
+    """Extract a short display name from the full benchmark name.
+
+    Benchmark names typically look like 'external/some_case/variant'.
+    We strip the leading 'external/' prefix to keep the table compact.
+    """
+    if name.startswith("external/"):
+        return name[len("external/"):]
+    return name
+
+
+def generate_markdown_summary(
+    comparisons: List[dict],
+    threshold: float,
+    has_regression: bool,
+) -> str:
+    """Generate a concise Markdown summary of benchmark comparison results."""
+    lines: List[str] = []
+
+    regression_count = sum(1 for c in comparisons if c["is_regression"])
+
+    lines.append(
+        f"**Performance Benchmark Results** (threshold: {threshold*100:.0f}%)"
+    )
+    lines.append("")
+
+    if not comparisons:
+        lines.append("_No benchmarks to compare._")
+        return "\n".join(lines)
+
+    # Markdown table header
+    lines.append("| Benchmark | Baseline (us) | Current (us) | Change | Status |")
+    lines.append("|-----------|--------------|-------------|--------|--------|")
+
+    for comp in comparisons:
+        name = _short_name(comp["name"])
+        baseline_us = comp["baseline_time_ns"] / 1000
+        current_us = comp["current_time_ns"] / 1000
+        change_pct = comp["max_change"] * 100
+        status = "PASS" if not comp["is_regression"] else "**REGRESSED**"
+
+        lines.append(
+            f"| {name} | {baseline_us:.2f} | {current_us:.2f} "
+            f"| {change_pct:+.1f}% | {status} |"
+        )
+
+    lines.append("")
+    lines.append(
+        f"**Summary**: {len(comparisons)} benchmarks, "
+        f"{regression_count} regressions"
+    )
+
+    return "\n".join(lines)
+
+
+def generate_baseline_summary(results: List[BenchmarkResult]) -> str:
+    """Generate a concise Markdown summary for a baseline-save run."""
+    lines: List[str] = []
+    lines.append("**Baseline Benchmark Results**")
+    lines.append("")
+    lines.append("| Benchmark | Time (us) |")
+    lines.append("|-----------|----------|")
+
+    for r in results:
+        name = _short_name(r.name)
+        time_us = r.time_ns / 1000
+        lines.append(f"| {name} | {time_us:.2f} |")
+
+    lines.append("")
+    lines.append(f"**Total**: {len(results)} benchmarks collected")
+    return "\n".join(lines)
+
+
+def main():
+    parser = argparse.ArgumentParser(
+        description="Check for performance regressions in evmone benchmarks",
+        formatter_class=argparse.RawDescriptionHelpFormatter,
+        epilog="""
+Examples:
+  # Save baseline after a known-good commit
+  python check_performance_regression.py --save-baseline baseline.json
+
+  # Check current commit against baseline in CI
+  python check_performance_regression.py --baseline baseline.json
+
+  # Check with custom threshold (15% instead of default 10%)
+  python check_performance_regression.py --baseline baseline.json --threshold 0.15
+
+  # Specify different library or benchmark directory
+  python check_performance_regression.py --baseline baseline.json --lib ./other.so --mode jit
+""",
+    )
+
+    parser.add_argument(
+        "--baseline",
+        metavar="PATH",
+        help="Path to baseline JSON file for comparison",
+    )
+    parser.add_argument(
+        "--save-baseline",
+        metavar="PATH",
+        help="Run benchmarks and save results to file (use this to create baseline)",
+    )
+    parser.add_argument(
+        "--threshold",
+        type=float,
+        default=0.10,
+        help="Regression threshold as ratio (default: 0.10 = 10%%)",
+    )
+    parser.add_argument(
+        "--lib",
+        default="./libdtvmapi.so",
+        help="Path to the library to benchmark (default: ./libdtvmapi.so)",
+    )
+    parser.add_argument(
+        "--mode",
+        default="interpreter",
+        help="Mode for the library (default: interpreter)",
+    )
+    parser.add_argument(
+        "--benchmark-dir",
+        default="test/evm-benchmarks/benchmarks",
+        help="Path to benchmark directory (default: test/evm-benchmarks/benchmarks)",
+    )
+    parser.add_argument(
+        "--output-summary",
+        metavar="PATH",
+        help="Write a concise Markdown summary to the given file (for PR comments)",
+    )
+    parser.add_argument(
+        "--verbose",
+        "-v",
+        action="store_true",
+        help="Verbose output",
+    )
+
+    args = parser.parse_args()
+
+    if not args.baseline and not args.save_baseline:
+        parser.error("Either --baseline or --save-baseline must be specified")
+
+    # Run benchmarks
+    try:
+        current_results = run_benchmark(
+            lib_path=args.lib,
+            mode=args.mode,
+            benchmark_dir=args.benchmark_dir,
+        )
+    except Exception as e:
+        print(f"::error::Failed to run benchmarks: {e}")
+        sys.exit(2)
+
+    if not current_results:
+        print("::error::No benchmark results found")
+        sys.exit(2)
+
+    print(f"Collected {len(current_results)} benchmark results")
+
+    # Save baseline mode
+    if args.save_baseline:
+        save_baseline(current_results, args.save_baseline)
+        if args.output_summary:
+            summary_md = generate_baseline_summary(current_results)
+            with open(args.output_summary, "w") as f:
+                f.write(summary_md)
+            print(f"Wrote baseline summary to {args.output_summary}")
+        return 0
+
+    # Compare mode
+    baseline_results = load_baseline(args.baseline)
+    print(f"Loaded {len(baseline_results)} baseline results from {args.baseline}")
+
+    has_regression, comparisons = compare_benchmarks(
+        current_results,
+        baseline_results,
+        args.threshold,
+    )
+
+    print_comparison_table(comparisons, args.threshold)
+
+    # Write Markdown summary for PR comments
+    if args.output_summary:
+        summary_md = generate_markdown_summary(
+            comparisons, args.threshold, has_regression
+        )
+        with open(args.output_summary, "w") as f:
+            f.write(summary_md)
+        print(f"Wrote comparison summary to {args.output_summary}")
+
+    # Summary for GitHub Actions
+    print("\n" + "=" * 100)
+    if has_regression:
+        print(f"::error::Performance regression detected! Some benchmarks exceeded {args.threshold*100:.0f}% threshold.")
+        print("RESULT: FAIL")
+        return 1
+    else:
+        print("::notice::No significant performance regression detected.")
+        print("RESULT: PASS")
+        return 0
+
+
+if __name__ == "__main__":
+    sys.exit(main())