PostHog · gewenyu99 · Feb 11, 2026 · Feb 11, 2026 · Feb 11, 2026
diff --git a/bin.ts b/bin.ts
@@ -115,6 +115,12 @@ yargs(hideBin(process.argv))
             'Show menu for manual integration selection instead of auto-detecting\nenv: POSTHOG_WIZARD_MENU',
           type: 'boolean',
         },
+        benchmark: {
+          default: false,
+          describe:
+            'Run in benchmark mode with per-phase token tracking\nenv: POSTHOG_WIZARD_BENCHMARK',
+          type: 'boolean',
+        },
       });
     },
     (argv) => {

diff --git a/src/lib/__tests__/agent-interface.test.ts b/src/lib/__tests__/agent-interface.test.ts
@@ -32,6 +32,7 @@ describe('runAgent', () => {
     localMcp: false,
     ci: false,
     menu: false,
+    benchmark: false,
   };
 
   const defaultAgentConfig = {

diff --git a/src/lib/agent-interface.ts b/src/lib/agent-interface.ts
@@ -14,6 +14,8 @@ import {
 } from './constants';
 import { getLlmGatewayUrlFromHost } from '../utils/urls';
 import { LINTING_TOOLS } from './safe-tools';
+import type { BenchmarkData } from './benchmark';
+import { BenchmarkTracker } from './benchmark';
 import { createEnvFileServer, ENV_FILE_TOOL_NAMES } from './env-file-tools';
 
 // Dynamic import cache for ESM module
@@ -416,7 +418,11 @@ export async function runAgent(
     successMessage?: string;
     errorMessage?: string;
   },
-): Promise<{ error?: AgentErrorType; message?: string }> {
+): Promise<{
+  error?: AgentErrorType;
+  message?: string;
+  benchmark?: BenchmarkData;
+}> {
   const {
     estimatedDurationMinutes = 8,
     spinnerMessage = 'Customizing your PostHog setup...',
@@ -430,6 +436,9 @@ export async function runAgent(
     `This whole process should take about ${estimatedDurationMinutes} minutes including error checking and fixes.\n\nGrab some coffee!`,
   );
 
+  // Create benchmark tracker before spinner starts so its log output is visible
+  const tracker = options.benchmark ? new BenchmarkTracker(spinner) : null;
+
   spinner.start(spinnerMessage);
 
   const cliPath = getClaudeCodeExecutablePath();
@@ -441,6 +450,8 @@ export async function runAgent(
   const collectedText: string[] = [];
   // Track if we received a successful result (before any cleanup errors)
   let receivedSuccessResult = false;
+  // Track the result message for benchmark data extraction
+  let resultMessage: SDKMessage = null;
 
   // Workaround for SDK bug: stdin closes before canUseTool responses can be sent.
   // The fix is to use an async generator for the prompt that stays open until
@@ -465,7 +476,11 @@ export async function runAgent(
   // Helper to handle successful completion (used in normal path and race condition recovery)
   const completeWithSuccess = (
     suppressedError?: Error,
-  ): { error?: AgentErrorType; message?: string } => {
+  ): {
+    error?: AgentErrorType;
+    message?: string;
+    benchmark?: BenchmarkData;
+  } => {
     const durationMs = Date.now() - startTime;
     const durationSeconds = Math.round(durationMs / 1000);
 
@@ -501,7 +516,9 @@ export async function runAgent(
       duration_seconds: durationSeconds,
     });
     spinner.stop(successMessage);
-    return {};
+
+    const benchmark = tracker?.finalize(resultMessage, durationMs);
+    return { benchmark };
   };
 
   try {
@@ -598,13 +615,15 @@ export async function runAgent(
         collectedText,
         receivedSuccessResult,
       );
+      tracker?.onMessage(message);
 
       // Signal completion when result received
       if (message.type === 'result') {
         // Track successful results before any potential cleanup errors
         // The SDK may emit a second error result during cleanup due to a race condition
         if (message.subtype === 'success' && !message.is_error) {
           receivedSuccessResult = true;
+          resultMessage = message;
         }
         signalDone!();
       }
@@ -776,7 +795,6 @@ function handleSDKMessage(
     }
 
     default:
-      // Log other message types for debugging
       if (options.debug) {
         debug(`Unhandled message type: ${message.type}`);
       }

diff --git a/src/lib/agent-runner.ts b/src/lib/agent-runner.ts
@@ -193,17 +193,19 @@ export async function runAgentWizard(
     options,
   );
 
+  const agentRunConfig = {
+    estimatedDurationMinutes: config.ui.estimatedDurationMinutes,
+    spinnerMessage: SPINNER_MESSAGE,
+    successMessage: config.ui.successMessage,
+    errorMessage: 'Integration failed',
+  };
+
   const agentResult = await runAgent(
     agent,
     integrationPrompt,
     options,
     spinner,
-    {
-      estimatedDurationMinutes: config.ui.estimatedDurationMinutes,
-      spinnerMessage: SPINNER_MESSAGE,
-      successMessage: config.ui.successMessage,
-      errorMessage: 'Integration failed',
-    },
+    agentRunConfig,
   );
 
   // Handle error cases detected in agent output