tnelson · tnelson · Feb 5, 2026 · Feb 27, 2025 · Feb 27, 2025 · Feb 27, 2025
diff --git a/forge/e2e/helpers/constants.ts b/forge/e2e/helpers/constants.ts
@@ -0,0 +1,13 @@
+/**
+ * Constants for Forge/Sterling e2e tests.
+ * Centralizes magic values so representation changes only need one update.
+ */
+
+// Boolean display values (Racket uses #t/#f format)
+export const BOOL_TRUE = '#t';
+export const BOOL_FALSE = '#f';
+
+// Common timeout values (ms)
+export const TIMEOUT_GRAPH_LAYOUT = 2000;
+export const TIMEOUT_EVAL_RESULT = 1500;
+export const TIMEOUT_ELEMENT_VISIBLE = 5000;
diff --git a/forge/e2e/tests/sterling-evaluator.spec.ts b/forge/e2e/tests/sterling-evaluator.spec.ts
@@ -1,5 +1,136 @@
-import { test, expect } from '@playwright/test';
+import { test, expect, Page, Locator } from '@playwright/test';
 import { startForge, ForgeInstance, selectAndRunCommand } from '../helpers/forge-runner';
+import { BOOL_TRUE, BOOL_FALSE, TIMEOUT_GRAPH_LAYOUT, TIMEOUT_EVAL_RESULT, TIMEOUT_ELEMENT_VISIBLE } from '../helpers/constants';
+
+/**
+ * Helper class to interact with Sterling's evaluator panel.
+ *
+ * IMPORTANT: We cannot reliably scope assertions to the evaluator panel because
+ * Sterling's DOM structure may vary. Instead, we use these strategies:
+ *
+ * 1. For expressions: Count the expression text (e.g., "`Node0") which is unique
+ *    to the evaluator since backticks don't appear in graph labels.
+ *
+ * 2. For boolean results: Count #t/#f which only appear in evaluator results.
+ *
+ * 3. For atom/set results: We can only verify the expression was processed
+ *    (appears in history) and no error occurred. We cannot distinguish result
+ *    atoms from graph atoms.
+ */
+class EvaluatorHelper {
+  private page: Page;
+  private input: Locator;
+
+  constructor(page: Page) {
+    this.page = page;
+    this.input = page.getByPlaceholder('Enter an expression');
+  }
+
+  async waitForVisible(): Promise<void> {
+    await expect(this.input).toBeVisible({ timeout: TIMEOUT_ELEMENT_VISIBLE });
+  }
+
+  /**
+   * Count occurrences of exact text on the page.
+   */
+  async countOnPage(text: string): Promise<number> {
+    return await this.page.getByText(text, { exact: true }).count();
+  }
+
+  /**
+   * Evaluate an expression and wait for result.
+   */
+  async evaluate(expr: string): Promise<void> {
+    await this.input.fill(expr);
+    await this.input.press('Enter');
+    await this.page.waitForTimeout(TIMEOUT_EVAL_RESULT);
+  }
+
+  /**
+   * Check that no error indicators are visible.
+   * Uses specific patterns that indicate actual errors, avoiding false matches
+   * on benign text like "0 errors" or help text.
+   */
+  async expectNoError(): Promise<void> {
+    // Check for error-styled elements (red text, error classes) rather than just text
+    // These patterns are more specific to actual error states
+    const errorIndicators = [
+      // Exact error message patterns (case-sensitive to avoid "0 errors")
+      'Error:',
+      'Error evaluating',
+      'evaluation failed',
+      'Parse error',
+      'Syntax error',
+    ];
+
+    for (const errorText of errorIndicators) {
+      const errorVisible = await this.page.getByText(errorText).first().isVisible().catch(() => false);
+      expect(errorVisible, `Unexpected error: found '${errorText}'`).toBe(false);
+    }
+  }
+
+  /**
+   * Verify that evaluating an expression adds it to the history.
+   * The expression text (especially with backticks) is unique to the evaluator.
+   *
+   * @param expr - The expression to evaluate
+   * @param description - Test description for error messages
+   * @param expectedResult - Optional: expected result text to verify (must be evaluator-unique)
+   */
+  async evaluateAndVerifyInHistory(expr: string, description: string, expectedResult?: string): Promise<void> {
+    const countBefore = await this.countOnPage(expr);
+    const resultCountBefore = expectedResult ? await this.countOnPage(expectedResult) : 0;
+
+    await this.evaluate(expr);
+
+    const countAfter = await this.countOnPage(expr);
+    expect(countAfter, `${description}: expression '${expr}' should appear in history`).toBeGreaterThan(countBefore);
+
+    if (expectedResult) {
+      const resultCountAfter = await this.countOnPage(expectedResult);
+      if (resultCountAfter <= resultCountBefore) {
+        // Capture what's actually on the page for debugging
+        const pageText = await this.page.locator('body').innerText();
+        // Find lines containing the expression to show nearby context
+        const lines = pageText.split('\n');
+        const relevantLines = lines.filter(line => line.includes(expr) || line.includes('Node'));
+        const context = relevantLines.slice(0, 10).join('\n');
+
+        const errorMsg =
+          `${description}: expected result '${expectedResult}' not found.\n` +
+          `Expression '${expr}' was evaluated.\n` +
+          `Expected '${expectedResult}' count to increase from ${resultCountBefore}, but got ${resultCountAfter}.\n` +
+          `Relevant page content:\n${context}\n` +
+          `(Check screenshot for full evaluator output)`;
+
+        expect(resultCountAfter, errorMsg).toBeGreaterThan(resultCountBefore);
+      }
+    }
+
+    await this.expectNoError();
+  }
+
+  /**
+   * Verify that evaluating a boolean formula produces the expected result.
+   * #t and #f are unique to evaluator output (don't appear in graph).
+   */
+  async evaluateAndExpectBool(expr: string, expectedBool: string, description: string): Promise<void> {
+    const exprCountBefore = await this.countOnPage(expr);
+    const boolCountBefore = await this.countOnPage(expectedBool);
+
+    await this.evaluate(expr);
+
+    // Verify expression was added to history
+    const exprCountAfter = await this.countOnPage(expr);
+    expect(exprCountAfter, `${description}: expression should appear in history`).toBeGreaterThan(exprCountBefore);
+
+    // Verify correct boolean result appeared (#t/#f are evaluator-specific)
+    const boolCountAfter = await this.countOnPage(expectedBool);
+    expect(boolCountAfter, `${description}: expected '${expectedBool}' result`).toBeGreaterThan(boolCountBefore);
+
+    await this.expectNoError();
+  }
+}
 
 test.describe('Sterling Evaluator', () => {
   let forge: ForgeInstance;
@@ -10,88 +141,81 @@ test.describe('Sterling Evaluator', () => {
     }
   });
 
-  test('can open the evaluator panel', async ({ page }) => {
-    forge = await startForge('e2e/fixtures/simple-graph.frg');
-    await page.goto(forge.sterlingUrl);
-
-    // Must run a command first to have an instance to evaluate
+  /**
+   * Helper to set up a test: open Sterling, run a command, open evaluator.
+   * @param page - Playwright page
+   * @param forgeInstance - The Forge instance (must be started before calling)
+   */
+  async function setupEvaluator(page: Page, forgeInstance: ForgeInstance): Promise<EvaluatorHelper> {
+    await page.goto(forgeInstance.sterlingUrl);
     await selectAndRunCommand(page, 'simpleRun');
+    await page.waitForTimeout(TIMEOUT_GRAPH_LAYOUT);
 
-    // Wait for graph layout to finish
-    await page.waitForTimeout(2000);
-
-    // The Evaluator is a sidebar tab on the right - click it
-    // It's displayed as vertical text "Evaluator" in a tab button
     const evaluatorTab = page.getByText('Evaluator', { exact: true }).first();
-    await expect(evaluatorTab).toBeVisible({ timeout: 5000 });
+    await expect(evaluatorTab).toBeVisible({ timeout: TIMEOUT_ELEMENT_VISIBLE });
     await evaluatorTab.click();
 
-    // After clicking, the EVALUATOR panel should open
-    await expect(page.getByPlaceholder('Enter an expression')).toBeVisible({ timeout: 5000 });
-  });
+    const helper = new EvaluatorHelper(page);
+    await helper.waitForVisible();
+    return helper;
+  }
 
-  test('can evaluate a simple expression', async ({ page }) => {
+  test('can open the evaluator panel', async ({ page }) => {
     forge = await startForge('e2e/fixtures/simple-graph.frg');
-    await page.goto(forge.sterlingUrl);
-
-    // Run a command first
-    await selectAndRunCommand(page, 'simpleRun');
-
-    // Wait for graph layout to finish
-    await page.waitForTimeout(2000);
-
-    // Open evaluator panel
-    const evaluatorTab = page.getByText('Evaluator', { exact: true }).first();
-    await evaluatorTab.click();
-    await expect(page.getByPlaceholder('Enter an expression')).toBeVisible({ timeout: 5000 });
+    await setupEvaluator(page, forge);
+    // setupEvaluator already verifies the evaluator input is visible
+  });
 
-    // Find input field in evaluator - placeholder is "Enter an expression..."
-    const evaluatorInput = page.getByPlaceholder('Enter an expression');
-    await expect(evaluatorInput).toBeVisible({ timeout: 5000 });
+  test('sig expression evaluates without error', async ({ page }) => {
+    forge = await startForge('e2e/fixtures/simple-graph.frg');
+    const evaluator = await setupEvaluator(page, forge);
 
-    // Type a simple expression - "Node" should return all Node atoms
-    await evaluatorInput.fill('Node');
-    await evaluatorInput.press('Enter');
+    // Evaluate "Node" - we can only verify no error occurs
+    // (result atoms are indistinguishable from graph atoms)
+    await evaluator.evaluate('Node');
+    await evaluator.expectNoError();
+  });
 
-    // Wait for result
-    await page.waitForTimeout(2000);
+  test('relation expression evaluates without error', async ({ page }) => {
+    forge = await startForge('e2e/fixtures/simple-graph.frg');
+    const evaluator = await setupEvaluator(page, forge);
 
-    // The result should contain Node references - look for Node0, Node1, or Node2
-    await expect(page.getByText(/Node[012]/).first()).toBeVisible({ timeout: 5000 });
+    // Evaluate "edges" - we can only verify no error occurs
+    // (result tuples are indistinguishable from graph edges)
+    await evaluator.evaluate('edges');
+    await evaluator.expectNoError();
   });
 
-  test('evaluator shows result for edges relation', async ({ page }) => {
+  // --- Atom-specific evaluator tests ---
+  // These tests ensure backtick atoms work correctly in the Sterling evaluator.
+  // Combined into one test to avoid expensive Forge/Sterling startup for each case.
+
+  test('can evaluate atom expressions and formulas', async ({ page }) => {
     forge = await startForge('e2e/fixtures/simple-graph.frg');
-    await page.goto(forge.sterlingUrl);
+    const evaluator = await setupEvaluator(page, forge);
 
-    // Run a command first
-    await selectAndRunCommand(page, 'simpleRun');
+    // Test 1: Single atom expression - backtick expression appears in history
+    await evaluator.evaluateAndVerifyInHistory('`Node0', 'Single atom', '`Node0');
 
-    // Wait for graph layout to finish
-    await page.waitForTimeout(2000);
+    // Test 2: Atom join expression - `Node0.edges (result depends on instance)
+    await evaluator.evaluateAndVerifyInHistory('`Node0.edges', 'Atom join');
 
-    // Open evaluator panel
-    const evaluatorTab = page.getByText('Evaluator', { exact: true }).first();
-    await evaluatorTab.click();
-    await expect(page.getByPlaceholder('Enter an expression')).toBeVisible({ timeout: 5000 });
+    // Test 3: Atom membership formula - `Node0 in Node should be true
+    await evaluator.evaluateAndExpectBool('`Node0 in Node', BOOL_TRUE, 'Atom membership (true)');
 
-    // Count how many result entries exist before evaluation
-    const resultsBefore = await page.locator('text=/Node\\d.*→.*Node\\d/').count();
+    // Test 4: Atom equality formula - `Node0 = `Node0 should be true
+    await evaluator.evaluateAndExpectBool('`Node0 = `Node0', BOOL_TRUE, 'Atom self-equality (true)');
 
-    // Find input field and evaluate the edges relation
-    const evaluatorInput = page.getByPlaceholder('Enter an expression');
-    await evaluatorInput.fill('edges');
-    await evaluatorInput.press('Enter');
+    // Test 5: Atom inequality formula - `Node0 != `Node1 should be true
+    await evaluator.evaluateAndExpectBool('`Node0 != `Node1', BOOL_TRUE, 'Atom inequality (true)');
 
-    // Wait for result
-    await page.waitForTimeout(2000);
+    // Test 6: Atom equality with different atoms - should be FALSE
+    await evaluator.evaluateAndExpectBool('`Node0 = `Node1', BOOL_FALSE, 'Different atoms equality (false)');
 
-    // The evaluator should show tuple results like "Node0 → Node1" for edges
-    // Since simpleRun requires "some edges", there should be at least one tuple
-    const resultsAfter = await page.locator('text=/Node\\d/').count();
+    // Test 7: Atom in set union - backtick expression appears in history
+    await evaluator.evaluateAndVerifyInHistory('`Node0 + `Node1', 'Set union', '((Node0) (Node1))');
 
-    // There should be more Node references after evaluation (the result tuples)
-    // Or at minimum, the evaluator panel should show the edges expression was processed
-    expect(resultsAfter).toBeGreaterThan(0);
+    // Test 8: Atom in product - backtick expression appears in history
+    await evaluator.evaluateAndVerifyInHistory('`Node0 -> `Node1', 'Product', '((Node0 Node1))');
   });
 });
diff --git a/forge/lang/ast.rkt b/forge/lang/ast.rkt
@@ -1348,10 +1348,13 @@
       "unknown:?:?"))
 
 (define (raise-forge-error #:msg [msg "error"] #:context [context #f] #:raise? [raise? #t])
-  (if raise? 
+  (if raise?
       (raise-user-error (format "[~a] ~a" (pretty-loc context) msg))
       (fprintf (current-error-port) "[~a] ~a" (pretty-loc context) msg)))
 
+(define (raise-forge-warning #:msg [msg "warning"] #:context [context #f])
+  (fprintf (current-error-port) "Warning: [~a] ~a~n" (pretty-loc context) msg))
+
 ; Helper for other locations we might need to generate a nodeinfo struct from a variety
 ; of datatype possibilities.
 (define (build-nodeinfo context)

diff --git a/forge/lang/expander.rkt b/forge/lang/expander.rkt
@@ -20,7 +20,7 @@
                      (only-in pkg/lib pkg-directory)
                      (only-in racket/path file-name-from-path find-relative-path normalize-path)
                      (only-in racket/string string-replace string-join)
-                     (only-in forge/lang/ast raise-forge-error))
+                     (only-in forge/lang/ast raise-forge-error raise-forge-warning))
          syntax/srcloc
          racket/string
          (only-in syntax/modresolve resolve-module-path)
@@ -1246,10 +1246,11 @@
 (define-for-syntax (ensure-target-ref target-pred ex)
   (define tp (syntax-e target-pred))
   (let ([ex-as-datum (syntax->datum ex)])
-    (unless 
+    (unless
       (memq tp (flatten ex-as-datum))
-      (eprintf  "Warning: ~a ~a:~a Test does not reference ~a.\n" 
-        (syntax-source ex) (syntax-line ex) (syntax-column ex)  tp))))
+      (raise-forge-warning
+       #:msg (format "Test does not reference ~a." tp)
+       #:context ex))))
 
 
 (define-syntax (NT-TestSuiteDecl stx)

diff --git a/forge/server/forgeserver.rkt b/forge/server/forgeserver.rkt
@@ -442,11 +442,11 @@
                   #:context context)]))
          (define xml (get-xml inst))
 
-         ; is-running? is about the _solver process itself_.
+         ; is-solver-process-alive? is about the _solver process itself_.
          ; is-run-closed? is about whether this specific run has been terminated
          ; Sat? is about whether the solution we have is Sat.
          (define response (make-sterling-data xml datum-id name temporal?
-                                              (and (is-running? the-run) (not (is-run-closed? the-run)) inst)
+                                              (and (is-solver-process-alive? the-run) (not (is-run-closed? the-run)) inst)
                                               old-datum-id))
         (send-to-sterling response #:connection connection)]
        [else