Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
37 commits
Select commit Hold shift + click to select a range
85fd498
cmd-line option: don't run sterling if off
tnelson Feb 27, 2025
b84bf65
cleanup: minor cleanup
tnelson Feb 27, 2025
5d6c27d
add: test case for pred->fun/fun->pred recursive
tnelson Feb 27, 2025
688ea1f
[patch] Handle spaces and quotes in filenames w.r.t. run IDs on backe…
tnelson Mar 25, 2025
8a0e270
Feat int minimization (#303)
tnelson Apr 25, 2025
c6688a2
Merge branch 'main' into dev
tnelson Apr 25, 2025
3a304e9
Avoid identifier clash between expander macros and user-defined sigs,…
tnelson May 28, 2025
80a0085
feat: chain of .ts/.js/.cnd files to Sterling
tnelson Jun 14, 2025
8edd9ad
update Sterling
tnelson Jun 14, 2025
fef4882
fix: harden sterling against multiple vis elements
tnelson Jun 15, 2025
2e9d161
feat: module-style imports for helper libraries, temporal helpers
tnelson Jun 20, 2025
39bedb7
breaking: move seq library to actual library. Require open util/seque…
tnelson Jun 21, 2025
c4d6074
fix: possible id-shadowing bug, add option to run without Sterling
tnelson Oct 13, 2025
f99ec90
fix: join parsing, regression test, note on 2nd issue (#311)
tnelson Nov 12, 2025
3ebbf83
Begin integrating typed Racket into core translation layer (#310)
tnelson Nov 12, 2025
f00a33a
Modify the cores pipeline to get cores info to Sterling (#312)
tnelson Nov 13, 2025
4a1ac94
Refactoring options and imports (#316)
tnelson Dec 13, 2025
d00fda7
Refactor AST, additional static types (#318)
tnelson Jan 16, 2026
d993d29
Update to CnD Sterling v. 2.0.4 (#319)
tnelson Jan 16, 2026
c134d0f
types: small type augmentation feat. Claude
tnelson Jan 17, 2026
114dda9
Jan26 fixes (#320)
tnelson Jan 22, 2026
3178a9e
Update documentation links and installation instructions
tnelson Jan 22, 2026
4d2fa2c
Merge main into dev, feat. Claude
tnelson Jan 26, 2026
2a58aaf
update: Sterling version, fix e2e test
tnelson Jan 26, 2026
9d65ddf
update sterling to v2.1.2
tnelson Jan 31, 2026
f6c99aa
Updating Sterling version (#322)
sidprasad Jan 31, 2026
38552c4
Updating sterling version (#323)
sidprasad Feb 1, 2026
3ab323c
fix: test_keep last was misreporting failures
tnelson Feb 3, 2026
5b5af56
fix: bad run-closing on forge_error tests; improve func name
tnelson Feb 4, 2026
d40e99b
fix: detect fun/pred parameter names that shadow sig/fields
tnelson Feb 4, 2026
c5752b4
tests: add more shadowing error tests
tnelson Feb 4, 2026
18cabb7
Merge main into dev, keeping dev's Sterling
tnelson Feb 4, 2026
bde0fc0
Merge branch 'dev' into fixes_feb26
tnelson Feb 4, 2026
1a147ae
tests: further work on e2e suite, check atoms
tnelson Feb 4, 2026
9afcba6
Sterling version update (#326)
sidprasad Feb 5, 2026
33fcb1c
Merge branch 'fixes_feb26' into dev
tnelson Feb 5, 2026
bfbf96e
fix: shadowing error -> warning, avoid BC
tnelson Feb 5, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 13 additions & 0 deletions forge/e2e/helpers/constants.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
/**
* Constants for Forge/Sterling e2e tests.
* Centralizes magic values so representation changes only need one update.
*/

// Boolean display values (Racket uses #t/#f format)
export const BOOL_TRUE = '#t';
export const BOOL_FALSE = '#f';

// Common timeout values (ms)
export const TIMEOUT_GRAPH_LAYOUT = 2000;
export const TIMEOUT_EVAL_RESULT = 1500;
export const TIMEOUT_ELEMENT_VISIBLE = 5000;
248 changes: 186 additions & 62 deletions forge/e2e/tests/sterling-evaluator.spec.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,136 @@
import { test, expect } from '@playwright/test';
import { test, expect, Page, Locator } from '@playwright/test';
import { startForge, ForgeInstance, selectAndRunCommand } from '../helpers/forge-runner';
import { BOOL_TRUE, BOOL_FALSE, TIMEOUT_GRAPH_LAYOUT, TIMEOUT_EVAL_RESULT, TIMEOUT_ELEMENT_VISIBLE } from '../helpers/constants';

/**
* Helper class to interact with Sterling's evaluator panel.
*
* IMPORTANT: We cannot reliably scope assertions to the evaluator panel because
* Sterling's DOM structure may vary. Instead, we use these strategies:
*
* 1. For expressions: Count the expression text (e.g., "`Node0") which is unique
* to the evaluator since backticks don't appear in graph labels.
*
* 2. For boolean results: Count #t/#f which only appear in evaluator results.
*
* 3. For atom/set results: We can only verify the expression was processed
* (appears in history) and no error occurred. We cannot distinguish result
* atoms from graph atoms.
*/
class EvaluatorHelper {
private page: Page;
private input: Locator;

constructor(page: Page) {
this.page = page;
this.input = page.getByPlaceholder('Enter an expression');
}

async waitForVisible(): Promise<void> {
await expect(this.input).toBeVisible({ timeout: TIMEOUT_ELEMENT_VISIBLE });
}

/**
* Count occurrences of exact text on the page.
*/
async countOnPage(text: string): Promise<number> {
return await this.page.getByText(text, { exact: true }).count();
}

/**
* Evaluate an expression and wait for result.
*/
async evaluate(expr: string): Promise<void> {
await this.input.fill(expr);
await this.input.press('Enter');
await this.page.waitForTimeout(TIMEOUT_EVAL_RESULT);
}

/**
* Check that no error indicators are visible.
* Uses specific patterns that indicate actual errors, avoiding false matches
* on benign text like "0 errors" or help text.
*/
async expectNoError(): Promise<void> {
// Check for error-styled elements (red text, error classes) rather than just text
// These patterns are more specific to actual error states
const errorIndicators = [
// Exact error message patterns (case-sensitive to avoid "0 errors")
'Error:',
'Error evaluating',
'evaluation failed',
'Parse error',
'Syntax error',
];

for (const errorText of errorIndicators) {
const errorVisible = await this.page.getByText(errorText).first().isVisible().catch(() => false);
expect(errorVisible, `Unexpected error: found '${errorText}'`).toBe(false);
}
}

/**
* Verify that evaluating an expression adds it to the history.
* The expression text (especially with backticks) is unique to the evaluator.
*
* @param expr - The expression to evaluate
* @param description - Test description for error messages
* @param expectedResult - Optional: expected result text to verify (must be evaluator-unique)
*/
async evaluateAndVerifyInHistory(expr: string, description: string, expectedResult?: string): Promise<void> {
const countBefore = await this.countOnPage(expr);
const resultCountBefore = expectedResult ? await this.countOnPage(expectedResult) : 0;

await this.evaluate(expr);

const countAfter = await this.countOnPage(expr);
expect(countAfter, `${description}: expression '${expr}' should appear in history`).toBeGreaterThan(countBefore);

if (expectedResult) {
const resultCountAfter = await this.countOnPage(expectedResult);
if (resultCountAfter <= resultCountBefore) {
// Capture what's actually on the page for debugging
const pageText = await this.page.locator('body').innerText();
// Find lines containing the expression to show nearby context
const lines = pageText.split('\n');
const relevantLines = lines.filter(line => line.includes(expr) || line.includes('Node'));
const context = relevantLines.slice(0, 10).join('\n');

const errorMsg =
`${description}: expected result '${expectedResult}' not found.\n` +
`Expression '${expr}' was evaluated.\n` +
`Expected '${expectedResult}' count to increase from ${resultCountBefore}, but got ${resultCountAfter}.\n` +
`Relevant page content:\n${context}\n` +
`(Check screenshot for full evaluator output)`;

expect(resultCountAfter, errorMsg).toBeGreaterThan(resultCountBefore);
}
}

await this.expectNoError();
}

/**
* Verify that evaluating a boolean formula produces the expected result.
* #t and #f are unique to evaluator output (don't appear in graph).
*/
async evaluateAndExpectBool(expr: string, expectedBool: string, description: string): Promise<void> {
const exprCountBefore = await this.countOnPage(expr);
const boolCountBefore = await this.countOnPage(expectedBool);

await this.evaluate(expr);

// Verify expression was added to history
const exprCountAfter = await this.countOnPage(expr);
expect(exprCountAfter, `${description}: expression should appear in history`).toBeGreaterThan(exprCountBefore);

// Verify correct boolean result appeared (#t/#f are evaluator-specific)
const boolCountAfter = await this.countOnPage(expectedBool);
expect(boolCountAfter, `${description}: expected '${expectedBool}' result`).toBeGreaterThan(boolCountBefore);

await this.expectNoError();
}
}

test.describe('Sterling Evaluator', () => {
let forge: ForgeInstance;
Expand All @@ -10,88 +141,81 @@ test.describe('Sterling Evaluator', () => {
}
});

test('can open the evaluator panel', async ({ page }) => {
forge = await startForge('e2e/fixtures/simple-graph.frg');
await page.goto(forge.sterlingUrl);

// Must run a command first to have an instance to evaluate
/**
* Helper to set up a test: open Sterling, run a command, open evaluator.
* @param page - Playwright page
* @param forgeInstance - The Forge instance (must be started before calling)
*/
async function setupEvaluator(page: Page, forgeInstance: ForgeInstance): Promise<EvaluatorHelper> {
await page.goto(forgeInstance.sterlingUrl);
await selectAndRunCommand(page, 'simpleRun');
await page.waitForTimeout(TIMEOUT_GRAPH_LAYOUT);

// Wait for graph layout to finish
await page.waitForTimeout(2000);

// The Evaluator is a sidebar tab on the right - click it
// It's displayed as vertical text "Evaluator" in a tab button
const evaluatorTab = page.getByText('Evaluator', { exact: true }).first();
await expect(evaluatorTab).toBeVisible({ timeout: 5000 });
await expect(evaluatorTab).toBeVisible({ timeout: TIMEOUT_ELEMENT_VISIBLE });
await evaluatorTab.click();

// After clicking, the EVALUATOR panel should open
await expect(page.getByPlaceholder('Enter an expression')).toBeVisible({ timeout: 5000 });
});
const helper = new EvaluatorHelper(page);
await helper.waitForVisible();
return helper;
}

test('can evaluate a simple expression', async ({ page }) => {
test('can open the evaluator panel', async ({ page }) => {
forge = await startForge('e2e/fixtures/simple-graph.frg');
await page.goto(forge.sterlingUrl);

// Run a command first
await selectAndRunCommand(page, 'simpleRun');

// Wait for graph layout to finish
await page.waitForTimeout(2000);

// Open evaluator panel
const evaluatorTab = page.getByText('Evaluator', { exact: true }).first();
await evaluatorTab.click();
await expect(page.getByPlaceholder('Enter an expression')).toBeVisible({ timeout: 5000 });
await setupEvaluator(page, forge);
// setupEvaluator already verifies the evaluator input is visible
});

// Find input field in evaluator - placeholder is "Enter an expression..."
const evaluatorInput = page.getByPlaceholder('Enter an expression');
await expect(evaluatorInput).toBeVisible({ timeout: 5000 });
test('sig expression evaluates without error', async ({ page }) => {
forge = await startForge('e2e/fixtures/simple-graph.frg');
const evaluator = await setupEvaluator(page, forge);

// Type a simple expression - "Node" should return all Node atoms
await evaluatorInput.fill('Node');
await evaluatorInput.press('Enter');
// Evaluate "Node" - we can only verify no error occurs
// (result atoms are indistinguishable from graph atoms)
await evaluator.evaluate('Node');
await evaluator.expectNoError();
});

// Wait for result
await page.waitForTimeout(2000);
test('relation expression evaluates without error', async ({ page }) => {
forge = await startForge('e2e/fixtures/simple-graph.frg');
const evaluator = await setupEvaluator(page, forge);

// The result should contain Node references - look for Node0, Node1, or Node2
await expect(page.getByText(/Node[012]/).first()).toBeVisible({ timeout: 5000 });
// Evaluate "edges" - we can only verify no error occurs
// (result tuples are indistinguishable from graph edges)
await evaluator.evaluate('edges');
await evaluator.expectNoError();
});

test('evaluator shows result for edges relation', async ({ page }) => {
// --- Atom-specific evaluator tests ---
// These tests ensure backtick atoms work correctly in the Sterling evaluator.
// Combined into one test to avoid expensive Forge/Sterling startup for each case.

test('can evaluate atom expressions and formulas', async ({ page }) => {
forge = await startForge('e2e/fixtures/simple-graph.frg');
await page.goto(forge.sterlingUrl);
const evaluator = await setupEvaluator(page, forge);

// Run a command first
await selectAndRunCommand(page, 'simpleRun');
// Test 1: Single atom expression - backtick expression appears in history
await evaluator.evaluateAndVerifyInHistory('`Node0', 'Single atom', '`Node0');

// Wait for graph layout to finish
await page.waitForTimeout(2000);
// Test 2: Atom join expression - `Node0.edges (result depends on instance)
await evaluator.evaluateAndVerifyInHistory('`Node0.edges', 'Atom join');

// Open evaluator panel
const evaluatorTab = page.getByText('Evaluator', { exact: true }).first();
await evaluatorTab.click();
await expect(page.getByPlaceholder('Enter an expression')).toBeVisible({ timeout: 5000 });
// Test 3: Atom membership formula - `Node0 in Node should be true
await evaluator.evaluateAndExpectBool('`Node0 in Node', BOOL_TRUE, 'Atom membership (true)');

// Count how many result entries exist before evaluation
const resultsBefore = await page.locator('text=/Node\\d.*→.*Node\\d/').count();
// Test 4: Atom equality formula - `Node0 = `Node0 should be true
await evaluator.evaluateAndExpectBool('`Node0 = `Node0', BOOL_TRUE, 'Atom self-equality (true)');

// Find input field and evaluate the edges relation
const evaluatorInput = page.getByPlaceholder('Enter an expression');
await evaluatorInput.fill('edges');
await evaluatorInput.press('Enter');
// Test 5: Atom inequality formula - `Node0 != `Node1 should be true
await evaluator.evaluateAndExpectBool('`Node0 != `Node1', BOOL_TRUE, 'Atom inequality (true)');

// Wait for result
await page.waitForTimeout(2000);
// Test 6: Atom equality with different atoms - should be FALSE
await evaluator.evaluateAndExpectBool('`Node0 = `Node1', BOOL_FALSE, 'Different atoms equality (false)');

// The evaluator should show tuple results like "Node0 → Node1" for edges
// Since simpleRun requires "some edges", there should be at least one tuple
const resultsAfter = await page.locator('text=/Node\\d/').count();
// Test 7: Atom in set union - backtick expression appears in history
await evaluator.evaluateAndVerifyInHistory('`Node0 + `Node1', 'Set union', '((Node0) (Node1))');

// There should be more Node references after evaluation (the result tuples)
// Or at minimum, the evaluator panel should show the edges expression was processed
expect(resultsAfter).toBeGreaterThan(0);
// Test 8: Atom in product - backtick expression appears in history
await evaluator.evaluateAndVerifyInHistory('`Node0 -> `Node1', 'Product', '((Node0 Node1))');
});
});
5 changes: 4 additions & 1 deletion forge/lang/ast.rkt
Original file line number Diff line number Diff line change
Expand Up @@ -1348,10 +1348,13 @@
"unknown:?:?"))

(define (raise-forge-error #:msg [msg "error"] #:context [context #f] #:raise? [raise? #t])
(if raise?
(if raise?
(raise-user-error (format "[~a] ~a" (pretty-loc context) msg))
(fprintf (current-error-port) "[~a] ~a" (pretty-loc context) msg)))

(define (raise-forge-warning #:msg [msg "warning"] #:context [context #f])
(fprintf (current-error-port) "Warning: [~a] ~a~n" (pretty-loc context) msg))

; Helper for other locations we might need to generate a nodeinfo struct from a variety
; of datatype possibilities.
(define (build-nodeinfo context)
Expand Down
9 changes: 5 additions & 4 deletions forge/lang/expander.rkt
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
(only-in pkg/lib pkg-directory)
(only-in racket/path file-name-from-path find-relative-path normalize-path)
(only-in racket/string string-replace string-join)
(only-in forge/lang/ast raise-forge-error))
(only-in forge/lang/ast raise-forge-error raise-forge-warning))
syntax/srcloc
racket/string
(only-in syntax/modresolve resolve-module-path)
Expand Down Expand Up @@ -1246,10 +1246,11 @@
(define-for-syntax (ensure-target-ref target-pred ex)
(define tp (syntax-e target-pred))
(let ([ex-as-datum (syntax->datum ex)])
(unless
(unless
(memq tp (flatten ex-as-datum))
(eprintf "Warning: ~a ~a:~a Test does not reference ~a.\n"
(syntax-source ex) (syntax-line ex) (syntax-column ex) tp))))
(raise-forge-warning
#:msg (format "Test does not reference ~a." tp)
#:context ex))))


(define-syntax (NT-TestSuiteDecl stx)
Expand Down
4 changes: 2 additions & 2 deletions forge/server/forgeserver.rkt
Original file line number Diff line number Diff line change
Expand Up @@ -442,11 +442,11 @@
#:context context)]))
(define xml (get-xml inst))

; is-running? is about the _solver process itself_.
; is-solver-process-alive? is about the _solver process itself_.
; is-run-closed? is about whether this specific run has been terminated
; Sat? is about whether the solution we have is Sat.
(define response (make-sterling-data xml datum-id name temporal?
(and (is-running? the-run) (not (is-run-closed? the-run)) inst)
(and (is-solver-process-alive? the-run) (not (is-run-closed? the-run)) inst)
old-datum-id))
(send-to-sterling response #:connection connection)]
[else
Expand Down
Loading
Loading