From 11fcfa21ee86d6c210c02bfdace7b78d0a5aef73 Mon Sep 17 00:00:00 2001 From: Mike Delmonaco Date: Thu, 17 Apr 2025 21:17:00 -0400 Subject: [PATCH 01/26] add begin to racket-body --- private/syntax/interface.rkt | 3 +++ 1 file changed, 3 insertions(+) diff --git a/private/syntax/interface.rkt b/private/syntax/interface.rkt index 19a4fdc..d80000e 100644 --- a/private/syntax/interface.rkt +++ b/private/syntax/interface.rkt @@ -359,6 +359,9 @@ ((~literal define-syntaxes) (x:racket-macro ...) e:expr) #:binding (export-syntaxes x ... e) + ((~literal begin) b:racket-body ...) + #:binding [(re-export b) ...] + e:racket-expr)) (define-syntax define-dsl-syntax From 16806df80fad003c50d06c444b87761207a60199 Mon Sep 17 00:00:00 2001 From: Mike Delmonaco Date: Thu, 17 Apr 2025 22:38:53 -0400 Subject: [PATCH 02/26] syntax class expander --- private/test/syntax-class-expander.rkt | 111 +++++++++++++++++++++++++ 1 file changed, 111 insertions(+) create mode 100644 private/test/syntax-class-expander.rkt diff --git a/private/test/syntax-class-expander.rkt b/private/test/syntax-class-expander.rkt new file mode 100644 index 0000000..e62ce65 --- /dev/null +++ b/private/test/syntax-class-expander.rkt @@ -0,0 +1,111 @@ +#lang racket/base + +;; Similar to private/test/simple-bsepc.rkt, but attempting to create an expander that +;; uses syntax classes for nonterminals. +;; This was created to research a potential solution for +;; https://github.com/michaelballantyne/syntax-spec/issues/62 + +(require + (for-syntax + racket/base + syntax/parse + "../ee-lib/main.rkt" + "../runtime/binding-spec.rkt") + + "../ee-lib/define.rkt") + +#;;; ANF language +(syntax-spec + (binding-class my-var) + (nonterminal a-expr + e:c-expr + (my-let ([x:my-var e:c-expr]) b:a-expr) + #:binding (scope (bind x) b)) + (nonterminal c-expr + e:i-expr + (my-+ a:i-expr b:i-expr)) + (nonterminal i-expr + n:number + x:my-var)) + +(define-literal-forms anf-lits + "mylang forms may only be used in mylang" + (my-let my-+)) + +(begin-for-syntax + (struct my-var-rep ()) + + ;; hash from symbols to binding representations + ;; to simulate the real expander environment + (define pretend-binding-store (make-hash)) + (hash-set! pretend-binding-store 'bogus #f) + + (define-syntax-class a-expr + #:literal-sets (anf-lits) + (pattern e:c-expr + #:attr expanded #'e.expanded) + (pattern (my-let ([x:my-var-bind e:c-expr]) b:a-expr) + #:attr expanded #'(my-let ([x.expanded e.expanded]) b.expanded))) + + (define-syntax-class c-expr + #:literal-sets (anf-lits) + (pattern e:i-expr + #:attr expanded (attribute e.expanded)) + (pattern (my-+ a:i-expr b:i-expr) + #:attr expanded #'(my-+ a.expanded b.expanded))) + + (define-syntax-class i-expr + #:literal-sets (anf-lits) + (pattern n:number + #:attr expanded #'n) + (pattern x:my-var-ref + #:attr expanded #'x.expanded)) + + (define-syntax-class my-var-bind + (pattern x:id + #:fail-when (hash-has-key? pretend-binding-store (syntax->datum #'x)) "duplicate binding" + ;; we need the do because the bind! needs to happen before the body expands. + ;; attrs are eagerly evaluated when a pattern with a syntax class is matched, + ;; so the bind! needs to happen before the body is even matched. + #:do [(hash-set! pretend-binding-store (syntax->datum #'x) (my-var-rep))] + #:attr expanded #'x)) + + (define-syntax-class my-var-ref + (pattern x:id + #:fail-unless (hash-has-key? pretend-binding-store (syntax->datum #'x)) "unbound variable" + #:fail-unless (my-var-rep? (hash-ref pretend-binding-store (syntax->datum #'x))) "expected a my-var" + #:attr expanded #'x))) + +(define-syntax (mylang stx) + (syntax-parse stx + [(_ e:a-expr) + #''e.expanded])) + +(require rackunit syntax/macro-testing) + +(define-syntax-rule (check-success e) + (check-equal? (mylang e) + 'e)) +(define-syntax-rule (check-failure e msg) + (check-exn + msg + (lambda () + (convert-compile-time-error (mylang e))))) + +(check-success 1) +(check-success (my-+ 1 2)) +(check-success (my-let ([x 1]) x)) +(check-failure + y + #rx"unbound var") +(check-failure + (my-let ([a 1]) + (my-let ([a 2]) + 3)) + #rx"duplicate binding") +(check-failure + bogus + #rx"expected a my-var") +(check-failure + (my-+ (my-let ([z 1]) z) 2) + #rx"expected number or expected identifier") From 70c90900d1da43fd884ed073fdcef5ffce21e3f3 Mon Sep 17 00:00:00 2001 From: Mike Delmonaco Date: Fri, 18 Apr 2025 11:20:48 -0400 Subject: [PATCH 03/26] updates from call --- private/test/syntax-class-expander.rkt | 80 ++++++++++++++++++++++++-- 1 file changed, 74 insertions(+), 6 deletions(-) diff --git a/private/test/syntax-class-expander.rkt b/private/test/syntax-class-expander.rkt index e62ce65..75cccf5 100644 --- a/private/test/syntax-class-expander.rkt +++ b/private/test/syntax-class-expander.rkt @@ -20,39 +20,48 @@ (nonterminal a-expr e:c-expr (my-let ([x:my-var e:c-expr]) b:a-expr) + #:binding (scope (bind x) b) + ;; body will expand before binding x + (where b:a-expr + ([x:my-var e:c-expr])) #:binding (scope (bind x) b)) (nonterminal c-expr e:i-expr - (my-+ a:i-expr b:i-expr)) + (my-+ a:i-expr b:i-expr) + ;; application + (a:i-expr b:i-expr c:i-expr)) (nonterminal i-expr n:number x:my-var)) (define-literal-forms anf-lits "mylang forms may only be used in mylang" - (my-let my-+)) + (my-let my-+ where)) (begin-for-syntax (struct my-var-rep ()) ;; hash from symbols to binding representations ;; to simulate the real expander environment - (define pretend-binding-store (make-hash)) - (hash-set! pretend-binding-store 'bogus #f) + (define pretend-binding-store (make-hash (list (cons 'bogus #f)))) (define-syntax-class a-expr #:literal-sets (anf-lits) (pattern e:c-expr #:attr expanded #'e.expanded) (pattern (my-let ([x:my-var-bind e:c-expr]) b:a-expr) - #:attr expanded #'(my-let ([x.expanded e.expanded]) b.expanded))) + #:attr expanded #'(my-let ([x.expanded e.expanded]) b.expanded)) + (pattern (where b:a-expr ([x:my-var-bind e:c-expr])) + #:attr expanded #'(where b.expanded ([x.expanded e.expanded])))) (define-syntax-class c-expr #:literal-sets (anf-lits) (pattern e:i-expr #:attr expanded (attribute e.expanded)) (pattern (my-+ a:i-expr b:i-expr) - #:attr expanded #'(my-+ a.expanded b.expanded))) + #:attr expanded #'(my-+ a.expanded b.expanded)) + (pattern (a:i-expr b:i-expr c:i-expr) + #:attr expanded #'(#%app a.expanded b.expanded c:i-expr))) (define-syntax-class i-expr #:literal-sets (anf-lits) @@ -77,6 +86,7 @@ #:attr expanded #'x))) (define-syntax (mylang stx) + (set! pretend-binding-store (make-hash (list (cons 'bogus #f)))) (syntax-parse stx [(_ e:a-expr) #''e.expanded])) @@ -109,3 +119,61 @@ (check-failure (my-+ (my-let ([z 1]) z) 2) #rx"expected number or expected identifier") +;; problems +;; this fails because the reference parses/expands before the binding +#;(check-success + (where x + ([x 1]))) +;; this fails because my-+ gets bound after parsing/expanding the body, +;; so my-+ gets resolved to the literal. +;; (there would be no actual binding anyway in this toy example, but there would be in ss) +#;(check-equal? (mylang (where (my-+ 1 2) + ([my-+ 3]))) + ;; should not treat my-+ as a literal in the where body + '(where (#%app my-+ 1 2) + ([my-+ 3]))) + +#| +examples that broke the original eager design: +(where x + ([x 1])) +this broke bc the reference parsed/expanded before the binding. syntax-parse parses (and thus expands bc of attr eagerness) +left to right. +(where (my-+ 1 2) + ([my-+ 3])) +this broke for a similar reason. This shows that even "structural" parsing needs bindings to detect literal shadowing. + +problems: +- if you do everything eager (and do binding class resolution in parsing), then where breaks because the body +gets parsed before the binding happens +- if you treat all ids as ids and ignore binding classes during parsing, then the my-+ shadow thing fails +because it will resolve to the literal instead of the shadowed thing +- the solution is to do binding-spec-driven expansion order, which requires promises and binding stuff has to happen in the post of the production. +but then, you'll still get the my-+ shadow problem because parsing needs to be driven by binding classes +to distinguish between shadowed literals and actual literals. + +constraints: +- to resolve literals vs references to bindings that shadow literals, you need to bind as you parse +- to do that, you need to delay parsing. parsing and expansion order must be driven by the binding spec. +- to avoid backtracking over a binding, you need to commit when you bind + +you can delay parsing with #:with + +current desired semantics: +full backtracking (even over binding classes), except you commit when you bind a variable. +TODO does cut in a post commit the way you need it to? + +example: + +(nonterminal my-expr + n:number + x:a-var + x:b-var + (let ([x:a-var e:my-expr]) b:my-expr) + #:binding (scope (bind x) b) + (let ([x:b-var e:my-expr]) b:my-expr) + #:binding (scope (bind x) b)) + +identifiers don't commit to a-var (which I'm pretty sure is the current syntax-spec behavior) +but (let ([x 1]) x) commits to the a-var let production +|# From 471af0dbbcf6c1091d8377e2eb035860fa338402 Mon Sep 17 00:00:00 2001 From: Mike Delmonaco Date: Sat, 19 Apr 2025 20:39:22 -0400 Subject: [PATCH 04/26] get where and literal shadowing working --- private/test/syntax-class-expander.rkt | 51 ++++++++++++++++++-------- 1 file changed, 35 insertions(+), 16 deletions(-) diff --git a/private/test/syntax-class-expander.rkt b/private/test/syntax-class-expander.rkt index 75cccf5..e4f955f 100644 --- a/private/test/syntax-class-expander.rkt +++ b/private/test/syntax-class-expander.rkt @@ -49,19 +49,39 @@ #:literal-sets (anf-lits) (pattern e:c-expr #:attr expanded #'e.expanded) - (pattern (my-let ([x:my-var-bind e:c-expr]) b:a-expr) - #:attr expanded #'(my-let ([x.expanded e.expanded]) b.expanded)) - (pattern (where b:a-expr ([x:my-var-bind e:c-expr])) - #:attr expanded #'(where b.expanded ([x.expanded e.expanded])))) + (pattern (my-let ([x:id ~! e:expr]) b:expr) + #:with x^:my-var-bind #'x + #:with e^:c-expr #'e + #:with b^:a-expr #'b + #:attr expanded #'(my-let ([x^.expanded e^.expanded]) b^.expanded)) + (pattern (where b:expr ([x:id ~! e:expr])) + ;; we expand x and e BEFORE e because the binding spec is + ;; [(scope (bind x) b) e] + ;; expansion order is driven by the binding spec + #:with x^:my-var-bind #'x + #:with b^:a-expr #'b + #:with e^:c-expr #'e + #:attr expanded #'(where b^.expanded ([x^.expanded e^.expanded])))) (define-syntax-class c-expr #:literal-sets (anf-lits) (pattern e:i-expr #:attr expanded (attribute e.expanded)) - (pattern (my-+ a:i-expr b:i-expr) - #:attr expanded #'(my-+ a.expanded b.expanded)) - (pattern (a:i-expr b:i-expr c:i-expr) - #:attr expanded #'(#%app a.expanded b.expanded c:i-expr))) + ;; my-+ should be first, but due to the fake binding store, + ;; to get the my-+ shadowing test passing, this needed to be first. + ;; in a real implementation with the real binding store, the my-+ pattern would + ;; fail because the identifier wouldn't be referencing the literal, right?. + ;; you'd need to be painting scopes on syntax as you go, but that could happen in + ;; the pattern actions I guess. + (pattern (a:expr b:expr c:expr) + #:with a^:i-expr #'a + #:with b^:i-expr #'b + #:with c^:i-expr #'c + #:attr expanded #'(#%app a^.expanded b^.expanded c^.expanded)) + (pattern (my-+ a:expr b:expr) + #:with a^:i-expr #'a + #:with b^:i-expr #'b + #:attr expanded #'(my-+ a^.expanded b^.expanded))) (define-syntax-class i-expr #:literal-sets (anf-lits) @@ -118,16 +138,15 @@ #rx"expected a my-var") (check-failure (my-+ (my-let ([z 1]) z) 2) - #rx"expected number or expected identifier") -;; problems -;; this fails because the reference parses/expands before the binding -#;(check-success + #rx"expected a-expr") +;; where. +;; this test fails if you expand left-to-right +(check-success (where x ([x 1]))) -;; this fails because my-+ gets bound after parsing/expanding the body, -;; so my-+ gets resolved to the literal. -;; (there would be no actual binding anyway in this toy example, but there would be in ss) -#;(check-equal? (mylang (where (my-+ 1 2) +;; shadow my-+ in where. +;; this test fails if you expand left-to-right +(check-equal? (mylang (where (my-+ 1 2) ([my-+ 3]))) ;; should not treat my-+ as a literal in the where body '(where (#%app my-+ 1 2) From 0e459599d0560d087b1151e506c5f878adb4169e Mon Sep 17 00:00:00 2001 From: Mike Delmonaco Date: Fri, 2 May 2025 20:25:26 -0400 Subject: [PATCH 05/26] commit on forms, error on backtrack over binding --- private/test/syntax-class-expander.rkt | 64 +++++++++++++++++++++----- 1 file changed, 52 insertions(+), 12 deletions(-) diff --git a/private/test/syntax-class-expander.rkt b/private/test/syntax-class-expander.rkt index e4f955f..6d1ec7a 100644 --- a/private/test/syntax-class-expander.rkt +++ b/private/test/syntax-class-expander.rkt @@ -46,16 +46,20 @@ (define pretend-binding-store (make-hash (list (cons 'bogus #f)))) (define-syntax-class a-expr + #:no-delimit-cut #:literal-sets (anf-lits) (pattern e:c-expr #:attr expanded #'e.expanded) - (pattern (my-let ([x:id ~! e:expr]) b:expr) + (pattern (my-let ~! ([x:id e:expr]) b:expr) + ;; we expand x and b BEFORE e because the binding spec is + ;; [(scope (bind x) b) e] + ;; expansion order is driven by the binding spec #:with x^:my-var-bind #'x - #:with e^:c-expr #'e #:with b^:a-expr #'b + #:with e^:c-expr #'e #:attr expanded #'(my-let ([x^.expanded e^.expanded]) b^.expanded)) - (pattern (where b:expr ([x:id ~! e:expr])) - ;; we expand x and e BEFORE e because the binding spec is + (pattern (where ~! b:expr ([x:id e:expr])) + ;; we expand x and b BEFORE e because the binding spec is ;; [(scope (bind x) b) e] ;; expansion order is driven by the binding spec #:with x^:my-var-bind #'x @@ -64,6 +68,7 @@ #:attr expanded #'(where b^.expanded ([x^.expanded e^.expanded])))) (define-syntax-class c-expr + #:no-delimit-cut #:literal-sets (anf-lits) (pattern e:i-expr #:attr expanded (attribute e.expanded)) @@ -78,12 +83,13 @@ #:with b^:i-expr #'b #:with c^:i-expr #'c #:attr expanded #'(#%app a^.expanded b^.expanded c^.expanded)) - (pattern (my-+ a:expr b:expr) + (pattern (my-+ ~! a:expr b:expr) #:with a^:i-expr #'a #:with b^:i-expr #'b #:attr expanded #'(my-+ a^.expanded b^.expanded))) (define-syntax-class i-expr + #:no-delimit-cut #:literal-sets (anf-lits) (pattern n:number #:attr expanded #'n) @@ -91,15 +97,16 @@ #:attr expanded #'x.expanded)) (define-syntax-class my-var-bind - (pattern x:id - #:fail-when (hash-has-key? pretend-binding-store (syntax->datum #'x)) "duplicate binding" - ;; we need the do because the bind! needs to happen before the body expands. - ;; attrs are eagerly evaluated when a pattern with a syntax class is matched, - ;; so the bind! needs to happen before the body is even matched. - #:do [(hash-set! pretend-binding-store (syntax->datum #'x) (my-var-rep))] + #:no-delimit-cut + (pattern (~and x:id + (~do (when (hash-has-key? pretend-binding-store (syntax->datum #'x)) + (raise-syntax-error 'my-lang "duplicate binding" #'x)) + (hash-set! pretend-binding-store (syntax->datum #'x) (my-var-rep))) + (~undo (error "backtracked over a binding. flaw in language itself"))) #:attr expanded #'x)) (define-syntax-class my-var-ref + #:no-delimit-cut (pattern x:id #:fail-unless (hash-has-key? pretend-binding-store (syntax->datum #'x)) "unbound variable" #:fail-unless (my-var-rep? (hash-ref pretend-binding-store (syntax->datum #'x))) "expected a my-var" @@ -138,7 +145,7 @@ #rx"expected a my-var") (check-failure (my-+ (my-let ([z 1]) z) 2) - #rx"expected a-expr") + #rx"expected number or expected identifier") ;; where. ;; this test fails if you expand left-to-right (check-success @@ -151,6 +158,15 @@ ;; should not treat my-+ as a literal in the where body '(where (#%app my-+ 1 2) ([my-+ 3]))) +;; can't backtrack over a binding +(check-failure + (my-let ([x y]) x) + ;; an unbound var error would be better here + #rx"backtracked over a binding") +;; can't backtrack over a binding +(check-failure + (my-let ([x (my-+ y y)]) x) + #rx"backtracked over a binding") #| examples that broke the original eager design: @@ -181,6 +197,7 @@ you can delay parsing with #:with current desired semantics: full backtracking (even over binding classes), except you commit when you bind a variable. TODO does cut in a post commit the way you need it to? +No, I don't think so. I ended up needing to put ~! in the production structure pattern example: @@ -196,3 +213,26 @@ example: identifiers don't commit to a-var (which I'm pretty sure is the current syntax-spec behavior) but (let ([x 1]) x) commits to the a-var let production |# + +#| +After solving where and literal shadowing: + +The current semantics are full backtracking, with the exception that +binding sites cause commitment. + +limitation: moving sub-parsing into #:with messes up failure progress measurement. +many parse attempt paths end up with the same LATE progress so error messages suck. + +constraints: +- we need #:post or equivalent to control order of sub-expansion and therefore sub-parsing +- when most sub-parsing happens in posts, we get failure progress ties (lots of things are just LATE), which cause vague error messages. +- within a pattern, it looks like each post does not contribute additional progress beyond a single LATE. Not sure about this though. + + + + + +new design: +- form groups commit on literals +- trying to backtrack over bindings is an error +|# From af79e77f2b3ad542a795619fae0ae70dc166bf01 Mon Sep 17 00:00:00 2001 From: Mike Delmonaco Date: Fri, 16 May 2025 17:54:31 -0400 Subject: [PATCH 06/26] subst --- main.rkt | 2 + private/ee-lib/flip-intro-scope.rkt | 4 +- private/runtime/binding-operations.rkt | 48 ++++++++--- scribblings/reference/compiling.scrbl | 13 +++ tests/binding-operations.rkt | 115 ++++++++++++++++++++++++- 5 files changed, 168 insertions(+), 14 deletions(-) diff --git a/main.rkt b/main.rkt index 8b837d0..2d64ead 100644 --- a/main.rkt +++ b/main.rkt @@ -53,6 +53,8 @@ free-identifiers binding-identifiers alpha-equivalent? + identifier=? + subst get-racket-referenced-identifiers)) (require "private/syntax/interface.rkt" diff --git a/private/ee-lib/flip-intro-scope.rkt b/private/ee-lib/flip-intro-scope.rkt index f946ad3..a0985e3 100644 --- a/private/ee-lib/flip-intro-scope.rkt +++ b/private/ee-lib/flip-intro-scope.rkt @@ -16,4 +16,6 @@ (define/who (flip-intro-scope stx) (check who syntax? stx) - ((make-intro-scope-introducer) stx 'flip)) + (if (syntax-transforming?) + ((make-intro-scope-introducer) stx 'flip) + stx)) diff --git a/private/runtime/binding-operations.rkt b/private/runtime/binding-operations.rkt index ba36eed..3f55179 100644 --- a/private/runtime/binding-operations.rkt +++ b/private/runtime/binding-operations.rkt @@ -2,7 +2,9 @@ (provide free-identifiers binding-identifiers + identifier=? alpha-equivalent? + subst get-racket-referenced-identifiers (rename-out [identifier=? compiled-identifier=?])) @@ -99,25 +101,22 @@ ; Syntax, Syntax [#:allow-host? Boolean] -> Boolean ; Are the two expressions alpha-equivalent? (define (alpha-equivalent? stx-a stx-b #:allow-host? [allow-host? #f]) - (define bound-reference=? (alpha-equivalent?/bindings stx-a stx-b allow-host?)) + (define bound-reference=? (syntaxes->bound-reference=? stx-a stx-b allow-host?)) (and bound-reference=? (alpha-equivalent?/references stx-a stx-b bound-reference=? allow-host?))) ; Syntax Syntax Boolean -> (or/c #f (Identifier Identifier -> Boolean)) -; check that the bindings of both expressions can be alpha-equivalent. -; returns bound-reference=?, or #f if the binding check fails. -(define (alpha-equivalent?/bindings stx-a stx-b allow-host?) +; if the two terms have corresponding binders, build bound-reference=? +; If they have different numbers of binders, return #f +; bound-reference=? answers "do these two references refer to corresponding binders?" +(define (syntaxes->bound-reference=? stx-a stx-b allow-host?) (define table-a (make-free-id-table)) (define table-b (make-free-id-table)) + ;; associate both binders with the same gensym (define (bind! identifier-a identifier-b) (define x (gensym)) - (free-id-table-set! table-a identifier-a x) - (free-id-table-set! table-b identifier-b x)) - (define (bound-reference=? identifier-a identifier-b) - (and (dict-has-key? table-a identifier-a) - (dict-has-key? table-b identifier-b) - (eq? (free-id-table-ref table-a identifier-a) - (free-id-table-ref table-b identifier-b)))) + (free-id-table-set! table-a (compiled-from identifier-a) x) + (free-id-table-set! table-b (compiled-from identifier-b) x)) (define binders-a (binding-identifiers stx-a #:allow-host? allow-host?)) (define binders-b (binding-identifiers stx-b #:allow-host? allow-host?)) ; must traverse binders before references @@ -127,7 +126,16 @@ [binder-b binders-b]) (bind! binder-a binder-b)) (and (= (length binders-a) (length binders-b)) - bound-reference=?)) + (substitutions->bound-reference=? table-a table-b))) + +;; FreeIdTable FreeIdTable -> (Identifier Identifier -> Boolean) +;; Do these two references refer to corresponding binders? +;; table-a and table-b should map corresponding binders to the same, unique value +(define ((substitutions->bound-reference=? table-a table-b) identifier-a identifier-b) + (and (dict-has-key? table-a (compiled-from identifier-a)) + (dict-has-key? table-b (compiled-from identifier-b)) + (eq? (free-id-table-ref table-a (compiled-from identifier-a)) + (free-id-table-ref table-b (compiled-from identifier-b))))) ; Syntax Syntax (Identifier Identifier -> Boolean) Boolean -> Boolean ; check that the references are alpha-equivalent. @@ -159,6 +167,22 @@ [(a b) (equal? (syntax->datum #'a) (syntax->datum #'b))]))) +;; Syntax Syntax Syntax -> Syntax +;; Replace all occurrences of target (by alpha equivalence) with replacement in stx. +;; Leaves host expressions unchanged. +(define (subst stx target replacement) + (let loop ([stx stx]) + (if (if (compiled-binder? target) + (and (compiled-reference? stx) (identifier=? stx target)) + (alpha-equivalent? stx target)) + replacement + (syntax-parse stx + ;; ignore host expressions + [((~literal #%host-expression) . _) this-syntax] + [(a . b) + (quasisyntax/loc this-syntax (#,(loop #'a) . #,(loop #'b)))] + [_ stx])))) + (define current-referenced-vars (make-parameter #f)) ; get the racket vars referenced in e of the provided binding classes diff --git a/scribblings/reference/compiling.scrbl b/scribblings/reference/compiling.scrbl index 9cc0af2..fc403ee 100644 --- a/scribblings/reference/compiling.scrbl +++ b/scribblings/reference/compiling.scrbl @@ -232,6 +232,19 @@ Returns @racket[#t] if the two DSL expressions are alpha-equivalent, @racket[#f] Analysis of @tech{host expressions} is currently not supported. When given syntax that contains a host expression, the operation raises an error if @racket[allow-host?] is @racket[#f], or ignores that portion is syntax if @racket[allow-host?] is @racket[#t]. +@defproc[(subst [stx syntax?] [target syntax?] [replacement syntax?]) syntax?] + +Substitutes occurences of (expressions @racket[alpha-equivalent?] to) @racket[target] with @racket[replacement] in @racket[stx]. + +All arguments must be the result of DSL expansion, not just plain racket expressions. + +In the case that @racket[target] is an identifier from a binding position, references will be replaced by @racket[replacement]. + +Host expressions are left unchanged. + +NOTE: In order to avoid hygiene issues, it may be necessary to re-expand using @racket[nonterminal-expander] after substitution. +@;TODO example where you need to re-expand + @defform[(get-racket-referenced-identifiers [binding-class-id ...] expr)] Returns an immutable symbol set containing identifiers of the specified binding classes that were referenced in racket (host) expressions in @racket[expr]. diff --git a/tests/binding-operations.rkt b/tests/binding-operations.rkt index 6561cb9..969e37c 100644 --- a/tests/binding-operations.rkt +++ b/tests/binding-operations.rkt @@ -1,11 +1,15 @@ #lang racket/base -(require "../testing.rkt") +(require "../testing.rkt" + ;; for runtime syntax-interpreter testing, we want this at phase 0 + "../private/runtime/binding-operations.rkt") (syntax-spec (binding-class var) + (extension-class macro #:binding-space dsl) (nonterminal expr #:binding-space dsl + #:allow-extension macro n:number v:var (+ e1:expr e2:expr) @@ -13,6 +17,7 @@ (lambda (x:var) e:expr) #:binding (scope (bind x) e) + (e1:expr e2:expr) (letrec ([x:var e:expr] ...) body:expr) #:binding (scope (bind x) ... e ... (scope body))) @@ -123,6 +128,24 @@ (lambda (a) x) (lambda (x) x))) +(check-false + (expr/alpha-equivalent? + (lambda (x) (lambda (x) x)) + (lambda (x) (lambda (y) x)))) + +;; alpha equivalence should respect hygiene +(define-dsl-syntax m macro + (syntax-parser + [(m y:id) #'(lambda (x) y)])) +;; fails bc the binding equivalence uses a free id table even though there are no bindings on expanded syntax, +;; so it ends up not being hygienic +(check-false + (expr/alpha-equivalent? + ;; (lambda (x1) (lambda (x2) x1)) because macro introduction scope + (lambda (x) (m x)) + ;; (lambda (x1) (lambda (x2) x2)) + (lambda (x) (lambda (x) x)))) + (check-true (expr/alpha-equivalent? (letrec ([f g] @@ -150,3 +173,93 @@ (expr/alpha-equivalent?/ignore-host (+ x (host PI)) (+ x (host PI)))) + +(syntax-spec + (host-interface/expression + (subst-expr e:expr target:expr replacement:expr) + #`'#,(subst #'e #'target #'replacement))) + +;; substitute whole expression +(check-equal? (subst-expr (lambda (x) x) (lambda (y) y) 1) + 1) +;; substitute sub-expression +(check-equal? (subst-expr (letrec ([x (lambda (x) x)]) x) + (lambda (y) y) + 1) + '(letrec ([x 1]) x)) +;; substitute sub-expression multiple times +(check-equal? (subst-expr (letrec ([x (lambda (x) x)]) (lambda (z) z)) + (lambda (y) y) + 1) + '(letrec ([x 1]) 1)) + +(check-equal? (subst-expr (lambda (x) (+ 1 1)) (+ 1 1) (+ 2 2)) + '(lambda (x) (+ 2 2))) + +;; substitution of a free variable +(syntax-spec + (host-interface/expression (beta/subst app:expr) + (syntax-parse #'app + [(((~datum lambda) (x) body) arg) + #`'#,(subst #'body #'x #'arg)]))) + +(check-equal? + (beta/subst ((lambda (x) (+ x x)) + 1)) + '(+ 1 1)) + +(check-equal? + (beta/subst ((lambda (x) (lambda (y) (+ x y))) + 1)) + '(lambda (y) (+ 1 y))) + +;;; tests using binding operations at runtime, with a syntax-interpreter style + +(syntax-spec + (binding-class rt-var #:binding-space rt) + (extension-class rt-macro #:binding-space rt) + (nonterminal rt-expr + #:allow-extension rt-macro + #:binding-space rt + x:rt-var + (lambda (x:rt-var) e:rt-expr) + #:binding (scope (bind x) e) + (f:rt-expr x:rt-expr)) + (host-interface/expression + (expand-rt e:rt-expr) + #'#'e)) + +(define-dsl-syntax let rt-macro + (syntax-rules () + [(let ([x rhs]) body) + ((lambda (x) body) + rhs)])) + +(check-true (alpha-equivalent? (expand-rt (lambda (x) x)) + (expand-rt (lambda (x) x)))) +(check-true (alpha-equivalent? (expand-rt (lambda (x) x)) + (expand-rt (lambda (y) y)))) +(check-true (alpha-equivalent? (expand-rt (lambda (x) (lambda (y) x))) + (expand-rt (lambda (x) (lambda (y) x))))) +(check-false (alpha-equivalent? (expand-rt (lambda (x) (lambda (y) x))) + (expand-rt (lambda (x) (lambda (y) y))))) +(check-true (alpha-equivalent? (expand-rt (let ([x (lambda (y) y)]) (x x))) + (expand-rt (let ([x (lambda (x) x)]) (x x))))) + +(check-equal? (syntax->datum (subst (expand-rt (lambda (x) x)) + (expand-rt (lambda (x) x)) + (expand-rt (lambda (y) y)))) + '(lambda (y) y)) +(check-equal? (syntax->datum (subst (expand-rt (lambda (_) (lambda (x) x))) + (expand-rt (lambda (x) x)) + (expand-rt (lambda (y) y)))) + '(lambda (_) (lambda (y) y))) +(check-true (alpha-equivalent? (subst (expand-rt (lambda (x) x)) + (expand-rt (lambda (x) x)) + (expand-rt (lambda (y) y))) + (expand-rt (lambda (y) y)))) +(check-true (alpha-equivalent? (subst (expand-rt (lambda (_) (lambda (x) x))) + (expand-rt (lambda (x) x)) + (expand-rt (lambda (y) y))) + (expand-rt (lambda (_) (lambda (y) y))))) + From 4e17ee171ffffe6e60ecdbb6f1295d7fa3335ad4 Mon Sep 17 00:00:00 2001 From: Mike Delmonaco Date: Thu, 22 May 2025 17:06:49 -0400 Subject: [PATCH 07/26] syntax interpreter NBE --- main.rkt | 1 - tests/dsls/syntax-interpreter.rkt | 207 ++++++++++++++++++++++++++++++ 2 files changed, 207 insertions(+), 1 deletion(-) create mode 100644 tests/dsls/syntax-interpreter.rkt diff --git a/main.rkt b/main.rkt index 2d64ead..0967f69 100644 --- a/main.rkt +++ b/main.rkt @@ -53,7 +53,6 @@ free-identifiers binding-identifiers alpha-equivalent? - identifier=? subst get-racket-referenced-identifiers)) diff --git a/tests/dsls/syntax-interpreter.rkt b/tests/dsls/syntax-interpreter.rkt new file mode 100644 index 0000000..eaf8619 --- /dev/null +++ b/tests/dsls/syntax-interpreter.rkt @@ -0,0 +1,207 @@ +#lang racket + +;; A lambda calculus interpreter that operates on expanded syntax, +;; using syntax-spec. +;; Features normalization by evaluation +;; +;; Benefits: +;; - grammar and binding checking +;; - hygienic macros, syntactic sugar +;; - interpreter/static checks only have to worry about core forms +;; - static checks have access to binding information (this example language has no static checks) +;; - easy to report runtime error source location since we evaluate syntax +;; Drawbacks: +;; - not sure if substitution would work bc of scopes on expanded syntax. might be fine +;; - interpreter helpers need expanded syntax, which might make it hard to unit test them + +(module+ test (require rackunit)) +(require "../../testing.rkt" + syntax/parse + racket/syntax + (for-syntax syntax/parse) + racket/syntax-srcloc + (for-template syntax-spec-dev) + syntax/macro-testing) + +(syntax-spec + (binding-class lc-var #:binding-space lc) + (extension-class lc-macro #:binding-space lc) + (nonterminal lc-expr + #:binding-space lc + #:allow-extension lc-macro + n:number + (+ e1:lc-expr e2:lc-expr) + x:lc-var + (lambda (x:lc-var) e:lc-expr) + #:binding (scope (bind x) e) + (~> (e1 e2) + ;; this is necessary to preserve source location, properties, etc. + (syntax/loc this-syntax (#%app e1 e2))) + (#%app e1:lc-expr e2:lc-expr)) + + (host-interface/expression + (lc-expand e:lc-expr) + #'#'e)) + +(define-dsl-syntax let lc-macro + (syntax-rules () + [(let ([x rhs]) body) + ((lambda (x) body) rhs)])) + +(define-syntax-rule (lc e) + (lc-eval (lc-expand e) empty-env)) + +;;; runtime + +;; An Env is a (ImmutableBoundIdTable Value) +(define empty-env (immutable-symbol-table)) +;; Env Identifier -> Value +(define (env-lookup env x) + (if (symbol-table-has-key? env x) + (symbol-table-ref env x) + ;; neutral + x)) +;; Env Identifier Value -> Void +(define (env-extend env x v) + (symbol-table-set env x v)) +;; this seems weird. not sure if this will work +;; from michael: +;; will work fine locally, will get weird if you have host expressions. + +;; A Value is one of +;; a Number +;; a Value -> Value +;; a NeutralExpr + +;; A NeutralExpr is one of +;; Identifier +;; (+ NeutralExpr Value) +;; (+ Value NeutralExpr) +;; (NeutralExpr Value) + +(define-syntax-rule (normalize e) + (lc-uneval (lc e))) + +;; Syntax Env -> Value +(define (lc-eval stx env) + (syntax-parse stx + #:datum-literals (+ lambda #%app) + [n:number + (syntax->datum #'n)] + [(+ e1 e2) + (define v1 (lc-eval #'e1 env)) + (define v2 (lc-eval #'e2 env)) + (cond + [(or (syntax? v1) (syntax? v2)) + ;; neutral + #`(+ #,v1 #,v2)] + [else + (unless (number? v1) + (lc-error this-syntax "+ expects number")) + (unless (number? v2) + (lc-error this-syntax "+ expects number")) + (+ v1 v2)])] + [x:id + (env-lookup env #'x)] + [(lambda (x:id) e:expr) + (lambda (v) (lc-eval #'e (env-extend env #'x v)))] + [(#%app e1 e2) + (match (lc-eval #'e1 env) + [(? procedure? f) + (f (lc-eval #'e2 env))] + [(? syntax? f) + #`(#,f #,(lc-eval #'e2 env))] + [_ + (lc-error this-syntax "applied non-function")])])) + +;; Value -> Syntax +(define (lc-uneval v) + (define count 0) + (define (fresh) + (begin0 (format-id #f "_.~a" count) + (set! count (add1 count)))) + (let loop ([v v]) + (match v + [(? number?) + (datum->syntax #f v)] + [(? procedure?) + (define x (fresh)) + #`(lambda (#,x) #,(loop (v x)))] + [(? syntax?) + (syntax-parse v + [((~datum +) a b) + #`(+ #,(loop (attribute a)) #,(loop (attribute b)))] + [(a b) + #`(#,(loop (attribute a)) #,(loop (attribute b)))] + [x:id #'x] + [_ + ; 3D syntax of a value that got syntax'ed bc of quasiquote + (loop (syntax->datum v))])]))) + +;; Syntax String -> Void +;; raise (runtime) error with source location reported +(define (lc-error stx msg) + (define loc (syntax-srcloc stx)) + (if loc + (raise-user-error (format "~a: ~a" (srcloc->string loc) msg)) + (raise-user-error 'lc msg))) + +(module+ test + (define-syntax-rule (teval e) (check-equal? (lc e) e)) + (define-syntax-rule (tnormalize e e^) (check-equal? (syntax->datum (normalize e)) 'e^)) + (define-syntax-rule (t-runtime-error msg e) + (check-exn + msg + (lambda () + (lc e)))) + (define-syntax-rule (t-expand-error msg e) + (check-exn + msg + (lambda () + (convert-compile-time-error (lc e))))) + (teval 1) + (teval (+ 1 1)) + (teval ((lambda (x) x) 1)) + (teval (let ([x 1]) (+ x x))) + (test-case "hygiene" + ;; basic shadow + (teval (let ([x 1]) + (let ([x 2]) + x))) + ;; macro "shadows", should not actually shadow + (define-dsl-syntax m lc-macro + (syntax-rules () + [(m e) + (let ([x 2]) e)])) + (check-equal? (lc (let ([x 1]) (m x))) + 1) + ;; macro ref to macro binding not shadowable from use site + (define-dsl-syntax m2 lc-macro + (syntax-rules () + [(m2 ([x rhs])) + (let ([y 1]) + (let ([x rhs]) + y))])) + (check-equal? (lc (m2 ([y 2]))) + 1)) + ;; errors + (t-expand-error + #rx"not bound" + x) + (t-expand-error + ;; actual decent grammatical error message + #rx"lambda: unexpected term" + (lambda (x y) x)) + (t-runtime-error + ;; source location for runtime error + #px".*\\.rkt:\\d*:\\d*: applied non-function" + (1 2)) + (tnormalize 1 1) + (tnormalize (lambda (x) x) + (lambda (_.0) _.0)) + (tnormalize (lambda (x) (+ (+ 1 1) x)) + (lambda (_.0) (+ 2 _.0))) + (tnormalize (lambda (x) (+ x (+ 1 1))) + (lambda (_.0) (+ _.0 2))) + (tnormalize (lambda (x) (x (lambda (y) y))) + (lambda (_.0) (_.0 (lambda (_.1) _.1))))) From 7ac54a8dfda5426073cc4b52c34210f7a2a105e1 Mon Sep 17 00:00:00 2001 From: Mike Delmonaco Date: Wed, 18 Jun 2025 20:22:52 -0400 Subject: [PATCH 08/26] update docs for get-racket-referenced-identifiers --- scribblings/reference/compiling.scrbl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scribblings/reference/compiling.scrbl b/scribblings/reference/compiling.scrbl index fc403ee..6846c3a 100644 --- a/scribblings/reference/compiling.scrbl +++ b/scribblings/reference/compiling.scrbl @@ -247,7 +247,7 @@ NOTE: In order to avoid hygiene issues, it may be necessary to re-expand using @ @defform[(get-racket-referenced-identifiers [binding-class-id ...] expr)] -Returns an immutable symbol set containing identifiers of the specified binding classes that were referenced in racket (host) expressions in @racket[expr]. +Returns an immutable symbol set containing identifiers of the specified binding classes that were referenced in racket (host) expressions in @racket[expr]. If @racket[expr] is not a host expression, an exception is raised. @section{Expansion} From d251f9186e4eb48aa55b39b4f6ca1f0c3fa2ac27 Mon Sep 17 00:00:00 2001 From: Mike Delmonaco Date: Wed, 18 Jun 2025 20:23:13 -0400 Subject: [PATCH 09/26] syntax interpreter tutorial --- scribblings/tutorial/main.scrbl | 1 + .../syntax-interpreter-tutorial.scrbl | 271 ++++++++++++++++++ tests/dsls/syntax-interpreter.rkt | 17 +- 3 files changed, 283 insertions(+), 6 deletions(-) create mode 100644 scribblings/tutorial/syntax-interpreter-tutorial.scrbl diff --git a/scribblings/tutorial/main.scrbl b/scribblings/tutorial/main.scrbl index 8f55b26..11714b4 100644 --- a/scribblings/tutorial/main.scrbl +++ b/scribblings/tutorial/main.scrbl @@ -11,3 +11,4 @@ The tutorial is broken down into illustrative examples: @include-section["basic-tutorial.scrbl"] @include-section["stlc-tutorial.scrbl"] @include-section["multipass-tutorial.scrbl"] +@include-section["syntax-interpreter-tutorial.scrbl"] diff --git a/scribblings/tutorial/syntax-interpreter-tutorial.scrbl b/scribblings/tutorial/syntax-interpreter-tutorial.scrbl new file mode 100644 index 0000000..da80f6c --- /dev/null +++ b/scribblings/tutorial/syntax-interpreter-tutorial.scrbl @@ -0,0 +1,271 @@ +#lang scribble/manual + +@(require (for-label racket racket/block racket/class racket/match racket/list syntax/parse "../../main.rkt") + scribble/example + racket/sandbox) +@(define eval (make-base-eval '(require racket))) +@(define-syntax-rule (repl body ...) (examples #:eval eval #:label #f body ...)) + +@title[#:tag "syntax-interpreter"]{Advanced Tutorial: An Interpreter} + +This guide demonstrates how to use syntax-spec to create an interpreter, as well as the benefits of this approach. + +Typically, syntax-spec is used to create a compiler, usually to Racket. However, it's possible to use it to create an interpreter as well. In such an interpreter, syntax-spec will enforce the grammar, check binding, macro-expand the source program, etc. and the interpreter takes in the expanded, core syntax and evaluates it to a value. As an example, let's create an interpreter for lambda calculus. + +@section{Expander} + +Here is the syntax-spec: + +@repl[ +#:hidden #t +(module grammar racket + (provide (all-defined-out) (for-syntax (all-defined-out)) (for-space lc (all-defined-out))) + (require "main.rkt" (for-syntax syntax/parse)) + (syntax-spec + (binding-class lc-var #:binding-space lc) + (extension-class lc-macro #:binding-space lc) + (nonterminal lc-expr + #:binding-space lc + #:allow-extension lc-macro + n:number + (+ e1:lc-expr e2:lc-expr) + x:lc-var + (lambda (x:lc-var) e:lc-expr) + #:binding (scope (bind x) e) + (rkt e:expr) + (~> (e1 e2) + ;; this is necessary to preserve source location, properties, etc. + (syntax/loc this-syntax (#%app e1 e2))) + (#%app e1:lc-expr e2:lc-expr)) + + (host-interface/expression + (lc-expand e:lc-expr) + #'#'e))) +(require 'grammar "main.rkt" (for-syntax syntax/parse)) +] + +@racketmod[racket +(require syntax-spec (for-syntax syntax/parse)) +(syntax-spec + (binding-class lc-var #:binding-space lc) + (extension-class lc-macro #:binding-space lc) + (nonterminal lc-expr + #:binding-space lc + #:allow-extension lc-macro + n:number + (+ e1:lc-expr e2:lc-expr) + x:lc-var + (lambda (x:lc-var) e:lc-expr) + #:binding (scope (bind x) e) + (~> (e1 e2) + (syntax/loc this-syntax (#%app e1 e2))) + (#%app e1:lc-expr e2:lc-expr)) + + (host-interface/expression + (lc-expand e:lc-expr) + #'#'e)) +] + +We have numbers, binary addition, variables, lambdas, and applications. The interesting bit is the host interface @racket[lc-expand]. The result is the expanded syntax itself! For example: + +@repl[ +(lc-expand ((lambda (x) x) 1)) +] + +The host-interface could also just invoke the interpreter directly on the syntax: + +@racketblock[ +(host-interface/expression + (lc e:lc-expr) + #'(lc-eval #'e empty-env)) +] + +This is what you'd normally do, but we haven't implemented @racket[lc-eval] yet so we'll stick with @racket[lc-expand]. + +Our language is also macro-extensible, so the result of @racket[lc-expand] expands macros away: + +@repl[ +(eval:no-prompt (require (for-syntax syntax/parse))) +(eval:no-prompt +(define-dsl-syntax let lc-macro + (syntax-parser + [(_ ([x:id e:expr]) body:expr) + #'((lambda (x) body) e)]))) +(lc-expand (let ([x 1]) (+ x x))) +] + +@section{Evaluator} + +We can use the output of @racket[lc-expand] as the input of an interpreter. But first, let's define some helpers. + +We will be building a strict, environment-based interpreter for this language, so we need to define what our environment will look like. + +The environment needs to map variables to values, and we have the result of syntax-spec expansion, so we can use symbol tables! This means our environment will map identifiers to values and will respect hygiene. syntax-spec gives us this benefit of hygienic environments for free, which is important. If we used a hash from symbols to values, which would contain no binding/hygiene information, an example like this would break: + +@racketblock[ +(define-dsl-syntax m lc-macro + (syntax-parser + [(_ e) #'(let ([tmp 2]) e)])) +(lc (let ([tmp 1]) (m tmp))) +] + +The macro-introduced @racket[tmp] would shadow the surface syntax @racket[tmp] and we'd get @racket[2] instead of @racket[1]. + +@repl[ +#:hidden #t +(require (for-template "main.rkt")) +] + +@repl[ +(eval:alts (eval:no-prompt (require (for-template syntax-spec))) (void)) +;; An Env is a (ImmutableBoundIdTable Value) +(eval:no-prompt (define empty-env (immutable-symbol-table))) +;; Env Identifier -> Value +(eval:no-prompt +(define (env-lookup env x) + (if (symbol-table-has-key? env x) + (symbol-table-ref env x) + x))) +;; Env Identifier Value -> Void +(eval:no-prompt +(define (env-extend env x v) + (symbol-table-set env x v))) +] + +One more thing we'll need is the ability to raise errors. Luckily, since we're operating on syntax, we can report the source location of an error. + +@repl[ +(eval:no-prompt (require racket/syntax-srcloc)) +(eval:no-prompt +(define (lc-error stx msg) + (define loc (syntax-srcloc stx)) + (if loc + (raise-user-error (format "~a: ~a" (srcloc->string loc) msg)) + (raise-user-error 'lc msg)))) +(eval:error (lc-error #'x "something went wrong")) +] + +Alright, now let's define our evaluator: + +@repl[ +(eval:no-prompt (require syntax/parse)) +(eval:no-prompt +(define-syntax-rule (lc e) + (lc-eval (lc-expand e) empty-env))) +(eval:no-prompt +(define (lc-eval stx env) + (syntax-parse stx + #:datum-literals (+ lambda #%app) + [n:number + (syntax->datum #'n)] + [(+ e1 e2) + (define v1 (lc-eval #'e1 env)) + (unless (number? v1) + (lc-error this-syntax "+ expects number")) + (define v2 (lc-eval #'e2 env)) + (unless (number? v2) + (lc-error this-syntax "+ expects number")) + (+ v1 v2)] + [x:id + (env-lookup env #'x)] + [(lambda (x:id) e:expr) + (lambda (v) (lc-eval #'e (env-extend env #'x v)))] + [(#%app e1 e2) + (match (lc-eval #'e1 env) + [(? procedure? f) + (f (lc-eval #'e2 env))] + [_ + (lc-error this-syntax "applied non-function")])]))) +(lc 1) +(lc (+ 1 1)) +(lc (let ([x 1]) (+ x x))) +(lc ((lambda (x) (+ x 1)) 3)) +(eval:error (lc (1 2))) +(define-dsl-syntax m lc-macro + (syntax-parser + [(_ e) #'(let ([tmp 2]) e)])) +(lc (let ([tmp 1]) (m tmp))) +] + +Pretty cool! + +To recap, we are using syntax-spec as a frontend for our interpreter, which operates on expanded syntax and uses symbol tables as an environment. + +Here are some of the benefits of writing an interpreter in this style: + +@itemlist[ +@item{We operate on syntax, which means we easily get source locations in errors and we can use syntax/parse to perform case analysis on expressions} +@item{Our interpreter can assume that the program is grammatically valid and well-bound since we are operating on the result of expansion from syntax-spec} +@item{We can use symbol tables for environments, which are hygienic} +@item{Our language is macro-extensible and our interpreter only has to operate on core forms} +] + +@section{Supporting Racket Subexpressions} + +We can add limited support for Racket subexpressions to our language: + +@racketblock[ +(syntax-spec + (nonterminal lc-expr + #:binding-space lc + #:allow-extension lc-macro + n:number + (+ e1:lc-expr e2:lc-expr) + x:lc-var + (lambda (x:lc-var) e:lc-expr) + #:binding (scope (bind x) e) + (rkt e:expr) + (~> (e1 e2) + ;; this is necessary to preserve source location, properties, etc. + (syntax/loc this-syntax (#%app e1 e2))) + (#%app e1:lc-expr e2:lc-expr))) +] + +We added @racket[(rkt e:expr)] to the productions. Usually, for a Racket expression position like this, we'd use the @racket[racket-expr] nonterminal instead of the @racket[expr] syntax class, but since we're evaluating syntax, we want the unmodified syntax of the Racket expression with no @racket[#%host-expression] wrapper. Evaluation is simple: + +@repl[ +(eval:no-prompt +(define (lc-eval stx env) + (syntax-parse stx + #:datum-literals (+ lambda #%app rkt) + [n:number + (syntax->datum #'n)] + [(+ e1 e2) + (define v1 (lc-eval #'e1 env)) + (unless (number? v1) + (lc-error this-syntax "+ expects number")) + (define v2 (lc-eval #'e2 env)) + (unless (number? v2) + (lc-error this-syntax "+ expects number")) + (+ v1 v2)] + [x:id + (env-lookup env #'x)] + [(lambda (x:id) e:expr) + (lambda (v) (lc-eval #'e (env-extend env #'x v)))] + [(#%app e1 e2) + (match (lc-eval #'e1 env) + [(? procedure? f) + (f (lc-eval #'e2 env))] + [_ + (lc-error this-syntax "applied non-function")])] + [(rkt e) + (eval #'e)]))) +(lc (rkt (* 4 2))) +] + +We just add a case that calls @racket[eval] on the Racket expression! However, there are some limitations with this method. In particular, we have access to top-level names like @racket[*], but not local variables defined outside of the Racket subexpression, because @racket[eval] is evaluating against the global namespace and not capturing local variable definitions. + +@repl[ +(define top-level-x 2) +(lc (rkt top-level-x)) +(eval:error + (let ([local-x 3]) + (lc (rkt local-x)))) +] + +Similarly, we cannot reference @racket[lc-var]s: + +@repl[ +(eval:error + (lc (let ([lc-x 4]) (rkt lc-x)))) +] diff --git a/tests/dsls/syntax-interpreter.rkt b/tests/dsls/syntax-interpreter.rkt index eaf8619..996985f 100644 --- a/tests/dsls/syntax-interpreter.rkt +++ b/tests/dsls/syntax-interpreter.rkt @@ -34,6 +34,7 @@ x:lc-var (lambda (x:lc-var) e:lc-expr) #:binding (scope (bind x) e) + (rkt e:expr) (~> (e1 e2) ;; this is necessary to preserve source location, properties, etc. (syntax/loc this-syntax (#%app e1 e2))) @@ -64,9 +65,6 @@ ;; Env Identifier Value -> Void (define (env-extend env x v) (symbol-table-set env x v)) -;; this seems weird. not sure if this will work -;; from michael: -;; will work fine locally, will get weird if you have host expressions. ;; A Value is one of ;; a Number @@ -85,7 +83,7 @@ ;; Syntax Env -> Value (define (lc-eval stx env) (syntax-parse stx - #:datum-literals (+ lambda #%app) + #:datum-literals (+ lambda #%app rkt) [n:number (syntax->datum #'n)] [(+ e1 e2) @@ -112,7 +110,9 @@ [(? syntax? f) #`(#,f #,(lc-eval #'e2 env))] [_ - (lc-error this-syntax "applied non-function")])])) + (lc-error this-syntax "applied non-function")])] + [(rkt e) + (eval #'e)])) ;; Value -> Syntax (define (lc-uneval v) @@ -146,6 +146,8 @@ (raise-user-error (format "~a: ~a" (srcloc->string loc) msg)) (raise-user-error 'lc msg))) +(define top-level-var 4) + (module+ test (define-syntax-rule (teval e) (check-equal? (lc e) e)) (define-syntax-rule (tnormalize e e^) (check-equal? (syntax->datum (normalize e)) 'e^)) @@ -199,9 +201,12 @@ (tnormalize 1 1) (tnormalize (lambda (x) x) (lambda (_.0) _.0)) + ;; evaluates in lambda bodies (tnormalize (lambda (x) (+ (+ 1 1) x)) (lambda (_.0) (+ 2 _.0))) (tnormalize (lambda (x) (+ x (+ 1 1))) (lambda (_.0) (+ _.0 2))) (tnormalize (lambda (x) (x (lambda (y) y))) - (lambda (_.0) (_.0 (lambda (_.1) _.1))))) + (lambda (_.0) (_.0 (lambda (_.1) _.1)))) + (check-equal? (lc (rkt (* 2 2))) 4) + (check-equal? (lc (rkt top-level-var)) 4)) From 02cc446349cfec9a300ad597ea8503ebe505bc9d Mon Sep 17 00:00:00 2001 From: Mike Delmonaco Date: Tue, 8 Jul 2025 22:53:34 -0400 Subject: [PATCH 10/26] some work with michael and cleaning --- main.rkt | 3 +++ private/runtime/binding-operations.rkt | 4 +-- tests/dsls/multipass.rkt | 37 +++++++++++++------------- 3 files changed, 23 insertions(+), 21 deletions(-) diff --git a/main.rkt b/main.rkt index 0967f69..53328db 100644 --- a/main.rkt +++ b/main.rkt @@ -7,6 +7,8 @@ ... ...+ + #%host-expression + mutable-reference-compiler immutable-reference-compiler @@ -60,6 +62,7 @@ "private/runtime/compile.rkt" (for-syntax syntax/parse (except-in "private/ee-lib/main.rkt" racket-var) + "private/runtime/compile.rkt" "private/ee-lib/persistent-id-table.rkt" "private/ee-lib/binding.rkt" "private/runtime/binding-operations.rkt" diff --git a/private/runtime/binding-operations.rkt b/private/runtime/binding-operations.rkt index 3f55179..be2b6c6 100644 --- a/private/runtime/binding-operations.rkt +++ b/private/runtime/binding-operations.rkt @@ -198,9 +198,7 @@ 'expression '()) - (sequence->list (in-symbol-set (for/fold ([references (immutable-symbol-set)]) - ([x (in-symbol-set (current-referenced-vars))]) - (symbol-set-add references x)))))) + (sequence->list (in-symbol-set (current-referenced-vars))))) (define recording-reference-compiler (make-variable-like-reference-compiler diff --git a/tests/dsls/multipass.rkt b/tests/dsls/multipass.rkt index 8bf831e..e23ffe2 100644 --- a/tests/dsls/multipass.rkt +++ b/tests/dsls/multipass.rkt @@ -4,7 +4,7 @@ ;; arithmetic + let -> ANF -> prune unused variables -> racket (require "../../testing.rkt" - (for-syntax racket/list rackunit (only-in "../../private/ee-lib/main.rkt" define/hygienic))) + (for-syntax racket/pretty racket/list rackunit (only-in "../../private/ee-lib/main.rkt" define/hygienic))) (syntax-spec (binding-class var #:reference-compiler immutable-reference-compiler) @@ -100,16 +100,15 @@ ; anf-expr -> anf-expr (define/hygienic (prune-unused-variables e) #:expression - (define var-used? (get-used-vars e)) - (remove-unused-vars e var-used?)) + (define used-vars (get-used-vars e)) + (remove-unused-vars e used-vars)) - ; anf-expr -> (Identifier -> Bool) + ; anf-expr -> SymbolTable ; non-hygienic because it's just an analysis pass (define (get-used-vars e) - (define-local-symbol-table used-vars) + (define used-vars (local-symbol-set)) (define (mark-as-used! x) - (symbol-table-set! used-vars x #t)) - (define (var-used? x) (symbol-table-ref used-vars x #f)) + (symbol-set-add! used-vars x)) ; Go bottom-up, seeing references before their binders. ; The invariant is that we only traverse expressions that need ; to be evaluated. @@ -124,25 +123,25 @@ (syntax-parse e [((~literal let) ([x e]) body) (mark-used-variables! #'body) - (when (var-used? #'x) + (when (symbol-set-member? used-vars #'x) (mark-used-variables! #'e))] [(op a b) (mark-used-variables! #'a) (mark-used-variables! #'b)] [x:id (mark-as-used! #'x)] - ; don't descent into racket expressions. - ; this means we'll miss references like (rkt (eval-expr x)). - ; TODO use free-variables once it supports host-expressions + [((~datum rkt) e) + (for ([x (get-racket-referenced-identifiers [var] #'e)]) + (mark-as-used! x))] [_ (void)])) - var-used?) + used-vars) - ; anf-expr (Identifier -> Boolean) -> anf-expr - (define (remove-unused-vars e var-used?) + ; anf-expr SymbolTable -> anf-expr + (define (remove-unused-vars e used-vars) (let loop ([e e]) (syntax-parse e [((~and let (~literal let)) ([x e]) body) - (if (var-used? #'x) + (if (symbol-set-member? used-vars #'x) ; no need to recur on e since it's not a let #`(let ([x e]) #,(loop #'body)) @@ -215,6 +214,7 @@ (+ x (rkt x)))) 2) +#;; this breaks because of get-racket-referenced-identifiers (test-equal? "use outer dsl var in dsl in rkt" (eval-expr (let ([x 1]) @@ -231,9 +231,10 @@ (let ([unused (rkt (error "bad"))]) 1)) 1) -#;; since we don't descend into racket exprs, it thinks it's unused, so it removes it and we get an unbound reference -(check-equal? +#;(check-equal? (eval-expr (let ([used-only-in-rkt 1]) (let ([x (rkt used-only-in-rkt)]) - x)))) + x))) + 1) + From 0cfff7ff00a5b2cacfe4c91d6c144da2c0e96a1e Mon Sep 17 00:00:00 2001 From: Mike Delmonaco Date: Tue, 8 Jul 2025 22:54:04 -0400 Subject: [PATCH 11/26] initial draft of multipass tutorial --- scribblings/tutorial/multipass-tutorial.scrbl | 349 +++++++++++++++++- 1 file changed, 346 insertions(+), 3 deletions(-) diff --git a/scribblings/tutorial/multipass-tutorial.scrbl b/scribblings/tutorial/multipass-tutorial.scrbl index f3ee38c..52e3ef8 100644 --- a/scribblings/tutorial/multipass-tutorial.scrbl +++ b/scribblings/tutorial/multipass-tutorial.scrbl @@ -2,8 +2,351 @@ #lang scribble/manual @(require (for-label racket racket/block racket/class racket/match racket/list syntax/parse "../../main.rkt") - scribble/example) + scribble/example + racket/sandbox) +@(define eval (make-base-eval '(require racket (for-syntax racket)))) +@(define-syntax-rule (repl body ...) (examples #:eval eval #:label #f body ...)) -@title[#:tag "multipass"]{Advanced Tutorial: A Compiler with Multiple Passes} +@title[#:tag "multipass"]{Advanced Tutorial: A Compiler with Transformative Passes} -This is a stub +Many DSLs are implemented in several passes. Some passes may just be static checks, and others may actually transform the program, often to a restricted subset of the surface language. When using syntax-spec, some special care needs to be taken with transformative passes. To demonstrate how such a DSL can be implemented, we will implement a language with an @hyperlink["https://en.wikipedia.org/wiki/A-normal_form"]{A-normal form} transformation and an unused variable pruning optimization. + +@section[#:tag "multipass-expander"]{Expander} + +Here is the syntax-spec of our language: + +@repl[ +#:hidden #t +(module grammar racket + (provide (all-defined-out) (for-syntax (all-defined-out))) + (require "main.rkt" (for-syntax racket syntax/parse)) + (syntax-spec + (binding-class var #:reference-compiler immutable-reference-compiler) + (nonterminal expr + n:number + x:var + ; need to use ~literal because you can't re-use let in the other non-terminals + ((~literal let) ([x:var e:expr]) body:expr) + #:binding (scope (bind x) body) + ((~literal +) a:expr b:expr) + ((~literal *) a:expr b:expr) + ((~literal /) a:expr b:expr) + (rkt e:racket-expr)) + (nonterminal anf-expr + ((~literal let) ([x:var e:rhs-expr]) body:anf-expr) + #:binding (scope (bind x) body) + e:rhs-expr) + (nonterminal rhs-expr + ((~literal +) a:immediate-expr b:immediate-expr) + ((~literal *) a:immediate-expr b:immediate-expr) + ((~literal /) a:immediate-expr b:immediate-expr) + ((~literal rkt) e:racket-expr) + e:immediate-expr) + (nonterminal immediate-expr + x:var + n:number) + + (host-interface/expression + (eval-expr e:expr) + #'(compile-expr e))) +(begin-for-syntax + (define (to-anf e) + (define bindings-rev '()) + (define (bind! x e) (set! bindings-rev (cons (list x e) bindings-rev))) + (define e^ (to-rhs e bind!)) + (wrap-lets e^ (reverse bindings-rev))) + + (define (to-rhs e bind!) + (syntax-parse e + [((~literal let) ([x e]) body) + (bind! #'x (to-rhs #'e bind!)) + (to-rhs #'body bind!)] + [(op a b) + (define/syntax-parse a^ (to-immediate #'a bind!)) + (define/syntax-parse b^ (to-immediate #'b bind!)) + #'(op a^ b^)] + [_ this-syntax])) + + (define (to-immediate e bind!) + (syntax-parse e + [(_ . _) + (define/syntax-parse (tmp) (generate-temporaries '(tmp))) + (bind! #'tmp (to-rhs this-syntax bind!)) + #'tmp] + [_ this-syntax])) + + (define (wrap-lets e bindings) + (foldr (lambda (binding e) + (define/syntax-parse x (first binding)) + (define/syntax-parse rhs (second binding)) + (define/syntax-parse body e) + #'(let ([x rhs]) body)) + e + bindings))) + +(begin-for-syntax + (define (prune-unused-variables e) + (define used-vars (get-used-vars e)) + (remove-unused-vars e used-vars)) + + (define (get-used-vars e) + (define used-vars (local-symbol-set)) + (define (mark-as-used! x) + (symbol-set-add! used-vars x)) + (let mark-used-variables! ([e e]) + (syntax-parse e + [((~literal let) ([x e]) body) + (mark-used-variables! #'body) + (when (symbol-set-member? used-vars #'x) + (mark-used-variables! #'e))] + [(op a b) + (mark-used-variables! #'a) + (mark-used-variables! #'b)] + [x:id + (mark-as-used! #'x)] + [_ (void)])) + used-vars) + + (define (remove-unused-vars e used-vars) + (let loop ([e e]) + (syntax-parse e + [((~and let (~literal let)) ([x e]) body) + (define/syntax-parse body^ (loop #'body)) + (if (symbol-set-member? used-vars #'x) + #'(let ([x e]) + body^) + #'body^)] + [_ this-syntax])))) + +(define-syntax compile-anf + (syntax-parser + [(_ ((~literal let) ([x e]) body)) + #'(let ([x (compile-anf e)]) (compile-anf body))] + [(_ (op a b)) #'(op a b)] + [(_ ((~literal rkt) e)) + #'(let ([x e]) + (if (number? x) + x + (error 'rkt "expected a number, got ~a" x)))] + [(_ e) #'e])) + +(begin-for-syntax + (define local-expand-anf (nonterminal-expander anf-expr))) + +(define-syntax compile-expr + (syntax-parser + [(_ e) + (define e/anf (local-expand-anf (to-anf #'e) #:should-rename? #t)) + (define e/pruned (prune-unused-variables e/anf)) + (define/syntax-parse e/pruned^ (local-expand-anf e/pruned #:should-rename? #t)) + #'(compile-anf e/pruned^)])) + ) +(require 'grammar "main.rkt" (for-syntax syntax/parse)) +] + +@racketmod[ +racket +(require syntax-spec (for-syntax syntax/parse)) +(syntax-spec + (binding-class var #:reference-compiler immutable-reference-compiler) + (nonterminal expr + n:number + x:var + ((~literal let) ([x:var e:expr]) body:expr) + #:binding (scope (bind x) body) + ((~literal +) a:expr b:expr) + ((~literal *) a:expr b:expr) + ((~literal /) a:expr b:expr) + (rkt e:racket-expr)) + (nonterminal anf-expr + ((~literal let) ([x:var e:rhs-expr]) body:anf-expr) + #:binding (scope (bind x) body) + e:rhs-expr) + (nonterminal rhs-expr + ((~literal +) a:immediate-expr b:immediate-expr) + ((~literal *) a:immediate-expr b:immediate-expr) + ((~literal /) a:immediate-expr b:immediate-expr) + ((~literal rkt) e:racket-expr) + e:immediate-expr) + (nonterminal immediate-expr + x:var + n:number) + + (host-interface/expression + (eval-expr e:expr) + #'(compile-expr e))) +] + +Our language supports arithmetic, local variables, and Racket subexpressions. + +We have the following nonterminals: + +@itemlist[ +@item{@racket[expr]: The surface syntax of a program} +@item{@racket[anf-expr]: An expression in A-normal form. Users will not be writing these expressions; the compiler will transform @racket[expr]s the user writes into @racket[anf-expr]s.} +@item{@racket[rhs-expr]: An expression which is allowed to be on the right-hand side of a binding pair in an expression when it is in A-normal form. Conceptually, these expressions take at most one "step" of reduction to evaluate. In other words, no nested expressions (except for @racket[rkt] expressions).} +@item{@racket[immediate-expr]: Atomic expressions that can immediately be evaluated.} +] + +A-normal form makes the evaluation order of the program completely unambiguous and simplifies compilation to a language like assembly. Now, let's transform our surface syntax to it! + +@section{A-normal Form Transformation} + +The core idea of transforming to A-normal form is extracting nested sub-expressions into temporary variables. For example: + +@racketblock[ +(+ (+ 1 2) (+ 3 4)) +~> +(let ([tmp1 (+ 1 2)] + [tmp2 (+ 3 4)]) + (+ tmp1 tmp2)) +] + +To follow our grammar for an @racket[anf-expr], the arguments to functions like @racket[+] must be immediate expressions, like variable references or numbers. Our source program did not obey this rule, so we had to create temporary variables for subexpressions and replace each subexpression with a reference to its temporary variable. + +Now let's automate this process: + +@racketblock[ +(begin-for-syntax + (define (to-anf e) + (define bindings-rev '()) + (define (bind! x e) (set! bindings-rev (cons (list x e) bindings-rev))) + (define e^ (to-rhs e bind!)) + (wrap-lets e^ (reverse bindings-rev))) + + (define (to-rhs e bind!) + (syntax-parse e + [((~literal let) ([x e]) body) + (bind! #'x (to-rhs #'e bind!)) + (to-rhs #'body bind!)] + [(op a b) + (define/syntax-parse a^ (to-immediate #'a bind!)) + (define/syntax-parse b^ (to-immediate #'b bind!)) + #'(op a^ b^)] + [_ this-syntax])) + + (define (to-immediate e bind!) + (syntax-parse e + [(_ . _) + (define/syntax-parse (tmp) (generate-temporaries '(tmp))) + (bind! #'tmp (to-rhs this-syntax bind!)) + #'tmp] + [_ this-syntax])) + + (define (wrap-lets e bindings) + (foldr (lambda (binding e) + (define/syntax-parse x (first binding)) + (define/syntax-parse rhs (second binding)) + (define/syntax-parse body e) + #'(let ([x rhs]) body)) + e + bindings))) +] + +Our transformation goes through the expression, recording the temporary variable bindings we need to lift out around the final @racket[rhs-expr] that will be the body of the innermost let at the end of the transformation. Converting to an @racket[rhs-expr] or an @racket[immediate-expr] has the side effect of recording a binding pair to be lifted, and the result of replacing complex subexpressions with temporary variable references is returned from each helper. + +We are using compile-time functions, rather than macros, for these compiler passes. There are a few reasons for this: + +@itemlist[ +@item{We are passing around non-syntax data like symbol sets.} +@item{Our recursive helpers rely on side effects in a way that requires eager, bottom-up evaluation, which macros aren't good for.} +@item{We want to be working with DSL syntax between every pass, not macro use syntax which will eventually expand to DSL code.} +] + +@section{Pruning unused variables} + +Using syntax-spec's symbol tables and binding operations, we can add an optimizing pass that removes unused variables. + +@racketblock[ +(begin-for-syntax + (define (prune-unused-variables e) + (define used-vars (get-used-vars e)) + (remove-unused-vars e used-vars)) + + (define (get-used-vars e) + (define used-vars (local-symbol-set)) + (define (mark-as-used! x) + (symbol-set-add! used-vars x)) + (let mark-used-variables! ([e e]) + (syntax-parse e + [((~literal let) ([x e]) body) + (mark-used-variables! #'body) + (when (symbol-set-member? used-vars #'x) + (mark-used-variables! #'e))] + [(op a b) + (mark-used-variables! #'a) + (mark-used-variables! #'b)] + [x:id + (mark-as-used! #'x)] + [_ (void)])) + used-vars) + + (define (remove-unused-vars e used-vars) + (let loop ([e e]) + (syntax-parse e + [((~and let (~literal let)) ([x e]) body) + (define/syntax-parse body^ (loop #'body)) + (if (symbol-set-member? used-vars #'x) + #'(let ([x e]) + body^) + #'body^)] + [_ this-syntax])))) +] + +@;TODO don't ignore racket subexpression references. Requires fixing a bug though. +First, we figure out which variables are referenced, using a bottom-up traversal with an invariant that we only check expressions which will need to be evaluated. For now, we ignore references in Racket subexpressions. + +Then, with that knowledge, we just remove the bindings of the unused variables. Other than Racket subexpressions, our computations are pure, so this is mostly a sound optimization. + +This optimization is slightly simplified by having already transformed the program to A-normal form. We can see this in @racket[remove-unused-vars]: We don't need to recur on the right-hand-side of a let-binding because we know there are no variable bindings to be removed from that expression. However, due to the nature of expansion and binding structure, some special care needs to be taken in sequencing multiple transformative compiler passes. + +@section{Putting it all Together} + +Since our A-normal form transformation adds new bindings, we need to re-expand the result so syntax-spec can compute and check binding information for use in later passes/compilation: + +@racketblock[ +(begin-for-syntax + (define local-expand-anf (nonterminal-expander anf-expr))) + +(define-syntax compile-expr + (syntax-parser + [(_ e) + (define e/anf (local-expand-anf (to-anf #'e) #:should-rename? #t)) + (define e/pruned (prune-unused-variables e/anf)) + (define/syntax-parse e/pruned^ (local-expand-anf e/pruned #:should-rename? #t)) + #'(compile-anf e/pruned^)])) +] + +We perform this re-expansion using @racket[nonterminal-expander]. This function expects DSL syntax of a specified nonterminal (here, @racket[anf-expr]) and expands macros in the DSL code, checks binding structure, etc. It's kind of like @racket[local-expand] but for a particular nonterminal. This is what happens in a host interface that produces the expanded, core syntax that your compiler works with. + +The expansion after pruning is technically unnecessary for this example since we are only removing bindings in that pass, but it is good to always make sure your compiler is receiving freshly expanding syntax. This extra expansion also makes sure your optimization produces valid syntax. In general, even if your compiler just has a single transformative pass before compilation, you should expand the result of the pass. + +Finally, we must implement compilation of A-normal form expressions to Racket, which is straightforward: + +@racketblock[ +(define-syntax compile-anf + (syntax-parser + [(_ ((~literal let) ([x e]) body)) + #'(let ([x (compile-anf e)]) (compile-anf body))] + [(_ (op a b)) #'(op a b)] + [(_ ((~literal rkt) e)) + #'(let ([x e]) + (if (number? x) + x + (error 'rkt "expected a number, got ~a" x)))] + [(_ e) #'e])) +] + +@repl[ +(eval-expr 1) +(eval-expr (let ([x 1]) (let ([y 2]) x))) +(eval-expr (let ([unused (rkt (displayln "hello!"))]) 42)) +] + +To summarize the key points: + +@itemlist[ +@item{We used compile-time functions for compiler passes, rather than macros.} +@item{We can have multiple passes in a compiler simply by sequencing compile-time functions that operate on expanded DSL expressions.} +@item{Since we have transformative passes in our compiler, we must re-expand resulting syntax using @racket[nonterminal-expander] after each transformation.} +] From 8ea7f518036d33edd1c9585bfc6fb19f89621ac6 Mon Sep 17 00:00:00 2001 From: Mike Delmonaco Date: Tue, 8 Jul 2025 22:55:24 -0400 Subject: [PATCH 12/26] edits with michael --- .../tutorial/syntax-interpreter-tutorial.scrbl | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/scribblings/tutorial/syntax-interpreter-tutorial.scrbl b/scribblings/tutorial/syntax-interpreter-tutorial.scrbl index da80f6c..aec8500 100644 --- a/scribblings/tutorial/syntax-interpreter-tutorial.scrbl +++ b/scribblings/tutorial/syntax-interpreter-tutorial.scrbl @@ -6,11 +6,11 @@ @(define eval (make-base-eval '(require racket))) @(define-syntax-rule (repl body ...) (examples #:eval eval #:label #f body ...)) -@title[#:tag "syntax-interpreter"]{Advanced Tutorial: An Interpreter} +@title[#:tag "syntax-interpreter"]{Advanced Tutorial: An Interpreted Language} -This guide demonstrates how to use syntax-spec to create an interpreter, as well as the benefits of this approach. +This guide demonstrates how to use syntax-spec to create an interpreted language, as well as the benefits of this approach. -Typically, syntax-spec is used to create a compiler, usually to Racket. However, it's possible to use it to create an interpreter as well. In such an interpreter, syntax-spec will enforce the grammar, check binding, macro-expand the source program, etc. and the interpreter takes in the expanded, core syntax and evaluates it to a value. As an example, let's create an interpreter for lambda calculus. +Typically, syntax-spec is used to create languages that compile to Racket. However, it's possible to use it to create an interpreted language as well. In such an implementation, syntax-spec will enforce the grammar, check binding, macro-expand the source program, etc. and pass off the expanded, core syntax to an interpreter that evaluates it to a value. As an example, let's create an interpreted implementation of the lambda calculus. @section{Expander} @@ -34,7 +34,6 @@ Here is the syntax-spec: #:binding (scope (bind x) e) (rkt e:expr) (~> (e1 e2) - ;; this is necessary to preserve source location, properties, etc. (syntax/loc this-syntax (#%app e1 e2))) (#%app e1:lc-expr e2:lc-expr)) @@ -44,7 +43,8 @@ Here is the syntax-spec: (require 'grammar "main.rkt" (for-syntax syntax/parse)) ] -@racketmod[racket +@racketmod[ +racket (require syntax-spec (for-syntax syntax/parse)) (syntax-spec (binding-class lc-var #:binding-space lc) @@ -94,7 +94,7 @@ Our language is also macro-extensible, so the result of @racket[lc-expand] expan (lc-expand (let ([x 1]) (+ x x))) ] -@section{Evaluator} +@section{Interpreter} We can use the output of @racket[lc-expand] as the input of an interpreter. But first, let's define some helpers. @@ -145,7 +145,7 @@ One more thing we'll need is the ability to raise errors. Luckily, since we're o (eval:error (lc-error #'x "something went wrong")) ] -Alright, now let's define our evaluator: +Alright, now let's define our interpreter: @repl[ (eval:no-prompt (require syntax/parse)) @@ -221,7 +221,7 @@ We can add limited support for Racket subexpressions to our language: (#%app e1:lc-expr e2:lc-expr))) ] -We added @racket[(rkt e:expr)] to the productions. Usually, for a Racket expression position like this, we'd use the @racket[racket-expr] nonterminal instead of the @racket[expr] syntax class, but since we're evaluating syntax, we want the unmodified syntax of the Racket expression with no @racket[#%host-expression] wrapper. Evaluation is simple: +We added @racket[(rkt e:expr)] to the productions. Usually for racket expressions, we use @racket[racket-expr], which wraps the expression with @racket[#%host-expression]. This does some work behind the scenes to make sure we can refer to DSL bindings in the Racket expression. But for this syntax interpreter, that won't work, so we'll just use @racket[expr] to avoid wrapping the expression in a @racket[#%host-expression]. Evaluation is simple: @repl[ (eval:no-prompt From d82ba37fa32ccd98dbe4545f81ac11400d1501f3 Mon Sep 17 00:00:00 2001 From: Mike Delmonaco Date: Thu, 10 Jul 2025 23:14:38 -0400 Subject: [PATCH 13/26] mention same number of expansions for identifiers --- scribblings/tutorial/multipass-tutorial.scrbl | 2 ++ 1 file changed, 2 insertions(+) diff --git a/scribblings/tutorial/multipass-tutorial.scrbl b/scribblings/tutorial/multipass-tutorial.scrbl index 52e3ef8..d9afeb3 100644 --- a/scribblings/tutorial/multipass-tutorial.scrbl +++ b/scribblings/tutorial/multipass-tutorial.scrbl @@ -321,6 +321,8 @@ We perform this re-expansion using @racket[nonterminal-expander]. This function The expansion after pruning is technically unnecessary for this example since we are only removing bindings in that pass, but it is good to always make sure your compiler is receiving freshly expanding syntax. This extra expansion also makes sure your optimization produces valid syntax. In general, even if your compiler just has a single transformative pass before compilation, you should expand the result of the pass. +An additional caveat is that identifiers need to undergo the same number of expansions for things to work properly. The easiest way to do this is to expand only the entire dsl expression at once, rather than expanding subexpressions individually. + Finally, we must implement compilation of A-normal form expressions to Racket, which is straightforward: @racketblock[ From 1dfa7a77f085390124464471d48aa84e15b8b3ab Mon Sep 17 00:00:00 2001 From: Mike Delmonaco Date: Thu, 10 Jul 2025 23:14:53 -0400 Subject: [PATCH 14/26] document #%host-expression --- scribblings/reference/compiling.scrbl | 3 +++ 1 file changed, 3 insertions(+) diff --git a/scribblings/reference/compiling.scrbl b/scribblings/reference/compiling.scrbl index 6846c3a..7603d46 100644 --- a/scribblings/reference/compiling.scrbl +++ b/scribblings/reference/compiling.scrbl @@ -64,6 +64,9 @@ A variable-like reference compiler that allows references as well as mutations v References expand to their @tech{compiled identifier}. +@defform[(#%host-expression rkt-expr)] + +Racket subexpressions are wrapped with @racket[#%host-expression] during DSL expansion, which delays the expansion of the Racket subexpression until after compilation, allowing context like syntax parameters to be established by the compiler, which can be used by reference compilers. @section{Compiled identifiers vs surface syntax} From 8deb310c7221969bbafd55c98ac477881a19e1e7 Mon Sep 17 00:00:00 2001 From: Mike Delmonaco Date: Thu, 17 Jul 2025 20:47:12 -0400 Subject: [PATCH 15/26] edits with michael --- scribblings/tutorial/multipass-tutorial.scrbl | 67 +++++++++++++------ 1 file changed, 48 insertions(+), 19 deletions(-) diff --git a/scribblings/tutorial/multipass-tutorial.scrbl b/scribblings/tutorial/multipass-tutorial.scrbl index d9afeb3..946a828 100644 --- a/scribblings/tutorial/multipass-tutorial.scrbl +++ b/scribblings/tutorial/multipass-tutorial.scrbl @@ -1,4 +1,3 @@ - #lang scribble/manual @(require (for-label racket racket/block racket/class racket/match racket/list syntax/parse "../../main.rkt") @@ -9,7 +8,7 @@ @title[#:tag "multipass"]{Advanced Tutorial: A Compiler with Transformative Passes} -Many DSLs are implemented in several passes. Some passes may just be static checks, and others may actually transform the program, often to a restricted subset of the surface language. When using syntax-spec, some special care needs to be taken with transformative passes. To demonstrate how such a DSL can be implemented, we will implement a language with an @hyperlink["https://en.wikipedia.org/wiki/A-normal_form"]{A-normal form} transformation and an unused variable pruning optimization. +Many DSLs need a compiler that transforms syntax in several passes. Some passes may just be static checks, and others may actually transform the program, often to a restricted subset of the surface language. When using syntax-spec, some special care needs to be taken with transformative passes. To demonstrate how such a DSL can be implemented, we will create a language with an @hyperlink["https://en.wikipedia.org/wiki/A-normal_form"]{A-normal form} transformation and an unused variable pruning optimization. @section[#:tag "multipass-expander"]{Expander} @@ -197,9 +196,9 @@ The core idea of transforming to A-normal form is extracting nested sub-expressi @racketblock[ (+ (+ 1 2) (+ 3 4)) ~> -(let ([tmp1 (+ 1 2)] - [tmp2 (+ 3 4)]) - (+ tmp1 tmp2)) +(let ([tmp1 (+ 1 2)]) + (let ([tmp2 (+ 3 4)]) + (+ tmp1 tmp2))) ] To follow our grammar for an @racket[anf-expr], the arguments to functions like @racket[+] must be immediate expressions, like variable references or numbers. Our source program did not obey this rule, so we had to create temporary variables for subexpressions and replace each subexpression with a reference to its temporary variable. @@ -239,24 +238,54 @@ Now let's automate this process: (define/syntax-parse rhs (second binding)) (define/syntax-parse body e) #'(let ([x rhs]) body)) - e - bindings))) + e + bindings))) ] -Our transformation goes through the expression, recording the temporary variable bindings we need to lift out around the final @racket[rhs-expr] that will be the body of the innermost let at the end of the transformation. Converting to an @racket[rhs-expr] or an @racket[immediate-expr] has the side effect of recording a binding pair to be lifted, and the result of replacing complex subexpressions with temporary variable references is returned from each helper. +Our transformation goes through the expression, recording the temporary variable bindings to lift. The final @racket[rhs-expr] returned by @racket[to-rhs] will be the body of the innermost @racket[let] at the end of the transformation. Converting to an @racket[rhs-expr] or an @racket[immediate-expr] has the side effect of recording a binding pair to be lifted, and the result of replacing complex subexpressions with temporary variable references is returned from each helper. -We are using compile-time functions, rather than macros, for these compiler passes. There are a few reasons for this: +Notice that the code generation pass is implemented as macro, while the intermediate passes are implemented as compile-time functions. Using a Racket macro for the code generator is convenient because it provide hygiene for any temporary names we introduce. For the intermediate passes we must use compile-time functions rather than macros, for three reasons: @itemlist[ -@item{We are passing around non-syntax data like symbol sets.} -@item{Our recursive helpers rely on side effects in a way that requires eager, bottom-up evaluation, which macros aren't good for.} -@item{We want to be working with DSL syntax between every pass, not macro use syntax which will eventually expand to DSL code.} +@item{ +The intermediate passes do not generate Racket syntax that can be further expanded by the Racket macro expander. Instead, they generate code in our DSL's intermediate representation. +} +@item{ +Compiler passes may need additional arguments and return values, which may not be syntax objects. This is possible with a compile-time function, but not with a macro. For example, our A-normal form transformation receives the @racket[bind!] procedure as an argument. +} +@item{ +Compiler passes may use side effects, and rely on a particular order of evaluation. For our A-normal form pass, we want to create @racket[let]-bindings for the innermost subexpressions first. We accomplish this via the way we order calls to the @racket[bind!] procedure. +} ] @section{Pruning unused variables} Using syntax-spec's symbol tables and binding operations, we can add an optimizing pass that removes unused variables. +For example: + +@racketblock[ +(let ([x (+ 2 2)]) + (let ([y (+ 3 3)]) + x)) +~> +(let ([x (+ 2 2)]) + x) +] + +Since @racket[y] is not referenced, we can just remove its definition from the program. Note that this optimization only makes sense when the right-hand-side of a definition is free of side-effects. For example, pruning @racket[y] in this example would change the behavior of the program: + +@racketblock[ +(let ([x (+ 2 2)]) + (let ([y (rkt (begin (displayln "hello!") (+ 3 3)))]) + x)) +~> +(let ([x (+ 2 2)]) + x) +] + +Without pruning, this would print something, but with pruning, it would not. Our optimization shouldn't change the behavior of the program. This DSL is designed with the requirement that @racket[rkt] forms only have pure computations inside, but this cannot easily be checked. As such, we will assume Racket subexpressions are free of side effects, and our optimization will only be sound for side-effect-free Racket subexpressions. + @racketblock[ (begin-for-syntax (define (prune-unused-variables e) @@ -284,7 +313,7 @@ Using syntax-spec's symbol tables and binding operations, we can add an optimizi (define (remove-unused-vars e used-vars) (let loop ([e e]) (syntax-parse e - [((~and let (~literal let)) ([x e]) body) + [((~literal let) ([x e]) body) (define/syntax-parse body^ (loop #'body)) (if (symbol-set-member? used-vars #'x) #'(let ([x e]) @@ -294,15 +323,15 @@ Using syntax-spec's symbol tables and binding operations, we can add an optimizi ] @;TODO don't ignore racket subexpression references. Requires fixing a bug though. -First, we figure out which variables are referenced, using a bottom-up traversal with an invariant that we only check expressions which will need to be evaluated. For now, we ignore references in Racket subexpressions. +First, we figure out which variables are referenced, using a bottom-up traversal. We only include consider variables in the right-hand-side of a @racket[let] used if we have determined that the variable bound by the @racket[let] is used in its body. For now, we ignore references in Racket subexpressions. -Then, with that knowledge, we just remove the bindings of the unused variables. Other than Racket subexpressions, our computations are pure, so this is mostly a sound optimization. +Then, with that knowledge, we reconstruct the program, only including bindings for used variables. -This optimization is slightly simplified by having already transformed the program to A-normal form. We can see this in @racket[remove-unused-vars]: We don't need to recur on the right-hand-side of a let-binding because we know there are no variable bindings to be removed from that expression. However, due to the nature of expansion and binding structure, some special care needs to be taken in sequencing multiple transformative compiler passes. +This optimization is slightly simplified by having already transformed the program to A-normal form. We can see this in @racket[remove-unused-vars]: We don't need to recur on the right-hand-side of a let-binding because we know there are no variable bindings to be removed from that expression. @section{Putting it all Together} -Since our A-normal form transformation adds new bindings, we need to re-expand the result so syntax-spec can compute and check binding information for use in later passes/compilation: +Due to the nature of expansion and binding structure, some special care needs to be taken in sequencing multiple transformative compiler passes. Since our A-normal form transformation adds new bindings, we need to re-expand the result so syntax-spec can compute and check binding information for use in later passes/compilation: @racketblock[ (begin-for-syntax @@ -317,11 +346,11 @@ Since our A-normal form transformation adds new bindings, we need to re-expand t #'(compile-anf e/pruned^)])) ] -We perform this re-expansion using @racket[nonterminal-expander]. This function expects DSL syntax of a specified nonterminal (here, @racket[anf-expr]) and expands macros in the DSL code, checks binding structure, etc. It's kind of like @racket[local-expand] but for a particular nonterminal. This is what happens in a host interface that produces the expanded, core syntax that your compiler works with. +We perform this re-expansion using @racket[nonterminal-expander]. This function expects DSL syntax of a specified nonterminal (here, @racket[anf-expr]) and expands macros in the DSL code, checks binding structure, etc. It's kind of like @racket[local-expand] but for a particular nonterminal. This is what happens in a host interface that produces the expanded, core syntax that your compiler works with. We use @racket[#:should-rename? #t] to ensure that we re-compile and rename identifiers in this expansion. The expansion after pruning is technically unnecessary for this example since we are only removing bindings in that pass, but it is good to always make sure your compiler is receiving freshly expanding syntax. This extra expansion also makes sure your optimization produces valid syntax. In general, even if your compiler just has a single transformative pass before compilation, you should expand the result of the pass. -An additional caveat is that identifiers need to undergo the same number of expansions for things to work properly. The easiest way to do this is to expand only the entire dsl expression at once, rather than expanding subexpressions individually. +An additional caveat is that identifiers need to undergo the same number of expansions for things to work properly. The easiest way to do this is to expand only the entire DSL expression at once, rather than expanding subexpressions individually. Finally, we must implement compilation of A-normal form expressions to Racket, which is straightforward: From 77d47969f2fd87acce10fd6640ea733b98304159 Mon Sep 17 00:00:00 2001 From: Mike Delmonaco Date: Thu, 17 Jul 2025 20:51:13 -0400 Subject: [PATCH 16/26] no quasiquote --- tests/dsls/multipass.rkt | 22 ++++++++++++++-------- 1 file changed, 14 insertions(+), 8 deletions(-) diff --git a/tests/dsls/multipass.rkt b/tests/dsls/multipass.rkt index e23ffe2..34682e7 100644 --- a/tests/dsls/multipass.rkt +++ b/tests/dsls/multipass.rkt @@ -51,8 +51,8 @@ (define e/pruned (prune-unused-variables e/anf)) ; this last local-expand-anf might be unnecessary for this compiler, but i'll leave it in ; since most compilers would need it. - (define e/pruned^ (local-expand-anf e/pruned #:should-rename? #t)) - #`(compile-anf #,e/pruned^)])) + (define/syntax-parse e/pruned^ (local-expand-anf e/pruned #:should-rename? #t)) + #'(compile-anf e/pruned^)])) (begin-for-syntax ; expr -> anf-expr @@ -77,8 +77,9 @@ (bind! #'x (to-rhs #'e bind!)) (to-rhs #'body bind!)] [(op a b) - #`(op #,(to-immediate #'a bind!) - #,(to-immediate #'b bind!))] + (define/syntax-parse a^ (to-immediate #'a bind!)) + (define/syntax-parse b^ (to-immediate #'b bind!)) + #'(op a^ b^)] [_ this-syntax])) ; expr (Identifier rhs-expr -> Void) -> immediate-expr @@ -92,7 +93,11 @@ ; rhs-expr (listof (list Identifier rhs-expr) ) (define (wrap-lets e bindings) - (foldr (lambda (binding e) #`(let ([#,(first binding) #,(second binding)]) #,e)) + (foldr (lambda (binding e) + (define/syntax-parse x (first binding)) + (define/syntax-parse rhs (second binding)) + (define/syntax-parse body e) + #'(let ([x rhs]) body)) e bindings))) @@ -141,11 +146,12 @@ (let loop ([e e]) (syntax-parse e [((~and let (~literal let)) ([x e]) body) + (define/syntax-parse body^ (loop #'body)) (if (symbol-set-member? used-vars #'x) ; no need to recur on e since it's not a let - #`(let ([x e]) - #,(loop #'body)) - (loop #'body))] + #'(let ([x e]) + body^) + #'body^)] [_ this-syntax])))) (define-syntax compile-anf From 4e48eb5b67c76e63b01c044b2e5b1fa3624cc1e3 Mon Sep 17 00:00:00 2001 From: Mike Delmonaco Date: Thu, 17 Jul 2025 20:55:51 -0400 Subject: [PATCH 17/26] binding space instead of racket literals --- tests/dsls/multipass.rkt | 36 ++++++++++++++++++++---------------- 1 file changed, 20 insertions(+), 16 deletions(-) diff --git a/tests/dsls/multipass.rkt b/tests/dsls/multipass.rkt index 34682e7..006ac19 100644 --- a/tests/dsls/multipass.rkt +++ b/tests/dsls/multipass.rkt @@ -7,28 +7,32 @@ (for-syntax racket/pretty racket/list rackunit (only-in "../../private/ee-lib/main.rkt" define/hygienic))) (syntax-spec - (binding-class var #:reference-compiler immutable-reference-compiler) + (binding-class var + #:reference-compiler immutable-reference-compiler) (nonterminal expr + #:binding-space anf n:number x:var - ; need to use ~literal because you can't re-use let in the other non-terminals - ((~literal let) ([x:var e:expr]) body:expr) + (let ([x:var e:expr]) body:expr) #:binding (scope (bind x) body) - ((~literal +) a:expr b:expr) - ((~literal *) a:expr b:expr) - ((~literal /) a:expr b:expr) + (+ a:expr b:expr) + (* a:expr b:expr) + (/ a:expr b:expr) (rkt e:racket-expr)) (nonterminal anf-expr - ((~literal let) ([x:var e:rhs-expr]) body:anf-expr) + #:binding-space anf + ((~datum let) ([x:var e:rhs-expr]) body:anf-expr) #:binding (scope (bind x) body) e:rhs-expr) (nonterminal rhs-expr - ((~literal +) a:immediate-expr b:immediate-expr) - ((~literal *) a:immediate-expr b:immediate-expr) - ((~literal /) a:immediate-expr b:immediate-expr) - ((~literal rkt) e:racket-expr) + #:binding-space anf + ((~datum +) a:immediate-expr b:immediate-expr) + ((~datum *) a:immediate-expr b:immediate-expr) + ((~datum /) a:immediate-expr b:immediate-expr) + ((~datum rkt) e:racket-expr) e:immediate-expr) (nonterminal immediate-expr + #:binding-space anf x:var n:number) @@ -73,7 +77,7 @@ ; in other compilers, helpers may need to be hygienic too. (define (to-rhs e bind!) (syntax-parse e - [((~literal let) ([x e]) body) + [((~datum let) ([x e]) body) (bind! #'x (to-rhs #'e bind!)) (to-rhs #'body bind!)] [(op a b) @@ -126,7 +130,7 @@ ; so we don't traverse its rhs since it isn't needed. (let mark-used-variables! ([e e]) (syntax-parse e - [((~literal let) ([x e]) body) + [((~datum let) ([x e]) body) (mark-used-variables! #'body) (when (symbol-set-member? used-vars #'x) (mark-used-variables! #'e))] @@ -145,7 +149,7 @@ (define (remove-unused-vars e used-vars) (let loop ([e e]) (syntax-parse e - [((~and let (~literal let)) ([x e]) body) + [((~and let (~datum let)) ([x e]) body) (define/syntax-parse body^ (loop #'body)) (if (symbol-set-member? used-vars #'x) ; no need to recur on e since it's not a let @@ -156,10 +160,10 @@ (define-syntax compile-anf (syntax-parser - [(_ ((~literal let) ([x e]) body)) + [(_ ((~datum let) ([x e]) body)) #'(let ([x (compile-anf e)]) (compile-anf body))] [(_ (op a b)) #'(op a b)] - [(_ ((~literal rkt) e)) + [(_ ((~datum rkt) e)) #'(let ([x e]) (if (number? x) x From 67614ac2f6af506f5c765bcaf41a9ae17a709888 Mon Sep 17 00:00:00 2001 From: Mike Delmonaco Date: Thu, 17 Jul 2025 20:57:08 -0400 Subject: [PATCH 18/26] bind! ~> lift-binding! --- tests/dsls/multipass.rkt | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/tests/dsls/multipass.rkt b/tests/dsls/multipass.rkt index 006ac19..0994a47 100644 --- a/tests/dsls/multipass.rkt +++ b/tests/dsls/multipass.rkt @@ -68,30 +68,30 @@ (define bindings-rev '()) ; Identifier rhs-expr -> Void ; ends up producing a let-binding of x to e in the result - (define (bind! x e) (set! bindings-rev (cons (list x e) bindings-rev))) - (define e^ (to-rhs e bind!)) + (define (lift-binding! x e) (set! bindings-rev (cons (list x e) bindings-rev))) + (define e^ (to-rhs e lift-binding!)) (wrap-lets e^ (reverse bindings-rev))) ; expr (Identifier rhs-expr -> Void) -> rhs-expr ; this doesn't need to be hygienic, only the whole pass. ; in other compilers, helpers may need to be hygienic too. - (define (to-rhs e bind!) + (define (to-rhs e lift-binding!) (syntax-parse e [((~datum let) ([x e]) body) - (bind! #'x (to-rhs #'e bind!)) - (to-rhs #'body bind!)] + (lift-binding! #'x (to-rhs #'e lift-binding!)) + (to-rhs #'body lift-binding!)] [(op a b) - (define/syntax-parse a^ (to-immediate #'a bind!)) - (define/syntax-parse b^ (to-immediate #'b bind!)) + (define/syntax-parse a^ (to-immediate #'a lift-binding!)) + (define/syntax-parse b^ (to-immediate #'b lift-binding!)) #'(op a^ b^)] [_ this-syntax])) ; expr (Identifier rhs-expr -> Void) -> immediate-expr - (define (to-immediate e bind!) + (define (to-immediate e lift-binding!) (syntax-parse e [(_ . _) (define/syntax-parse (tmp) (generate-temporaries '(tmp))) - (bind! #'tmp (to-rhs this-syntax bind!)) + (lift-binding! #'tmp (to-rhs this-syntax lift-binding!)) #'tmp] [_ this-syntax])) From 82a22decde585ae5a6f296dd25d3d4ebbe950afc Mon Sep 17 00:00:00 2001 From: Mike Delmonaco Date: Thu, 17 Jul 2025 20:59:04 -0400 Subject: [PATCH 19/26] to-rhs! --- tests/dsls/multipass.rkt | 39 ++++++++++++++++++++++----------------- 1 file changed, 22 insertions(+), 17 deletions(-) diff --git a/tests/dsls/multipass.rkt b/tests/dsls/multipass.rkt index 0994a47..20d5345 100644 --- a/tests/dsls/multipass.rkt +++ b/tests/dsls/multipass.rkt @@ -4,7 +4,7 @@ ;; arithmetic + let -> ANF -> prune unused variables -> racket (require "../../testing.rkt" - (for-syntax racket/pretty racket/list rackunit (only-in "../../private/ee-lib/main.rkt" define/hygienic))) + (for-syntax racket/match racket/pretty racket/list rackunit (only-in "../../private/ee-lib/main.rkt" define/hygienic))) (syntax-spec (binding-class var @@ -69,41 +69,46 @@ ; Identifier rhs-expr -> Void ; ends up producing a let-binding of x to e in the result (define (lift-binding! x e) (set! bindings-rev (cons (list x e) bindings-rev))) - (define e^ (to-rhs e lift-binding!)) + (define e^ (to-rhs! e lift-binding!)) (wrap-lets e^ (reverse bindings-rev))) ; expr (Identifier rhs-expr -> Void) -> rhs-expr ; this doesn't need to be hygienic, only the whole pass. ; in other compilers, helpers may need to be hygienic too. - (define (to-rhs e lift-binding!) + (define (to-rhs! e lift-binding!) (syntax-parse e [((~datum let) ([x e]) body) - (lift-binding! #'x (to-rhs #'e lift-binding!)) - (to-rhs #'body lift-binding!)] + (define e^ (to-rhs! #'e lift-binding!)) + (lift-binding! #'x e^) + (to-rhs! #'body lift-binding!)] [(op a b) - (define/syntax-parse a^ (to-immediate #'a lift-binding!)) - (define/syntax-parse b^ (to-immediate #'b lift-binding!)) + (define/syntax-parse a^ (to-immediate! #'a lift-binding!)) + (define/syntax-parse b^ (to-immediate! #'b lift-binding!)) #'(op a^ b^)] - [_ this-syntax])) + [(~or ((~datum rkt) _) + x:id + n:number) + this-syntax])) ; expr (Identifier rhs-expr -> Void) -> immediate-expr - (define (to-immediate e lift-binding!) + (define (to-immediate! e lift-binding!) (syntax-parse e [(_ . _) (define/syntax-parse (tmp) (generate-temporaries '(tmp))) - (lift-binding! #'tmp (to-rhs this-syntax lift-binding!)) + (define e^ (to-rhs! this-syntax lift-binding!)) + (lift-binding! #'tmp e^) #'tmp] [_ this-syntax])) ; rhs-expr (listof (list Identifier rhs-expr) ) (define (wrap-lets e bindings) - (foldr (lambda (binding e) - (define/syntax-parse x (first binding)) - (define/syntax-parse rhs (second binding)) - (define/syntax-parse body e) - #'(let ([x rhs]) body)) - e - bindings))) + (match bindings + [(cons binding bindings) + (with-syntax ([x (first binding)] + [rhs (second binding)] + [body (wrap-lets e bindings)]) + #'(let ([x rhs]) body))] + ['() e]))) (begin-for-syntax ; anf-expr -> anf-expr From 3d88cd7ac7c1352da3de178869d243a652dee0b2 Mon Sep 17 00:00:00 2001 From: Mike Delmonaco Date: Thu, 17 Jul 2025 21:15:23 -0400 Subject: [PATCH 20/26] rest of michael's fixes --- tests/dsls/multipass.rkt | 68 +++++++++++++++++----------------------- 1 file changed, 29 insertions(+), 39 deletions(-) diff --git a/tests/dsls/multipass.rkt b/tests/dsls/multipass.rkt index 20d5345..24a1b3a 100644 --- a/tests/dsls/multipass.rkt +++ b/tests/dsls/multipass.rkt @@ -4,7 +4,7 @@ ;; arithmetic + let -> ANF -> prune unused variables -> racket (require "../../testing.rkt" - (for-syntax racket/match racket/pretty racket/list rackunit (only-in "../../private/ee-lib/main.rkt" define/hygienic))) + (for-syntax racket/match racket/syntax racket/list rackunit)) (syntax-spec (binding-class var @@ -60,9 +60,7 @@ (begin-for-syntax ; expr -> anf-expr - ; this doesn't really need to be hygienic, but in general, compiler passes often will. - (define/hygienic (to-anf e) - #:expression + (define (to-anf e) ; list of (list Identifier rhs-expr) ; most recent, and thus innermost, binding first (define bindings-rev '()) @@ -93,12 +91,12 @@ ; expr (Identifier rhs-expr -> Void) -> immediate-expr (define (to-immediate! e lift-binding!) (syntax-parse e - [(_ . _) - (define/syntax-parse (tmp) (generate-temporaries '(tmp))) + [(~or x:id n:number) this-syntax] + [_ + (define/syntax-parse tmp (generate-temporary 'tmp)) (define e^ (to-rhs! this-syntax lift-binding!)) (lift-binding! #'tmp e^) - #'tmp] - [_ this-syntax])) + #'tmp])) ; rhs-expr (listof (list Identifier rhs-expr) ) (define (wrap-lets e bindings) @@ -112,17 +110,13 @@ (begin-for-syntax ; anf-expr -> anf-expr - (define/hygienic (prune-unused-variables e) - #:expression + (define (prune-unused-variables e) (define used-vars (get-used-vars e)) (remove-unused-vars e used-vars)) ; anf-expr -> SymbolTable ; non-hygienic because it's just an analysis pass (define (get-used-vars e) - (define used-vars (local-symbol-set)) - (define (mark-as-used! x) - (symbol-set-add! used-vars x)) ; Go bottom-up, seeing references before their binders. ; The invariant is that we only traverse expressions that need ; to be evaluated. @@ -133,35 +127,31 @@ ; we need its rhs' referenced variables too, so recur on the rhs. ; If we see a binder that isn't marked as used, it was never referenced, ; so we don't traverse its rhs since it isn't needed. - (let mark-used-variables! ([e e]) - (syntax-parse e - [((~datum let) ([x e]) body) - (mark-used-variables! #'body) - (when (symbol-set-member? used-vars #'x) - (mark-used-variables! #'e))] - [(op a b) - (mark-used-variables! #'a) - (mark-used-variables! #'b)] - [x:id - (mark-as-used! #'x)] - [((~datum rkt) e) - (for ([x (get-racket-referenced-identifiers [var] #'e)]) - (mark-as-used! x))] - [_ (void)])) - used-vars) + (syntax-parse e + [((~datum let) ([x e]) body) + (define body-vars (get-used-vars #'body)) + (if (symbol-set-member? body-vars #'x) + (symbol-set-union body-vars (get-used-vars #'e)) + body-vars)] + [(op a b) + (symbol-set-union (get-used-vars #'a) (get-used-vars #'b))] + [x:id + (immutable-symbol-set #'x)] + [((~datum rkt) e) + (apply immutable-symbol-set (get-racket-referenced-identifiers [var] #'e))] + [n:number (immutable-symbol-set)])) ; anf-expr SymbolTable -> anf-expr (define (remove-unused-vars e used-vars) - (let loop ([e e]) - (syntax-parse e - [((~and let (~datum let)) ([x e]) body) - (define/syntax-parse body^ (loop #'body)) - (if (symbol-set-member? used-vars #'x) - ; no need to recur on e since it's not a let - #'(let ([x e]) - body^) - #'body^)] - [_ this-syntax])))) + (syntax-parse e + [((~and let (~datum let)) ([x e]) body) + (define/syntax-parse body^ (remove-unused-vars #'body used-vars)) + (if (symbol-set-member? used-vars #'x) + ; no need to recur on e since it's not a let + #'(let ([x e]) + body^) + #'body^)] + [_ this-syntax]))) (define-syntax compile-anf (syntax-parser From 98324f084cb0ccfbc80bb2626ca04ccd601e9743 Mon Sep 17 00:00:00 2001 From: Mike Delmonaco Date: Thu, 17 Jul 2025 21:22:50 -0400 Subject: [PATCH 21/26] copy new stuff into hidden true code --- scribblings/tutorial/multipass-tutorial.scrbl | 225 +++++++++++------- 1 file changed, 133 insertions(+), 92 deletions(-) diff --git a/scribblings/tutorial/multipass-tutorial.scrbl b/scribblings/tutorial/multipass-tutorial.scrbl index 946a828..a620640 100644 --- a/scribblings/tutorial/multipass-tutorial.scrbl +++ b/scribblings/tutorial/multipass-tutorial.scrbl @@ -17,129 +17,170 @@ Here is the syntax-spec of our language: @repl[ #:hidden #t (module grammar racket - (provide (all-defined-out) (for-syntax (all-defined-out))) - (require "main.rkt" (for-syntax racket syntax/parse)) - (syntax-spec - (binding-class var #:reference-compiler immutable-reference-compiler) - (nonterminal expr - n:number - x:var - ; need to use ~literal because you can't re-use let in the other non-terminals - ((~literal let) ([x:var e:expr]) body:expr) - #:binding (scope (bind x) body) - ((~literal +) a:expr b:expr) - ((~literal *) a:expr b:expr) - ((~literal /) a:expr b:expr) - (rkt e:racket-expr)) - (nonterminal anf-expr - ((~literal let) ([x:var e:rhs-expr]) body:anf-expr) - #:binding (scope (bind x) body) - e:rhs-expr) - (nonterminal rhs-expr - ((~literal +) a:immediate-expr b:immediate-expr) - ((~literal *) a:immediate-expr b:immediate-expr) - ((~literal /) a:immediate-expr b:immediate-expr) - ((~literal rkt) e:racket-expr) - e:immediate-expr) - (nonterminal immediate-expr - x:var - n:number) - - (host-interface/expression - (eval-expr e:expr) - #'(compile-expr e))) +(provide (all-defined-out) (for-space anf (all-defined-out)) (for-syntax (all-defined-out) (for-space anf (all-defined-out)))) + (require "main.rkt" (for-syntax syntax/parse racket)) + (require (for-syntax racket/match racket/syntax racket/list)) +(syntax-spec + (binding-class var + #:reference-compiler immutable-reference-compiler) + (nonterminal expr + #:binding-space anf + n:number + x:var + (let ([x:var e:expr]) body:expr) + #:binding (scope (bind x) body) + (+ a:expr b:expr) + (* a:expr b:expr) + (/ a:expr b:expr) + (rkt e:racket-expr)) + (nonterminal anf-expr + #:binding-space anf + ((~datum let) ([x:var e:rhs-expr]) body:anf-expr) + #:binding (scope (bind x) body) + e:rhs-expr) + (nonterminal rhs-expr + #:binding-space anf + ((~datum +) a:immediate-expr b:immediate-expr) + ((~datum *) a:immediate-expr b:immediate-expr) + ((~datum /) a:immediate-expr b:immediate-expr) + ((~datum rkt) e:racket-expr) + e:immediate-expr) + (nonterminal immediate-expr + #:binding-space anf + x:var + n:number) + + (host-interface/expression + (eval-expr e:expr) + #'(compile-expr e))) + +(begin-for-syntax + (define local-expand-anf (nonterminal-expander anf-expr))) + +(define-syntax compile-expr + (syntax-parser + [(_ e) + ; I chose to use compile-time functions instead of macros because there is a lot + ; of non-syntax data to pass around. But we still get hygiene with define/hygienic. + + ; need to expand to make sure everything is properly bound + ; for the analysis pass, which uses symbol tables. + (define e/anf (local-expand-anf (to-anf #'e) #:should-rename? #t)) + (define e/pruned (prune-unused-variables e/anf)) + ; this last local-expand-anf might be unnecessary for this compiler, but i'll leave it in + ; since most compilers would need it. + (define/syntax-parse e/pruned^ (local-expand-anf e/pruned #:should-rename? #t)) + #'(compile-anf e/pruned^)])) + (begin-for-syntax + ; expr -> anf-expr (define (to-anf e) + ; list of (list Identifier rhs-expr) + ; most recent, and thus innermost, binding first (define bindings-rev '()) - (define (bind! x e) (set! bindings-rev (cons (list x e) bindings-rev))) - (define e^ (to-rhs e bind!)) + ; Identifier rhs-expr -> Void + ; ends up producing a let-binding of x to e in the result + (define (lift-binding! x e) (set! bindings-rev (cons (list x e) bindings-rev))) + (define e^ (to-rhs! e lift-binding!)) (wrap-lets e^ (reverse bindings-rev))) - (define (to-rhs e bind!) + ; expr (Identifier rhs-expr -> Void) -> rhs-expr + ; this doesn't need to be hygienic, only the whole pass. + ; in other compilers, helpers may need to be hygienic too. + (define (to-rhs! e lift-binding!) (syntax-parse e - [((~literal let) ([x e]) body) - (bind! #'x (to-rhs #'e bind!)) - (to-rhs #'body bind!)] + [((~datum let) ([x e]) body) + (define e^ (to-rhs! #'e lift-binding!)) + (lift-binding! #'x e^) + (to-rhs! #'body lift-binding!)] [(op a b) - (define/syntax-parse a^ (to-immediate #'a bind!)) - (define/syntax-parse b^ (to-immediate #'b bind!)) + (define/syntax-parse a^ (to-immediate! #'a lift-binding!)) + (define/syntax-parse b^ (to-immediate! #'b lift-binding!)) #'(op a^ b^)] - [_ this-syntax])) + [(~or ((~datum rkt) _) + x:id + n:number) + this-syntax])) - (define (to-immediate e bind!) + ; expr (Identifier rhs-expr -> Void) -> immediate-expr + (define (to-immediate! e lift-binding!) (syntax-parse e - [(_ . _) - (define/syntax-parse (tmp) (generate-temporaries '(tmp))) - (bind! #'tmp (to-rhs this-syntax bind!)) - #'tmp] - [_ this-syntax])) - + [(~or x:id n:number) this-syntax] + [_ + (define/syntax-parse tmp (generate-temporary 'tmp)) + (define e^ (to-rhs! this-syntax lift-binding!)) + (lift-binding! #'tmp e^) + #'tmp])) + + ; rhs-expr (listof (list Identifier rhs-expr) ) (define (wrap-lets e bindings) - (foldr (lambda (binding e) - (define/syntax-parse x (first binding)) - (define/syntax-parse rhs (second binding)) - (define/syntax-parse body e) - #'(let ([x rhs]) body)) - e - bindings))) + (match bindings + [(cons binding bindings) + (with-syntax ([x (first binding)] + [rhs (second binding)] + [body (wrap-lets e bindings)]) + #'(let ([x rhs]) body))] + ['() e]))) (begin-for-syntax + ; anf-expr -> anf-expr (define (prune-unused-variables e) (define used-vars (get-used-vars e)) (remove-unused-vars e used-vars)) + ; anf-expr -> SymbolTable + ; non-hygienic because it's just an analysis pass (define (get-used-vars e) - (define used-vars (local-symbol-set)) - (define (mark-as-used! x) - (symbol-set-add! used-vars x)) - (let mark-used-variables! ([e e]) - (syntax-parse e - [((~literal let) ([x e]) body) - (mark-used-variables! #'body) - (when (symbol-set-member? used-vars #'x) - (mark-used-variables! #'e))] - [(op a b) - (mark-used-variables! #'a) - (mark-used-variables! #'b)] - [x:id - (mark-as-used! #'x)] - [_ (void)])) - used-vars) - + ; Go bottom-up, seeing references before their binders. + ; The invariant is that we only traverse expressions that need + ; to be evaluated. + ; The innermost expression is needed, so we traverse it. From there, + ; we only traverse expressions that are (transitively) needed. + ; If we see a reference, mark it as used. + ; If we see a binder that is marked as used, + ; we need its rhs' referenced variables too, so recur on the rhs. + ; If we see a binder that isn't marked as used, it was never referenced, + ; so we don't traverse its rhs since it isn't needed. + (syntax-parse e + [((~datum let) ([x e]) body) + (define body-vars (get-used-vars #'body)) + (if (symbol-set-member? body-vars #'x) + (symbol-set-union body-vars (get-used-vars #'e)) + body-vars)] + [(op a b) + (symbol-set-union (get-used-vars #'a) (get-used-vars #'b))] + [x:id + (immutable-symbol-set #'x)] + [((~datum rkt) e) + (apply immutable-symbol-set (get-racket-referenced-identifiers [var] #'e))] + [n:number (immutable-symbol-set)])) + + ; anf-expr SymbolTable -> anf-expr (define (remove-unused-vars e used-vars) - (let loop ([e e]) - (syntax-parse e - [((~and let (~literal let)) ([x e]) body) - (define/syntax-parse body^ (loop #'body)) - (if (symbol-set-member? used-vars #'x) - #'(let ([x e]) - body^) - #'body^)] - [_ this-syntax])))) + (syntax-parse e + [((~and let (~datum let)) ([x e]) body) + (define/syntax-parse body^ (remove-unused-vars #'body used-vars)) + (if (symbol-set-member? used-vars #'x) + ; no need to recur on e since it's not a let + #'(let ([x e]) + body^) + #'body^)] + [_ this-syntax]))) (define-syntax compile-anf (syntax-parser - [(_ ((~literal let) ([x e]) body)) + [(_ ((~datum let) ([x e]) body)) #'(let ([x (compile-anf e)]) (compile-anf body))] [(_ (op a b)) #'(op a b)] - [(_ ((~literal rkt) e)) + [(_ ((~datum rkt) e)) #'(let ([x e]) (if (number? x) x (error 'rkt "expected a number, got ~a" x)))] [(_ e) #'e])) -(begin-for-syntax - (define local-expand-anf (nonterminal-expander anf-expr))) -(define-syntax compile-expr - (syntax-parser - [(_ e) - (define e/anf (local-expand-anf (to-anf #'e) #:should-rename? #t)) - (define e/pruned (prune-unused-variables e/anf)) - (define/syntax-parse e/pruned^ (local-expand-anf e/pruned #:should-rename? #t)) - #'(compile-anf e/pruned^)])) - ) +) (require 'grammar "main.rkt" (for-syntax syntax/parse)) ] From 4241f47f6d0cd211eccd7496a2c22d7dff23a18f Mon Sep 17 00:00:00 2001 From: Mike Delmonaco Date: Thu, 17 Jul 2025 21:33:54 -0400 Subject: [PATCH 22/26] update tutorial with code changes --- scribblings/tutorial/multipass-tutorial.scrbl | 126 +++++++++--------- 1 file changed, 65 insertions(+), 61 deletions(-) diff --git a/scribblings/tutorial/multipass-tutorial.scrbl b/scribblings/tutorial/multipass-tutorial.scrbl index a620640..516755c 100644 --- a/scribblings/tutorial/multipass-tutorial.scrbl +++ b/scribblings/tutorial/multipass-tutorial.scrbl @@ -186,29 +186,34 @@ Here is the syntax-spec of our language: @racketmod[ racket -(require syntax-spec (for-syntax syntax/parse)) +(require syntax-spec (for-syntax syntax/parse racket/syntax racket/match racket/list)) (syntax-spec - (binding-class var #:reference-compiler immutable-reference-compiler) + (binding-class var + #:reference-compiler immutable-reference-compiler) (nonterminal expr + #:binding-space anf n:number x:var - ((~literal let) ([x:var e:expr]) body:expr) + (let ([x:var e:expr]) body:expr) #:binding (scope (bind x) body) - ((~literal +) a:expr b:expr) - ((~literal *) a:expr b:expr) - ((~literal /) a:expr b:expr) + (+ a:expr b:expr) + (* a:expr b:expr) + (/ a:expr b:expr) (rkt e:racket-expr)) (nonterminal anf-expr - ((~literal let) ([x:var e:rhs-expr]) body:anf-expr) + #:binding-space anf + ((~datum let) ([x:var e:rhs-expr]) body:anf-expr) #:binding (scope (bind x) body) e:rhs-expr) (nonterminal rhs-expr - ((~literal +) a:immediate-expr b:immediate-expr) - ((~literal *) a:immediate-expr b:immediate-expr) - ((~literal /) a:immediate-expr b:immediate-expr) - ((~literal rkt) e:racket-expr) + #:binding-space anf + ((~datum +) a:immediate-expr b:immediate-expr) + ((~datum *) a:immediate-expr b:immediate-expr) + ((~datum /) a:immediate-expr b:immediate-expr) + ((~datum rkt) e:racket-expr) e:immediate-expr) (nonterminal immediate-expr + #:binding-space anf x:var n:number) @@ -250,37 +255,42 @@ Now let's automate this process: (begin-for-syntax (define (to-anf e) (define bindings-rev '()) - (define (bind! x e) (set! bindings-rev (cons (list x e) bindings-rev))) - (define e^ (to-rhs e bind!)) + (define (lift-binding! x e) (set! bindings-rev (cons (list x e) bindings-rev))) + (define e^ (to-rhs! e lift-binding!)) (wrap-lets e^ (reverse bindings-rev))) - (define (to-rhs e bind!) + (define (to-rhs! e lift-binding!) (syntax-parse e - [((~literal let) ([x e]) body) - (bind! #'x (to-rhs #'e bind!)) - (to-rhs #'body bind!)] + [((~datum let) ([x e]) body) + (define e^ (to-rhs! #'e lift-binding!)) + (lift-binding! #'x e^) + (to-rhs! #'body lift-binding!)] [(op a b) - (define/syntax-parse a^ (to-immediate #'a bind!)) - (define/syntax-parse b^ (to-immediate #'b bind!)) + (define/syntax-parse a^ (to-immediate! #'a lift-binding!)) + (define/syntax-parse b^ (to-immediate! #'b lift-binding!)) #'(op a^ b^)] - [_ this-syntax])) + [(~or ((~datum rkt) _) + x:id + n:number) + this-syntax])) - (define (to-immediate e bind!) + (define (to-immediate! e lift-binding!) (syntax-parse e - [(_ . _) - (define/syntax-parse (tmp) (generate-temporaries '(tmp))) - (bind! #'tmp (to-rhs this-syntax bind!)) - #'tmp] - [_ this-syntax])) + [(~or x:id n:number) this-syntax] + [_ + (define/syntax-parse tmp (generate-temporary 'tmp)) + (define e^ (to-rhs! this-syntax lift-binding!)) + (lift-binding! #'tmp e^) + #'tmp])) (define (wrap-lets e bindings) - (foldr (lambda (binding e) - (define/syntax-parse x (first binding)) - (define/syntax-parse rhs (second binding)) - (define/syntax-parse body e) - #'(let ([x rhs]) body)) - e - bindings))) + (match bindings + [(cons binding bindings) + (with-syntax ([x (first binding)] + [rhs (second binding)] + [body (wrap-lets e bindings)]) + #'(let ([x rhs]) body))] + ['() e]))) ] Our transformation goes through the expression, recording the temporary variable bindings to lift. The final @racket[rhs-expr] returned by @racket[to-rhs] will be the body of the innermost @racket[let] at the end of the transformation. Converting to an @racket[rhs-expr] or an @racket[immediate-expr] has the side effect of recording a binding pair to be lifted, and the result of replacing complex subexpressions with temporary variable references is returned from each helper. @@ -334,33 +344,27 @@ Without pruning, this would print something, but with pruning, it would not. Our (remove-unused-vars e used-vars)) (define (get-used-vars e) - (define used-vars (local-symbol-set)) - (define (mark-as-used! x) - (symbol-set-add! used-vars x)) - (let mark-used-variables! ([e e]) - (syntax-parse e - [((~literal let) ([x e]) body) - (mark-used-variables! #'body) - (when (symbol-set-member? used-vars #'x) - (mark-used-variables! #'e))] - [(op a b) - (mark-used-variables! #'a) - (mark-used-variables! #'b)] - [x:id - (mark-as-used! #'x)] - [_ (void)])) - used-vars) + (syntax-parse e + [((~datum let) ([x e]) body) + (define body-vars (get-used-vars #'body)) + (if (symbol-set-member? body-vars #'x) + (symbol-set-union body-vars (get-used-vars #'e)) + body-vars)] + [(op a b) + (symbol-set-union (get-used-vars #'a) (get-used-vars #'b))] + [x:id + (immutable-symbol-set #'x)] + [(~or ((~datum rkt) _) n:number) (immutable-symbol-set)])) (define (remove-unused-vars e used-vars) - (let loop ([e e]) - (syntax-parse e - [((~literal let) ([x e]) body) - (define/syntax-parse body^ (loop #'body)) - (if (symbol-set-member? used-vars #'x) - #'(let ([x e]) - body^) - #'body^)] - [_ this-syntax])))) + (syntax-parse e + [((~and let (~datum let)) ([x e]) body) + (define/syntax-parse body^ (remove-unused-vars #'body used-vars)) + (if (symbol-set-member? used-vars #'x) + #'(let ([x e]) + body^) + #'body^)] + [_ this-syntax]))) ] @;TODO don't ignore racket subexpression references. Requires fixing a bug though. @@ -398,10 +402,10 @@ Finally, we must implement compilation of A-normal form expressions to Racket, w @racketblock[ (define-syntax compile-anf (syntax-parser - [(_ ((~literal let) ([x e]) body)) + [(_ ((~datum let) ([x e]) body)) #'(let ([x (compile-anf e)]) (compile-anf body))] [(_ (op a b)) #'(op a b)] - [(_ ((~literal rkt) e)) + [(_ ((~datum rkt) e)) #'(let ([x e]) (if (number? x) x @@ -412,7 +416,7 @@ Finally, we must implement compilation of A-normal form expressions to Racket, w @repl[ (eval-expr 1) (eval-expr (let ([x 1]) (let ([y 2]) x))) -(eval-expr (let ([unused (rkt (displayln "hello!"))]) 42)) +(eval-expr (let ([unused (rkt (displayln "can anyone hear me?"))]) 42)) ] To summarize the key points: From 8431dac2f8376f114640d8751206ff2016570743 Mon Sep 17 00:00:00 2001 From: Mike Delmonaco Date: Thu, 17 Jul 2025 21:43:10 -0400 Subject: [PATCH 23/26] comments on functions --- scribblings/tutorial/multipass-tutorial.scrbl | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/scribblings/tutorial/multipass-tutorial.scrbl b/scribblings/tutorial/multipass-tutorial.scrbl index 516755c..60797fe 100644 --- a/scribblings/tutorial/multipass-tutorial.scrbl +++ b/scribblings/tutorial/multipass-tutorial.scrbl @@ -253,12 +253,18 @@ Now let's automate this process: @racketblock[ (begin-for-syntax + (code:comment2 "expr -> anf-expr") + (code:comment2 "convert an expression to A-normal form") (define (to-anf e) (define bindings-rev '()) + (code:comment2 "Identifier rhs-expr -> Void") + (code:comment2 "record a variable binding pair") (define (lift-binding! x e) (set! bindings-rev (cons (list x e) bindings-rev))) (define e^ (to-rhs! e lift-binding!)) (wrap-lets e^ (reverse bindings-rev))) + (code:comment2 "expr (Identifier rhs-expr -> Void) -> rhs-expr") + (code:comment2 "convert an expr to an rhs-expr, potentially recording bindings") (define (to-rhs! e lift-binding!) (syntax-parse e [((~datum let) ([x e]) body) @@ -274,6 +280,8 @@ Now let's automate this process: n:number) this-syntax])) + (code:comment2 "expr (Identifier rhs-expr -> Void) -> immediate-expr") + (code:comment2 "convert an expr to an immediate-expr, potentially recording bindings") (define (to-immediate! e lift-binding!) (syntax-parse e [(~or x:id n:number) this-syntax] @@ -283,6 +291,8 @@ Now let's automate this process: (lift-binding! #'tmp e^) #'tmp])) + (code:comment2 "rhs-expr (Listof (List Identifier rhs-expr)) -> anf-expr") + (code:comment2 "wrap the innermost expression with `let`s for the bindings that were recorded") (define (wrap-lets e bindings) (match bindings [(cons binding bindings) @@ -339,10 +349,14 @@ Without pruning, this would print something, but with pruning, it would not. Our @racketblock[ (begin-for-syntax + (code:comment2 "anf-expr -> anf-expr") + (code:comment2 "reconstruct the expression, excluding definitions of unused variables") (define (prune-unused-variables e) (define used-vars (get-used-vars e)) (remove-unused-vars e used-vars)) + (code:comment2 "anf-expr -> ImmutableSymbolSet") + (code:comment2 "compute the set of used variables") (define (get-used-vars e) (syntax-parse e [((~datum let) ([x e]) body) @@ -356,6 +370,8 @@ Without pruning, this would print something, but with pruning, it would not. Our (immutable-symbol-set #'x)] [(~or ((~datum rkt) _) n:number) (immutable-symbol-set)])) + (code:comment2 "anf-expr ImmutableSymbolSet -> anf-expr") + (code:comment2 "reconstruct the expression, excluding definitions of specified unused variables") (define (remove-unused-vars e used-vars) (syntax-parse e [((~and let (~datum let)) ([x e]) body) From 81094cc72cdd7016da2876edd0cc51bf44f8f2a7 Mon Sep 17 00:00:00 2001 From: Mike Delmonaco Date: Fri, 18 Jul 2025 10:16:49 -0400 Subject: [PATCH 24/26] some edits with michael --- scribblings/tutorial/multipass-tutorial.scrbl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scribblings/tutorial/multipass-tutorial.scrbl b/scribblings/tutorial/multipass-tutorial.scrbl index 60797fe..ec17c54 100644 --- a/scribblings/tutorial/multipass-tutorial.scrbl +++ b/scribblings/tutorial/multipass-tutorial.scrbl @@ -368,7 +368,7 @@ Without pruning, this would print something, but with pruning, it would not. Our (symbol-set-union (get-used-vars #'a) (get-used-vars #'b))] [x:id (immutable-symbol-set #'x)] - [(~or ((~datum rkt) _) n:number) (immutable-symbol-set)])) + [(~or ((~datum rkt) _) n:number) (immutable-symbol-set)])) (code:comment2 "anf-expr ImmutableSymbolSet -> anf-expr") (code:comment2 "reconstruct the expression, excluding definitions of specified unused variables") From c07b3b8052f68bc7627239a9f4bb5eb6294f4b6a Mon Sep 17 00:00:00 2001 From: Mike Delmonaco Date: Thu, 24 Jul 2025 20:21:00 -0400 Subject: [PATCH 25/26] expr ~> full-expr --- scribblings/tutorial/multipass-tutorial.scrbl | 42 +++++++++---------- tests/dsls/multipass.rkt | 18 ++++---- 2 files changed, 30 insertions(+), 30 deletions(-) diff --git a/scribblings/tutorial/multipass-tutorial.scrbl b/scribblings/tutorial/multipass-tutorial.scrbl index ec17c54..7163f0e 100644 --- a/scribblings/tutorial/multipass-tutorial.scrbl +++ b/scribblings/tutorial/multipass-tutorial.scrbl @@ -23,15 +23,15 @@ Here is the syntax-spec of our language: (syntax-spec (binding-class var #:reference-compiler immutable-reference-compiler) - (nonterminal expr + (nonterminal full-expr #:binding-space anf n:number x:var - (let ([x:var e:expr]) body:expr) + (let ([x:var e:full-expr]) body:full-expr) #:binding (scope (bind x) body) - (+ a:expr b:expr) - (* a:expr b:expr) - (/ a:expr b:expr) + (+ a:full-expr b:full-expr) + (* a:full-expr b:full-expr) + (/ a:full-expr b:full-expr) (rkt e:racket-expr)) (nonterminal anf-expr #:binding-space anf @@ -51,7 +51,7 @@ Here is the syntax-spec of our language: n:number) (host-interface/expression - (eval-expr e:expr) + (eval-expr e:full-expr) #'(compile-expr e))) (begin-for-syntax @@ -73,7 +73,7 @@ Here is the syntax-spec of our language: #'(compile-anf e/pruned^)])) (begin-for-syntax - ; expr -> anf-expr + ; full-expr -> anf-expr (define (to-anf e) ; list of (list Identifier rhs-expr) ; most recent, and thus innermost, binding first @@ -84,7 +84,7 @@ Here is the syntax-spec of our language: (define e^ (to-rhs! e lift-binding!)) (wrap-lets e^ (reverse bindings-rev))) - ; expr (Identifier rhs-expr -> Void) -> rhs-expr + ; full-expr (Identifier rhs-expr -> Void) -> rhs-expr ; this doesn't need to be hygienic, only the whole pass. ; in other compilers, helpers may need to be hygienic too. (define (to-rhs! e lift-binding!) @@ -102,7 +102,7 @@ Here is the syntax-spec of our language: n:number) this-syntax])) - ; expr (Identifier rhs-expr -> Void) -> immediate-expr + ; full-expr (Identifier rhs-expr -> Void) -> immediate-expr (define (to-immediate! e lift-binding!) (syntax-parse e [(~or x:id n:number) this-syntax] @@ -190,15 +190,15 @@ racket (syntax-spec (binding-class var #:reference-compiler immutable-reference-compiler) - (nonterminal expr + (nonterminal full-expr #:binding-space anf n:number x:var - (let ([x:var e:expr]) body:expr) + (let ([x:var e:full-expr]) body:full-expr) #:binding (scope (bind x) body) - (+ a:expr b:expr) - (* a:expr b:expr) - (/ a:expr b:expr) + (+ a:full-expr b:full-expr) + (* a:full-expr b:full-expr) + (/ a:full-expr b:full-expr) (rkt e:racket-expr)) (nonterminal anf-expr #:binding-space anf @@ -218,7 +218,7 @@ racket n:number) (host-interface/expression - (eval-expr e:expr) + (eval-expr e:full-expr) #'(compile-expr e))) ] @@ -227,8 +227,8 @@ Our language supports arithmetic, local variables, and Racket subexpressions. We have the following nonterminals: @itemlist[ -@item{@racket[expr]: The surface syntax of a program} -@item{@racket[anf-expr]: An expression in A-normal form. Users will not be writing these expressions; the compiler will transform @racket[expr]s the user writes into @racket[anf-expr]s.} +@item{@racket[full-expr]: The surface syntax of a program} +@item{@racket[anf-expr]: An expression in A-normal form. Users will not be writing these expressions; the compiler will transform @racket[full-expr]s the user writes into @racket[anf-expr]s.} @item{@racket[rhs-expr]: An expression which is allowed to be on the right-hand side of a binding pair in an expression when it is in A-normal form. Conceptually, these expressions take at most one "step" of reduction to evaluate. In other words, no nested expressions (except for @racket[rkt] expressions).} @item{@racket[immediate-expr]: Atomic expressions that can immediately be evaluated.} ] @@ -253,7 +253,7 @@ Now let's automate this process: @racketblock[ (begin-for-syntax - (code:comment2 "expr -> anf-expr") + (code:comment2 "full-expr -> anf-expr") (code:comment2 "convert an expression to A-normal form") (define (to-anf e) (define bindings-rev '()) @@ -263,7 +263,7 @@ Now let's automate this process: (define e^ (to-rhs! e lift-binding!)) (wrap-lets e^ (reverse bindings-rev))) - (code:comment2 "expr (Identifier rhs-expr -> Void) -> rhs-expr") + (code:comment2 "full-expr (Identifier rhs-expr -> Void) -> rhs-expr") (code:comment2 "convert an expr to an rhs-expr, potentially recording bindings") (define (to-rhs! e lift-binding!) (syntax-parse e @@ -280,8 +280,8 @@ Now let's automate this process: n:number) this-syntax])) - (code:comment2 "expr (Identifier rhs-expr -> Void) -> immediate-expr") - (code:comment2 "convert an expr to an immediate-expr, potentially recording bindings") + (code:comment2 "full-expr (Identifier rhs-expr -> Void) -> immediate-expr") + (code:comment2 "convert a full-expr to an immediate-expr, potentially recording bindings") (define (to-immediate! e lift-binding!) (syntax-parse e [(~or x:id n:number) this-syntax] diff --git a/tests/dsls/multipass.rkt b/tests/dsls/multipass.rkt index 24a1b3a..9fc8fb6 100644 --- a/tests/dsls/multipass.rkt +++ b/tests/dsls/multipass.rkt @@ -9,15 +9,15 @@ (syntax-spec (binding-class var #:reference-compiler immutable-reference-compiler) - (nonterminal expr + (nonterminal full-expr #:binding-space anf n:number x:var - (let ([x:var e:expr]) body:expr) + (let ([x:var e:full-expr]) body:full-expr) #:binding (scope (bind x) body) - (+ a:expr b:expr) - (* a:expr b:expr) - (/ a:expr b:expr) + (+ a:full-expr b:full-expr) + (* a:full-expr b:full-expr) + (/ a:full-expr b:full-expr) (rkt e:racket-expr)) (nonterminal anf-expr #:binding-space anf @@ -37,7 +37,7 @@ n:number) (host-interface/expression - (eval-expr e:expr) + (eval-expr e:full-expr) #'(compile-expr e))) (begin-for-syntax @@ -59,7 +59,7 @@ #'(compile-anf e/pruned^)])) (begin-for-syntax - ; expr -> anf-expr + ; full-expr -> anf-expr (define (to-anf e) ; list of (list Identifier rhs-expr) ; most recent, and thus innermost, binding first @@ -70,7 +70,7 @@ (define e^ (to-rhs! e lift-binding!)) (wrap-lets e^ (reverse bindings-rev))) - ; expr (Identifier rhs-expr -> Void) -> rhs-expr + ; full-expr (Identifier rhs-expr -> Void) -> rhs-expr ; this doesn't need to be hygienic, only the whole pass. ; in other compilers, helpers may need to be hygienic too. (define (to-rhs! e lift-binding!) @@ -88,7 +88,7 @@ n:number) this-syntax])) - ; expr (Identifier rhs-expr -> Void) -> immediate-expr + ; full-expr (Identifier rhs-expr -> Void) -> immediate-expr (define (to-immediate! e lift-binding!) (syntax-parse e [(~or x:id n:number) this-syntax] From 6ae15ea921c5b9ead9194541af818c48580d3629 Mon Sep 17 00:00:00 2001 From: Mike Delmonaco Date: Thu, 24 Jul 2025 20:36:18 -0400 Subject: [PATCH 26/26] omit scribblings from tests --- info.rkt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/info.rkt b/info.rkt index 1543619..fee46e4 100644 --- a/info.rkt +++ b/info.rkt @@ -11,4 +11,4 @@ (define build-deps '("racket-doc" "scribble-lib" "drracket" "typed-racket-lib")) (define scribblings '(("scribblings/main.scrbl" (multi-page) (experimental) "syntax-spec-dev"))) (define compile-omit-paths '("design" "demos")) -(define test-omit-paths '("design" "demos")) +(define test-omit-paths '("scribblings" "design" "demos"))