diff --git a/info.rkt b/info.rkt index 1543619..fee46e4 100644 --- a/info.rkt +++ b/info.rkt @@ -11,4 +11,4 @@ (define build-deps '("racket-doc" "scribble-lib" "drracket" "typed-racket-lib")) (define scribblings '(("scribblings/main.scrbl" (multi-page) (experimental) "syntax-spec-dev"))) (define compile-omit-paths '("design" "demos")) -(define test-omit-paths '("design" "demos")) +(define test-omit-paths '("scribblings" "design" "demos")) diff --git a/main.rkt b/main.rkt index 8b837d0..53328db 100644 --- a/main.rkt +++ b/main.rkt @@ -7,6 +7,8 @@ ... ...+ + #%host-expression + mutable-reference-compiler immutable-reference-compiler @@ -53,12 +55,14 @@ free-identifiers binding-identifiers alpha-equivalent? + subst get-racket-referenced-identifiers)) (require "private/syntax/interface.rkt" "private/runtime/compile.rkt" (for-syntax syntax/parse (except-in "private/ee-lib/main.rkt" racket-var) + "private/runtime/compile.rkt" "private/ee-lib/persistent-id-table.rkt" "private/ee-lib/binding.rkt" "private/runtime/binding-operations.rkt" diff --git a/private/ee-lib/flip-intro-scope.rkt b/private/ee-lib/flip-intro-scope.rkt index f946ad3..a0985e3 100644 --- a/private/ee-lib/flip-intro-scope.rkt +++ b/private/ee-lib/flip-intro-scope.rkt @@ -16,4 +16,6 @@ (define/who (flip-intro-scope stx) (check who syntax? stx) - ((make-intro-scope-introducer) stx 'flip)) + (if (syntax-transforming?) + ((make-intro-scope-introducer) stx 'flip) + stx)) diff --git a/private/runtime/binding-operations.rkt b/private/runtime/binding-operations.rkt index ba36eed..be2b6c6 100644 --- a/private/runtime/binding-operations.rkt +++ b/private/runtime/binding-operations.rkt @@ -2,7 +2,9 @@ (provide free-identifiers binding-identifiers + identifier=? alpha-equivalent? + subst get-racket-referenced-identifiers (rename-out [identifier=? compiled-identifier=?])) @@ -99,25 +101,22 @@ ; Syntax, Syntax [#:allow-host? Boolean] -> Boolean ; Are the two expressions alpha-equivalent? (define (alpha-equivalent? stx-a stx-b #:allow-host? [allow-host? #f]) - (define bound-reference=? (alpha-equivalent?/bindings stx-a stx-b allow-host?)) + (define bound-reference=? (syntaxes->bound-reference=? stx-a stx-b allow-host?)) (and bound-reference=? (alpha-equivalent?/references stx-a stx-b bound-reference=? allow-host?))) ; Syntax Syntax Boolean -> (or/c #f (Identifier Identifier -> Boolean)) -; check that the bindings of both expressions can be alpha-equivalent. -; returns bound-reference=?, or #f if the binding check fails. -(define (alpha-equivalent?/bindings stx-a stx-b allow-host?) +; if the two terms have corresponding binders, build bound-reference=? +; If they have different numbers of binders, return #f +; bound-reference=? answers "do these two references refer to corresponding binders?" +(define (syntaxes->bound-reference=? stx-a stx-b allow-host?) (define table-a (make-free-id-table)) (define table-b (make-free-id-table)) + ;; associate both binders with the same gensym (define (bind! identifier-a identifier-b) (define x (gensym)) - (free-id-table-set! table-a identifier-a x) - (free-id-table-set! table-b identifier-b x)) - (define (bound-reference=? identifier-a identifier-b) - (and (dict-has-key? table-a identifier-a) - (dict-has-key? table-b identifier-b) - (eq? (free-id-table-ref table-a identifier-a) - (free-id-table-ref table-b identifier-b)))) + (free-id-table-set! table-a (compiled-from identifier-a) x) + (free-id-table-set! table-b (compiled-from identifier-b) x)) (define binders-a (binding-identifiers stx-a #:allow-host? allow-host?)) (define binders-b (binding-identifiers stx-b #:allow-host? allow-host?)) ; must traverse binders before references @@ -127,7 +126,16 @@ [binder-b binders-b]) (bind! binder-a binder-b)) (and (= (length binders-a) (length binders-b)) - bound-reference=?)) + (substitutions->bound-reference=? table-a table-b))) + +;; FreeIdTable FreeIdTable -> (Identifier Identifier -> Boolean) +;; Do these two references refer to corresponding binders? +;; table-a and table-b should map corresponding binders to the same, unique value +(define ((substitutions->bound-reference=? table-a table-b) identifier-a identifier-b) + (and (dict-has-key? table-a (compiled-from identifier-a)) + (dict-has-key? table-b (compiled-from identifier-b)) + (eq? (free-id-table-ref table-a (compiled-from identifier-a)) + (free-id-table-ref table-b (compiled-from identifier-b))))) ; Syntax Syntax (Identifier Identifier -> Boolean) Boolean -> Boolean ; check that the references are alpha-equivalent. @@ -159,6 +167,22 @@ [(a b) (equal? (syntax->datum #'a) (syntax->datum #'b))]))) +;; Syntax Syntax Syntax -> Syntax +;; Replace all occurrences of target (by alpha equivalence) with replacement in stx. +;; Leaves host expressions unchanged. +(define (subst stx target replacement) + (let loop ([stx stx]) + (if (if (compiled-binder? target) + (and (compiled-reference? stx) (identifier=? stx target)) + (alpha-equivalent? stx target)) + replacement + (syntax-parse stx + ;; ignore host expressions + [((~literal #%host-expression) . _) this-syntax] + [(a . b) + (quasisyntax/loc this-syntax (#,(loop #'a) . #,(loop #'b)))] + [_ stx])))) + (define current-referenced-vars (make-parameter #f)) ; get the racket vars referenced in e of the provided binding classes @@ -174,9 +198,7 @@ 'expression '()) - (sequence->list (in-symbol-set (for/fold ([references (immutable-symbol-set)]) - ([x (in-symbol-set (current-referenced-vars))]) - (symbol-set-add references x)))))) + (sequence->list (in-symbol-set (current-referenced-vars))))) (define recording-reference-compiler (make-variable-like-reference-compiler diff --git a/private/syntax/interface.rkt b/private/syntax/interface.rkt index 19a4fdc..d80000e 100644 --- a/private/syntax/interface.rkt +++ b/private/syntax/interface.rkt @@ -359,6 +359,9 @@ ((~literal define-syntaxes) (x:racket-macro ...) e:expr) #:binding (export-syntaxes x ... e) + ((~literal begin) b:racket-body ...) + #:binding [(re-export b) ...] + e:racket-expr)) (define-syntax define-dsl-syntax diff --git a/private/test/syntax-class-expander.rkt b/private/test/syntax-class-expander.rkt new file mode 100644 index 0000000..6d1ec7a --- /dev/null +++ b/private/test/syntax-class-expander.rkt @@ -0,0 +1,238 @@ +#lang racket/base + +;; Similar to private/test/simple-bsepc.rkt, but attempting to create an expander that +;; uses syntax classes for nonterminals. +;; This was created to research a potential solution for +;; https://github.com/michaelballantyne/syntax-spec/issues/62 + +(require + (for-syntax + racket/base + syntax/parse + "../ee-lib/main.rkt" + "../runtime/binding-spec.rkt") + + "../ee-lib/define.rkt") + +#;;; ANF language +(syntax-spec + (binding-class my-var) + (nonterminal a-expr + e:c-expr + (my-let ([x:my-var e:c-expr]) b:a-expr) + #:binding (scope (bind x) b) + ;; body will expand before binding x + (where b:a-expr + ([x:my-var e:c-expr])) + #:binding (scope (bind x) b)) + (nonterminal c-expr + e:i-expr + (my-+ a:i-expr b:i-expr) + ;; application + (a:i-expr b:i-expr c:i-expr)) + (nonterminal i-expr + n:number + x:my-var)) + +(define-literal-forms anf-lits + "mylang forms may only be used in mylang" + (my-let my-+ where)) + +(begin-for-syntax + (struct my-var-rep ()) + + ;; hash from symbols to binding representations + ;; to simulate the real expander environment + (define pretend-binding-store (make-hash (list (cons 'bogus #f)))) + + (define-syntax-class a-expr + #:no-delimit-cut + #:literal-sets (anf-lits) + (pattern e:c-expr + #:attr expanded #'e.expanded) + (pattern (my-let ~! ([x:id e:expr]) b:expr) + ;; we expand x and b BEFORE e because the binding spec is + ;; [(scope (bind x) b) e] + ;; expansion order is driven by the binding spec + #:with x^:my-var-bind #'x + #:with b^:a-expr #'b + #:with e^:c-expr #'e + #:attr expanded #'(my-let ([x^.expanded e^.expanded]) b^.expanded)) + (pattern (where ~! b:expr ([x:id e:expr])) + ;; we expand x and b BEFORE e because the binding spec is + ;; [(scope (bind x) b) e] + ;; expansion order is driven by the binding spec + #:with x^:my-var-bind #'x + #:with b^:a-expr #'b + #:with e^:c-expr #'e + #:attr expanded #'(where b^.expanded ([x^.expanded e^.expanded])))) + + (define-syntax-class c-expr + #:no-delimit-cut + #:literal-sets (anf-lits) + (pattern e:i-expr + #:attr expanded (attribute e.expanded)) + ;; my-+ should be first, but due to the fake binding store, + ;; to get the my-+ shadowing test passing, this needed to be first. + ;; in a real implementation with the real binding store, the my-+ pattern would + ;; fail because the identifier wouldn't be referencing the literal, right?. + ;; you'd need to be painting scopes on syntax as you go, but that could happen in + ;; the pattern actions I guess. + (pattern (a:expr b:expr c:expr) + #:with a^:i-expr #'a + #:with b^:i-expr #'b + #:with c^:i-expr #'c + #:attr expanded #'(#%app a^.expanded b^.expanded c^.expanded)) + (pattern (my-+ ~! a:expr b:expr) + #:with a^:i-expr #'a + #:with b^:i-expr #'b + #:attr expanded #'(my-+ a^.expanded b^.expanded))) + + (define-syntax-class i-expr + #:no-delimit-cut + #:literal-sets (anf-lits) + (pattern n:number + #:attr expanded #'n) + (pattern x:my-var-ref + #:attr expanded #'x.expanded)) + + (define-syntax-class my-var-bind + #:no-delimit-cut + (pattern (~and x:id + (~do (when (hash-has-key? pretend-binding-store (syntax->datum #'x)) + (raise-syntax-error 'my-lang "duplicate binding" #'x)) + (hash-set! pretend-binding-store (syntax->datum #'x) (my-var-rep))) + (~undo (error "backtracked over a binding. flaw in language itself"))) + #:attr expanded #'x)) + + (define-syntax-class my-var-ref + #:no-delimit-cut + (pattern x:id + #:fail-unless (hash-has-key? pretend-binding-store (syntax->datum #'x)) "unbound variable" + #:fail-unless (my-var-rep? (hash-ref pretend-binding-store (syntax->datum #'x))) "expected a my-var" + #:attr expanded #'x))) + +(define-syntax (mylang stx) + (set! pretend-binding-store (make-hash (list (cons 'bogus #f)))) + (syntax-parse stx + [(_ e:a-expr) + #''e.expanded])) + +(require rackunit syntax/macro-testing) + +(define-syntax-rule (check-success e) + (check-equal? (mylang e) + 'e)) +(define-syntax-rule (check-failure e msg) + (check-exn + msg + (lambda () + (convert-compile-time-error (mylang e))))) + +(check-success 1) +(check-success (my-+ 1 2)) +(check-success (my-let ([x 1]) x)) +(check-failure + y + #rx"unbound var") +(check-failure + (my-let ([a 1]) + (my-let ([a 2]) + 3)) + #rx"duplicate binding") +(check-failure + bogus + #rx"expected a my-var") +(check-failure + (my-+ (my-let ([z 1]) z) 2) + #rx"expected number or expected identifier") +;; where. +;; this test fails if you expand left-to-right +(check-success + (where x + ([x 1]))) +;; shadow my-+ in where. +;; this test fails if you expand left-to-right +(check-equal? (mylang (where (my-+ 1 2) + ([my-+ 3]))) + ;; should not treat my-+ as a literal in the where body + '(where (#%app my-+ 1 2) + ([my-+ 3]))) +;; can't backtrack over a binding +(check-failure + (my-let ([x y]) x) + ;; an unbound var error would be better here + #rx"backtracked over a binding") +;; can't backtrack over a binding +(check-failure + (my-let ([x (my-+ y y)]) x) + #rx"backtracked over a binding") + +#| +examples that broke the original eager design: +(where x + ([x 1])) +this broke bc the reference parsed/expanded before the binding. syntax-parse parses (and thus expands bc of attr eagerness) +left to right. +(where (my-+ 1 2) + ([my-+ 3])) +this broke for a similar reason. This shows that even "structural" parsing needs bindings to detect literal shadowing. + +problems: +- if you do everything eager (and do binding class resolution in parsing), then where breaks because the body +gets parsed before the binding happens +- if you treat all ids as ids and ignore binding classes during parsing, then the my-+ shadow thing fails +because it will resolve to the literal instead of the shadowed thing +- the solution is to do binding-spec-driven expansion order, which requires promises and binding stuff has to happen in the post of the production. +but then, you'll still get the my-+ shadow problem because parsing needs to be driven by binding classes +to distinguish between shadowed literals and actual literals. + +constraints: +- to resolve literals vs references to bindings that shadow literals, you need to bind as you parse +- to do that, you need to delay parsing. parsing and expansion order must be driven by the binding spec. +- to avoid backtracking over a binding, you need to commit when you bind + +you can delay parsing with #:with + +current desired semantics: +full backtracking (even over binding classes), except you commit when you bind a variable. +TODO does cut in a post commit the way you need it to? +No, I don't think so. I ended up needing to put ~! in the production structure pattern + +example: + +(nonterminal my-expr + n:number + x:a-var + x:b-var + (let ([x:a-var e:my-expr]) b:my-expr) + #:binding (scope (bind x) b) + (let ([x:b-var e:my-expr]) b:my-expr) + #:binding (scope (bind x) b)) + +identifiers don't commit to a-var (which I'm pretty sure is the current syntax-spec behavior) +but (let ([x 1]) x) commits to the a-var let production +|# + +#| +After solving where and literal shadowing: + +The current semantics are full backtracking, with the exception that +binding sites cause commitment. + +limitation: moving sub-parsing into #:with messes up failure progress measurement. +many parse attempt paths end up with the same LATE progress so error messages suck. + +constraints: +- we need #:post or equivalent to control order of sub-expansion and therefore sub-parsing +- when most sub-parsing happens in posts, we get failure progress ties (lots of things are just LATE), which cause vague error messages. +- within a pattern, it looks like each post does not contribute additional progress beyond a single LATE. Not sure about this though. + + + + + +new design: +- form groups commit on literals +- trying to backtrack over bindings is an error +|# diff --git a/scribblings/reference/compiling.scrbl b/scribblings/reference/compiling.scrbl index 9cc0af2..7603d46 100644 --- a/scribblings/reference/compiling.scrbl +++ b/scribblings/reference/compiling.scrbl @@ -64,6 +64,9 @@ A variable-like reference compiler that allows references as well as mutations v References expand to their @tech{compiled identifier}. +@defform[(#%host-expression rkt-expr)] + +Racket subexpressions are wrapped with @racket[#%host-expression] during DSL expansion, which delays the expansion of the Racket subexpression until after compilation, allowing context like syntax parameters to be established by the compiler, which can be used by reference compilers. @section{Compiled identifiers vs surface syntax} @@ -232,9 +235,22 @@ Returns @racket[#t] if the two DSL expressions are alpha-equivalent, @racket[#f] Analysis of @tech{host expressions} is currently not supported. When given syntax that contains a host expression, the operation raises an error if @racket[allow-host?] is @racket[#f], or ignores that portion is syntax if @racket[allow-host?] is @racket[#t]. +@defproc[(subst [stx syntax?] [target syntax?] [replacement syntax?]) syntax?] + +Substitutes occurences of (expressions @racket[alpha-equivalent?] to) @racket[target] with @racket[replacement] in @racket[stx]. + +All arguments must be the result of DSL expansion, not just plain racket expressions. + +In the case that @racket[target] is an identifier from a binding position, references will be replaced by @racket[replacement]. + +Host expressions are left unchanged. + +NOTE: In order to avoid hygiene issues, it may be necessary to re-expand using @racket[nonterminal-expander] after substitution. +@;TODO example where you need to re-expand + @defform[(get-racket-referenced-identifiers [binding-class-id ...] expr)] -Returns an immutable symbol set containing identifiers of the specified binding classes that were referenced in racket (host) expressions in @racket[expr]. +Returns an immutable symbol set containing identifiers of the specified binding classes that were referenced in racket (host) expressions in @racket[expr]. If @racket[expr] is not a host expression, an exception is raised. @section{Expansion} diff --git a/scribblings/tutorial/main.scrbl b/scribblings/tutorial/main.scrbl index 8f55b26..11714b4 100644 --- a/scribblings/tutorial/main.scrbl +++ b/scribblings/tutorial/main.scrbl @@ -11,3 +11,4 @@ The tutorial is broken down into illustrative examples: @include-section["basic-tutorial.scrbl"] @include-section["stlc-tutorial.scrbl"] @include-section["multipass-tutorial.scrbl"] +@include-section["syntax-interpreter-tutorial.scrbl"] diff --git a/scribblings/tutorial/multipass-tutorial.scrbl b/scribblings/tutorial/multipass-tutorial.scrbl index f3ee38c..7163f0e 100644 --- a/scribblings/tutorial/multipass-tutorial.scrbl +++ b/scribblings/tutorial/multipass-tutorial.scrbl @@ -1,9 +1,444 @@ - #lang scribble/manual @(require (for-label racket racket/block racket/class racket/match racket/list syntax/parse "../../main.rkt") - scribble/example) + scribble/example + racket/sandbox) +@(define eval (make-base-eval '(require racket (for-syntax racket)))) +@(define-syntax-rule (repl body ...) (examples #:eval eval #:label #f body ...)) + +@title[#:tag "multipass"]{Advanced Tutorial: A Compiler with Transformative Passes} + +Many DSLs need a compiler that transforms syntax in several passes. Some passes may just be static checks, and others may actually transform the program, often to a restricted subset of the surface language. When using syntax-spec, some special care needs to be taken with transformative passes. To demonstrate how such a DSL can be implemented, we will create a language with an @hyperlink["https://en.wikipedia.org/wiki/A-normal_form"]{A-normal form} transformation and an unused variable pruning optimization. + +@section[#:tag "multipass-expander"]{Expander} + +Here is the syntax-spec of our language: + +@repl[ +#:hidden #t +(module grammar racket +(provide (all-defined-out) (for-space anf (all-defined-out)) (for-syntax (all-defined-out) (for-space anf (all-defined-out)))) + (require "main.rkt" (for-syntax syntax/parse racket)) + (require (for-syntax racket/match racket/syntax racket/list)) +(syntax-spec + (binding-class var + #:reference-compiler immutable-reference-compiler) + (nonterminal full-expr + #:binding-space anf + n:number + x:var + (let ([x:var e:full-expr]) body:full-expr) + #:binding (scope (bind x) body) + (+ a:full-expr b:full-expr) + (* a:full-expr b:full-expr) + (/ a:full-expr b:full-expr) + (rkt e:racket-expr)) + (nonterminal anf-expr + #:binding-space anf + ((~datum let) ([x:var e:rhs-expr]) body:anf-expr) + #:binding (scope (bind x) body) + e:rhs-expr) + (nonterminal rhs-expr + #:binding-space anf + ((~datum +) a:immediate-expr b:immediate-expr) + ((~datum *) a:immediate-expr b:immediate-expr) + ((~datum /) a:immediate-expr b:immediate-expr) + ((~datum rkt) e:racket-expr) + e:immediate-expr) + (nonterminal immediate-expr + #:binding-space anf + x:var + n:number) + + (host-interface/expression + (eval-expr e:full-expr) + #'(compile-expr e))) + +(begin-for-syntax + (define local-expand-anf (nonterminal-expander anf-expr))) + +(define-syntax compile-expr + (syntax-parser + [(_ e) + ; I chose to use compile-time functions instead of macros because there is a lot + ; of non-syntax data to pass around. But we still get hygiene with define/hygienic. + + ; need to expand to make sure everything is properly bound + ; for the analysis pass, which uses symbol tables. + (define e/anf (local-expand-anf (to-anf #'e) #:should-rename? #t)) + (define e/pruned (prune-unused-variables e/anf)) + ; this last local-expand-anf might be unnecessary for this compiler, but i'll leave it in + ; since most compilers would need it. + (define/syntax-parse e/pruned^ (local-expand-anf e/pruned #:should-rename? #t)) + #'(compile-anf e/pruned^)])) + +(begin-for-syntax + ; full-expr -> anf-expr + (define (to-anf e) + ; list of (list Identifier rhs-expr) + ; most recent, and thus innermost, binding first + (define bindings-rev '()) + ; Identifier rhs-expr -> Void + ; ends up producing a let-binding of x to e in the result + (define (lift-binding! x e) (set! bindings-rev (cons (list x e) bindings-rev))) + (define e^ (to-rhs! e lift-binding!)) + (wrap-lets e^ (reverse bindings-rev))) + + ; full-expr (Identifier rhs-expr -> Void) -> rhs-expr + ; this doesn't need to be hygienic, only the whole pass. + ; in other compilers, helpers may need to be hygienic too. + (define (to-rhs! e lift-binding!) + (syntax-parse e + [((~datum let) ([x e]) body) + (define e^ (to-rhs! #'e lift-binding!)) + (lift-binding! #'x e^) + (to-rhs! #'body lift-binding!)] + [(op a b) + (define/syntax-parse a^ (to-immediate! #'a lift-binding!)) + (define/syntax-parse b^ (to-immediate! #'b lift-binding!)) + #'(op a^ b^)] + [(~or ((~datum rkt) _) + x:id + n:number) + this-syntax])) + + ; full-expr (Identifier rhs-expr -> Void) -> immediate-expr + (define (to-immediate! e lift-binding!) + (syntax-parse e + [(~or x:id n:number) this-syntax] + [_ + (define/syntax-parse tmp (generate-temporary 'tmp)) + (define e^ (to-rhs! this-syntax lift-binding!)) + (lift-binding! #'tmp e^) + #'tmp])) + + ; rhs-expr (listof (list Identifier rhs-expr) ) + (define (wrap-lets e bindings) + (match bindings + [(cons binding bindings) + (with-syntax ([x (first binding)] + [rhs (second binding)] + [body (wrap-lets e bindings)]) + #'(let ([x rhs]) body))] + ['() e]))) + +(begin-for-syntax + ; anf-expr -> anf-expr + (define (prune-unused-variables e) + (define used-vars (get-used-vars e)) + (remove-unused-vars e used-vars)) + + ; anf-expr -> SymbolTable + ; non-hygienic because it's just an analysis pass + (define (get-used-vars e) + ; Go bottom-up, seeing references before their binders. + ; The invariant is that we only traverse expressions that need + ; to be evaluated. + ; The innermost expression is needed, so we traverse it. From there, + ; we only traverse expressions that are (transitively) needed. + ; If we see a reference, mark it as used. + ; If we see a binder that is marked as used, + ; we need its rhs' referenced variables too, so recur on the rhs. + ; If we see a binder that isn't marked as used, it was never referenced, + ; so we don't traverse its rhs since it isn't needed. + (syntax-parse e + [((~datum let) ([x e]) body) + (define body-vars (get-used-vars #'body)) + (if (symbol-set-member? body-vars #'x) + (symbol-set-union body-vars (get-used-vars #'e)) + body-vars)] + [(op a b) + (symbol-set-union (get-used-vars #'a) (get-used-vars #'b))] + [x:id + (immutable-symbol-set #'x)] + [((~datum rkt) e) + (apply immutable-symbol-set (get-racket-referenced-identifiers [var] #'e))] + [n:number (immutable-symbol-set)])) + + ; anf-expr SymbolTable -> anf-expr + (define (remove-unused-vars e used-vars) + (syntax-parse e + [((~and let (~datum let)) ([x e]) body) + (define/syntax-parse body^ (remove-unused-vars #'body used-vars)) + (if (symbol-set-member? used-vars #'x) + ; no need to recur on e since it's not a let + #'(let ([x e]) + body^) + #'body^)] + [_ this-syntax]))) + +(define-syntax compile-anf + (syntax-parser + [(_ ((~datum let) ([x e]) body)) + #'(let ([x (compile-anf e)]) (compile-anf body))] + [(_ (op a b)) #'(op a b)] + [(_ ((~datum rkt) e)) + #'(let ([x e]) + (if (number? x) + x + (error 'rkt "expected a number, got ~a" x)))] + [(_ e) #'e])) + + +) +(require 'grammar "main.rkt" (for-syntax syntax/parse)) +] + +@racketmod[ +racket +(require syntax-spec (for-syntax syntax/parse racket/syntax racket/match racket/list)) +(syntax-spec + (binding-class var + #:reference-compiler immutable-reference-compiler) + (nonterminal full-expr + #:binding-space anf + n:number + x:var + (let ([x:var e:full-expr]) body:full-expr) + #:binding (scope (bind x) body) + (+ a:full-expr b:full-expr) + (* a:full-expr b:full-expr) + (/ a:full-expr b:full-expr) + (rkt e:racket-expr)) + (nonterminal anf-expr + #:binding-space anf + ((~datum let) ([x:var e:rhs-expr]) body:anf-expr) + #:binding (scope (bind x) body) + e:rhs-expr) + (nonterminal rhs-expr + #:binding-space anf + ((~datum +) a:immediate-expr b:immediate-expr) + ((~datum *) a:immediate-expr b:immediate-expr) + ((~datum /) a:immediate-expr b:immediate-expr) + ((~datum rkt) e:racket-expr) + e:immediate-expr) + (nonterminal immediate-expr + #:binding-space anf + x:var + n:number) + + (host-interface/expression + (eval-expr e:full-expr) + #'(compile-expr e))) +] + +Our language supports arithmetic, local variables, and Racket subexpressions. + +We have the following nonterminals: + +@itemlist[ +@item{@racket[full-expr]: The surface syntax of a program} +@item{@racket[anf-expr]: An expression in A-normal form. Users will not be writing these expressions; the compiler will transform @racket[full-expr]s the user writes into @racket[anf-expr]s.} +@item{@racket[rhs-expr]: An expression which is allowed to be on the right-hand side of a binding pair in an expression when it is in A-normal form. Conceptually, these expressions take at most one "step" of reduction to evaluate. In other words, no nested expressions (except for @racket[rkt] expressions).} +@item{@racket[immediate-expr]: Atomic expressions that can immediately be evaluated.} +] + +A-normal form makes the evaluation order of the program completely unambiguous and simplifies compilation to a language like assembly. Now, let's transform our surface syntax to it! + +@section{A-normal Form Transformation} + +The core idea of transforming to A-normal form is extracting nested sub-expressions into temporary variables. For example: + +@racketblock[ +(+ (+ 1 2) (+ 3 4)) +~> +(let ([tmp1 (+ 1 2)]) + (let ([tmp2 (+ 3 4)]) + (+ tmp1 tmp2))) +] + +To follow our grammar for an @racket[anf-expr], the arguments to functions like @racket[+] must be immediate expressions, like variable references or numbers. Our source program did not obey this rule, so we had to create temporary variables for subexpressions and replace each subexpression with a reference to its temporary variable. + +Now let's automate this process: + +@racketblock[ +(begin-for-syntax + (code:comment2 "full-expr -> anf-expr") + (code:comment2 "convert an expression to A-normal form") + (define (to-anf e) + (define bindings-rev '()) + (code:comment2 "Identifier rhs-expr -> Void") + (code:comment2 "record a variable binding pair") + (define (lift-binding! x e) (set! bindings-rev (cons (list x e) bindings-rev))) + (define e^ (to-rhs! e lift-binding!)) + (wrap-lets e^ (reverse bindings-rev))) + + (code:comment2 "full-expr (Identifier rhs-expr -> Void) -> rhs-expr") + (code:comment2 "convert an expr to an rhs-expr, potentially recording bindings") + (define (to-rhs! e lift-binding!) + (syntax-parse e + [((~datum let) ([x e]) body) + (define e^ (to-rhs! #'e lift-binding!)) + (lift-binding! #'x e^) + (to-rhs! #'body lift-binding!)] + [(op a b) + (define/syntax-parse a^ (to-immediate! #'a lift-binding!)) + (define/syntax-parse b^ (to-immediate! #'b lift-binding!)) + #'(op a^ b^)] + [(~or ((~datum rkt) _) + x:id + n:number) + this-syntax])) + + (code:comment2 "full-expr (Identifier rhs-expr -> Void) -> immediate-expr") + (code:comment2 "convert a full-expr to an immediate-expr, potentially recording bindings") + (define (to-immediate! e lift-binding!) + (syntax-parse e + [(~or x:id n:number) this-syntax] + [_ + (define/syntax-parse tmp (generate-temporary 'tmp)) + (define e^ (to-rhs! this-syntax lift-binding!)) + (lift-binding! #'tmp e^) + #'tmp])) + + (code:comment2 "rhs-expr (Listof (List Identifier rhs-expr)) -> anf-expr") + (code:comment2 "wrap the innermost expression with `let`s for the bindings that were recorded") + (define (wrap-lets e bindings) + (match bindings + [(cons binding bindings) + (with-syntax ([x (first binding)] + [rhs (second binding)] + [body (wrap-lets e bindings)]) + #'(let ([x rhs]) body))] + ['() e]))) +] + +Our transformation goes through the expression, recording the temporary variable bindings to lift. The final @racket[rhs-expr] returned by @racket[to-rhs] will be the body of the innermost @racket[let] at the end of the transformation. Converting to an @racket[rhs-expr] or an @racket[immediate-expr] has the side effect of recording a binding pair to be lifted, and the result of replacing complex subexpressions with temporary variable references is returned from each helper. + +Notice that the code generation pass is implemented as macro, while the intermediate passes are implemented as compile-time functions. Using a Racket macro for the code generator is convenient because it provide hygiene for any temporary names we introduce. For the intermediate passes we must use compile-time functions rather than macros, for three reasons: + +@itemlist[ +@item{ +The intermediate passes do not generate Racket syntax that can be further expanded by the Racket macro expander. Instead, they generate code in our DSL's intermediate representation. +} +@item{ +Compiler passes may need additional arguments and return values, which may not be syntax objects. This is possible with a compile-time function, but not with a macro. For example, our A-normal form transformation receives the @racket[bind!] procedure as an argument. +} +@item{ +Compiler passes may use side effects, and rely on a particular order of evaluation. For our A-normal form pass, we want to create @racket[let]-bindings for the innermost subexpressions first. We accomplish this via the way we order calls to the @racket[bind!] procedure. +} +] + +@section{Pruning unused variables} + +Using syntax-spec's symbol tables and binding operations, we can add an optimizing pass that removes unused variables. + +For example: + +@racketblock[ +(let ([x (+ 2 2)]) + (let ([y (+ 3 3)]) + x)) +~> +(let ([x (+ 2 2)]) + x) +] + +Since @racket[y] is not referenced, we can just remove its definition from the program. Note that this optimization only makes sense when the right-hand-side of a definition is free of side-effects. For example, pruning @racket[y] in this example would change the behavior of the program: + +@racketblock[ +(let ([x (+ 2 2)]) + (let ([y (rkt (begin (displayln "hello!") (+ 3 3)))]) + x)) +~> +(let ([x (+ 2 2)]) + x) +] + +Without pruning, this would print something, but with pruning, it would not. Our optimization shouldn't change the behavior of the program. This DSL is designed with the requirement that @racket[rkt] forms only have pure computations inside, but this cannot easily be checked. As such, we will assume Racket subexpressions are free of side effects, and our optimization will only be sound for side-effect-free Racket subexpressions. + +@racketblock[ +(begin-for-syntax + (code:comment2 "anf-expr -> anf-expr") + (code:comment2 "reconstruct the expression, excluding definitions of unused variables") + (define (prune-unused-variables e) + (define used-vars (get-used-vars e)) + (remove-unused-vars e used-vars)) + + (code:comment2 "anf-expr -> ImmutableSymbolSet") + (code:comment2 "compute the set of used variables") + (define (get-used-vars e) + (syntax-parse e + [((~datum let) ([x e]) body) + (define body-vars (get-used-vars #'body)) + (if (symbol-set-member? body-vars #'x) + (symbol-set-union body-vars (get-used-vars #'e)) + body-vars)] + [(op a b) + (symbol-set-union (get-used-vars #'a) (get-used-vars #'b))] + [x:id + (immutable-symbol-set #'x)] + [(~or ((~datum rkt) _) n:number) (immutable-symbol-set)])) + + (code:comment2 "anf-expr ImmutableSymbolSet -> anf-expr") + (code:comment2 "reconstruct the expression, excluding definitions of specified unused variables") + (define (remove-unused-vars e used-vars) + (syntax-parse e + [((~and let (~datum let)) ([x e]) body) + (define/syntax-parse body^ (remove-unused-vars #'body used-vars)) + (if (symbol-set-member? used-vars #'x) + #'(let ([x e]) + body^) + #'body^)] + [_ this-syntax]))) +] + +@;TODO don't ignore racket subexpression references. Requires fixing a bug though. +First, we figure out which variables are referenced, using a bottom-up traversal. We only include consider variables in the right-hand-side of a @racket[let] used if we have determined that the variable bound by the @racket[let] is used in its body. For now, we ignore references in Racket subexpressions. + +Then, with that knowledge, we reconstruct the program, only including bindings for used variables. + +This optimization is slightly simplified by having already transformed the program to A-normal form. We can see this in @racket[remove-unused-vars]: We don't need to recur on the right-hand-side of a let-binding because we know there are no variable bindings to be removed from that expression. + +@section{Putting it all Together} + +Due to the nature of expansion and binding structure, some special care needs to be taken in sequencing multiple transformative compiler passes. Since our A-normal form transformation adds new bindings, we need to re-expand the result so syntax-spec can compute and check binding information for use in later passes/compilation: + +@racketblock[ +(begin-for-syntax + (define local-expand-anf (nonterminal-expander anf-expr))) + +(define-syntax compile-expr + (syntax-parser + [(_ e) + (define e/anf (local-expand-anf (to-anf #'e) #:should-rename? #t)) + (define e/pruned (prune-unused-variables e/anf)) + (define/syntax-parse e/pruned^ (local-expand-anf e/pruned #:should-rename? #t)) + #'(compile-anf e/pruned^)])) +] + +We perform this re-expansion using @racket[nonterminal-expander]. This function expects DSL syntax of a specified nonterminal (here, @racket[anf-expr]) and expands macros in the DSL code, checks binding structure, etc. It's kind of like @racket[local-expand] but for a particular nonterminal. This is what happens in a host interface that produces the expanded, core syntax that your compiler works with. We use @racket[#:should-rename? #t] to ensure that we re-compile and rename identifiers in this expansion. + +The expansion after pruning is technically unnecessary for this example since we are only removing bindings in that pass, but it is good to always make sure your compiler is receiving freshly expanding syntax. This extra expansion also makes sure your optimization produces valid syntax. In general, even if your compiler just has a single transformative pass before compilation, you should expand the result of the pass. + +An additional caveat is that identifiers need to undergo the same number of expansions for things to work properly. The easiest way to do this is to expand only the entire DSL expression at once, rather than expanding subexpressions individually. + +Finally, we must implement compilation of A-normal form expressions to Racket, which is straightforward: + +@racketblock[ +(define-syntax compile-anf + (syntax-parser + [(_ ((~datum let) ([x e]) body)) + #'(let ([x (compile-anf e)]) (compile-anf body))] + [(_ (op a b)) #'(op a b)] + [(_ ((~datum rkt) e)) + #'(let ([x e]) + (if (number? x) + x + (error 'rkt "expected a number, got ~a" x)))] + [(_ e) #'e])) +] + +@repl[ +(eval-expr 1) +(eval-expr (let ([x 1]) (let ([y 2]) x))) +(eval-expr (let ([unused (rkt (displayln "can anyone hear me?"))]) 42)) +] -@title[#:tag "multipass"]{Advanced Tutorial: A Compiler with Multiple Passes} +To summarize the key points: -This is a stub +@itemlist[ +@item{We used compile-time functions for compiler passes, rather than macros.} +@item{We can have multiple passes in a compiler simply by sequencing compile-time functions that operate on expanded DSL expressions.} +@item{Since we have transformative passes in our compiler, we must re-expand resulting syntax using @racket[nonterminal-expander] after each transformation.} +] diff --git a/scribblings/tutorial/syntax-interpreter-tutorial.scrbl b/scribblings/tutorial/syntax-interpreter-tutorial.scrbl new file mode 100644 index 0000000..aec8500 --- /dev/null +++ b/scribblings/tutorial/syntax-interpreter-tutorial.scrbl @@ -0,0 +1,271 @@ +#lang scribble/manual + +@(require (for-label racket racket/block racket/class racket/match racket/list syntax/parse "../../main.rkt") + scribble/example + racket/sandbox) +@(define eval (make-base-eval '(require racket))) +@(define-syntax-rule (repl body ...) (examples #:eval eval #:label #f body ...)) + +@title[#:tag "syntax-interpreter"]{Advanced Tutorial: An Interpreted Language} + +This guide demonstrates how to use syntax-spec to create an interpreted language, as well as the benefits of this approach. + +Typically, syntax-spec is used to create languages that compile to Racket. However, it's possible to use it to create an interpreted language as well. In such an implementation, syntax-spec will enforce the grammar, check binding, macro-expand the source program, etc. and pass off the expanded, core syntax to an interpreter that evaluates it to a value. As an example, let's create an interpreted implementation of the lambda calculus. + +@section{Expander} + +Here is the syntax-spec: + +@repl[ +#:hidden #t +(module grammar racket + (provide (all-defined-out) (for-syntax (all-defined-out)) (for-space lc (all-defined-out))) + (require "main.rkt" (for-syntax syntax/parse)) + (syntax-spec + (binding-class lc-var #:binding-space lc) + (extension-class lc-macro #:binding-space lc) + (nonterminal lc-expr + #:binding-space lc + #:allow-extension lc-macro + n:number + (+ e1:lc-expr e2:lc-expr) + x:lc-var + (lambda (x:lc-var) e:lc-expr) + #:binding (scope (bind x) e) + (rkt e:expr) + (~> (e1 e2) + (syntax/loc this-syntax (#%app e1 e2))) + (#%app e1:lc-expr e2:lc-expr)) + + (host-interface/expression + (lc-expand e:lc-expr) + #'#'e))) +(require 'grammar "main.rkt" (for-syntax syntax/parse)) +] + +@racketmod[ +racket +(require syntax-spec (for-syntax syntax/parse)) +(syntax-spec + (binding-class lc-var #:binding-space lc) + (extension-class lc-macro #:binding-space lc) + (nonterminal lc-expr + #:binding-space lc + #:allow-extension lc-macro + n:number + (+ e1:lc-expr e2:lc-expr) + x:lc-var + (lambda (x:lc-var) e:lc-expr) + #:binding (scope (bind x) e) + (~> (e1 e2) + (syntax/loc this-syntax (#%app e1 e2))) + (#%app e1:lc-expr e2:lc-expr)) + + (host-interface/expression + (lc-expand e:lc-expr) + #'#'e)) +] + +We have numbers, binary addition, variables, lambdas, and applications. The interesting bit is the host interface @racket[lc-expand]. The result is the expanded syntax itself! For example: + +@repl[ +(lc-expand ((lambda (x) x) 1)) +] + +The host-interface could also just invoke the interpreter directly on the syntax: + +@racketblock[ +(host-interface/expression + (lc e:lc-expr) + #'(lc-eval #'e empty-env)) +] + +This is what you'd normally do, but we haven't implemented @racket[lc-eval] yet so we'll stick with @racket[lc-expand]. + +Our language is also macro-extensible, so the result of @racket[lc-expand] expands macros away: + +@repl[ +(eval:no-prompt (require (for-syntax syntax/parse))) +(eval:no-prompt +(define-dsl-syntax let lc-macro + (syntax-parser + [(_ ([x:id e:expr]) body:expr) + #'((lambda (x) body) e)]))) +(lc-expand (let ([x 1]) (+ x x))) +] + +@section{Interpreter} + +We can use the output of @racket[lc-expand] as the input of an interpreter. But first, let's define some helpers. + +We will be building a strict, environment-based interpreter for this language, so we need to define what our environment will look like. + +The environment needs to map variables to values, and we have the result of syntax-spec expansion, so we can use symbol tables! This means our environment will map identifiers to values and will respect hygiene. syntax-spec gives us this benefit of hygienic environments for free, which is important. If we used a hash from symbols to values, which would contain no binding/hygiene information, an example like this would break: + +@racketblock[ +(define-dsl-syntax m lc-macro + (syntax-parser + [(_ e) #'(let ([tmp 2]) e)])) +(lc (let ([tmp 1]) (m tmp))) +] + +The macro-introduced @racket[tmp] would shadow the surface syntax @racket[tmp] and we'd get @racket[2] instead of @racket[1]. + +@repl[ +#:hidden #t +(require (for-template "main.rkt")) +] + +@repl[ +(eval:alts (eval:no-prompt (require (for-template syntax-spec))) (void)) +;; An Env is a (ImmutableBoundIdTable Value) +(eval:no-prompt (define empty-env (immutable-symbol-table))) +;; Env Identifier -> Value +(eval:no-prompt +(define (env-lookup env x) + (if (symbol-table-has-key? env x) + (symbol-table-ref env x) + x))) +;; Env Identifier Value -> Void +(eval:no-prompt +(define (env-extend env x v) + (symbol-table-set env x v))) +] + +One more thing we'll need is the ability to raise errors. Luckily, since we're operating on syntax, we can report the source location of an error. + +@repl[ +(eval:no-prompt (require racket/syntax-srcloc)) +(eval:no-prompt +(define (lc-error stx msg) + (define loc (syntax-srcloc stx)) + (if loc + (raise-user-error (format "~a: ~a" (srcloc->string loc) msg)) + (raise-user-error 'lc msg)))) +(eval:error (lc-error #'x "something went wrong")) +] + +Alright, now let's define our interpreter: + +@repl[ +(eval:no-prompt (require syntax/parse)) +(eval:no-prompt +(define-syntax-rule (lc e) + (lc-eval (lc-expand e) empty-env))) +(eval:no-prompt +(define (lc-eval stx env) + (syntax-parse stx + #:datum-literals (+ lambda #%app) + [n:number + (syntax->datum #'n)] + [(+ e1 e2) + (define v1 (lc-eval #'e1 env)) + (unless (number? v1) + (lc-error this-syntax "+ expects number")) + (define v2 (lc-eval #'e2 env)) + (unless (number? v2) + (lc-error this-syntax "+ expects number")) + (+ v1 v2)] + [x:id + (env-lookup env #'x)] + [(lambda (x:id) e:expr) + (lambda (v) (lc-eval #'e (env-extend env #'x v)))] + [(#%app e1 e2) + (match (lc-eval #'e1 env) + [(? procedure? f) + (f (lc-eval #'e2 env))] + [_ + (lc-error this-syntax "applied non-function")])]))) +(lc 1) +(lc (+ 1 1)) +(lc (let ([x 1]) (+ x x))) +(lc ((lambda (x) (+ x 1)) 3)) +(eval:error (lc (1 2))) +(define-dsl-syntax m lc-macro + (syntax-parser + [(_ e) #'(let ([tmp 2]) e)])) +(lc (let ([tmp 1]) (m tmp))) +] + +Pretty cool! + +To recap, we are using syntax-spec as a frontend for our interpreter, which operates on expanded syntax and uses symbol tables as an environment. + +Here are some of the benefits of writing an interpreter in this style: + +@itemlist[ +@item{We operate on syntax, which means we easily get source locations in errors and we can use syntax/parse to perform case analysis on expressions} +@item{Our interpreter can assume that the program is grammatically valid and well-bound since we are operating on the result of expansion from syntax-spec} +@item{We can use symbol tables for environments, which are hygienic} +@item{Our language is macro-extensible and our interpreter only has to operate on core forms} +] + +@section{Supporting Racket Subexpressions} + +We can add limited support for Racket subexpressions to our language: + +@racketblock[ +(syntax-spec + (nonterminal lc-expr + #:binding-space lc + #:allow-extension lc-macro + n:number + (+ e1:lc-expr e2:lc-expr) + x:lc-var + (lambda (x:lc-var) e:lc-expr) + #:binding (scope (bind x) e) + (rkt e:expr) + (~> (e1 e2) + ;; this is necessary to preserve source location, properties, etc. + (syntax/loc this-syntax (#%app e1 e2))) + (#%app e1:lc-expr e2:lc-expr))) +] + +We added @racket[(rkt e:expr)] to the productions. Usually for racket expressions, we use @racket[racket-expr], which wraps the expression with @racket[#%host-expression]. This does some work behind the scenes to make sure we can refer to DSL bindings in the Racket expression. But for this syntax interpreter, that won't work, so we'll just use @racket[expr] to avoid wrapping the expression in a @racket[#%host-expression]. Evaluation is simple: + +@repl[ +(eval:no-prompt +(define (lc-eval stx env) + (syntax-parse stx + #:datum-literals (+ lambda #%app rkt) + [n:number + (syntax->datum #'n)] + [(+ e1 e2) + (define v1 (lc-eval #'e1 env)) + (unless (number? v1) + (lc-error this-syntax "+ expects number")) + (define v2 (lc-eval #'e2 env)) + (unless (number? v2) + (lc-error this-syntax "+ expects number")) + (+ v1 v2)] + [x:id + (env-lookup env #'x)] + [(lambda (x:id) e:expr) + (lambda (v) (lc-eval #'e (env-extend env #'x v)))] + [(#%app e1 e2) + (match (lc-eval #'e1 env) + [(? procedure? f) + (f (lc-eval #'e2 env))] + [_ + (lc-error this-syntax "applied non-function")])] + [(rkt e) + (eval #'e)]))) +(lc (rkt (* 4 2))) +] + +We just add a case that calls @racket[eval] on the Racket expression! However, there are some limitations with this method. In particular, we have access to top-level names like @racket[*], but not local variables defined outside of the Racket subexpression, because @racket[eval] is evaluating against the global namespace and not capturing local variable definitions. + +@repl[ +(define top-level-x 2) +(lc (rkt top-level-x)) +(eval:error + (let ([local-x 3]) + (lc (rkt local-x)))) +] + +Similarly, we cannot reference @racket[lc-var]s: + +@repl[ +(eval:error + (lc (let ([lc-x 4]) (rkt lc-x)))) +] diff --git a/tests/binding-operations.rkt b/tests/binding-operations.rkt index 6561cb9..969e37c 100644 --- a/tests/binding-operations.rkt +++ b/tests/binding-operations.rkt @@ -1,11 +1,15 @@ #lang racket/base -(require "../testing.rkt") +(require "../testing.rkt" + ;; for runtime syntax-interpreter testing, we want this at phase 0 + "../private/runtime/binding-operations.rkt") (syntax-spec (binding-class var) + (extension-class macro #:binding-space dsl) (nonterminal expr #:binding-space dsl + #:allow-extension macro n:number v:var (+ e1:expr e2:expr) @@ -13,6 +17,7 @@ (lambda (x:var) e:expr) #:binding (scope (bind x) e) + (e1:expr e2:expr) (letrec ([x:var e:expr] ...) body:expr) #:binding (scope (bind x) ... e ... (scope body))) @@ -123,6 +128,24 @@ (lambda (a) x) (lambda (x) x))) +(check-false + (expr/alpha-equivalent? + (lambda (x) (lambda (x) x)) + (lambda (x) (lambda (y) x)))) + +;; alpha equivalence should respect hygiene +(define-dsl-syntax m macro + (syntax-parser + [(m y:id) #'(lambda (x) y)])) +;; fails bc the binding equivalence uses a free id table even though there are no bindings on expanded syntax, +;; so it ends up not being hygienic +(check-false + (expr/alpha-equivalent? + ;; (lambda (x1) (lambda (x2) x1)) because macro introduction scope + (lambda (x) (m x)) + ;; (lambda (x1) (lambda (x2) x2)) + (lambda (x) (lambda (x) x)))) + (check-true (expr/alpha-equivalent? (letrec ([f g] @@ -150,3 +173,93 @@ (expr/alpha-equivalent?/ignore-host (+ x (host PI)) (+ x (host PI)))) + +(syntax-spec + (host-interface/expression + (subst-expr e:expr target:expr replacement:expr) + #`'#,(subst #'e #'target #'replacement))) + +;; substitute whole expression +(check-equal? (subst-expr (lambda (x) x) (lambda (y) y) 1) + 1) +;; substitute sub-expression +(check-equal? (subst-expr (letrec ([x (lambda (x) x)]) x) + (lambda (y) y) + 1) + '(letrec ([x 1]) x)) +;; substitute sub-expression multiple times +(check-equal? (subst-expr (letrec ([x (lambda (x) x)]) (lambda (z) z)) + (lambda (y) y) + 1) + '(letrec ([x 1]) 1)) + +(check-equal? (subst-expr (lambda (x) (+ 1 1)) (+ 1 1) (+ 2 2)) + '(lambda (x) (+ 2 2))) + +;; substitution of a free variable +(syntax-spec + (host-interface/expression (beta/subst app:expr) + (syntax-parse #'app + [(((~datum lambda) (x) body) arg) + #`'#,(subst #'body #'x #'arg)]))) + +(check-equal? + (beta/subst ((lambda (x) (+ x x)) + 1)) + '(+ 1 1)) + +(check-equal? + (beta/subst ((lambda (x) (lambda (y) (+ x y))) + 1)) + '(lambda (y) (+ 1 y))) + +;;; tests using binding operations at runtime, with a syntax-interpreter style + +(syntax-spec + (binding-class rt-var #:binding-space rt) + (extension-class rt-macro #:binding-space rt) + (nonterminal rt-expr + #:allow-extension rt-macro + #:binding-space rt + x:rt-var + (lambda (x:rt-var) e:rt-expr) + #:binding (scope (bind x) e) + (f:rt-expr x:rt-expr)) + (host-interface/expression + (expand-rt e:rt-expr) + #'#'e)) + +(define-dsl-syntax let rt-macro + (syntax-rules () + [(let ([x rhs]) body) + ((lambda (x) body) + rhs)])) + +(check-true (alpha-equivalent? (expand-rt (lambda (x) x)) + (expand-rt (lambda (x) x)))) +(check-true (alpha-equivalent? (expand-rt (lambda (x) x)) + (expand-rt (lambda (y) y)))) +(check-true (alpha-equivalent? (expand-rt (lambda (x) (lambda (y) x))) + (expand-rt (lambda (x) (lambda (y) x))))) +(check-false (alpha-equivalent? (expand-rt (lambda (x) (lambda (y) x))) + (expand-rt (lambda (x) (lambda (y) y))))) +(check-true (alpha-equivalent? (expand-rt (let ([x (lambda (y) y)]) (x x))) + (expand-rt (let ([x (lambda (x) x)]) (x x))))) + +(check-equal? (syntax->datum (subst (expand-rt (lambda (x) x)) + (expand-rt (lambda (x) x)) + (expand-rt (lambda (y) y)))) + '(lambda (y) y)) +(check-equal? (syntax->datum (subst (expand-rt (lambda (_) (lambda (x) x))) + (expand-rt (lambda (x) x)) + (expand-rt (lambda (y) y)))) + '(lambda (_) (lambda (y) y))) +(check-true (alpha-equivalent? (subst (expand-rt (lambda (x) x)) + (expand-rt (lambda (x) x)) + (expand-rt (lambda (y) y))) + (expand-rt (lambda (y) y)))) +(check-true (alpha-equivalent? (subst (expand-rt (lambda (_) (lambda (x) x))) + (expand-rt (lambda (x) x)) + (expand-rt (lambda (y) y))) + (expand-rt (lambda (_) (lambda (y) y))))) + diff --git a/tests/dsls/multipass.rkt b/tests/dsls/multipass.rkt index 8bf831e..9fc8fb6 100644 --- a/tests/dsls/multipass.rkt +++ b/tests/dsls/multipass.rkt @@ -4,36 +4,40 @@ ;; arithmetic + let -> ANF -> prune unused variables -> racket (require "../../testing.rkt" - (for-syntax racket/list rackunit (only-in "../../private/ee-lib/main.rkt" define/hygienic))) + (for-syntax racket/match racket/syntax racket/list rackunit)) (syntax-spec - (binding-class var #:reference-compiler immutable-reference-compiler) - (nonterminal expr + (binding-class var + #:reference-compiler immutable-reference-compiler) + (nonterminal full-expr + #:binding-space anf n:number x:var - ; need to use ~literal because you can't re-use let in the other non-terminals - ((~literal let) ([x:var e:expr]) body:expr) + (let ([x:var e:full-expr]) body:full-expr) #:binding (scope (bind x) body) - ((~literal +) a:expr b:expr) - ((~literal *) a:expr b:expr) - ((~literal /) a:expr b:expr) + (+ a:full-expr b:full-expr) + (* a:full-expr b:full-expr) + (/ a:full-expr b:full-expr) (rkt e:racket-expr)) (nonterminal anf-expr - ((~literal let) ([x:var e:rhs-expr]) body:anf-expr) + #:binding-space anf + ((~datum let) ([x:var e:rhs-expr]) body:anf-expr) #:binding (scope (bind x) body) e:rhs-expr) (nonterminal rhs-expr - ((~literal +) a:immediate-expr b:immediate-expr) - ((~literal *) a:immediate-expr b:immediate-expr) - ((~literal /) a:immediate-expr b:immediate-expr) - ((~literal rkt) e:racket-expr) + #:binding-space anf + ((~datum +) a:immediate-expr b:immediate-expr) + ((~datum *) a:immediate-expr b:immediate-expr) + ((~datum /) a:immediate-expr b:immediate-expr) + ((~datum rkt) e:racket-expr) e:immediate-expr) (nonterminal immediate-expr + #:binding-space anf x:var n:number) (host-interface/expression - (eval-expr e:expr) + (eval-expr e:full-expr) #'(compile-expr e))) (begin-for-syntax @@ -51,65 +55,68 @@ (define e/pruned (prune-unused-variables e/anf)) ; this last local-expand-anf might be unnecessary for this compiler, but i'll leave it in ; since most compilers would need it. - (define e/pruned^ (local-expand-anf e/pruned #:should-rename? #t)) - #`(compile-anf #,e/pruned^)])) + (define/syntax-parse e/pruned^ (local-expand-anf e/pruned #:should-rename? #t)) + #'(compile-anf e/pruned^)])) (begin-for-syntax - ; expr -> anf-expr - ; this doesn't really need to be hygienic, but in general, compiler passes often will. - (define/hygienic (to-anf e) - #:expression + ; full-expr -> anf-expr + (define (to-anf e) ; list of (list Identifier rhs-expr) ; most recent, and thus innermost, binding first (define bindings-rev '()) ; Identifier rhs-expr -> Void ; ends up producing a let-binding of x to e in the result - (define (bind! x e) (set! bindings-rev (cons (list x e) bindings-rev))) - (define e^ (to-rhs e bind!)) + (define (lift-binding! x e) (set! bindings-rev (cons (list x e) bindings-rev))) + (define e^ (to-rhs! e lift-binding!)) (wrap-lets e^ (reverse bindings-rev))) - ; expr (Identifier rhs-expr -> Void) -> rhs-expr + ; full-expr (Identifier rhs-expr -> Void) -> rhs-expr ; this doesn't need to be hygienic, only the whole pass. ; in other compilers, helpers may need to be hygienic too. - (define (to-rhs e bind!) + (define (to-rhs! e lift-binding!) (syntax-parse e - [((~literal let) ([x e]) body) - (bind! #'x (to-rhs #'e bind!)) - (to-rhs #'body bind!)] + [((~datum let) ([x e]) body) + (define e^ (to-rhs! #'e lift-binding!)) + (lift-binding! #'x e^) + (to-rhs! #'body lift-binding!)] [(op a b) - #`(op #,(to-immediate #'a bind!) - #,(to-immediate #'b bind!))] - [_ this-syntax])) - - ; expr (Identifier rhs-expr -> Void) -> immediate-expr - (define (to-immediate e bind!) + (define/syntax-parse a^ (to-immediate! #'a lift-binding!)) + (define/syntax-parse b^ (to-immediate! #'b lift-binding!)) + #'(op a^ b^)] + [(~or ((~datum rkt) _) + x:id + n:number) + this-syntax])) + + ; full-expr (Identifier rhs-expr -> Void) -> immediate-expr + (define (to-immediate! e lift-binding!) (syntax-parse e - [(_ . _) - (define/syntax-parse (tmp) (generate-temporaries '(tmp))) - (bind! #'tmp (to-rhs this-syntax bind!)) - #'tmp] - [_ this-syntax])) + [(~or x:id n:number) this-syntax] + [_ + (define/syntax-parse tmp (generate-temporary 'tmp)) + (define e^ (to-rhs! this-syntax lift-binding!)) + (lift-binding! #'tmp e^) + #'tmp])) ; rhs-expr (listof (list Identifier rhs-expr) ) (define (wrap-lets e bindings) - (foldr (lambda (binding e) #`(let ([#,(first binding) #,(second binding)]) #,e)) - e - bindings))) + (match bindings + [(cons binding bindings) + (with-syntax ([x (first binding)] + [rhs (second binding)] + [body (wrap-lets e bindings)]) + #'(let ([x rhs]) body))] + ['() e]))) (begin-for-syntax ; anf-expr -> anf-expr - (define/hygienic (prune-unused-variables e) - #:expression - (define var-used? (get-used-vars e)) - (remove-unused-vars e var-used?)) + (define (prune-unused-variables e) + (define used-vars (get-used-vars e)) + (remove-unused-vars e used-vars)) - ; anf-expr -> (Identifier -> Bool) + ; anf-expr -> SymbolTable ; non-hygienic because it's just an analysis pass (define (get-used-vars e) - (define-local-symbol-table used-vars) - (define (mark-as-used! x) - (symbol-table-set! used-vars x #t)) - (define (var-used? x) (symbol-table-ref used-vars x #f)) ; Go bottom-up, seeing references before their binders. ; The invariant is that we only traverse expressions that need ; to be evaluated. @@ -120,41 +127,38 @@ ; we need its rhs' referenced variables too, so recur on the rhs. ; If we see a binder that isn't marked as used, it was never referenced, ; so we don't traverse its rhs since it isn't needed. - (let mark-used-variables! ([e e]) - (syntax-parse e - [((~literal let) ([x e]) body) - (mark-used-variables! #'body) - (when (var-used? #'x) - (mark-used-variables! #'e))] - [(op a b) - (mark-used-variables! #'a) - (mark-used-variables! #'b)] - [x:id - (mark-as-used! #'x)] - ; don't descent into racket expressions. - ; this means we'll miss references like (rkt (eval-expr x)). - ; TODO use free-variables once it supports host-expressions - [_ (void)])) - var-used?) - - ; anf-expr (Identifier -> Boolean) -> anf-expr - (define (remove-unused-vars e var-used?) - (let loop ([e e]) - (syntax-parse e - [((~and let (~literal let)) ([x e]) body) - (if (var-used? #'x) - ; no need to recur on e since it's not a let - #`(let ([x e]) - #,(loop #'body)) - (loop #'body))] - [_ this-syntax])))) + (syntax-parse e + [((~datum let) ([x e]) body) + (define body-vars (get-used-vars #'body)) + (if (symbol-set-member? body-vars #'x) + (symbol-set-union body-vars (get-used-vars #'e)) + body-vars)] + [(op a b) + (symbol-set-union (get-used-vars #'a) (get-used-vars #'b))] + [x:id + (immutable-symbol-set #'x)] + [((~datum rkt) e) + (apply immutable-symbol-set (get-racket-referenced-identifiers [var] #'e))] + [n:number (immutable-symbol-set)])) + + ; anf-expr SymbolTable -> anf-expr + (define (remove-unused-vars e used-vars) + (syntax-parse e + [((~and let (~datum let)) ([x e]) body) + (define/syntax-parse body^ (remove-unused-vars #'body used-vars)) + (if (symbol-set-member? used-vars #'x) + ; no need to recur on e since it's not a let + #'(let ([x e]) + body^) + #'body^)] + [_ this-syntax]))) (define-syntax compile-anf (syntax-parser - [(_ ((~literal let) ([x e]) body)) + [(_ ((~datum let) ([x e]) body)) #'(let ([x (compile-anf e)]) (compile-anf body))] [(_ (op a b)) #'(op a b)] - [(_ ((~literal rkt) e)) + [(_ ((~datum rkt) e)) #'(let ([x e]) (if (number? x) x @@ -215,6 +219,7 @@ (+ x (rkt x)))) 2) +#;; this breaks because of get-racket-referenced-identifiers (test-equal? "use outer dsl var in dsl in rkt" (eval-expr (let ([x 1]) @@ -231,9 +236,10 @@ (let ([unused (rkt (error "bad"))]) 1)) 1) -#;; since we don't descend into racket exprs, it thinks it's unused, so it removes it and we get an unbound reference -(check-equal? +#;(check-equal? (eval-expr (let ([used-only-in-rkt 1]) (let ([x (rkt used-only-in-rkt)]) - x)))) + x))) + 1) + diff --git a/tests/dsls/syntax-interpreter.rkt b/tests/dsls/syntax-interpreter.rkt new file mode 100644 index 0000000..996985f --- /dev/null +++ b/tests/dsls/syntax-interpreter.rkt @@ -0,0 +1,212 @@ +#lang racket + +;; A lambda calculus interpreter that operates on expanded syntax, +;; using syntax-spec. +;; Features normalization by evaluation +;; +;; Benefits: +;; - grammar and binding checking +;; - hygienic macros, syntactic sugar +;; - interpreter/static checks only have to worry about core forms +;; - static checks have access to binding information (this example language has no static checks) +;; - easy to report runtime error source location since we evaluate syntax +;; Drawbacks: +;; - not sure if substitution would work bc of scopes on expanded syntax. might be fine +;; - interpreter helpers need expanded syntax, which might make it hard to unit test them + +(module+ test (require rackunit)) +(require "../../testing.rkt" + syntax/parse + racket/syntax + (for-syntax syntax/parse) + racket/syntax-srcloc + (for-template syntax-spec-dev) + syntax/macro-testing) + +(syntax-spec + (binding-class lc-var #:binding-space lc) + (extension-class lc-macro #:binding-space lc) + (nonterminal lc-expr + #:binding-space lc + #:allow-extension lc-macro + n:number + (+ e1:lc-expr e2:lc-expr) + x:lc-var + (lambda (x:lc-var) e:lc-expr) + #:binding (scope (bind x) e) + (rkt e:expr) + (~> (e1 e2) + ;; this is necessary to preserve source location, properties, etc. + (syntax/loc this-syntax (#%app e1 e2))) + (#%app e1:lc-expr e2:lc-expr)) + + (host-interface/expression + (lc-expand e:lc-expr) + #'#'e)) + +(define-dsl-syntax let lc-macro + (syntax-rules () + [(let ([x rhs]) body) + ((lambda (x) body) rhs)])) + +(define-syntax-rule (lc e) + (lc-eval (lc-expand e) empty-env)) + +;;; runtime + +;; An Env is a (ImmutableBoundIdTable Value) +(define empty-env (immutable-symbol-table)) +;; Env Identifier -> Value +(define (env-lookup env x) + (if (symbol-table-has-key? env x) + (symbol-table-ref env x) + ;; neutral + x)) +;; Env Identifier Value -> Void +(define (env-extend env x v) + (symbol-table-set env x v)) + +;; A Value is one of +;; a Number +;; a Value -> Value +;; a NeutralExpr + +;; A NeutralExpr is one of +;; Identifier +;; (+ NeutralExpr Value) +;; (+ Value NeutralExpr) +;; (NeutralExpr Value) + +(define-syntax-rule (normalize e) + (lc-uneval (lc e))) + +;; Syntax Env -> Value +(define (lc-eval stx env) + (syntax-parse stx + #:datum-literals (+ lambda #%app rkt) + [n:number + (syntax->datum #'n)] + [(+ e1 e2) + (define v1 (lc-eval #'e1 env)) + (define v2 (lc-eval #'e2 env)) + (cond + [(or (syntax? v1) (syntax? v2)) + ;; neutral + #`(+ #,v1 #,v2)] + [else + (unless (number? v1) + (lc-error this-syntax "+ expects number")) + (unless (number? v2) + (lc-error this-syntax "+ expects number")) + (+ v1 v2)])] + [x:id + (env-lookup env #'x)] + [(lambda (x:id) e:expr) + (lambda (v) (lc-eval #'e (env-extend env #'x v)))] + [(#%app e1 e2) + (match (lc-eval #'e1 env) + [(? procedure? f) + (f (lc-eval #'e2 env))] + [(? syntax? f) + #`(#,f #,(lc-eval #'e2 env))] + [_ + (lc-error this-syntax "applied non-function")])] + [(rkt e) + (eval #'e)])) + +;; Value -> Syntax +(define (lc-uneval v) + (define count 0) + (define (fresh) + (begin0 (format-id #f "_.~a" count) + (set! count (add1 count)))) + (let loop ([v v]) + (match v + [(? number?) + (datum->syntax #f v)] + [(? procedure?) + (define x (fresh)) + #`(lambda (#,x) #,(loop (v x)))] + [(? syntax?) + (syntax-parse v + [((~datum +) a b) + #`(+ #,(loop (attribute a)) #,(loop (attribute b)))] + [(a b) + #`(#,(loop (attribute a)) #,(loop (attribute b)))] + [x:id #'x] + [_ + ; 3D syntax of a value that got syntax'ed bc of quasiquote + (loop (syntax->datum v))])]))) + +;; Syntax String -> Void +;; raise (runtime) error with source location reported +(define (lc-error stx msg) + (define loc (syntax-srcloc stx)) + (if loc + (raise-user-error (format "~a: ~a" (srcloc->string loc) msg)) + (raise-user-error 'lc msg))) + +(define top-level-var 4) + +(module+ test + (define-syntax-rule (teval e) (check-equal? (lc e) e)) + (define-syntax-rule (tnormalize e e^) (check-equal? (syntax->datum (normalize e)) 'e^)) + (define-syntax-rule (t-runtime-error msg e) + (check-exn + msg + (lambda () + (lc e)))) + (define-syntax-rule (t-expand-error msg e) + (check-exn + msg + (lambda () + (convert-compile-time-error (lc e))))) + (teval 1) + (teval (+ 1 1)) + (teval ((lambda (x) x) 1)) + (teval (let ([x 1]) (+ x x))) + (test-case "hygiene" + ;; basic shadow + (teval (let ([x 1]) + (let ([x 2]) + x))) + ;; macro "shadows", should not actually shadow + (define-dsl-syntax m lc-macro + (syntax-rules () + [(m e) + (let ([x 2]) e)])) + (check-equal? (lc (let ([x 1]) (m x))) + 1) + ;; macro ref to macro binding not shadowable from use site + (define-dsl-syntax m2 lc-macro + (syntax-rules () + [(m2 ([x rhs])) + (let ([y 1]) + (let ([x rhs]) + y))])) + (check-equal? (lc (m2 ([y 2]))) + 1)) + ;; errors + (t-expand-error + #rx"not bound" + x) + (t-expand-error + ;; actual decent grammatical error message + #rx"lambda: unexpected term" + (lambda (x y) x)) + (t-runtime-error + ;; source location for runtime error + #px".*\\.rkt:\\d*:\\d*: applied non-function" + (1 2)) + (tnormalize 1 1) + (tnormalize (lambda (x) x) + (lambda (_.0) _.0)) + ;; evaluates in lambda bodies + (tnormalize (lambda (x) (+ (+ 1 1) x)) + (lambda (_.0) (+ 2 _.0))) + (tnormalize (lambda (x) (+ x (+ 1 1))) + (lambda (_.0) (+ _.0 2))) + (tnormalize (lambda (x) (x (lambda (y) y))) + (lambda (_.0) (_.0 (lambda (_.1) _.1)))) + (check-equal? (lc (rkt (* 2 2))) 4) + (check-equal? (lc (rkt top-level-var)) 4))