diff --git a/info.rkt b/info.rkt index fee46e4..22cc85d 100644 --- a/info.rkt +++ b/info.rkt @@ -8,7 +8,7 @@ "version-case" "syntax-classes-lib" "rackunit-lib")) -(define build-deps '("racket-doc" "scribble-lib" "drracket" "typed-racket-lib")) +(define build-deps '("racket-doc" "scribble-lib" "drracket" "typed-racket-lib" "ocular-patdown")) (define scribblings '(("scribblings/main.scrbl" (multi-page) (experimental) "syntax-spec-dev"))) (define compile-omit-paths '("design" "demos")) (define test-omit-paths '("scribblings" "design" "demos")) diff --git a/scribblings/tutorial/main.scrbl b/scribblings/tutorial/main.scrbl index 11714b4..d46dee3 100644 --- a/scribblings/tutorial/main.scrbl +++ b/scribblings/tutorial/main.scrbl @@ -12,3 +12,5 @@ The tutorial is broken down into illustrative examples: @include-section["stlc-tutorial.scrbl"] @include-section["multipass-tutorial.scrbl"] @include-section["syntax-interpreter-tutorial.scrbl"] +@include-section["miniclass-tutorial.scrbl"] +@include-section["ocular-patdown-tutorial.scrbl"] diff --git a/scribblings/tutorial/miniclass-tutorial.scrbl b/scribblings/tutorial/miniclass-tutorial.scrbl new file mode 100644 index 0000000..42aedf3 --- /dev/null +++ b/scribblings/tutorial/miniclass-tutorial.scrbl @@ -0,0 +1,341 @@ +#lang scribble/manual + +@(require (for-label racket racket/block racket/class racket/match racket/list syntax/transformer syntax/parse "../../main.rkt") + scribble/example) +@(define eval (make-base-eval '(require racket racket/stxparam syntax/transformer (for-syntax racket syntax/transformer)))) +@(define-syntax-rule (repl body ...) (examples #:eval eval #:label #f body ...)) +@repl[ +#:hidden #t +(require "tests/dsls/miniclass/class.rkt") +] + +@title[#:tag "miniclass"]{Advanced Tutorial: Re-interpreting Racket Syntax} + +It is possible to create a DSL that re-interprets Racket forms like @racket[define] to behave differently in the context of that DSL. In Racket's @racket[class] DSL, procedure definitions are interpreted as method definitions. syntax-spec makes it easier to create these DSLs, but some special care and knowledge is required. In this tutorial, we'll create a small version of Racket's @racket[class] DSL. + +Here is an example of using the language we'll build: + +@repl[ +(define posn% + (class + (field x y) + (define (scale k) (new posn% (* x k) (* y k))) + (define (displayln) `(posn ,x ,y)))) +(define p (new posn% 1 2)) +(send (send p scale 3) displayln) +] + +@section[#:tag "miniclass-expander"]{Expander} + +Let's start by defining the grammar: + +@racketmod[ +racket +(require syntax-spec-dev + racket/stxparam + (for-syntax racket/list + syntax/parse + syntax/transformer)) + +(begin-for-syntax + (define-syntax-class lambda-id + (pattern (~or (~literal lambda) (~literal #%plain-lambda))))) + +(syntax-spec + (binding-class method-name + #:reference-compiler method-reference-compiler) + (binding-class field-name + #:reference-compiler field-reference-compiler) + + (nonterminal/exporting class-form + #:allow-extension racket-macro + (field name:field-name ...) + #:binding [(export name) ...] + ((~literal define-values) (m:method-name) + (lambda:lambda-id (arg:racket-var ...) body:racket-body ...)) + #:binding [(export m) (scope (bind arg) ... (import body) ...)] + + ((~literal define-syntaxes) (x:racket-macro ...) e:expr) + #:binding (export-syntaxes x ... e) + + ((~literal begin) e:class-form ...) + #:binding [(re-export e) ...] + + e:racket-expr)) +] + +We create separate binding classes for method and field names so that we can transform references to methods and fields differently when they appear in the class body. + +@margin-note{ +We could allow arbitrary Racket definitions in the class body using @racket[racket-body] instead of @racket[racket-expr]. However, this would require a more sophisticated compilation strategy to allow methods to close over these definitions. +} + +The @racket[class-form] nonterminal is for forms that will appear in the class body. We support field declarations, method definitions, macro definitions, @racket[begin], and arbitrary Racket expressions. The Racket expressions will run once, in the constructor. + +@margin-note{ + Racket's @racket[class] form accepts more complex expressions on the right-hand-side of @racket[define-values]. We could approach this feature by creating a macro-extensible @racket[method-procedure] nonterminal. Unfortunately, @racket[syntax-spec] is missing features required for expansions in this context. @hyperlink["https://github.com/michaelballantyne/syntax-spec/issues/91"]{Issue} +} + +The key piece that allows us to re-interpret Racket syntax is the @racket[racket-macro] @tech[#:key "extension classes"]{extension class}. This extension class allows any definition form that eventually translates to @racket[define-values] and @racket[define-syntaxes] to be used in our DSL. This includes @racket[define] and @racket[define-syntax], but also fancy ones like @racket[define/match]. We do require that @racket[define-values] forms have a @racket[lambda] on the right-hand-side so that we can parse the method arguments and body. + +As an example, let's think about how a method definition might expand: + +@racketblock[ +(define (add2 x) (+ x 2)) +~> +(define-values (add2) (lambda (x) (+ x 2))) +] + +This is exactly what our method production looks for. + +@section{Procedural Layer} + +Next, let's think about how we will represent classes at runtime. We'll define two data types: + +@racketblock[ +(code:comment2 "A ClassInfo is a") +(struct class-info [methods constructor]) +(code:comment2 "where") +(code:comment2 "methods is a (Hash Symbol Procedure)") +(code:comment2 "constructor is a (Any ... -> Object)") + +(code:comment2 "An Object is a") +(struct object [fields class]) +(code:comment2 "where") +(code:comment2 "fields is a (MutableVectorOf Any)") +(code:comment2 "class is a ClassInfo") +] + +A @racket[class-info] represents a class itself. @racket[methods] is a hash table mapping method names (symbols) to method implementations. Methods will be implemented as functions whose first argument is @racket[this] and subsequent arguments are the explicit arguments to the method. @racket[constructor] takes in an argument for each field and returns an instance of the class, which is an @racket[object]. + +A @racket[object] represents an instance of a class. It has its fields as a vector of values (in the same order as the constructor), and the @racket[class-info] containing its methods. + +Methods always access the instance via @racket[this]. So we can create one set of procedures in the @racket[class-info] structure and share it across all instances. But fields are instance-specific, so each @racket[object] needs its own. Fields are only accessed from within the class so we can transform each field access to a vector access at an index corresponding to the field. Methods may be referenced inside the class, but they may also be referenced externally via @racket[send] with a dynamically constructed symbol. Thus, we need to be able to access methods by their symbolic name. + +Now that we've defined our data types, we can implement functionality for constructing objects and calling methods: + +@racketblock[ +(define (new cls . fields) + (apply (class-info-constructor cls) fields)) + +(define-syntax send + (syntax-parser + [(_ obj:expr method-name:id arg:expr ...) + #'(send-rt obj 'method-name (list arg ...))] + [(_ obj:expr method-name:id . args) + #'(send-rt obj 'method-name args)])) + +(define (send-rt obj method-name args) + (let* ([cls (object-class obj)] + [method (hash-ref (class-info-methods cls) method-name (lambda () (error 'send "unknown method ~a" method-name)))]) + (apply method obj args))) +] + +Now let's think about how we will compile the main class form. Recall our example: + +@repl[ +(define posn% + (class + (field x y) + (define (scale k) (new posn% (* x k) (* y k))) + (define (displayln) `(posn ,x ,y)))) +(define p (new posn% 1 2)) +(send (send p scale 3) displayln) +] + +We would like that to compile to something like this: + +@repl[ +(define posn% + (class-info (hash 'scale (lambda (this-arg k) + (syntax-parameterize ([this (make-variable-like-transformer #'this-arg)]) + (new posn% (* (vector-ref (object-fields this) 0) k) + (* (vector-ref (object-fields this) 1) k)))) + 'displayln (lambda (this-arg) + (syntax-parameterize ([this (make-variable-like-transformer #'this-arg)]) + `(posn ,(vector-ref (object-fields this) 0) + ,(vector-ref (object-fields this) 1))))) + (lambda (x y) (object (vector x y) posn%)))) +(define p (new posn% 1 2)) +(send-rt (send-rt p 'scale (list 3)) 'displayln (list)) +] + +We have to create a dynamic mapping from method names to procedures and a static association between fields and vector indices. We also have to make sure method procedures have an implicit @racket[this-arg] argument for taking in the instance, and set up @racket[this] to refer to @racket[this-arg] in the body of the method. + +This is a bit of a simplification, but gives us the general idea of how classes will compile. + +@section[#:tag "miniclass-compiler"]{Compiler} + +Now let's implement this compilation. + +@racketblock[ +(syntax-spec + (host-interface/expression + (class e:class-form ...) + #:binding (scope (import e) ...) + (define-values (defns fields constructor-body) (group-class-decls (splice-begins (attribute e)))) + (compile-class-body defns fields constructor-body))) +] + +First, we splice @racket[begin]s so we get a flat list of class-level forms (field declarations, method definitions, and Racket expressions). Then, we group these class-level forms based on their type. Finally, we compile these forms to Racket. + +Here is how we splice @racket[begin]s: + +@racketblock[ +(begin-for-syntax + (code:comment2 "splices begins (recursively), returns flattened list of exprs.") + (define (splice-begins exprs) + (syntax-parse exprs + [() this-syntax] + [(expr . rest-exprs) + (syntax-parse #'expr + #:literals (begin) + [(begin e ...) + (splice-begins (append (attribute e) #'rest-exprs))] + [_ (cons this-syntax (splice-begins #'rest-exprs))])]))) +] + +We just flatten everything into a list of class-level forms. + +Here is the code for grouping up class-level forms: + +@racketblock[ +(begin-for-syntax + (define (group-class-decls exprs) + (syntax-parse exprs + #:literals (define-values define-syntaxes field) + [((~alt (~and defn (define-values . _)) + (~and stx-defn (define-syntaxes . _)) + (field field-name ...) + constructor-body) + ...) + ;; discard stx-defn because syntax definitions don't end up in the generated code + (values (attribute defn) + #'(field-name ... ...) + (attribute constructor-body))]))) +] + +It's just straightforward syntax manipulation, taking advantage of @racket[syntax-parse]'s powerful patterns. + +For compilation, we start with the method reference compilers: + +@racketblock[ +#:escape unracket +(begin-for-syntax + (define method-reference-compiler + (make-variable-like-reference-compiler + (syntax-parser + [name:id + #'(lambda args (send this name . args))])))) +] + +Inside of the @racket[class] body, if you reference a method directly, the reference expands to a procedure that invokes the method via @racket[send]. + +@racketblock[ +#:escape unracket +(begin-for-syntax + (define field-index-table (local-symbol-table)) + + (define field-reference-compiler + (make-variable-like-reference-compiler + (syntax-parser + [name:id + (let ([idx (symbol-table-ref field-index-table #'name)]) + #`(vector-ref (object-fields this) #,idx))]) + (syntax-parser + [(_ name:id rhs) + (let ([idx (symbol-table-ref field-index-table #'name)]) + #`(vector-set! (object-fields this) #,idx rhs))])))) +] + +Field references access or mutate the object's field vector. We use a symbol table to map field names to indices. This table will be used across all classes, which is safe due to hygiene. We use a local symbol table rather than a persistent one since fields can only be referenced from within the class definition, which means we don't need the table entries to persist across separate compilations. + +This table is populated in @racket[compile-constructor], which we'll look at soon. For now, let's start compiling the class body: + +@racketblock[ +#:escape unracket +(begin-for-syntax + (define (compile-class-body defns fields constructor-body) + (syntax-parse (list defns fields constructor-body) + #:literals (define-values field) + [(((define-values (method-name:id) (_ (method-arg:id ...) method-body:expr ...)) ...) + (field-name:id ...) + (constructor-body ...)) + (define/syntax-parse method-table (compile-methods (attribute method-name) (attribute method-arg) (attribute method-body))) + (define/syntax-parse constructor-procedure (compile-constructor (attribute field-name) #'cls (attribute constructor-body))) + #'(letrec ([methods method-table] + [constructor constructor-procedure] + [cls (class-info methods constructor)]) + cls)]))) +] + +We generate syntax that creates the method table, constructor procedure, and class info in a letrec. We need recursion because the constructor procedure returns an object with class info @racket[cls]. + +Now let's see how we compile methods: + +@racketblock[ +(begin-for-syntax + (define (compile-methods method-name method-arg method-body) + (check-duplicate-method-names method-name) + (syntax-parse (list method-name method-arg method-body) + [((method-name ...) ((method-arg ...) ...) ((method-body ...) ...)) + #'(make-immutable-hash + (list + (cons 'method-name + (lambda (this-arg method-arg ...) + (syntax-parameterize ([this (make-variable-like-transformer #'this-arg)]) + method-body + ...))) + ...))])) + + (define (check-duplicate-method-names names) + (let ([duplicate (check-duplicates names #:key syntax->datum)]) + (when duplicate + (raise-syntax-error #f "a method with same name has already been defined" duplicate))))) +] + +The method table is a mapping from method name to procedure. We set up @racket[this] to refer to the first argument, which is the instance of the class, and the rest of the arguments are those passed in from @racket[send]. +@;TODO is it safe to use the symbols? Are they actually renamed the right way? Or do you have to gensym. + +We treat method names as symbols to support dynamic dispatch. Symbols are non-hygienic, so we need to do a duplicate method name check on the symbolic names of methods. + +Now let's see how the constructor is compiled: + +@racketblock[ +(define (compile-constructor field-name cls constructor-body) + (for ([field-name field-name] + [field-index (in-naturals)]) + (symbol-table-set! field-index-table field-name field-index)) + (syntax-parse (list field-name cls constructor-body) + [((field-name ...) cls (constructor-body ...)) + #'(lambda (field-name ...) + (let ([this-val (object (vector field-name ...) cls)]) + (syntax-parameterize ([this (make-variable-like-transformer #'this-val)]) + ;; ensure body is non-empty + (void) + constructor-body + ...) + this-val))])) +] + +The constructor takes in values for the fields, creates an instance and binds it to @racket[this], runs the class-level Racket expressions, and finally returns the instance. + +We also associate field names with their vector indices according to their declaration order for @racket[field-reference-compiler]. + +Finally, let's define the syntax parameter for @racket[this]: + +@racketblock[ +(define-syntax-parameter this + (make-expression-transformer + (syntax-parser + [_ (raise-syntax-error 'this "used outside of a class" this-syntax)]))) +] + +That's it. We now have a simple class DSL. To summarize the key points: + +@itemlist[ + @item{We have productions with literals of expanded Racket forms to detect definitions and re-interpret them.} + @item{We use @racket[#:allow-extension racket-macro] to expand any Racket definitions down to @racket[define-values] and @racket[define-syntaxes].} + @item{We use @racket[racket-body] for Racket definitions or expressions, and @racket[racket-expr] for just expressions.} + @item{We use a syntax parameter for @racket[this], which gets set in the compiler to refer to the instance. But when used outside of a class body, it is a syntax error.} +] diff --git a/scribblings/tutorial/multipass-tutorial.scrbl b/scribblings/tutorial/multipass-tutorial.scrbl index 7163f0e..47d2cd8 100644 --- a/scribblings/tutorial/multipass-tutorial.scrbl +++ b/scribblings/tutorial/multipass-tutorial.scrbl @@ -1,8 +1,7 @@ #lang scribble/manual @(require (for-label racket racket/block racket/class racket/match racket/list syntax/parse "../../main.rkt") - scribble/example - racket/sandbox) + scribble/example) @(define eval (make-base-eval '(require racket (for-syntax racket)))) @(define-syntax-rule (repl body ...) (examples #:eval eval #:label #f body ...)) @@ -186,7 +185,7 @@ Here is the syntax-spec of our language: @racketmod[ racket -(require syntax-spec (for-syntax syntax/parse racket/syntax racket/match racket/list)) +(require syntax-spec-dev (for-syntax syntax/parse racket/syntax racket/match racket/list)) (syntax-spec (binding-class var #:reference-compiler immutable-reference-compiler) diff --git a/scribblings/tutorial/ocular-patdown-tutorial.scrbl b/scribblings/tutorial/ocular-patdown-tutorial.scrbl new file mode 100644 index 0000000..1f838bf --- /dev/null +++ b/scribblings/tutorial/ocular-patdown-tutorial.scrbl @@ -0,0 +1,468 @@ +#lang scribble/manual + +@(require (for-label racket racket/match racket/list syntax/parse "../../main.rkt" ocular-patdown) + scribble/example + racket/sandbox) +@(define eval (make-base-eval '(require racket (for-syntax racket) syntax-spec-dev))) +@(define op-eval (make-base-eval '(require racket (for-syntax racket) ocular-patdown/update ocular-patdown))) +@(define-syntax-rule (repl body ...) (examples #:eval eval #:label #f body ...)) +@(define-syntax-rule (op-repl body ...) (examples #:eval op-eval #:label #f body ...)) + +@title[#:tag "ocular-patdown"]{Advanced Tutorial: A Match-Like DSL for Deep Immutable Updates} + +This tutorial outlines the implementation of a DSL that uses lenses and pattern matching to perform deep immutable updates. + +We will rebuild a simplified version of the @racket[update] DSL from the @racketmodname[ocular-patdown] package. + +@section{Preview} + +Here is the language we will be implementing: + +@op-repl[ +(define lst (list 1 2 3 4)) +(update lst + [(list a b c ...) + (set! a 'one) + (modify! c add1)]) +lst +] + +It's like @racket[match], but we can use @racket[set!] to perform immutable updates on the matched value. This allows us to write code as if we're mutating, but we don't actually affect the matched value. + +The original @racket[lst] value is unchanged, but @racket[update] returns a new copy of the list with the changes applied. + +@section{Lenses} + +This language's runtime is powered by a tool called lenses. Lenses are essentially a first-class, immutable getter and setter for a part of a structure. + +Here is an example: + +@op-repl[ +(define car-lens + (make-lens car (code:comment "getter") + (lambda (pair new-car) (code:comment "setter") + (cons new-car (cdr pair))))) +(define pair (cons 1 2)) +(lens-get car-lens pair) +(lens-set car-lens pair 'one) +pair +] + +We can also make lenses for working with @racket[struct] fields. + +@op-repl[ +(struct posn [x y] #:transparent) +(define posn-x-lens (struct-lens posn x)) +(lens-set posn-x-lens + (posn 1 2) + 10) +] + +These examples could just as easily be accomplished using @racket[struct-copy] or @racket[match], but lenses can be composed for deep updates, which is where they become more useful. + +@op-repl[ +(define first-posn-x-lens + (lens-compose car-lens posn-x-lens)) +(lens-set first-posn-x-lens + (list (posn 1 2) (posn 3 4)) + 10) +] + +We also have traversals, which are useful for collections like lists. + +@op-repl[ +(traversal-map list-traversal + (list 1 2 3 4) + add1) +] + +The umbrella term for lenses and traversals is optics. + +Traversals can be composed with other optics to "map" them over a collection. + +@op-repl[ +(define posns-xs-traversal + (optic-compose list-traversal + posn-x-lens)) + +(traversal-map posns-xs-traversal + (list (posn 1 2) (posn 3 4)) + -) +] + +Optics are useful for implementing @racket[update] because they allow us to retrieve and immutable update values deep in a data structure. + +@section{Compilation Preview} + +The big idea of this language's implementation is to bind pattern variables to optics, and use a runtime parameter to keep track of the current version of the target. Here is a simplified example compilation: + +@(define compilation-preview-example +(racketblock +(update pair + [(cons a _) + (set! a (add1 a))]) +~> +(let ([a car-lens]) + (parameterize ([current-update-target pair]) + (current-update-target + (lens-set a + (current-update-target) + (add1 (lens-get a (current-update-target))))) + (current-update-target))) +)) +@compilation-preview-example + +Patterns get translated to @racket[let] and optics. Variable references get translated to @racket[lens-get], @racket[set!] gets translated to @racket[lens-set], and they both use the parameter @racket[current-update-target]. The result of the @racket[set!] is the new value of the target. + +@section{The Expander} + +@subsection{Syntactic Sugar} + +Some patterns are equivalent to others. For example: + +@racketblock[ +(list a b c) +~> +(cons a (cons b (cons c _))) +] + +For the purpose of this tutorial, we will only allow a single clause in @racket[update] and assume that the pattern is appropriate for the target. The real implementation supports multiple clauses and checks that patterns match, but this tutorial follows a simplified version of the DSL. Thus, we will use a wildcard pattern for the tail of the list, ignoring the possibility of there being more elements. + +We will use DSL macros for patterns like @racket[list] which can be expressed in terms of simpler ones. + +There are also patterns that aren't equivalent to each other, but compile to very similar code. Consider these two examples: + +@racketblock[ +(update pair + [(cons a _) + ]) +~> +(let ([a car-lens]) + ) + +(update pair + [(posn x _) + ]) +~> +(let ([x (struct-lens posn x)]) + ) +] + +In both examples, we're simply binding an optic to a pattern variable. The compilations are almost identical. We can create a general @racket[optic] pattern to capture this idea: + +@racketblock[ +(cons a _) +~> +(optic car-lens a) + +(update + [(optic a) ]) +~> +(let ([a ]) + ) +] + +Let's think about what should happen if we have multiple sub-patterns: + +@racketblock[ +(update pair + [(cons a d) + ]) +~> +(let ([a car-lens]) + (let ([d cdr-lens]) + )) +] + +We should produce nested uses of @racket[let]. It's kind of like we're doing both @racket[(optic car-lens a)] and @racket[(optic cdr-lens d)] on @racket[pair]. We can create a general @racket[and] pattern to capture this idea of matching multiple patterns on the same target: + +@racketblock[ +(cons a d) +~> +(and (optic car-lens a) + (optic cdr-lens d)) +] + +Since a lot of the complexity is in the procedural implementation of lenses, the language itself ends up being simple. + +@subsection{Core Grammar} + +@racket[optic] and @racket[and] are so general, we actually don't need any other core forms! All other patterns we need can be expressed in terms of them. + +@racketgrammar[#:literals (optic and2) +pat +id +_ +(optic optic-expr pat) +(and2 pat pat) +] + +(@racket[and] can be sugar on top of @racket[and2]) + +There we have it. Now we have to implement it with @racket[syntax-spec]. + +@racketblock[ +(require syntax-spec-dev (for-syntax syntax/parse syntax/parse/class/struct-id)) +(syntax-spec + (binding-class optic-var) + (extension-class pattern-macro + #:binding-space pattern-update) + (nonterminal/exporting pat + #:allow-extension pattern-macro + #:binding-space pattern-update + (~> (name:struct-id field ...) + #'(struct* name field ...)) + _ + v:optic-var + #:binding (export v) + (optic o:racket-expr p:pat) + #:binding (re-export p) + (and2 p1:pat p2:pat) + #:binding [(re-export p1) (re-export p2)])) +] + +We define a binding class for pattern variables, an extension class for pattern macros, and a nonterminal for patterns including productions for our core forms. + +We use an exporting nonterminal because all pattern variables are in scope in the body, and we don't want to allow duplicates. + +We have a special rewrite rule for struct names since we can't use productions or DSL macros for all possible struct names. The pattern macro @racket[struct*] will expand to uses of @racket[and], @racket[optic], and @racket[struct-lens]. + +Now that we've defined our core grammar and extensibility, let's implement some of our syntactic sugar: + +@racketblock[ +(define-dsl-syntax cons pattern-macro + (syntax-parser + [(_ a d) + #'(and (optic car-lens a) + (optic cdr-lens d))])) + +(define-dsl-syntax list pattern-macro + (syntax-parser + [(_) + #'_] + [(_ p (~datum ...)) + #'(optic list-traversal p)] + [(_ p0 p ...) + #'(cons p0 (list p ...))])) + +(define-dsl-syntax and pattern-macro + (syntax-parser + [(_) + #'_] + [(_ p0 p ...) + #'(and2 p0 (and p ...))])) +] + +We are defining DSL macros that have the same name as built-in forms and procedures from Racket. However, since we're using binding spaces, we aren't actually shadowing these built-in names. + +Now that we have our core grammar, expander, and some convenient syntactic sugar, we're ready to implement the compiler. + +@section{The Compiler} + +The compiler has two main pieces: The pattern compiler and the body "compiler". The pattern compiler translates patterns into @racket[let] and optics, and the body "compiler" establishes the parameter and transforms variable usages. The body "compiler" isn't a compiler in the same sense as the pattern compiler since it's really just customizing the expansion of the body, which is a Racket expression, or several Racket expressions. We want to support arbitrary Racket expressions to allow for maximum flexibility in the body. + +The clause body is an example of a multi-language boundary between our update language and Racket. The body is under a special context introduced by update, hence the need for the body compiler to establish that context. + +@subsection{The Pattern Compiler} + +We've seen a few examples of pattern compilation. Here is a refresher: + +@racket[optic] patterns get translated to a @racket[let]. + +@racketblock[ +(update pair + [(optic car-lens a) ]) +~> +(let ([a car-lens]) + ) +] + +We're ignoring body compilation for now. + +But what happens when we have nested patterns? + +@racketblock[ +(update posns + [(cons (posn x _) _) + ]) +~> +(let ([tmp car-lens]) + (let ([x (optic-compose tmp (struct-lens posn x))]) + )) +] + +We create temporary variables for parent patterns and use optic composition to "drill down" and create an optic that focuses on a field of a field. + +Let's also remind ourselves how compilation of @racket[and] patterns work while we're at it: + +@racketblock[ +(update posns + [(cons a d) ]) +~> +(update posns + [(and (optic car-lens a) + (optic cdr-lens b)) + ]) +~> +(let ([a car-lens]) + (let ([b cdr-lens]) + )) +] + +We end up with nested uses of @racket[let]. Bindings from the left pattern come before those of the right pattern. If we had more complicated subpatterns, all the left pattern's bindings would come first. For example: + +@racketblock[ +(update + [(and (cons a b) + (cons c d)) + ]) +~> +(let ([a car-lens]) + (let ([b cdr-lens]) + (let ([c car-lens]) + (let ([d cdr-lens]) + )))) +] + +Now let's actually implement this: + +@racketblock[ +(define-syntax bind-optics + (syntax-parser + [(_ current-optic:id p body) + (syntax-parse #'p + #:datum-literals (_ optic and2) + [_ #'body] + [var:id + #'(let ([var current-optic]) + body)] + [(optic o p) + #'(let ([tmp-optic (optic-compose current-optic o)]) + (bind-optics tmp-optic p body))] + [(and2 p1 p2) + #'(bind-optics current-optic p1 + (bind-optics current-optic p2 body))])])) +] + +Our compiler @racket[bind-optics] takes in the current optic variable name, the pattern to compile, and the body. + +@racket[current-optic] is like @racket[tmp] in our earlier example. It refers to an optic that focuses on the part of the structure that this pattern is matching on. As we compile nested patterns, we'll compose optics from sub-patterns with this current optic to drill down further. + +One important invariant is that all of the variables of @racket[p] will be bound in @racket[body]. + +Another invariant is that The pattern @racket[p]'s "sub-target" is the focus of the optic referred to by @racket[current-optic]. This ensures that our compositions are valid and focus on the correct part of the overall target. + +The wildcard pattern does not bind anything and simply emits the body. + +The variable pattern binds the variable to the current optic, which focuses on that part of the overall target. + +The @racket[optic] pattern creates a temporary variable, composes the provided optic with the current one, and recurs on the sub-pattern with the temporary variable as the new current optic. This ensures that the second invariant holds. + +The @racket[and2] pattern recurs on both sub-patterns, providing the second sub-pattern's compilation as the body of the first one's. This leads to "all the bindings from the left, then all the bindings from the right" and ensures that the first invariant holds. + +Notice that @racket[and2] recurs with the same @racket[current-optic] for each sub-pattern because each sub-pattern is "running" on the same piece of the overall target. This ensures that the second invariant holds. + +Now that we have pattern variables being bound to their corresponding optics, we are ready to compile the body so they can be used. + +@subsection{The Body Compiler} + +You might have noticed that the pattern compiler has absolutely no mention of the target value. This is because variables are bound to optics, which can be used with any target value. In a sense, a pattern is merely a specification of a tree of optic compositions with variables at the leaves. + +The body compiler will connect the variables to the target value using a parameter and reference compilers. + +Recall the example from the compiler preview: + +@compilation-preview-example + +The parameter @racket[current-update-target] keeps track of the current value for the target, and forms like @racket[set!] mutate this parameter with the result of immutable updates. + +First, let's implement the host interface which will establish this parameter and invoke our pattern compiler: + +@racketblock[ +(define current-update-target (make-parameter #f)) +(syntax-spec + (host-interface/expression + (update target:racket-expr + [p:pat body:racket-expr]) + #:binding (scope (import p) body) + #'(let ([target-v target]) + (bind-optics identity-iso p + (parameterize ([current-update-target + target-v]) + body))))) +] + +We create a temporary variable for the target to avoid duplicate evaluation, use @racket[bind-optics], and wrap the body with a @racket[parameterize] to establish @racket[current-update-target]. + +Our binding spec declares that all bindings exported from @racket[p] are in scope in @racket[body]. + +We initialize @racket[current-optic] to @racket[identity-iso]. You don't need to know that that is, just know that it's the identity of optic composition, so composing it with something like @racket[car-lens] is equivalent to just @racket[car-lens]. + +Now that we establish the parameter, let's transform variable usages with a reference compiler that uses the parameter: + +@racketblock[ +(define optic-var-reference-compiler + (make-variable-like-reference-compiler + (syntax-parser + [x:id #'(lens-get x (current-update-target))]) + (syntax-parser + [(set! x val) + #'(begin + (current-update-target + (lens-set x (current-update-target) val)) + (current-update-target))]))) + +(syntax-spec + (binding-class optic-var #:reference-compiler optic-var-reference-compiler) + ...) +] + +We must also set the reference compiler in our binding class declaration. + +Variable references turn into @racket[lens-get] and @racket[set!] turn into @racket[lens-set]. The result of @racket[set!] is the new value of the target. This is unlike Racket's usual @racket[set!], which returns void. And again, the only mutation is of the parameter, the actual value of the target itself is not being mutated. + +This is an interesting example of a reference compiler. The pattern compiler binds pattern variables to optics without reference to the target value. The reference compiler then transforms uses of the pattern variable to apply the corresponding optic to the @racket[current-update-target]. Reference compilers like @racket[immutable-reference-compiler] simply restrict how DSL-bound variables can be used, but this reference compiler completely alters the behavior of DSL-bound variables. + +We can add even more custom variable behavior. For example, what if instead of getting the value that a pattern variable refers to, we want its optic directly? Somehow, we'd have to prevent the reference compiler from transforming the variable reference to a @racket[lens-get]. We can do this by adding another host interface: + +@racketblock[ +(syntax-spec + (host-interface/expression + (optic x:pattern-var) + #'x)) +] + +The @racket[optic] host interface simply expands to the raw variable reference. Importantly, this will not be transformed by the reference compiler. This is a nice little trick for special variable behaviors. + +We can use the @racket[optic] form to add other forms similar to @racket[set!]: + +@racketblock[ +(define-syntax-rule (modify! x func) + (begin + (current-update-target + (traversal-modify (optic x) + (current-update-target) + func)) + (current-update-target))) +] + +@op-repl[ +(update (list 1 2 3) + [(list a ...) + (modify! a add1)]) +] + +And that's it! We can now use this DSL to perform deep immutable udpates on structures with the convenience and clarity of patterns. + +To summarize some key points: + +@itemlist[ +@item{We created a @racket[syntax-spec] frontend around a procedural library for optics to make it more convenient to use.} +@item{We used custom reference compilers to control the behavior of DSL-bound variables referenced in Racket expressions.} +@item{We used a rewrite rule and DSL macros to use struct names as the head of a DSL form.} +@item{We used a host interface to create a special case for the behavior of a DSL variable used in Racket expressions.} +@item{We used parameters to manage runtime state.} +@item{Our (recursive) pattern compiler has several invariants that inductively ensure its correctness.} +] + +@;TODO actually follow this and make sure it runs diff --git a/scribblings/tutorial/syntax-interpreter-tutorial.scrbl b/scribblings/tutorial/syntax-interpreter-tutorial.scrbl index aec8500..ab2c5ac 100644 --- a/scribblings/tutorial/syntax-interpreter-tutorial.scrbl +++ b/scribblings/tutorial/syntax-interpreter-tutorial.scrbl @@ -45,7 +45,7 @@ Here is the syntax-spec: @racketmod[ racket -(require syntax-spec (for-syntax syntax/parse)) +(require syntax-spec-dev (for-syntax syntax/parse)) (syntax-spec (binding-class lc-var #:binding-space lc) (extension-class lc-macro #:binding-space lc) diff --git a/tests/dsls/miniclass/class.rkt b/tests/dsls/miniclass/class.rkt index 86b301e..7b7c6c2 100644 --- a/tests/dsls/miniclass/class.rkt +++ b/tests/dsls/miniclass/class.rkt @@ -4,19 +4,18 @@ (require "../../../main.rkt" racket/stxparam - syntax/transformer (for-syntax racket/list syntax/parse syntax/transformer)) -(struct class-info [name->method-index method-table constructor]) -;; A ClassInfo is a (class-info (symbol -> natural) (any ... -> Object)) where -;; name->method-index maps a method name to its vector index in the method-table -;; method-table is a vector of methods +(struct class-info [methods constructor]) +;; A ClassInfo is a (class-info (HashEq Symbol Method) (any ... -> Object)) where +;; methods maps a method name to its implementation +;; constructor creates an instance of the class ;; Represents a class itself (struct object [fields class]) -;; An Object is a (object (vector any) (vector Method) Class) where +;; An Object is a (object (VectorOf any) (VectorOf Method) Class) where ;; fields is a vector of field-values ;; class is the class of which this object is an instance ;; Represents an object, which is an instance of a class @@ -47,8 +46,8 @@ (field name:field-var ...) #:binding [(export name) ...] ((~literal define-values) (m:method-var) - (lambda:lambda-id (arg:id ...) body:racket-expr ...)) - #:binding (export m) + (lambda:lambda-id (arg:racket-var ...) body:racket-body ...)) + #:binding [(export m) (scope (bind arg) ... (import body) ...)] ((~literal define-syntaxes) (x:racket-macro ...) e:expr) #:binding (export-syntaxes x ... e) @@ -56,16 +55,17 @@ ((~literal begin) e:class-form ...) #:binding [(re-export e) ...] - e:racket-expr) + e:racket-body + #:binding (re-export e)) (host-interface/expression (class e:class-form ...) #:binding (scope (import e) ...) - (define-values (defns fields exprs) (group-class-decls (splice-begins (attribute e)))) - (compile-class-body defns fields exprs))) + (define-values (defns fields constructor-body) (group-class-decls (splice-begins (attribute e)))) + (compile-class-body defns fields constructor-body))) (begin-for-syntax - (define-persistent-symbol-table field-index-table) + (define field-index-table (local-symbol-table)) #;((listof syntax?) -> (listof syntax?)) ;; splices begins (recursively), returns flattened list of exprs. @@ -86,49 +86,60 @@ (syntax-parse exprs #:literals (define-values define-syntaxes field) [((~alt (~and defn (define-values . _)) - ;; ignore because they don't end up in the generated code (~and stx-defn (define-syntaxes . _)) - (~and field-decl (field . _)) - expr) + (field field-name ...) + constructor-body) ...) + ;; discard stx-defn because syntax definitions don't end up in the generated code (values (attribute defn) - (attribute field-decl) - (attribute expr))])) + #'(field-name ... ...) + (attribute constructor-body))])) #;((listof syntax?) (listof syntax?) (listof syntax?) -> syntax?) ;; compile the partially expanded class-level definitions into pure racket code. ;; This is the actual class logic. - (define (compile-class-body defns fields exprs) - (syntax-parse (list defns fields exprs) + (define (compile-class-body defns fields constructor-body) + (syntax-parse (list defns fields constructor-body) #:literals (define-values field) [(((define-values (method-name:id) (_ (method-arg:id ...) method-body:expr ...)) ...) - ;; only 1 field definition allowed - ((~optional (field field-name:id ...) #:defaults ([(field-name 1) null]))) - (expr ...)) - (check-duplicate-method-names (attribute method-name)) - (for ([field-name (attribute field-name)] - [field-index (in-naturals)]) - (symbol-table-set! field-index-table field-name field-index)) - #'(letrec ([method-table - (vector (lambda (this-arg method-arg ...) - (syntax-parameterize ([this (make-variable-like-transformer #'this-arg)]) - method-body - ...)) - ...)] - [constructor - (lambda (field-name ...) - (let ([this-val (object (vector field-name ...) cls)]) - (syntax-parameterize ([this (make-variable-like-transformer #'this-val)]) - ;; ensure body is non-empty - (void) - expr - ...) - this-val))] - [method-name->index - (make-name->index (list 'method-name ...))] - [cls - (class-info method-name->index method-table constructor)]) + (field-name:id ...) + (constructor-body ...)) + (define/syntax-parse method-table (compile-methods (attribute method-name) (attribute method-arg) (attribute method-body))) + (define/syntax-parse constructor-procedure (compile-constructor (attribute field-name) #'cls (attribute constructor-body))) + #'(letrec ([methods method-table] + [constructor constructor-procedure] + [cls (class-info methods constructor)]) cls)])) + + #;((listof identifier?) (listof (listof identifier?)) (listof syntax?) -> syntax?) + (define (compile-methods method-name method-arg method-body) + (check-duplicate-method-names method-name) + (syntax-parse (list method-name method-arg method-body) + [((method-name ...) ((method-arg ...) ...) ((method-body ...) ...)) + #'(make-immutable-hash + (list + (cons 'method-name + (lambda (this-arg method-arg ...) + (syntax-parameterize ([this (make-variable-like-transformer #'this-arg)]) + method-body + ...))) + ...))])) + + #;((listof identifier?) identifier? (listof syntax?) -> syntax?) + (define (compile-constructor field-name cls constructor-body) + (for ([field-name field-name] + [field-index (in-naturals)]) + (symbol-table-set! field-index-table field-name field-index)) + (syntax-parse (list field-name cls constructor-body) + [((field-name ...) cls (constructor-body ...)) + #'(lambda (field-name ...) + (let ([this-val (object (vector field-name ...) cls)]) + (syntax-parameterize ([this (make-variable-like-transformer #'this-val)]) + ;; ensure body is non-empty + (void) + constructor-body + ...) + this-val))])) (define method-reference-compiler (make-variable-like-reference-compiler @@ -154,15 +165,6 @@ (when duplicate (raise-syntax-error #f "a method with same name has already been defined" duplicate))))) -#;((listof symbol?) -> (symbol? -> natural?)) -;; Create a function that maps method names to their method table indices -(define (make-name->index names) - (let ([table (for/hasheq ([name names] - [idx (in-naturals)]) - (values name idx))]) - (lambda (name) - (hash-ref table name (lambda () (error 'send "no such method ~a" name)))))) - (define (new cls . fields) (apply (class-info-constructor cls) fields)) @@ -176,7 +178,5 @@ #;(object? symbol? (listof any/c) -> any/c) (define (send-rt obj method-name args) (let* ([cls (object-class obj)] - [index ((class-info-name->method-index cls) method-name)] - [method-table (class-info-method-table cls)] - [method (vector-ref method-table index)]) + [method (hash-ref (class-info-methods cls) method-name (lambda () (error 'send "unknown method ~a" method-name)))]) (apply method obj args))) diff --git a/tests/dsls/miniclass/test.rkt b/tests/dsls/miniclass/test.rkt index 938c693..11bddd8 100644 --- a/tests/dsls/miniclass/test.rkt +++ b/tests/dsls/miniclass/test.rkt @@ -5,7 +5,8 @@ (require drracket/check-syntax (for-syntax syntax/parse) rackunit - syntax/macro-testing) + syntax/macro-testing + racket/match) (begin-for-syntax (define v (box 0))) @@ -16,6 +17,7 @@ (define (add y) (+ x y)))) (define foo (new foo% 1)) (check-equal? (send foo add 2) 3)) +#;( (test-case "empty class" (define foo% (class)) (new foo%) @@ -300,3 +302,32 @@ (my-fields x))) #:namespace ns)) 2)) +(test-case "define in method" + (define v + (new + (class + (define (f) + (define x 2) + x)))) + (check-equal? + (send v f) + 2)) +(test-case "match-define" + (define v + (new + (class + (define/match (fact n) + [(0) 1] + [(n) (* n (fact (sub1 n)))])))) + (check-equal? (send v fact 4) + 24)) +#; +(test-case "non-method definition" + (define v + (new + (class + (define x 2) + (define (f) x)))) + (check-equal? (send v f) 2)) + +) \ No newline at end of file