From 098fc638e9e0642fff0ba17e51a396f419f15035 Mon Sep 17 00:00:00 2001 From: Dmitry Gutov Date: Fri, 20 Jan 2023 19:35:01 +0200 Subject: [PATCH 1/3] Check in the newest changes from the emacs-29 branch of Emacs --- ruby-ts-mode.el | 417 +++++++++++++++++++++++++++++++----------------- 1 file changed, 270 insertions(+), 147 deletions(-) diff --git a/ruby-ts-mode.el b/ruby-ts-mode.el index 5c9a25c..2a7d850 100644 --- a/ruby-ts-mode.el +++ b/ruby-ts-mode.el @@ -5,6 +5,7 @@ ;; Author: Perry Smith ;; Created: December 2022 ;; Keywords: ruby languages tree-sitter +;; Version: 0.2 ;; This file is part of GNU Emacs. @@ -23,7 +24,7 @@ ;;; Commentary: -;; This file defines ruby-ts-mode which is a major mode for editting +;; This file defines ruby-ts-mode which is a major mode for editing ;; Ruby files that uses Tree Sitter to parse the language. More ;; information about Tree Sitter can be found in the ELisp Info pages ;; as well as this website: https://tree-sitter.github.io/tree-sitter/ @@ -50,11 +51,12 @@ ;; Currently tree treesit-font-lock-feature-list is set with the ;; following levels: -;; 1: comment method-definition +;; 1: comment method-definition parameter-definition ;; 2: keyword regexp string type -;; 3: builtin constant delimiter escape-sequence -;; global instance -;; interpolation literal symbol variable +;; 3: builtin-variable builtin-constant builtin-function +;; delimiter escape-sequence +;; constant global instance +;; interpolation literal symbol assignment ;; 4: bracket error function operator punctuation ;; Thus if treesit-font-lock-level is set to level 3 which is its @@ -70,6 +72,8 @@ ;; ruby-ts-mode tries to adhere to the indentation related user ;; options from ruby-mode, such as ruby-indent-level, ;; ruby-indent-tabs-mode, and so on. +;; +;; Type 'M-x customize-group RET ruby RET' to see the options. ;; * IMenu ;; * Navigation @@ -81,17 +85,22 @@ (require 'ruby-mode) (declare-function treesit-parser-create "treesit.c") +(declare-function treesit-induce-sparse-tree "treesit.c") +(declare-function treesit-node-child-by-field-name "treesit.c") +(declare-function treesit-search-subtree "treesit.c") +(declare-function treesit-node-parent "treesit.c") +(declare-function treesit-node-next-sibling "treesit.c") +(declare-function treesit-node-type "treesit.c") +(declare-function treesit-node-child "treesit.c") +(declare-function treesit-node-end "treesit.c") +(declare-function treesit-node-start "treesit.c") +(declare-function treesit-node-string "treesit.c") (defgroup ruby-ts nil "Major mode for editing Ruby code." :prefix "ruby-ts-" :group 'languages) -(defcustom ruby-ts-highlight-predefined-constants t - "When non-nil, the pre-defined constants are highlighted. -They will be highlighted the same way as the pre-defined variables." - :type 'boolean) - (defvar ruby-ts--operators '("+" "-" "*" "/" "%" "**" "==" "!=" ">" "<" ">=" "<=" "<=>" "===" @@ -108,21 +117,29 @@ They will be highlighted the same way as the pre-defined variables." "Ruby's punctuation characters.") (defvar ruby-ts--predefined-constants - (rx (or "ARGF" "ARGV" "DATA" "ENV" "RUBY_COPYRIGHT" + (rx string-start + (or "ARGF" "ARGV" "DATA" "ENV" "RUBY_COPYRIGHT" "RUBY_DESCRIPTION" "RUBY_ENGINE" "RUBY_ENGINE_VERSION" "RUBY_PATCHLEVEL" "RUBY_PLATFORM" "RUBY_RELEASE_DATE" "RUBY_REVISION" "RUBY_VERSION" "STDERR" "STDIN" "STDOUT" - "TOPLEVEL_BINDING")) - "Ruby predefined global constants. -These are currently unused") + "TOPLEVEL_BINDING") + string-end) + "Ruby predefined global constants.") (defvar ruby-ts--predefined-variables - (rx (or "$!" "$@" "$~" "$&" "$‘" "$‘" "$+" "$=" "$/" "$\\" "$," "$;" + (rx string-start + (or "$!" "$@" "$~" "$&" "$‘" "$‘" "$+" "$=" "$/" "$\\" "$," "$;" "$." "$<" "$>" "$_" "$*" "$$" "$?" "$:" "$LOAD_PATH" "$LOADED_FEATURES" "$DEBUG" "$FILENAME" "$stderr" "$stdin" "$stdout" "$VERBOSE" "$-a" "$-i" "$-l" "$-p" - (seq "$" (+ digit)))) - "Ruby global variables (but not global constants.") + "$0" "$1" "$2" "$3" "$4" "$5" "$6" "$7" "$8" "$9") + string-end) + "Ruby predefined global variables.") + +(defvar ruby-ts--builtin-methods + (format "\\`%s\\'" (regexp-opt (append ruby-builtin-methods-no-reqs + ruby-builtin-methods-with-reqs))) + "Ruby built-in methods.") (defconst ruby-ts--class-or-module-regex (rx string-start @@ -147,10 +164,9 @@ These are currently unused") "then" "ensure" "body_statement" - "parenthesized_statements" "interpolation") string-end) - "Regular expression of the nodes that can constain statements.") + "Regular expression of the nodes that can contain statements.") (defun ruby-ts--lineno (node) "Return line number of NODE's start." @@ -178,7 +194,7 @@ These are currently unused") Applies `font-lock-comment-delimiter-face' and `font-lock-comment-face' See `treesit-fontify-with-override' for values of OVERRIDE" - ;; Emperically it appears as if (treesit-node-start node) will be + ;; Empirically it appears as if (treesit-node-start node) will be ;; where the # character is at and (treesit-node-end node) will be ;; the end of the line (let* ((node-start (treesit-node-start node)) @@ -193,6 +209,9 @@ values of OVERRIDE" (treesit-fontify-with-override (max plus-1 start) (min node-end end) font-lock-comment-face override))) +(defun ruby-ts--builtin-method-p (node) + (string-match-p ruby-ts--builtin-methods (treesit-node-text node t))) + (defun ruby-ts--font-lock-settings (language) "Tree-sitter font-lock settings for Ruby." (treesit-font-lock-rules @@ -201,29 +220,24 @@ values of OVERRIDE" '((comment) @ruby-ts--comment-font-lock) :language language - :feature 'builtin - `(((global_variable) @var (:match ,ruby-ts--predefined-variables @var)) @font-lock-builtin-face - ,@(when ruby-ts-highlight-predefined-constants - `(((constant) @var (:match ,ruby-ts--predefined-constants @var)) @font-lock-builtin-face))) + :feature 'builtin-variable + `(((global_variable) @var (:match ,ruby-ts--predefined-variables @var)) @font-lock-builtin-face) :language language - :feature 'keyword - `([,@ruby-ts--keywords] @font-lock-keyword-face) + :feature 'builtin-constant + `(((constant) @var (:match ,ruby-ts--predefined-constants @var)) @font-lock-builtin-face) :language language - :feature 'constant - '((true) @font-lock-doc-markup-face - (false) @font-lock-doc-markup-face - (nil) @font-lock-doc-markup-face - (self) @font-lock-doc-markup-face - (super) @font-lock-doc-markup-face) + :feature 'keyword + `([,@ruby-ts--keywords] @font-lock-keyword-face + (self) @font-lock-keyword-face + (super) @font-lock-keyword-face) :language language - :feature 'symbol - '((bare_symbol) @font-lock-constant-face - (delimited_symbol (string_content) @font-lock-constant-face) - (hash_key_symbol) @font-lock-constant-face - (simple_symbol) @font-lock-constant-face) + :feature 'constant + '((true) @font-lock-constant-face + (false) @font-lock-constant-face + (nil) @font-lock-constant-face) ;; Before 'operator so (unary) works. :language language @@ -237,8 +251,8 @@ values of OVERRIDE" ;; Also before 'operator because % and / are operators :language language :feature 'regexp - '((regex "/" @font-lock-regexp-grouping-construct) - (regex _ (string_content) @font-lock-regexp-grouping-backslash)) + '((regex "/" @font-lock-regexp-face) + (regex _ (string_content) @font-lock-regexp-face)) :language language :feature 'operator @@ -253,21 +267,22 @@ values of OVERRIDE" :feature 'string '((delimited_symbol [ ":\"" "\"" ] @font-lock-string-face) (string "\"" @font-lock-string-face) - (string_array [ "%w(" ")" ] @font-lock-delimiter-face) - (subshell "`" @font-lock-delimiter-face) - (symbol_array [ "%i(" ")"] @font-lock-delimiter-face)) + (string_array ["%w(" ")"] @font-lock-string-face) + (subshell "`" @font-lock-string-face) + (symbol_array ["%i(" ")"] @font-lock-constant-face)) :language language :feature 'string - '((string_content) @font-lock-string-face - (heredoc_beginning) @font-lock-string-face - (heredoc_content) @font-lock-string-face - (heredoc_end) @font-lock-string-face) + '([(string_content) + (heredoc_beginning) + (heredoc_content) + (heredoc_end)] + @font-lock-string-face) :language language :feature 'interpolation - '((interpolation "#{" @font-lock-doc-face) - (interpolation "}" @font-lock-doc-face)) + '((interpolation "#{" @font-lock-misc-punctuation-face) + (interpolation "}" @font-lock-misc-punctuation-face)) :language language :feature 'type @@ -290,6 +305,43 @@ values of OVERRIDE" (method name: (setter) @font-lock-function-name-face)) + :language language + :feature 'parameter-definition + '((method_parameters + (identifier) @font-lock-variable-name-face) + (block_parameters + (identifier) @font-lock-variable-name-face) + (optional_parameter + name: (identifier) @font-lock-variable-name-face) + (splat_parameter + name: (identifier) @font-lock-variable-name-face) + (hash_splat_parameter + name: (identifier) @font-lock-variable-name-face) + (block_parameter + name: (identifier) @font-lock-variable-name-face) + (destructured_parameter + (identifier) @font-lock-variable-name-face) + (lambda_parameters + (identifier) @font-lock-variable-name-face) + (exception_variable + (identifier) @font-lock-variable-name-face) + (array_pattern + (identifier) @font-lock-variable-name-face) + (keyword_pattern + value: (identifier) @font-lock-variable-name-face) + (keyword_pattern + key: (hash_key_symbol) @font-lock-variable-name-face + !value) + (as_pattern + name: (identifier) @font-lock-variable-name-face) + (in_clause + pattern: (identifier) @font-lock-variable-name-face)) + + :language language + :feature 'builtin-function + `((((identifier) @font-lock-builtin-face) + (:pred ruby-ts--builtin-method-p @font-lock-builtin-face))) + ;; Yuan recommends also putting method definitions into the ;; 'function' category (thus keeping it in both). I've opted to ;; just use separate categories for them -- dgutov. @@ -298,6 +350,22 @@ values of OVERRIDE" '((call method: (identifier) @font-lock-function-name-face)) + :language language + :feature 'assignment + '((assignment + left: (identifier) @font-lock-variable-name-face) + (assignment + left: (left_assignment_list (identifier) @font-lock-variable-name-face)) + (operator_assignment + left: (identifier) @font-lock-variable-name-face)) + + :language language + :feature 'symbol + '((bare_symbol) @font-lock-constant-face + (delimited_symbol (string_content) @font-lock-constant-face) + (hash_key_symbol) @font-lock-constant-face + (simple_symbol) @font-lock-constant-face) + :language language :feature 'error '((ERROR) @font-lock-warning-face) @@ -352,8 +420,11 @@ Otherwise return start of PRED." (lambda (node parent bol &rest rest) (let* ((pred-node (funcall pred node parent bol rest)) (temp (treesit-node-start pred-node)) - (keyword (treesit-node-type pred-node)) - (bol (ruby-smie--indent-to-stmt-p keyword))) + (type (treesit-node-type pred-node)) + (bol (ruby-smie--indent-to-stmt-p + (if (equal type "method") + "def" + type)))) (when temp (if bol (save-excursion @@ -460,10 +531,6 @@ array or hash." (first-child (ruby-ts--first-non-comment-child parent))) (= (ruby-ts--lineno open-brace) (ruby-ts--lineno first-child)))) -(defun ruby-ts--assignment-ancestor (node &rest _) - "Return the assignment ancestor of NODE if any." - (treesit-parent-until node (ruby-ts--type-pred "\\`assignment\\'"))) - (defun ruby-ts--statement-ancestor (node &rest _) "Return the statement ancestor of NODE if any. A statement is defined as a child of a statement container where @@ -479,26 +546,6 @@ a statement container is a node that matches parent (treesit-node-parent parent))) statement)) -(defun ruby-ts--is-in-condition (node &rest _) - "Return the condition node if NODE is within a condition." - (while (and node - (not (equal "condition" (treesit-node-field-name node))) - (not (string-match-p ruby-ts--statement-container-regexp - (treesit-node-type node)))) - (setq node (treesit-node-parent node))) - (and (equal "condition" (treesit-node-field-name node)) node)) - -(defun ruby-ts--endless-method (node &rest _) - "Return the expression node if NODE is in an endless method. -i.e. expr of def foo(args) = expr is returned." - (let* ((method node)) - (while (and method - (not (string-match-p ruby-ts--method-regex (treesit-node-type method)))) - (setq method (treesit-node-parent method))) - (when method - (if (equal "=" (treesit-node-type (treesit-node-child method 3 nil))) - (treesit-node-child method 4 nil))))) - ;; ;; end of functions that can be used for queries ;; @@ -535,11 +582,11 @@ i.e. expr of def foo(args) = expr is returned." ;; ;; I'm using very restrictive patterns hoping to reduce rules ;; triggering unintentionally. - ((match "else" "if") + ((match "else" "if\\|unless") (ruby-ts--align-keywords ruby-ts--parent-node) 0) ((match "elsif" "if") (ruby-ts--align-keywords ruby-ts--parent-node) 0) - ((match "end" "if") + ((match "end" "if\\|unless") (ruby-ts--align-keywords ruby-ts--parent-node) 0) ((n-p-gp nil "then\\|else\\|elsif" "if\\|unless") (ruby-ts--align-keywords ruby-ts--grand-parent-node) ruby-indent-level) @@ -612,11 +659,15 @@ i.e. expr of def foo(args) = expr is returned." ;; else the second query aligns ;; `ruby-indent-level' spaces in from the parent. ((and ruby-ts--align-chain-p (match "\\." "call")) ruby-ts--align-chain 0) + ;; Obery ruby-method-call-indent, whether the dot is on + ;; this line or the previous line. + ((and (not ruby-ts--method-call-indent-p) + (or + (match "\\." "call") + (query "(call \".\" (identifier) @indent)"))) + parent 0) ((match "\\." "call") parent ruby-indent-level) - ;; ruby-indent-after-block-in-continued-expression - ((match "begin" "assignment") parent ruby-indent-level) - ;; method parameters -- four styles: ;; 1) With paren, first arg on same line: ((and (query "(method_parameters \"(\" _ @indent)") @@ -645,31 +696,36 @@ i.e. expr of def foo(args) = expr is returned." ;; 2) With paren, 1st arg on next line ((and (query "(argument_list \"(\" _ @indent)") (node-is ")")) - (ruby-ts--bol ruby-ts--grand-parent-node) 0) - ((query "(argument_list \"(\" _ @indent)") - (ruby-ts--bol ruby-ts--grand-parent-node) ruby-indent-level) + ruby-ts--parent-call-or-bol 0) + ((or (query "(argument_list \"(\" _ @indent)") + ;; No arguments yet; NODE is nil in that case. + (match "\\`\\'" "argument_list")) + ruby-ts--parent-call-or-bol ruby-indent-level) ;; 3) No paren, ruby-parenless-call-arguments-indent is t ((and ruby-ts--parenless-call-arguments-indent-p (parent-is "argument_list")) first-sibling 0) ;; 4) No paren, ruby-parenless-call-arguments-indent is nil - ((parent-is "argument_list") (ruby-ts--bol ruby-ts--grand-parent-node) ruby-indent-level) + ((parent-is "argument_list") + (ruby-ts--bol ruby-ts--statement-ancestor) ruby-indent-level) ;; Old... probably too simple ((parent-is "block_parameters") first-sibling 1) - ((and (parent-is "binary") - (or ruby-ts--assignment-ancestor - ruby-ts--is-in-condition - ruby-ts--endless-method)) - first-sibling 0) + ((and (not ruby-ts--after-op-indent-p) + (parent-is "binary\\|conditional")) + (ruby-ts--bol ruby-ts--statement-ancestor) ruby-indent-level) + + ((parent-is "binary") + ruby-ts--binary-indent-anchor 0) + + ((parent-is "conditional") parent ruby-indent-level) ;; ruby-mode does not touch these... ((match "bare_string" "string_array") no-indent 0) - ;; hash and array other than assignments. Note that the - ;; first sibling is the "{" or "[". There is a special - ;; case where the hash is an argument to a method. These - ;; need to be processed first. + ;; hash and array. Note that the first sibling is the "{" + ;; or "[". There is a special case where the hash is an + ;; argument to a method. These need to be processed first. ((and ruby-ts--same-line-hash-array-p (match "}" "hash")) first-sibling 0) @@ -680,44 +736,22 @@ i.e. expr of def foo(args) = expr is returned." ((and ruby-ts--same-line-hash-array-p (parent-is "array")) (nth-sibling 0 ruby-ts--true) 0) - ;; NOTE to folks trying to understand my insanity... - ;; I having trouble understanding the "logic" of why things - ;; are indented like they are so I am adding special cases - ;; hoping at some point I will be struck by lightning. - ((and (n-p-gp "}" "hash" "pair") - (not ruby-ts--same-line-hash-array-p)) - grand-parent 0) - ((and (n-p-gp "pair" "hash" "pair") - (not ruby-ts--same-line-hash-array-p)) - grand-parent ruby-indent-level) - ((and (n-p-gp "}" "hash" "method") - (not ruby-ts--same-line-hash-array-p)) - grand-parent 0) - ((and (n-p-gp "pair" "hash" "method") - (not ruby-ts--same-line-hash-array-p)) - grand-parent ruby-indent-level) - - ((n-p-gp "}" "hash" "assignment") (ruby-ts--bol ruby-ts--grand-parent-node) 0) - ((n-p-gp nil "hash" "assignment") (ruby-ts--bol ruby-ts--grand-parent-node) ruby-indent-level) - ((n-p-gp "]" "array" "assignment") (ruby-ts--bol ruby-ts--grand-parent-node) 0) - ((n-p-gp nil "array" "assignment") (ruby-ts--bol ruby-ts--grand-parent-node) ruby-indent-level) - - ((n-p-gp "}" "hash" "argument_list") first-sibling 0) - ((n-p-gp nil "hash" "argument_list") first-sibling ruby-indent-level) - ((n-p-gp "]" "array" "argument_list") first-sibling 0) - ((n-p-gp nil "array" "argument_list") first-sibling ruby-indent-level) - - ((match "}" "hash") first-sibling 0) - ((parent-is "hash") first-sibling ruby-indent-level) - ((match "]" "array") first-sibling 0) - ((parent-is "array") first-sibling ruby-indent-level) + ((match "}" "hash") ruby-ts--parent-call-or-bol 0) + ((parent-is "hash") ruby-ts--parent-call-or-bol ruby-indent-level) + ((match "]" "array") ruby-ts--parent-call-or-bol 0) + ((parent-is "array") ruby-ts--parent-call-or-bol ruby-indent-level) + + ((parent-is "pair") ruby-ts--parent-call-or-bol 0) + + ((match ")" "parenthesized_statements") parent-bol 0) + ((parent-is "parenthesized_statements") parent-bol ruby-indent-level) ;; If the previous method isn't finished yet, this will get ;; the next method indented properly. ((n-p-gp ,ruby-ts--method-regex "body_statement" ,ruby-ts--class-or-module-regex) (ruby-ts--bol ruby-ts--grand-parent-node) ruby-indent-level) - ;; Match the end of a class / modlue + ;; Match the end of a class / module ((match "end" ,ruby-ts--class-or-module-regex) parent 0) ;; A "do_block" has a "body_statement" child which has the @@ -728,12 +762,20 @@ i.e. expr of def foo(args) = expr is returned." ;; but with node set to the statement and parent set to ;; body_statement for all others. ... Fine. Be that way. ;; Ditto for "block" and "block_body" - ((node-is "body_statement") parent-bol ruby-indent-level) - ((parent-is "body_statement") (ruby-ts--bol ruby-ts--grand-parent-node) ruby-indent-level) - ((match "end" "do_block") parent-bol 0) - ((n-p-gp "block_body" "block" nil) parent-bol ruby-indent-level) - ((n-p-gp nil "block_body" "block") (ruby-ts--bol ruby-ts--grand-parent-node) ruby-indent-level) - ((match "}" "block") (ruby-ts--bol ruby-ts--grand-parent-node) 0) + ((node-is "body_statement") + (ruby-ts--block-indent-anchor ruby-ts--parent-node) + ruby-indent-level) + ((parent-is "body_statement") + (ruby-ts--block-indent-anchor ruby-ts--grand-parent-node) + ruby-indent-level) + ((match "end" "do_block") (ruby-ts--block-indent-anchor ruby-ts--parent-node) 0) + ((n-p-gp "block_body" "block" nil) + (ruby-ts--block-indent-anchor ruby-ts--parent-node) + ruby-indent-level) + ((n-p-gp nil "block_body" "block") + (ruby-ts--block-indent-anchor ruby-ts--grand-parent-node) + ruby-indent-level) + ((match "}" "block") (ruby-ts--block-indent-anchor ruby-ts--parent-node) 0) ;; Chained strings ((match "string" "chained_string") first-sibling 0) @@ -742,6 +784,78 @@ i.e. expr of def foo(args) = expr is returned." (catch-all parent-bol ruby-indent-level)))) `((ruby . ,common)))) +(defun ruby-ts--block-indent-anchor (block-node-getter) + (lambda (node parent _bol &rest _rest) + (let ((block-node (funcall block-node-getter node parent))) + (save-excursion + (goto-char + (treesit-node-start + (if ruby-block-indent + (ruby-ts--statement-ancestor block-node) + block-node))) + (back-to-indentation) + (point))))) + +(defun ruby-ts--binary-indent-anchor (_node parent _bol &rest _) + (save-excursion + (goto-char (treesit-node-start parent)) + (when (string-match-p ruby-ts--statement-container-regexp + (treesit-node-type (treesit-node-parent parent))) + ;; Hack alert: it's not the proper place to alter the offset. + ;; Redoing the analysis in the OFFSET form seems annoying, + ;; though. (**) + (forward-char ruby-indent-level)) + (point))) + +(defun ruby-ts--parent-call-or-bol (_not parent _bol &rest _) + (let* ((parent-bol (save-excursion + (goto-char (treesit-node-start parent)) + (back-to-indentation) + (point))) + (found + (treesit-parent-until + parent + (lambda (node) + (or (< (treesit-node-start node) parent-bol) + (string-match-p "\\`array\\|hash\\'" (treesit-node-type node)) + ;; Method call on same line. + (equal (treesit-node-type node) "argument_list")))))) + (cond + ((null found) + parent-bol) + ;; No paren/curly/brace found on the same line. + ((< (treesit-node-start found) parent-bol) + parent-bol) + ;; Hash or array opener on the same line. + ((string-match-p "\\`array\\|hash\\'" (treesit-node-type found)) + (save-excursion + (goto-char (treesit-node-start (treesit-node-child found 1))) + (point))) + ;; Parenless call found: indent to stmt with offset. + ((not ruby-parenless-call-arguments-indent) + (save-excursion + (goto-char (treesit-node-start + (ruby-ts--statement-ancestor found))) + ;; (**) Same. + (+ (point) ruby-indent-level))) + ;; Call with parens -- ident to first arg. + ((equal (treesit-node-type (treesit-node-child found 0)) + "(") + (save-excursion + (goto-char (treesit-node-start (treesit-node-child found 1))) + (point))) + ;; Indent to the parenless call args beginning. + (t + (save-excursion + (goto-char (treesit-node-start found)) + (point)))))) + +(defun ruby-ts--after-op-indent-p (&rest _) + ruby-after-operator-indent) + +(defun ruby-ts--method-call-indent-p (&rest _) + ruby-method-call-indent) + (defun ruby-ts--class-or-module-p (node) "Predicate if NODE is a class or module." (string-match-p ruby-ts--class-or-module-regex (treesit-node-type node))) @@ -844,7 +958,12 @@ The hash (#) is for instance methods only which are methods dot (.) is used. Double colon (::) is used between classes. The leading double colon is not added." (let* ((node (treesit-node-at (point))) - (method (treesit-parent-until node (ruby-ts--type-pred ruby-ts--method-regex))) + (method-pred + (lambda (node) + (and (<= (treesit-node-start node) (point)) + (>= (treesit-node-end node) (point)) + (string-match-p ruby-ts--method-regex (treesit-node-type node))))) + (method (treesit-parent-until node method-pred t)) (class (or method node)) (result nil) (sep "#") @@ -892,21 +1011,11 @@ leading double colon is not added." "C-c C-f" #'ruby-find-library-file) ;;;###autoload -(define-derived-mode ruby-ts-mode prog-mode "Ruby" +(define-derived-mode ruby-ts-mode ruby-base-mode "Ruby" "Major mode for editing Ruby, powered by tree-sitter." :group 'ruby :syntax-table ruby-mode-syntax-table - (setq indent-tabs-mode ruby-indent-tabs-mode) - - (setq-local paragraph-start (concat "$\\|" page-delimiter)) - (setq-local paragraph-separate paragraph-start) - (setq-local paragraph-ignore-fill-prefix t) - - (setq-local comment-start "# ") - (setq-local comment-end "") - (setq-local comment-start-skip "#+ *") - (unless (treesit-ready-p 'ruby) (error "Tree-sitter for Ruby isn't available")) @@ -929,16 +1038,30 @@ leading double colon is not added." (setq-local treesit-font-lock-settings (ruby-ts--font-lock-settings 'ruby)) ;; Level 3 is the default. (setq-local treesit-font-lock-feature-list - '(( comment method-definition ) + '(( comment method-definition parameter-definition) ( keyword regexp string type) - ( builtin constant - delimiter escape-sequence global - instance - interpolation literal symbol variable) + ( builtin-variable builtin-constant builtin-function + delimiter escape-sequence + constant global instance + interpolation literal symbol assignment) ( bracket error function operator punctuation))) (treesit-major-mode-setup)) +(if (treesit-ready-p 'ruby) + ;; Copied from ruby-mode.el. + (add-to-list 'auto-mode-alist + (cons (concat "\\(?:\\.\\(?:" + "rbw?\\|ru\\|rake\\|thor" + "\\|jbuilder\\|rabl\\|gemspec\\|podspec" + "\\)" + "\\|/" + "\\(?:Gem\\|Rake\\|Cap\\|Thor" + "\\|Puppet\\|Berks\\|Brew" + "\\|Vagrant\\|Guard\\|Pod\\)file" + "\\)\\'") + 'ruby-ts-mode))) + (provide 'ruby-ts-mode) ;;; ruby-ts-mode.el ends here From 3206595e1b7c3a7dfcc8b930cf1f95fd545c0b1c Mon Sep 17 00:00:00 2001 From: Dmitry Gutov Date: Fri, 20 Jan 2023 19:45:11 +0200 Subject: [PATCH 2/3] Update setup instructions --- README.org | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/README.org b/README.org index d38805d..93cf0c8 100644 --- a/README.org +++ b/README.org @@ -10,18 +10,18 @@ This is where I am going to keep notes as I develop the I'm hoping not to forget some steps I did... 1) Initial set up: - - 1. =git clone git@github.com:casouri/tree-sitter-module.git= - 2. =cd tree-sitter-module= - 3. =./build.sh ruby= - 4. =mkdir ~/.config/emacs/tree-sitter= - 5. =mv dist/libtree-sitter-ruby.dylib ~/.config/emacs/tree-sitter= + + 1. ~M-x treesit-install-language-grammar~ + 2. Write =ruby= when prompted for the language. + 2. Answer ~y~ to building interactively. + 3. Paste =https://github.com/tree-sitter/tree-sitter-ruby= when asked for the repo. + 4. Pick default answers for the remaining questions. + 5. Wait for the success message in the message area. 2) Loaded a Ruby file and did ~M-: (treesit-parser-create 'ruby)~ which returned ~#~... I guess that's good. - + 3) ~(treesit-parser-list)~ to check the list of parsers for the current buffer. 4) ~M-x treesit-inspect-mode~ and it seemed happy. - From 0cc3d812cd3c28cfac7d8adf8cc9905b9c3dd002 Mon Sep 17 00:00:00 2001 From: Dmitry Gutov Date: Sun, 5 Feb 2023 02:48:32 +0200 Subject: [PATCH 3/3] More changes from emacs 29 --- ruby-ts-mode.el | 110 ++++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 102 insertions(+), 8 deletions(-) diff --git a/ruby-ts-mode.el b/ruby-ts-mode.el index 2a7d850..e83bc2f 100644 --- a/ruby-ts-mode.el +++ b/ruby-ts-mode.el @@ -95,6 +95,11 @@ (declare-function treesit-node-end "treesit.c") (declare-function treesit-node-start "treesit.c") (declare-function treesit-node-string "treesit.c") +(declare-function treesit-query-compile "treesit.c") +(declare-function treesit-query-capture "treesit.c") +(declare-function treesit-parser-add-notifier "treesit.c") +(declare-function treesit-parser-buffer "treesit.c") +(declare-function treesit-parser-list "treesit.c") (defgroup ruby-ts nil "Major mode for editing Ruby code." @@ -128,7 +133,7 @@ (defvar ruby-ts--predefined-variables (rx string-start - (or "$!" "$@" "$~" "$&" "$‘" "$‘" "$+" "$=" "$/" "$\\" "$," "$;" + (or "$!" "$@" "$~" "$&" "$`" "$'" "$+" "$=" "$/" "$\\" "$," "$;" "$." "$<" "$>" "$_" "$*" "$$" "$?" "$:" "$LOAD_PATH" "$LOADED_FEATURES" "$DEBUG" "$FILENAME" "$stderr" "$stdin" "$stdout" "$VERBOSE" "$-a" "$-i" "$-l" "$-p" @@ -209,9 +214,6 @@ values of OVERRIDE" (treesit-fontify-with-override (max plus-1 start) (min node-end end) font-lock-comment-face override))) -(defun ruby-ts--builtin-method-p (node) - (string-match-p ruby-ts--builtin-methods (treesit-node-text node t))) - (defun ruby-ts--font-lock-settings (language) "Tree-sitter font-lock settings for Ruby." (treesit-font-lock-rules @@ -340,7 +342,7 @@ values of OVERRIDE" :language language :feature 'builtin-function `((((identifier) @font-lock-builtin-face) - (:pred ruby-ts--builtin-method-p @font-lock-builtin-face))) + (:match ,ruby-ts--builtin-methods @font-lock-builtin-face))) ;; Yuan recommends also putting method definitions into the ;; 'function' category (thus keeping it in both). I've opted to @@ -555,7 +557,7 @@ a statement container is a node that matches (let ((common `( ;; Slam all top level nodes to the left margin - ((parent-is "program") parent 0) + ((parent-is "program") point-min 0) ;; Do not indent here docs or the end. Not sure why it ;; takes the grand-parent but ok fine. @@ -566,6 +568,12 @@ a statement container is a node that matches ((n-p-gp nil nil "regex") no-indent 0) ((parent-is "regex") no-indent 0) + ;; Incomplete buffer state, better not reindent (bug#61017). + ((and (parent-is "ERROR") + (or (node-is ,ruby-ts--class-or-module-regex) + (node-is "\\`def\\'"))) + no-indent 0) + ;; if then else elseif notes: ;; ;; 1. The "then" starts at the end of the line that ends @@ -665,7 +673,7 @@ a statement container is a node that matches (or (match "\\." "call") (query "(call \".\" (identifier) @indent)"))) - parent 0) + (ruby-ts--bol ruby-ts--statement-ancestor) ruby-indent-level) ((match "\\." "call") parent ruby-indent-level) ;; method parameters -- four styles: @@ -999,6 +1007,78 @@ leading double colon is not added." (concat result sep method-name) result))) +(defvar ruby-ts--s-p-query + (when (treesit-available-p) + (treesit-query-compile 'ruby + '(((heredoc_body) @heredoc) + ;; $' $" $`. + ((global_variable) @global_var + (:match "\\`\\$[#\"'`:?]" @global_var)) + ;; ?' ?" ?` are character literals. + ((character) @char + (:match "\\`?[#\"'`:?]" @char)) + ;; Symbols like :+, :<=> or :foo=. + ((simple_symbol) @symbol + (:match "[[:punct:]]" @symbol)) + ;; Method calls with name ending with ? or !. + ((call method: (identifier) @ident) + (:match "[?!]\\'" @ident)) + ;; Backtick method redefinition. + ((operator "`" @backtick)) + ;; TODO: Stop at interpolations. + ((regex "/" @regex_slash)) + ;; =begin...=end + ((comment) @comm + (:match "\\`=" @comm)) + ;; Percent literals: %w[], %q{}, ... + ((string) @percent + (:match "\\`%" @percent)))))) + +(defun ruby-ts--syntax-propertize (beg end) + (let ((captures (treesit-query-capture 'ruby ruby-ts--s-p-query beg end))) + (pcase-dolist (`(,name . ,node) captures) + (pcase-exhaustive name + ('regex_slash + ;; N.B.: A regexp literal with modifiers actually includes them in + ;; the trailing "/" node. + (put-text-property (treesit-node-start node) (1+ (treesit-node-start node)) + 'syntax-table + ;; Differentiate the %r{...} literals. + (if (eq ?/ (char-after (treesit-node-start node))) + (string-to-syntax "\"/") + (string-to-syntax "|")))) + ('ident + (put-text-property (1- (treesit-node-end node)) (treesit-node-end node) + 'syntax-table (string-to-syntax "_"))) + ('symbol + (put-text-property (1+ (treesit-node-start node)) (treesit-node-end node) + 'syntax-table (string-to-syntax "_"))) + ('heredoc + (put-text-property (treesit-node-start node) (1+ (treesit-node-start node)) + 'syntax-table (string-to-syntax "\"")) + (put-text-property (treesit-node-end node) (1+ (treesit-node-end node)) + 'syntax-table (string-to-syntax "\""))) + ('percent + ;; FIXME: Put the first one on the first paren in both %Q{} and %(). + ;; That would stop electric-pair-mode from pairing, though. Hmm. + (put-text-property (treesit-node-start node) (1+ (treesit-node-start node)) + 'syntax-table (string-to-syntax "|")) + (put-text-property (1- (treesit-node-end node)) (treesit-node-end node) + 'syntax-table (string-to-syntax "|"))) + ((or 'global_var 'char) + (put-text-property (treesit-node-start node) (1+ (treesit-node-start node)) + 'syntax-table (string-to-syntax "'")) + (put-text-property (1+ (treesit-node-start node)) (treesit-node-end node) + 'syntax-table (string-to-syntax "_"))) + ('backtick + (put-text-property (treesit-node-start node) (treesit-node-end node) + 'syntax-table (string-to-syntax "_"))) + ('comm + (dolist (pos (list (treesit-node-start node) + (1- (treesit-node-end node)))) + (put-text-property pos (1+ pos) 'syntax-table + (string-to-syntax "!")))))))) + (defvar-keymap ruby-ts-mode-map :doc "Keymap used in Ruby mode" :parent prog-mode-map @@ -1046,7 +1126,21 @@ leading double colon is not added." interpolation literal symbol assignment) ( bracket error function operator punctuation))) - (treesit-major-mode-setup)) + (treesit-major-mode-setup) + + (treesit-parser-add-notifier (car (treesit-parser-list)) + #'ruby-ts--parser-after-change) + + (setq-local syntax-propertize-function #'ruby-ts--syntax-propertize)) + +(defun ruby-ts--parser-after-change (ranges parser) + ;; Make sure we re-syntax-propertize the full node that is being + ;; edited. This is most pertinent to multi-line complex nodes such + ;; as heredocs. + (when ranges + (with-current-buffer (treesit-parser-buffer parser) + (syntax-ppss-flush-cache (cl-loop for r in ranges + minimize (car r)))))) (if (treesit-ready-p 'ruby) ;; Copied from ruby-mode.el.