diff --git a/Cargo.lock b/Cargo.lock index 31fbd4b37..4e73c00f8 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -209,7 +209,7 @@ checksum = "320119579fcad9c21884f5c4861d16174d0e06250625266f50fe6898340abefa" [[package]] name = "aether_lsp_utils" version = "0.0.0" -source = "git+https://github.com/posit-dev/air?rev=f959e32eee91#f959e32eee91654f04a44a32e97321ef5d510e93" +source = "git+https://github.com/posit-dev/air?rev=9abbc6e68b270d3f5bf882fe811ce6a164f671ba#9abbc6e68b270d3f5bf882fe811ce6a164f671ba" dependencies = [ "anyhow", "biome_line_index", @@ -246,7 +246,7 @@ dependencies = [ [[package]] name = "air_r_factory" version = "0.0.0" -source = "git+https://github.com/posit-dev/air?rev=f959e32eee91#f959e32eee91654f04a44a32e97321ef5d510e93" +source = "git+https://github.com/posit-dev/air?rev=9abbc6e68b270d3f5bf882fe811ce6a164f671ba#9abbc6e68b270d3f5bf882fe811ce6a164f671ba" dependencies = [ "air_r_syntax", "biome_rowan", @@ -255,7 +255,7 @@ dependencies = [ [[package]] name = "air_r_parser" version = "0.0.0" -source = "git+https://github.com/posit-dev/air?rev=f959e32eee91#f959e32eee91654f04a44a32e97321ef5d510e93" +source = "git+https://github.com/posit-dev/air?rev=9abbc6e68b270d3f5bf882fe811ce6a164f671ba#9abbc6e68b270d3f5bf882fe811ce6a164f671ba" dependencies = [ "air_r_factory", "air_r_syntax", @@ -271,7 +271,7 @@ dependencies = [ [[package]] name = "air_r_syntax" version = "0.0.0" -source = "git+https://github.com/posit-dev/air?rev=f959e32eee91#f959e32eee91654f04a44a32e97321ef5d510e93" +source = "git+https://github.com/posit-dev/air?rev=9abbc6e68b270d3f5bf882fe811ce6a164f671ba#9abbc6e68b270d3f5bf882fe811ce6a164f671ba" dependencies = [ "biome_rowan", "serde", @@ -347,6 +347,7 @@ version = "0.1.222" dependencies = [ "actix-web", "aether_lsp_utils", + "air_r_factory", "air_r_parser", "air_r_syntax", "amalthea", @@ -356,6 +357,7 @@ dependencies = [ "base64 0.21.0", "biome_line_index", "biome_rowan", + "blake3", "bus", "cc", "cfg-if", @@ -409,6 +411,18 @@ dependencies = [ "yaml-rust", ] +[[package]] +name = "arrayref" +version = "0.3.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "76a2e8124351fda1ef8aaaa3bbd7ebbcb486bbcd4225aca0aa0d84bb2db8fecb" + +[[package]] +name = "arrayvec" +version = "0.7.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7c02d123df017efcdfbd739ef81735b36c5ba83ec3c59c80a9d7ecc718f92e50" + [[package]] name = "assert_matches" version = "1.5.0" @@ -474,7 +488,7 @@ checksum = "72b3254f16251a8381aa12e40e3c4d2f0199f8c6508fbecb9d91f575e0fbb8c6" [[package]] name = "biome_console" version = "0.5.7" -source = "git+https://github.com/biomejs/biome?rev=c13fc60726883781e4530a4437724273b560c8e0#c13fc60726883781e4530a4437724273b560c8e0" +source = "git+https://github.com/lionel-/biome?rev=41d799cfa4cedd25625fc3f6bd7898532873f051#41d799cfa4cedd25625fc3f6bd7898532873f051" dependencies = [ "biome_markup", "biome_text_size", @@ -487,7 +501,7 @@ dependencies = [ [[package]] name = "biome_diagnostics" version = "0.5.7" -source = "git+https://github.com/biomejs/biome?rev=c13fc60726883781e4530a4437724273b560c8e0#c13fc60726883781e4530a4437724273b560c8e0" +source = "git+https://github.com/lionel-/biome?rev=41d799cfa4cedd25625fc3f6bd7898532873f051#41d799cfa4cedd25625fc3f6bd7898532873f051" dependencies = [ "backtrace", "biome_console", @@ -507,7 +521,7 @@ dependencies = [ [[package]] name = "biome_diagnostics_categories" version = "0.5.7" -source = "git+https://github.com/biomejs/biome?rev=c13fc60726883781e4530a4437724273b560c8e0#c13fc60726883781e4530a4437724273b560c8e0" +source = "git+https://github.com/lionel-/biome?rev=41d799cfa4cedd25625fc3f6bd7898532873f051#41d799cfa4cedd25625fc3f6bd7898532873f051" dependencies = [ "quote", "serde", @@ -516,7 +530,7 @@ dependencies = [ [[package]] name = "biome_diagnostics_macros" version = "0.5.7" -source = "git+https://github.com/biomejs/biome?rev=c13fc60726883781e4530a4437724273b560c8e0#c13fc60726883781e4530a4437724273b560c8e0" +source = "git+https://github.com/lionel-/biome?rev=41d799cfa4cedd25625fc3f6bd7898532873f051#41d799cfa4cedd25625fc3f6bd7898532873f051" dependencies = [ "proc-macro-error2", "proc-macro2", @@ -527,7 +541,7 @@ dependencies = [ [[package]] name = "biome_line_index" version = "0.1.0" -source = "git+https://github.com/biomejs/biome?rev=c13fc60726883781e4530a4437724273b560c8e0#c13fc60726883781e4530a4437724273b560c8e0" +source = "git+https://github.com/lionel-/biome?rev=41d799cfa4cedd25625fc3f6bd7898532873f051#41d799cfa4cedd25625fc3f6bd7898532873f051" dependencies = [ "biome_text_size", "rustc-hash", @@ -536,7 +550,7 @@ dependencies = [ [[package]] name = "biome_markup" version = "0.5.7" -source = "git+https://github.com/biomejs/biome?rev=c13fc60726883781e4530a4437724273b560c8e0#c13fc60726883781e4530a4437724273b560c8e0" +source = "git+https://github.com/lionel-/biome?rev=41d799cfa4cedd25625fc3f6bd7898532873f051#41d799cfa4cedd25625fc3f6bd7898532873f051" dependencies = [ "proc-macro-error2", "proc-macro2", @@ -546,7 +560,7 @@ dependencies = [ [[package]] name = "biome_parser" version = "0.5.7" -source = "git+https://github.com/biomejs/biome?rev=c13fc60726883781e4530a4437724273b560c8e0#c13fc60726883781e4530a4437724273b560c8e0" +source = "git+https://github.com/lionel-/biome?rev=41d799cfa4cedd25625fc3f6bd7898532873f051#41d799cfa4cedd25625fc3f6bd7898532873f051" dependencies = [ "biome_console", "biome_diagnostics", @@ -560,7 +574,7 @@ dependencies = [ [[package]] name = "biome_rowan" version = "0.5.7" -source = "git+https://github.com/biomejs/biome?rev=c13fc60726883781e4530a4437724273b560c8e0#c13fc60726883781e4530a4437724273b560c8e0" +source = "git+https://github.com/lionel-/biome?rev=41d799cfa4cedd25625fc3f6bd7898532873f051#41d799cfa4cedd25625fc3f6bd7898532873f051" dependencies = [ "biome_text_edit", "biome_text_size", @@ -572,7 +586,7 @@ dependencies = [ [[package]] name = "biome_text_edit" version = "0.5.7" -source = "git+https://github.com/biomejs/biome?rev=c13fc60726883781e4530a4437724273b560c8e0#c13fc60726883781e4530a4437724273b560c8e0" +source = "git+https://github.com/lionel-/biome?rev=41d799cfa4cedd25625fc3f6bd7898532873f051#41d799cfa4cedd25625fc3f6bd7898532873f051" dependencies = [ "biome_text_size", "serde", @@ -582,7 +596,7 @@ dependencies = [ [[package]] name = "biome_text_size" version = "0.5.7" -source = "git+https://github.com/biomejs/biome?rev=c13fc60726883781e4530a4437724273b560c8e0#c13fc60726883781e4530a4437724273b560c8e0" +source = "git+https://github.com/lionel-/biome?rev=41d799cfa4cedd25625fc3f6bd7898532873f051#41d799cfa4cedd25625fc3f6bd7898532873f051" dependencies = [ "serde", ] @@ -590,7 +604,7 @@ dependencies = [ [[package]] name = "biome_unicode_table" version = "0.5.7" -source = "git+https://github.com/biomejs/biome?rev=c13fc60726883781e4530a4437724273b560c8e0#c13fc60726883781e4530a4437724273b560c8e0" +source = "git+https://github.com/lionel-/biome?rev=41d799cfa4cedd25625fc3f6bd7898532873f051#41d799cfa4cedd25625fc3f6bd7898532873f051" [[package]] name = "bitflags" @@ -604,6 +618,20 @@ version = "2.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b4682ae6287fcf752ecaabbfcc7b6f9b72aa33933dc23a554d853aea8eea8635" +[[package]] +name = "blake3" +version = "1.8.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2468ef7d57b3fb7e16b576e8377cdbde2320c60e1491e961d11da40fc4f02a2d" +dependencies = [ + "arrayref", + "arrayvec", + "cc", + "cfg-if", + "constant_time_eq", + "cpufeatures", +] + [[package]] name = "block-buffer" version = "0.10.4" @@ -738,6 +766,12 @@ dependencies = [ "windows-sys 0.52.0", ] +[[package]] +name = "constant_time_eq" +version = "0.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3d52eff69cd5e647efe296129160853a42795992097e8af39800e1060caeea9b" + [[package]] name = "convert_case" version = "0.4.0" @@ -763,9 +797,9 @@ checksum = "e496a50fda8aacccc86d7529e2c1e0892dbd0f898a6b5645b5561b89c3210efa" [[package]] name = "cpufeatures" -version = "0.2.7" +version = "0.2.17" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3e4c1eaa2012c47becbbad2ab175484c2a84d1185b566fb2cc5b8707343dfe58" +checksum = "59ed5838eebb26a2bb2e58f6d5b5316989ae9d08bab10e0e6d103e656d1b0280" dependencies = [ "libc", ] @@ -1934,7 +1968,7 @@ dependencies = [ [[package]] name = "line_ending" version = "0.0.0" -source = "git+https://github.com/posit-dev/air?rev=f959e32eee91#f959e32eee91654f04a44a32e97321ef5d510e93" +source = "git+https://github.com/posit-dev/air?rev=9abbc6e68b270d3f5bf882fe811ce6a164f671ba#9abbc6e68b270d3f5bf882fe811ce6a164f671ba" dependencies = [ "memchr", "settings", @@ -3029,7 +3063,7 @@ dependencies = [ [[package]] name = "settings" version = "0.0.0" -source = "git+https://github.com/posit-dev/air?rev=f959e32eee91#f959e32eee91654f04a44a32e97321ef5d510e93" +source = "git+https://github.com/posit-dev/air?rev=9abbc6e68b270d3f5bf882fe811ce6a164f671ba#9abbc6e68b270d3f5bf882fe811ce6a164f671ba" [[package]] name = "sha1" diff --git a/Cargo.toml b/Cargo.toml index 38aea55bf..2a880760e 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -25,10 +25,11 @@ license = "MIT" authors = ["Posit Software, PBC"] [workspace.dependencies] -biome_line_index = { git = "https://github.com/biomejs/biome", rev = "c13fc60726883781e4530a4437724273b560c8e0" } -biome_rowan = { git = "https://github.com/biomejs/biome", rev = "c13fc60726883781e4530a4437724273b560c8e0" } -aether_lsp_utils = { git = "https://github.com/posit-dev/air", rev = "f959e32eee91" } -aether_parser = { git = "https://github.com/posit-dev/air", package = "air_r_parser", rev = "f959e32eee91" } -aether_syntax = { git = "https://github.com/posit-dev/air", package = "air_r_syntax", rev = "f959e32eee91" } +biome_line_index = { git = "https://github.com/lionel-/biome", rev = "41d799cfa4cedd25625fc3f6bd7898532873f051" } +biome_rowan = { git = "https://github.com/lionel-/biome", rev = "41d799cfa4cedd25625fc3f6bd7898532873f051" } +aether_factory = { git = "https://github.com/posit-dev/air", rev = "9abbc6e68b270d3f5bf882fe811ce6a164f671ba", package = "air_r_factory" } +aether_lsp_utils = { git = "https://github.com/posit-dev/air", rev = "9abbc6e68b270d3f5bf882fe811ce6a164f671ba" } +aether_parser = { git = "https://github.com/posit-dev/air", rev = "9abbc6e68b270d3f5bf882fe811ce6a164f671ba", package = "air_r_parser" } +aether_syntax = { git = "https://github.com/posit-dev/air", rev = "9abbc6e68b270d3f5bf882fe811ce6a164f671ba", package = "air_r_syntax" } # For https://github.com/ebkalderon/tower-lsp/pull/428 tower-lsp = { branch = "bugfix/patches", git = "https://github.com/lionel-/tower-lsp" } diff --git a/crates/ark/Cargo.toml b/crates/ark/Cargo.toml index ffa669405..e81549ff8 100644 --- a/crates/ark/Cargo.toml +++ b/crates/ark/Cargo.toml @@ -18,6 +18,7 @@ async-trait = "0.1.66" base64 = "0.21.0" biome_line_index.workspace = true biome_rowan.workspace = true +blake3 = "1.8.2" bus = "2.3.0" cfg-if = "1.0.0" crossbeam = { version = "0.8.2", features = ["crossbeam-channel"] } @@ -27,6 +28,7 @@ dashmap = "5.4.0" aether_parser.workspace = true aether_syntax.workspace = true aether_lsp_utils.workspace = true +aether_factory.workspace = true ego-tree = "0.6.2" harp = { path = "../harp" } http = "0.2.9" diff --git a/crates/ark/src/console_annotate.rs b/crates/ark/src/console_annotate.rs index 4bb925fe9..48ec29e06 100644 --- a/crates/ark/src/console_annotate.rs +++ b/crates/ark/src/console_annotate.rs @@ -1,63 +1,918 @@ // // console_annotate.rs // -// Copyright (C) 2025 Posit Software, PBC. All rights reserved. +// Copyright (C) 2026 Posit Software, PBC. All rights reserved. // +use aether_syntax::RBracedExpressions; +use aether_syntax::RExpressionList; +use aether_syntax::RLanguage; +use aether_syntax::RSyntaxElement; +use aether_syntax::RSyntaxKind; +use aether_syntax::RSyntaxNode; use amalthea::wire::execute_request::CodeLocation; +use anyhow::anyhow; +use biome_line_index::LineIndex; use biome_rowan::AstNode; +use biome_rowan::SyntaxRewriter; +use biome_rowan::TextSize; +use biome_rowan::TriviaPieceKind; +use biome_rowan::VisitNodeSignal; +use harp::object::RObject; +use libr::SEXP; +use url::Url; -pub(crate) fn annotate_input(code: &str, location: CodeLocation) -> String { - let node = aether_parser::parse(code, Default::default()).tree(); - let Some(first_token) = node.syntax().first_token() else { - return code.into(); +use crate::dap::dap::Breakpoint; +use crate::dap::dap::BreakpointState; +use crate::dap::dap::InvalidReason; +use crate::interface::RMain; + +/// Function name used for auto-stepping over injected calls such as breakpoints +const AUTO_STEP_FUNCTION: &str = ".ark_auto_step"; + +// Main responsibilities of code annotation: +// +// 1. Inject breakpoints in the code, along with line directives that map to +// original document lines. Breakpoints can only be injected in the top-level +// expression list or in a `{}` list. +// +// 2. Mark invalid breakpoints as such in the DAP state. This concerns +// breakpoints on a closing `}` or inside multi-line expressions. +// The breakpoints are marked invalid by mutation as soon as they are matched to +// an invalid location by our code walker. +// +// 3. When sourcing with `source()` or `load_all()`, wrap in `{}` to allow +// top-level stepping, inject breakpoints, and inject top-level verification +// calls to let Ark know a breakpoint is now active after evaluation. This is +// handled in a separate code path in `annotate_source()`. + +// Breakpoint injection at parse-time rather than eval-time. +// +// Traditional approaches (like RStudio's) inject `browser()` calls into live +// objects at eval-time. This is fragile: you must find all copies of the object +// (explicit/implicit exports, method tables, etc.), preserve source references +// after injection, and handle edge cases for each object system (R6, S7, ...). +// These injection routines are ad hoc and require ongoing maintenance to add +// missing injection places as the ecosystem evolves. +// +// By injecting breakpoints before R ever sees the code, we sidestep all of +// this. R only evaluates code that already contains breakpoint calls, so we +// never miss copies and source references are correct from the start. +// +// A breakpoint injected on `expression_to_break_on` looks like this: +// +// ```r +// .ark_auto_step(.ark_breakpoint(browser(), "*url*", "*id*")) +// #line *line* "*url*" +// expression_to_break_on +// ``` +// +// - `.ark_auto_step()` is an identity function that serves as sentinel when R +// steps through code. If the user steps on an injected breakpoint, we detect +// the auto-step call in the `debug at` message emitted by R and automatically +// step over it (i.e. call `n`). +// +// - `.ark_breakpoint()` takes a `browser()` call promised in the current +// environment, a URL, and the breakpoint's unique ID. It only forces the +// browser argument if the breakpoint is active. Since the argument is promised +// in the call-site environment, this cause R to mark that environment as being +// debugged. +// +// It does not stop quite at the right place though, inside the +// `.ark_breakpoint()` wrapper, with `.ark_auto_step()` on the stack as well. To +// solve this, there is a second condition triggering auto-stepping in +// ReadConsole: if the function of the top stack frame is `.ark_breakpoint()` +// (which we detect through a class assigned to the function), then we +// auto-step. This causes R to resume evaluation. Since the call-site +// environment is being debugged, it stops at the next expression automatically, +// in this case `expression_to_break_on`. +// +// - The `#line` directive right above `expression_to_break_on` maps the source +// references to the original location in the source document. When R stops on +// the expression, it emits the original location, allowing the DAP to +// communicate the appropriate stopping place to the frontend. + +// Source instrumentation +// +// `base::source()` and `devtools::load_all()` need two things: +// +// - Breakpoint injection as described above. +// +// - Top-level adjustments so it's possible to step through a script or +// top-level package file (the latter is rarely useful but is a side benefit +// from using the same implementation as `source()`). +// +// If the sourced file looks like: +// +// ```r +// 1 +// 2 +// ``` +// +// The instrumented version ends up as: +// +// ```r +// { +// #line 1 "file:///file.R" +// 1 +// base::.ark_auto_step(base::.ark_verify_breakpoints_range("file:///test.R", 1L, 2L)) +// #line 2 "file:///file.R" +// 2 +// base::.ark_auto_step(base::.ark_verify_breakpoints_range("file:///test.R", 2L, 3L)) +// } +// ``` +// +// - The whole source is wrapped in `{}` to allow R to step through the code. +// - Line directives map each expression to original source. +// - An auto-stepped `.ark_verify_breakpoints_range()` call after each +// expression lets the DAP know that any breakpoints spanned by the last +// expression are now "verified", i.e. the breakpoints have been injected and +// the code containing them has been evaluated. + +// Breakpoint injection uses a Biome `SyntaxRewriter`, a tree visitor with +// preorder and postorder hooks that allows replacing nodes on the way out. +// +// - Preorder (`visit_node`): Cache line information for braced expressions. +// We record where each expression starts and its line range. This must be +// done before any modifications because the postorder hook sees a partially +// rebuilt tree with shifted token offsets. +// +// - Postorder (`visit_node_post`): Process expression lists bottom-up. For +// lists inside braces, we inject breakpoint calls, add `#line` directives, +// and mark remaining breakpoints (e.g. on closing braces) as invalid. The +// cached line info represents original source positions, which is exactly +// what we need for anchoring breakpoints to document lines. +// +// Note: The line info cached in pre-order visit can potentially become stale in +// the post-order hook, since the latter is operating on a reconstructed tree. +// However, we only use the cached info to reason about where original +// expressions lived in the source document, not where they end up in the +// rewritten tree. This is safe as long as we don't reorder or remove +// expressions (we only inject siblings and trivia). +// +// We use `SyntaxRewriter` instead of `BatchMutation` because the latter: +// +// - Doesn't handle insertions in lists (only replacements). +// +// - Doesn't handle nested changes in a node that is later replaced. For example: +// +// ```r +// { # BP 1 +// 1 # BP 2 +// } +// ``` +// +// BP 2 causes changes inside the braces. Then BP 1 causes the whole brace +// expression to be replaced with a variant that has a `#line` directive. +// BatchMutation can't express both changes because it takes modifications +// upfront. `SyntaxRewriter` lets us replace nodes bottom-up as we go. + +/// Annotate console input for `ReadConsole`. +/// +/// - Adds a `#line` directive to map the code back to its document location. +/// - Adds leading whitespace to align with the original character offset. +/// - Injects breakpoint calls if any breakpoints are set. +pub(crate) fn annotate_input( + code: &str, + location: CodeLocation, + breakpoints: Option<&mut [Breakpoint]>, +) -> anyhow::Result { + // First, inject breakpoints into the original code. This must happen before + // we add the outer line directive, otherwise the coordinates of inner line + // directives are shifted by 1 line. + let annotated_code = if let Some(breakpoints) = breakpoints { + let root = aether_parser::parse(code, Default::default()).tree(); + let line_index = LineIndex::new(code); + + // The line offset is `doc_line = code_line + line_offset`. + // Code line 0 corresponds to document line `location.start.line`. + let line_offset = location.start.line as i32; + + let mut rewriter = + AnnotationRewriter::new(&location.uri, breakpoints, line_offset, &line_index); + let out = rewriter.transform(root.syntax().clone()); + + if let Some(err) = rewriter.take_err() { + return Err(err); + } + + out.to_string() + } else { + code.to_string() }; + // Now add the line directive to the (possibly modified) code. This maps the + // code to evaluate to a location in the original document. let line_directive = format!( "#line {line} \"{uri}\"", line = location.start.line + 1, uri = location.uri ); - // Leading whitespace to ensure that R starts parsing expressions from - // the expected `character` offset. + // Add leading whitespace to ensure that R starts parsing expressions from + // the expected `character` offset let leading_padding = " ".repeat(location.start.character as usize); - // Collect existing leading trivia as (kind, text) tuples - let existing_trivia: Vec<_> = first_token + Ok(format!( + "{line_directive}\n{leading_padding}{annotated_code}" + )) +} + +/// Annotate source code for `source()` and `pkgload::load_all()`. +/// +/// - Wraps the whole source in a `{}` block first. This allows R to step through +/// top-level expressions and makes all breakpoints "nested" inside braces. +/// - Injects breakpoint calls (`.ark_auto_step(.ark_breakpoint(...))`) at +/// breakpoint locations. +/// - Injects verification calls (`.ark_auto_step(.ark_verify_breakpoints_range(...))`) +/// after expressions containing breakpoints. +/// - `#line` directives after injected calls to restore correct line mapping. +pub(crate) fn annotate_source( + code: &str, + uri: &Url, + breakpoints: &mut [Breakpoint], +) -> anyhow::Result { + // Wrap code in braces first. This: + // 1. Allows R to step through top-level expressions + // 2. Makes all breakpoints valid (they're now inside braces, at top-level they'd be invalid) + // This enables uniform treatment by `AnnotationRewriter` for input and source cases. + let wrapped = format!("{{\n{code}\n}}"); + let line_index = LineIndex::new(&wrapped); + + let root = aether_parser::parse(&wrapped, Default::default()).tree(); + + // `line_offset` = -1 because: + // - Wrapped line 0 is `{` + // - Wrapped line 1 is original line 0 + // - doc_line = code_line + line_offset = code_line - 1 + let line_offset: i32 = -1; + + let mut rewriter = AnnotationRewriter::new(uri, breakpoints, line_offset, &line_index); + let transformed = rewriter.transform(root.syntax().clone()); + + if let Some(err) = rewriter.take_err() { + return Err(err); + } + + let transformed_code = transformed.to_string(); + + // Add a trailing verify call to handle any injected breakpoint in trailing + // position. Normally we'd inject a verify call as well a line directive + // that ensures source references remain correct after the verify call. + // But for the last expression in a list, there is no sibling node to attach + // the line directive trivia to. So, instead of adding a verify call, we + // rely on verification in a parent list instead. If trailing, there won't + // be any verification calls at all though, so we manually add one there: + // + // ```r + // { + // foo({ + // .ark_auto_step(.breakpoint(...)) + // #line ... + // expr + // }) + // } + // .ark_auto_step(.ark_verify_breakpoints(...)) # <- Manual injection + // ``` + // + // This is unconditional for simplicity. + let last_line = code.lines().count() as u32; + let trailing_verify = format_verify_call(uri, &(0..last_line)); + + Ok(format!("{}\n{trailing_verify}\n", transformed_code.trim())) +} + +/// Rewriter that handles all code annotation inside braced expression lists: +/// - Breakpoint calls on statements +/// - Verification calls after statements containing breakpoints +/// - `#line` directives after injected calls to restore sourceref bookkeeping +struct AnnotationRewriter<'a> { + uri: &'a Url, + /// Breakpoints in document coordinates, will be mutated to mark invalid ones + breakpoints: &'a mut [Breakpoint], + /// Offset for coordinate conversion: doc_line = code_line + line_offset + line_offset: i32, + /// Line index for the parsed code + line_index: &'a LineIndex, + /// Set of breakpoint IDs that have been consumed (placed in nested lists) + consumed: std::collections::HashSet, + /// Stack tracking braced expression context. Each entry contains precomputed + /// line information captured before child transformations (which can corrupt ranges). + brace_stack: Vec, + /// First error encountered during transformation (if any) + err: Option, +} + +/// Holds precomputed line information for a braced expression list. +/// Captured on entry to a braced expression since line info becomes unreliable +/// after child transformations. +struct BraceFrame { + /// Code line of the opening `{` + brace_code_line: u32, + /// Code line of the closing `}` + closing_brace_code_line: u32, + /// Line info for each expression (indexed by slot position) + expr_info: Vec, +} + +/// Precomputed line information for a single expression in a braced list. +struct ExprLineInfo { + /// Code line where the expression starts (from first token) + start: u32, + /// Code line range [start, end) for the expression + range: std::ops::Range, +} + +impl<'a> AnnotationRewriter<'a> { + fn new( + uri: &'a Url, + breakpoints: &'a mut [Breakpoint], + line_offset: i32, + line_index: &'a LineIndex, + ) -> Self { + // Sort so that `find_breakpoint_for_expr` (which uses `position()`) finds + // the earliest-line breakpoint first when multiple could match + breakpoints.sort_by_key(|bp| bp.line); + + Self { + uri, + breakpoints, + line_offset, + line_index, + consumed: std::collections::HashSet::new(), + brace_stack: Vec::new(), + err: None, + } + } + + fn take_err(&mut self) -> Option { + self.err.take() + } + + fn fail(&mut self, err: anyhow::Error, node: RSyntaxNode) -> RSyntaxNode { + if self.err.is_none() { + self.err = Some(err); + } + node + } + + /// Convert a text offset to a code line number. + fn to_code_line(&self, offset: TextSize) -> Option { + self.line_index.line_col(offset).map(|lc| lc.line) + } + + /// Convert code line to document line. Can be negative for the wrapper + /// brace in `annotate_source(). + fn to_doc_line(&self, code_line: u32) -> i32 { + code_line as i32 + self.line_offset + } + + /// Check if a breakpoint is available (not consumed, not invalid, and not + /// disabled) + fn is_available(&self, bp: &Breakpoint) -> bool { + !self.consumed.contains(&bp.id) && + !matches!( + bp.state, + BreakpointState::Invalid(_) | BreakpointState::Disabled + ) + } + + /// Find all available breakpoints that anchor to this expression: At or + /// after the previous expression's end, up to and including the + /// expression's last line. Returns the indices of all matching breakpoints. + fn match_breakpoints( + &self, + prev_doc_end: Option, + expr_last_line: i32, + ) -> anyhow::Result> { + if expr_last_line < 0 { + return Err(anyhow!( + "Unexpected negative `expr_last_line` ({expr_last_line})" + )); + } + let expr_last_line = expr_last_line as u32; + + let result: Vec = self + .breakpoints + .iter() + .enumerate() + .filter_map(|(idx, bp)| { + if !self.is_available(bp) { + return None; + } + + // Breakpoint is after this expression + if bp.line > expr_last_line { + return None; + } + + // There is no previous expression so that's a match + let Some(prev_doc_end) = prev_doc_end else { + return Some(idx); + }; + + // Breakpoint must be after the end of the previous expression. + // Note we allow blank lines between expressions to anchor to + // the next expression. + if (bp.line as i32) >= prev_doc_end { + Some(idx) + } else { + None + } + }) + .collect(); + Ok(result) + } + + /// Check if any breakpoint (including consumed ones) falls within the line + /// range [start, end) in document coordinates. This is used to determine + /// whether a statement contains breakpoints and thus needs to be followed + /// by a verify call. + fn has_breakpoints_in_range(&self, start: i32, end: i32) -> bool { + self.breakpoints.iter().any(|bp| { + let bp_line = bp.line as i32; + !matches!( + bp.state, + BreakpointState::Invalid(_) | BreakpointState::Disabled + ) && bp_line >= start && + bp_line < end + }) + } +} + +impl SyntaxRewriter for AnnotationRewriter<'_> { + type Language = RLanguage; + + fn visit_node(&mut self, node: RSyntaxNode) -> VisitNodeSignal { + if self.err.is_some() { + // Something is wrong but we can't short-circuit the visit. Just + // visit nodes until exhaustion. + return VisitNodeSignal::Traverse(node); + } + + // Track `BraceFrame` information when we enter a braced expression. + // This must be done on entry, since the exit hook only sees the + // (partially) rebuilt tree with invalid line information. + // + // Note: we intentionally cache line info from the original parse tree. + // Downstream injections (breakpoint calls, `#line` trivia, verify calls) + // can change token offsets and make line lookups on the rebuilt nodes + // unreliable, but they do not change where the original expressions + // lived in the source. We only rely on these cached source positions + // when anchoring and invalidating breakpoints, tasks for which we need + // the _original_ coordinates, not the new ones. + if let Some(braced) = RBracedExpressions::cast(node.clone()) { + let Some(brace_code_line) = first_token_code_line(&node, self.line_index) else { + self.err = Some(anyhow!("Failed to get line for opening brace")); + return VisitNodeSignal::Traverse(node); + }; + + let Some(closing_brace_code_line) = braced + .r_curly_token() + .ok() + .and_then(|token| self.to_code_line(token.text_trimmed_range().start())) + else { + self.err = Some(anyhow!("Failed to get line for closing brace")); + return VisitNodeSignal::Traverse(node); + }; + + let mut expr_info = Vec::new(); + + for expr in braced.expressions() { + let Some(start) = first_token_code_line(expr.syntax(), self.line_index) else { + self.err = Some(anyhow!("Failed to get start line for expression")); + return VisitNodeSignal::Traverse(node); + }; + let range = match text_trimmed_line_range(expr.syntax(), self.line_index) { + Ok(range) => range, + Err(err) => { + self.err = Some(err); + return VisitNodeSignal::Traverse(node); + }, + }; + + expr_info.push(ExprLineInfo { start, range }); + } + + self.brace_stack.push(BraceFrame { + brace_code_line, + closing_brace_code_line, + expr_info, + }); + } + + VisitNodeSignal::Traverse(node) + } + + fn visit_node_post(&mut self, node: RSyntaxNode) -> RSyntaxNode { + if self.err.is_some() { + // Something is wrong but we can't short-circuit the visit. Just + // visit nodes until exhaustion. + return node; + } + + // Only process expression lists + if node.kind() != RSyntaxKind::R_EXPRESSION_LIST { + return node; + } + + // Note we assume that only braced expressions and the root list have + // `R_EXPRESSION_LIST`, which is the case in our syntax + if let Some(frame) = self.brace_stack.pop() { + if frame.expr_info.is_empty() { + // Empty braces have no expressions to break on. Mark breakpoints + // strictly inside as "empty braces", and the closing brace as + // "closing brace". Breakpoints on the opening brace line belong + // to the parent scope (for `{}` on a single line, this means no + // breakpoints are marked invalid here). + + let brace_doc_start = self.to_doc_line(frame.brace_code_line); + let closing_doc_line = self.to_doc_line(frame.closing_brace_code_line); + + // Mark lines strictly inside as "empty braces" + let inner_start = brace_doc_start + 1; + let inner_end = closing_doc_line - 1; + if inner_start <= inner_end { + self.mark_breakpoints_invalid( + Some(inner_start), + Some(inner_end), + InvalidReason::EmptyBraces, + ); + } + + // Mark the closing brace line as "closing brace" + if closing_doc_line > brace_doc_start { + self.mark_breakpoints_invalid( + Some(closing_doc_line), + Some(closing_doc_line), + InvalidReason::ClosingBrace, + ); + } + + return node; + } + + // Brace range in document coordinates. Since we checked for empty + // expr_info above, first/last are guaranteed to exist. + let Some(last_info) = frame.expr_info.last() else { + return self.fail(anyhow!("expr_info unexpectedly empty"), node); + }; + let Some(first_info) = frame.expr_info.first() else { + return self.fail(anyhow!("expr_info unexpectedly empty"), node); + }; + + let brace_doc_start = self.to_doc_line(frame.brace_code_line); + let brace_doc_end = self.to_doc_line(last_info.range.end); + let first_expr_doc_start = self.to_doc_line(first_info.start); + + // Annotate statements in the braced list + let result = self.annotate_braced_list(node, frame.brace_code_line, frame.expr_info); + + // Mark any remaining breakpoints in this brace range as invalid (closing braces) + let invalidation_floor = breakpoint_floor(brace_doc_start, first_expr_doc_start); + self.mark_breakpoints_invalid( + Some(invalidation_floor), + Some(brace_doc_end), + InvalidReason::ClosingBrace, + ); + + result + } else { + // Root expression list: leave breakpoints as Unverified. + // They can't be hit in console input, but may be valid when sourcing. + node + } + } +} + +impl AnnotationRewriter<'_> { + /// Annotate an expression list inside braces with breakpoints, `#line` + /// directives, and verification calls. + fn annotate_braced_list( + &mut self, + list_node: RSyntaxNode, + brace_code_line: u32, + expr_info: Vec, + ) -> RSyntaxNode { + let Some(list) = RExpressionList::cast(list_node.clone()) else { + return list_node; + }; + + let elements: Vec<_> = list.into_iter().collect(); + if elements.is_empty() { + return list_node; + } + + // Convert brace code line to document coordinates. This is the floor + // for breakpoint matching, breakpoints before this line belong to an + // outer scope, not this braced list. Note that due to the injected + // wrapper braces in `annotate_source()`, this can be -1 (before any doc + // line). + let brace_doc_start: i32 = self.to_doc_line(brace_code_line); + + let mut result_slots: Vec> = Vec::new(); + let mut needs_line_directive = false; + + let first_expr_doc_start = expr_info + .first() + .map(|info| self.to_doc_line(info.start)) + .unwrap_or(brace_doc_start); + let mut prev_doc_end: Option = + Some(breakpoint_floor(brace_doc_start, first_expr_doc_start)); + + for (i, expr) in elements.iter().enumerate() { + // Use precomputed line info captured on the preorder visit, when + // positions were still valid + let Some(info) = expr_info.get(i) else { + return self.fail(anyhow!("Missing line info for expression {i}"), list_node); + }; + + let expr_doc_start = self.to_doc_line(info.start); + let expr_doc_end = self.to_doc_line(info.range.end); + + // Find all breakpoints that anchor to this expression: + // - At or after the previous expression's end + // - At or before the expression's last line (expr_doc_end - 1, since end is exclusive) + // This includes breakpoints on blank lines before the expression and + // breakpoints inside multiline expressions (which all anchor to the start). + let bp_indices = match self.match_breakpoints(prev_doc_end, expr_doc_end - 1) { + Ok(indices) => indices, + Err(e) => return self.fail(e, list_node), + }; + + if !bp_indices.is_empty() { + // Use the first breakpoint's id for the injected call + let first_bp_id = self.breakpoints[bp_indices[0]].id; + + // Update all matching breakpoints: anchor to expr start and + // mark consumed/injected. The `injected` flag is crucial: + // `verify_breakpoints()` only verifies breakpoints where + // `injected == true`. This prevents a bug where a breakpoint + // added _after_ parsing gets incorrectly verified when stopping + // at another breakpoint in the same function. + for &bp_idx in &bp_indices { + let bp = &mut self.breakpoints[bp_idx]; + bp.line = expr_doc_start as u32; + bp.injected = true; + self.consumed.insert(bp.id); + } + + // Inject a single breakpoint call for all matching breakpoints + // (all breakpoints are shown at the same location in the + // frontend, once verified) + let breakpoint_call = create_breakpoint_call(self.uri, first_bp_id); + result_slots.push(Some(breakpoint_call.into())); + + // We've injected an expression so we'll need to fix sourcerefs + // with a line directive + needs_line_directive = true; + } + + // There are two reasons we might need a line directive: + // - We've just injected a breakpoint + // - We've injected a verify call at last iteration + let expr_node = if needs_line_directive { + match add_line_directive_to_node(expr.syntax(), expr_doc_start, self.uri) { + Ok(n) => n, + Err(e) => return self.fail(e, list_node), + } + } else { + expr.syntax().clone() + }; + result_slots.push(Some(expr_node.into())); + + // If this expression's range contains any breakpoints, we inject a + // verify call right after it to ensure that they are immediately + // verified after stepping over this expression. The very last + // expression in the list is an exception. We don't inject a verify + // call because we have nowhere to attach a corresponding line + // directive. Instead we rely on the parent list to verify. + let is_last = i == elements.len() - 1; + if !is_last && self.has_breakpoints_in_range(expr_doc_start, expr_doc_end) { + let start_u32 = expr_doc_start.max(0) as u32; + let end_u32 = expr_doc_end.max(0) as u32; + let verify_call = create_verify_call(self.uri, &(start_u32..end_u32)); + result_slots.push(Some(verify_call.into())); + + // Next expression will need a line directive no matter what + // (even if there is no injected breakpoint) + needs_line_directive = true; + } else { + needs_line_directive = false; + } + + prev_doc_end = Some(expr_doc_end); + } + + // Replace all slots with the new list + let slot_count = list_node.slots().count(); + list_node.splice_slots(0..slot_count, result_slots) + } + + /// Mark remaining unconsumed breakpoints as invalid within the given range. + /// If range bounds are None, all remaining breakpoints are marked invalid. + fn mark_breakpoints_invalid( + &mut self, + start: Option, + end: Option, + reason: InvalidReason, + ) { + for bp in self.breakpoints.iter_mut() { + let is_available = + !self.consumed.contains(&bp.id) && !matches!(bp.state, BreakpointState::Invalid(_)); + if !is_available { + continue; + } + + let bp_line = bp.line as i32; + let in_range = + start.map_or(true, |s| bp_line >= s) && end.map_or(true, |e| bp_line <= e); + if in_range { + bp.state = BreakpointState::Invalid(reason); + } + } + } +} + +/// Compute the floor line for breakpoint matching in a braced list. When +/// content starts on a later line than the brace, we use `brace_doc_start + 1` +/// to avoid claiming breakpoints on the brace line, as those belong to the +/// parent scope. +fn breakpoint_floor(brace_doc_start: i32, first_expr_doc_start: i32) -> i32 { + if first_expr_doc_start > brace_doc_start { + brace_doc_start + 1 + } else { + brace_doc_start + } +} + +/// Returns the code line of the node's first token. +fn first_token_code_line(node: &RSyntaxNode, line_index: &LineIndex) -> Option { + let token = node.first_token()?; + let offset = token.text_trimmed_range().start(); + line_index.line_col(offset).map(|lc| lc.line) +} + +/// Returns the line range [start, end) for the node's trimmed text. +fn text_trimmed_line_range( + node: &RSyntaxNode, + line_index: &LineIndex, +) -> anyhow::Result> { + // This gives a range in offset coordinates. We need to retrieve the line range + // using the line index. + let text_range = node.text_trimmed_range(); + + let start = line_index + .line_col(text_range.start()) + .map(|lc| lc.line) + .ok_or_else(|| anyhow!("Failed to get line for text range start offset"))?; + + let end = line_index + .line_col(text_range.end()) + .map(|lc| lc.line + 1) // Close the range end + .ok_or_else(|| anyhow!("Failed to get line for text range end offset"))?; + + Ok(start..end) +} + +type TriviaPieces = Vec<(TriviaPieceKind, String)>; + +/// Collects leading trivia from a token as (kind, text) tuples. +fn collect_leading_trivia(token: &aether_syntax::RSyntaxToken) -> TriviaPieces { + token .leading_trivia() .pieces() .map(|piece| (piece.kind(), piece.text().to_string())) - .collect(); - - // Create new trivia with line directive prepended - let new_trivia: Vec<_> = vec![ - ( - biome_rowan::TriviaPieceKind::SingleLineComment, - line_directive.to_string(), - ), - (biome_rowan::TriviaPieceKind::Newline, "\n".to_string()), - ( - biome_rowan::TriviaPieceKind::Whitespace, - leading_padding.to_string(), - ), + .collect() +} + +/// Creates trivia pieces for a line directive comment followed by a newline. +fn line_directive_trivia(line: u32, uri: &Url) -> TriviaPieces { + let directive = format!("#line {} \"{}\"", line + 1, uri); + vec![ + (TriviaPieceKind::SingleLineComment, directive), + (TriviaPieceKind::Newline, "\n".to_string()), ] - .into_iter() - .chain(existing_trivia.into_iter()) - .collect(); +} + +/// Inserts trivia pieces before trailing whitespace (indentation) if present. +/// This preserves indentation: `[\n, \n, ws]` becomes `[\n, \n, , ws]` +fn insert_before_trailing_whitespace( + mut trivia: TriviaPieces, + to_insert: TriviaPieces, +) -> TriviaPieces { + let has_trailing_whitespace = trivia + .last() + .is_some_and(|(k, _)| *k == TriviaPieceKind::Whitespace); + + if has_trailing_whitespace { + let Some(last) = trivia.pop() else { + trivia.extend(to_insert); + return trivia; + }; + trivia.extend(to_insert); + trivia.push(last); + } else { + trivia.extend(to_insert); + } + + trivia +} + +fn add_line_directive_to_node( + node: &RSyntaxNode, + line: i32, + uri: &Url, +) -> anyhow::Result { + if line < 0 { + return Err(anyhow!( + "Line directive line is negative ({line}), this shouldn't happen" + )); + } + let line = line as u32; + + let first_token = node + .first_token() + .ok_or_else(|| anyhow!("Node has no first token for line directive"))?; + + let mut existing_trivia = collect_leading_trivia(&first_token); + + // Skip leading newline as it belongs to the previous node + if existing_trivia + .first() + .is_some_and(|(kind, _)| *kind == TriviaPieceKind::Newline) + { + existing_trivia.remove(0); + } + + let directive_trivia = line_directive_trivia(line, uri); + let new_trivia = insert_before_trailing_whitespace(existing_trivia, directive_trivia); let new_first_token = first_token.with_leading_trivia(new_trivia.iter().map(|(k, t)| (*k, t.as_str()))); - let Some(new_node) = node - .syntax() - .clone() + node.clone() .replace_child(first_token.into(), new_first_token.into()) - else { - return code.into(); + .ok_or_else(|| anyhow!("Failed to replace first token with line directive")) +} + +// We create new calls by parsing strings. Although less elegant, it's much less +// verbose and easier to see what's going on. + +fn create_breakpoint_call(uri: &Url, id: i64) -> RSyntaxNode { + // NOTE: If you use `base::browser()` here in an attempt to prevent masking + // issues in case someone redefined `browser()`, you'll cause the function + // in which the breakpoint is injected to be bytecode-compiled. This is a + // limitation/bug of https://github.com/r-devel/r-svn/blob/e2aae817/src/library/compiler/R/cmp.R#L1273-L1290 + let code = format!( + "\nbase::{AUTO_STEP_FUNCTION}(base::.ark_breakpoint(browser(), \"{uri}\", \"{id}\"))\n" + ); + aether_parser::parse(&code, Default::default()).syntax() +} + +fn create_verify_call(uri: &Url, line_range: &std::ops::Range) -> RSyntaxNode { + let code = format!("\n{}\n", format_verify_call(uri, line_range)); + aether_parser::parse(&code, Default::default()).syntax() +} + +/// Formats a verify call as a string. Takes 0-indexed line range. +fn format_verify_call(uri: &Url, line_range: &std::ops::Range) -> String { + format!( + "base::{AUTO_STEP_FUNCTION}(base::.ark_verify_breakpoints_range(\"{}\", {}L, {}L))", + uri, + line_range.start + 1, + line_range.end + 1 + ) +} + +#[harp::register] +pub unsafe extern "C-unwind" fn ps_annotate_source(uri: SEXP, code: SEXP) -> anyhow::Result { + let uri: String = RObject::view(uri).try_into()?; + let code: String = RObject::view(code).try_into()?; + + let uri = Url::parse(&uri)?; + + let main = RMain::get(); + let mut dap_guard = main.debug_dap.lock().unwrap(); + + // If there are no breakpoints for this file, return NULL to signal no + // annotation needed. Scope the mutable borrow so we can re-borrow after. + let annotated = { + let Some((_, breakpoints)) = dap_guard.breakpoints.get_mut(&uri) else { + return Ok(harp::r_null()); + }; + if breakpoints.is_empty() { + return Ok(harp::r_null()); + } + annotate_source(&code, &uri, breakpoints.as_mut_slice())? }; - new_node.to_string() + // Notify frontend about any breakpoints marked invalid during annotation + if let Some((_, breakpoints)) = dap_guard.breakpoints.get(&uri) { + dap_guard.notify_invalid_breakpoints(breakpoints); + } + + Ok(RObject::try_from(annotated)?.sexp) } #[cfg(test)] @@ -80,7 +935,7 @@ mod tests { fn test_annotate_input_basic() { let code = "x <- 1\ny <- 2"; let location = make_location(0, 0); - let result = annotate_input(code, location); + let result = annotate_input(code, location, None).unwrap(); insta::assert_snapshot!(result); } @@ -88,7 +943,7 @@ mod tests { fn test_annotate_input_shifted_line() { let code = "x <- 1\ny <- 2"; let location = make_location(10, 0); - let result = annotate_input(code, location); + let result = annotate_input(code, location, None).unwrap(); insta::assert_snapshot!(result); } @@ -96,7 +951,7 @@ mod tests { fn test_annotate_input_shifted_character() { let code = "x <- 1\ny <- 2"; let location = make_location(0, 5); - let result = annotate_input(code, location); + let result = annotate_input(code, location, None).unwrap(); insta::assert_snapshot!(result); } @@ -104,7 +959,7 @@ mod tests { fn test_annotate_input_shifted_line_and_character() { let code = "x <- 1\ny <- 2"; let location = make_location(10, 5); - let result = annotate_input(code, location); + let result = annotate_input(code, location, None).unwrap(); insta::assert_snapshot!(result); } @@ -112,7 +967,7 @@ mod tests { fn test_annotate_input_with_existing_whitespace() { let code = " x <- 1\n y <- 2"; let location = make_location(0, 0); - let result = annotate_input(code, location); + let result = annotate_input(code, location, None).unwrap(); insta::assert_snapshot!(result); } @@ -120,7 +975,7 @@ mod tests { fn test_annotate_input_with_existing_whitespace_shifted() { let code = " x <- 1\n y <- 2"; let location = make_location(0, 2); - let result = annotate_input(code, location); + let result = annotate_input(code, location, None).unwrap(); insta::assert_snapshot!(result); } @@ -128,7 +983,7 @@ mod tests { fn test_annotate_input_with_existing_comment() { let code = "# comment\nx <- 1"; let location = make_location(0, 0); - let result = annotate_input(code, location); + let result = annotate_input(code, location, None).unwrap(); insta::assert_snapshot!(result); } @@ -136,7 +991,962 @@ mod tests { fn test_annotate_input_empty_code() { let code = ""; let location = make_location(0, 0); - let result = annotate_input(code, location); + let result = annotate_input(code, location, None).unwrap(); + insta::assert_snapshot!(result); + } + + #[test] + fn test_annotate_input_with_breakpoint() { + // Test the full annotate_input path with breakpoints. + // Wrap in braces so breakpoints are valid. + let code = "{\n0\n1\n2\n}"; + let location = CodeLocation { + uri: Url::parse("file:///test.R").unwrap(), + start: Position { + line: 3, + character: 0, + }, + end: Position { + line: 7, + character: 1, + }, + }; + // Breakpoint at document line 5 (code line 2, i.e., `1`) + let mut breakpoints = vec![Breakpoint::new(1, 5, BreakpointState::Unverified)]; + + let result = annotate_input(code, location, Some(&mut breakpoints)).unwrap(); + insta::assert_snapshot!(result); + + // Breakpoint line should remain in document coordinates + assert_eq!(breakpoints[0].line, 5); + assert!(!matches!(breakpoints[0].state, BreakpointState::Invalid(_))); + } + + #[test] + fn test_inject_breakpoints_single_line() { + // Wrap in braces so breakpoints are valid (inside a brace list) + let code = "{\nx <- 1\ny <- 2\nz <- 3\n}"; + let location = CodeLocation { + uri: Url::parse("file:///test.R").unwrap(), + start: Position { + line: 0, + character: 0, + }, + end: Position { + line: 4, + character: 1, + }, + }; + let mut breakpoints = vec![Breakpoint::new(1, 2, BreakpointState::Unverified)]; + + let result = annotate_input(code, location, Some(&mut breakpoints)).unwrap(); + insta::assert_snapshot!(result); + assert!(!matches!(breakpoints[0].state, BreakpointState::Invalid(_))); + } + + #[test] + fn test_inject_breakpoints_multiple() { + // Wrap in braces so breakpoints are valid (inside a brace list) + let code = "{\nx <- 1\ny <- 2\nz <- 3\nw <- 4\n}"; + let location = CodeLocation { + uri: Url::parse("file:///test.R").unwrap(), + start: Position { + line: 0, + character: 0, + }, + end: Position { + line: 5, + character: 1, + }, + }; + let mut breakpoints = vec![ + Breakpoint::new(1, 2, BreakpointState::Unverified), + Breakpoint::new(2, 4, BreakpointState::Unverified), + ]; + + let result = annotate_input(code, location, Some(&mut breakpoints)).unwrap(); insta::assert_snapshot!(result); + assert!(!matches!(breakpoints[0].state, BreakpointState::Invalid(_))); + assert!(!matches!(breakpoints[1].state, BreakpointState::Invalid(_))); + } + + #[test] + fn test_inject_breakpoints_in_brace_list() { + let code = "f <- function() {\n x <- 1\n y <- 2\n}"; + let location = CodeLocation { + uri: Url::parse("file:///test.R").unwrap(), + start: Position { + line: 0, + character: 0, + }, + end: Position { + line: 3, + character: 1, + }, + }; + let mut breakpoints = vec![Breakpoint::new(1, 2, BreakpointState::Unverified)]; + + let result = annotate_input(code, location, Some(&mut breakpoints)).unwrap(); + insta::assert_snapshot!(result); + assert!(!matches!(breakpoints[0].state, BreakpointState::Verified)); + } + + #[test] + fn test_inject_breakpoints_out_of_range() { + let code = "x <- 1\ny <- 2"; + let location = CodeLocation { + uri: Url::parse("file:///test.R").unwrap(), + start: Position { + line: 0, + character: 0, + }, + end: Position { + line: 1, + character: 6, + }, + }; + let mut breakpoints = vec![Breakpoint::new(1, 10, BreakpointState::Unverified)]; + + let result = annotate_input(code, location, Some(&mut breakpoints)).unwrap(); + // annotate_input always adds #line directive for srcref mapping + let expected = format!("#line 1 \"file:///test.R\"\n{code}"); + assert_eq!(result, expected); + assert!(!matches!(breakpoints[0].state, BreakpointState::Verified)); + } + + #[test] + fn test_inject_breakpoints_multiple_lists() { + // This test has breakpoints in different parent lists: + // - One in the outer brace list + // - One in a nested brace list (inside function) + // Wrap in braces so both breakpoints are valid + let code = "{\nx <- 1\nf <- function() {\n y <- 2\n z <- 3\n}\nw <- 4\n}"; + let location = CodeLocation { + uri: Url::parse("file:///test.R").unwrap(), + start: Position { + line: 0, + character: 0, + }, + end: Position { + line: 7, + character: 1, + }, + }; + let mut breakpoints = vec![ + Breakpoint::new(1, 3, BreakpointState::Unverified), + Breakpoint::new(2, 6, BreakpointState::Unverified), + ]; + + let result = annotate_input(code, location, Some(&mut breakpoints)).unwrap(); + insta::assert_snapshot!(result); + // Both breakpoints are valid (inside brace lists) + assert!(!matches!(breakpoints[0].state, BreakpointState::Invalid(_))); + assert!(!matches!(breakpoints[1].state, BreakpointState::Invalid(_))); + } + + #[test] + fn test_inject_breakpoints_inside_multiline_expr_anchors_to_start() { + // A breakpoint on an intermediate line of a multiline expression should + // anchor to the start of that expression. + // Lines: + // 0: { + // 1: x + + // 2: y + // 3: z + // 4: } + let code = "{\n x +\n y\n z\n}"; + let location = CodeLocation { + uri: Url::parse("file:///test.R").unwrap(), + start: Position { + line: 0, + character: 0, + }, + end: Position { + line: 4, + character: 1, + }, + }; + let mut breakpoints = vec![Breakpoint::new(1, 2, BreakpointState::Unverified)]; + + let result = annotate_input(code, location, Some(&mut breakpoints)).unwrap(); + insta::assert_snapshot!(result); + // Breakpoint inside multiline expression should anchor to expression start + assert!(!matches!(breakpoints[0].state, BreakpointState::Invalid(_))); + assert_eq!(breakpoints[0].line, 1); // Anchored to line 1 (x +) + } + + #[test] + fn test_inject_breakpoints_on_blank_line_anchors_to_next() { + // A breakpoint on a blank line between expressions should anchor to + // the next expression. + // Lines: + // 0: { + // 1: x <- 1 + // 2: (blank) + // 3: y <- 2 + // 4: } + let code = "{\n x <- 1\n\n y <- 2\n}"; + let location = CodeLocation { + uri: Url::parse("file:///test.R").unwrap(), + start: Position { + line: 0, + character: 0, + }, + end: Position { + line: 4, + character: 1, + }, + }; + let mut breakpoints = vec![Breakpoint::new(1, 2, BreakpointState::Unverified)]; + + let result = annotate_input(code, location, Some(&mut breakpoints)).unwrap(); + insta::assert_snapshot!(result); + // Breakpoint on blank line should anchor to next expression (valid) + assert!(!matches!(breakpoints[0].state, BreakpointState::Invalid(_))); + // Line should be updated to the actual anchor position (line 3) + assert_eq!(breakpoints[0].line, 3); + } + + #[test] + fn test_multiple_breakpoints_collapse_to_same_line() { + // Multiple breakpoints matching the same expression should all anchor + // to the expression start, but only one breakpoint call is injected. + // Lines: + // 0: { + // 1: (blank) + // 2: foo( + // 3: 1 + // 4: ) + // 5: } + let code = "{\n\n foo(\n 1\n )\n}"; + let location = CodeLocation { + uri: Url::parse("file:///test.R").unwrap(), + start: Position { + line: 0, + character: 0, + }, + end: Position { + line: 5, + character: 1, + }, + }; + // Three breakpoints: blank line, expression start, and inside expression + let mut breakpoints = vec![ + Breakpoint::new(1, 1, BreakpointState::Unverified), + Breakpoint::new(2, 2, BreakpointState::Unverified), + Breakpoint::new(3, 3, BreakpointState::Unverified), + ]; + + let result = annotate_input(code, location, Some(&mut breakpoints)).unwrap(); + insta::assert_snapshot!(result); + + // All breakpoints should be valid and anchored to line 2 (expression start) + for bp in &breakpoints { + assert!( + !matches!(bp.state, BreakpointState::Invalid(_)), + "Breakpoint {} should be valid", + bp.id + ); + assert_eq!(bp.line, 2, "Breakpoint {} should anchor to line 2", bp.id); + } + + // Only one breakpoint call should be injected (count occurrences) + let bp_call_count = result.matches(".ark_breakpoint").count(); + assert_eq!( + bp_call_count, 1, + "Only one breakpoint call should be injected" + ); + } + + #[test] + fn test_inject_breakpoints_with_blank_line() { + // Test that blank lines before an anchor are preserved + // Wrap in braces so breakpoints are valid + let code = "{\nx <- 1\n\n\ny <- 2\n}"; + let location = CodeLocation { + uri: Url::parse("file:///test.R").unwrap(), + start: Position { + line: 0, + character: 0, + }, + end: Position { + line: 5, + character: 1, + }, + }; + let mut breakpoints = vec![Breakpoint::new(1, 4, BreakpointState::Unverified)]; + + let result = annotate_input(code, location, Some(&mut breakpoints)).unwrap(); + insta::assert_snapshot!(result); + assert!(!matches!(breakpoints[0].state, BreakpointState::Invalid(_))); + } + + #[test] + fn test_inject_breakpoints_on_closing_brace() { + // Breakpoint on a line with only `}` should be left unverified + // (no executable code there) + let code = "f <- function() {\n x <- 1\n}\ny <- 2"; + let location = CodeLocation { + uri: Url::parse("file:///test.R").unwrap(), + start: Position { + line: 0, + character: 0, + }, + end: Position { + line: 3, + character: 6, + }, + }; + let mut breakpoints = vec![Breakpoint::new(1, 2, BreakpointState::Unverified)]; + + let result = annotate_input(code, location, Some(&mut breakpoints)).unwrap(); + // annotate_input always adds #line directive for srcref mapping + let expected = format!("#line 1 \"file:///test.R\"\n{code}"); + assert_eq!(result, expected); + // Marked as invalid + assert!(matches!(breakpoints[0].state, BreakpointState::Invalid(_))); + } + + #[test] + fn test_inject_breakpoints_on_closing_brace_with_valid_breakpoint() { + // One breakpoint on `}` (invalid) and one on valid code in outer braces + // Wrap in braces so the second breakpoint is valid + let code = "{\nf <- function() {\n x <- 1\n}\ny <- 2\n}"; + let location = CodeLocation { + uri: Url::parse("file:///test.R").unwrap(), + start: Position { + line: 0, + character: 0, + }, + end: Position { + line: 5, + character: 1, + }, + }; + let mut breakpoints = vec![ + Breakpoint::new(1, 3, BreakpointState::Unverified), + Breakpoint::new(2, 4, BreakpointState::Unverified), + ]; + + let result = annotate_input(code, location, Some(&mut breakpoints)).unwrap(); + insta::assert_snapshot!(result); + + // First breakpoint is invalid (on closing brace) + assert!(matches!(breakpoints[0].state, BreakpointState::Invalid(_))); + // Second breakpoint is valid (in outer brace list) + assert!(!matches!(breakpoints[1].state, BreakpointState::Invalid(_))); + } + + #[test] + fn test_inject_breakpoints_before_within_after_nested() { + // Comprehensive test with breakpoints: + // - Before nested list (line 1: `x <- 1`) - in outer braces + // - Within nested list (line 3: `y <- 2`) - inside function + // - After nested list (line 6: `w <- 4`) - in outer braces + // Wrap in braces so all breakpoints are valid + let code = "{\nx <- 1\nf <- function() {\n y <- 2\n z <- 3\n}\nw <- 4\n}"; + let location = CodeLocation { + uri: Url::parse("file:///test.R").unwrap(), + start: Position { + line: 0, + character: 0, + }, + end: Position { + line: 7, + character: 1, + }, + }; + let mut breakpoints = vec![ + Breakpoint::new(1, 1, BreakpointState::Unverified), + Breakpoint::new(2, 3, BreakpointState::Unverified), + Breakpoint::new(3, 6, BreakpointState::Unverified), + ]; + + let result = annotate_input(code, location, Some(&mut breakpoints)).unwrap(); + insta::assert_snapshot!(result); + // All breakpoints are valid (inside brace lists) + assert!(!matches!(breakpoints[0].state, BreakpointState::Invalid(_))); + assert!(!matches!(breakpoints[1].state, BreakpointState::Invalid(_))); + assert!(!matches!(breakpoints[2].state, BreakpointState::Invalid(_))); + } + + #[test] + fn test_inject_breakpoints_with_line_offset() { + // Test that breakpoints work correctly when the code starts at a non-zero line + // in the document. This simulates executing a selection from the middle of a file. + // Wrap in braces so breakpoints are valid. + // + // The code represents lines 10-14 of the original document: + // Line 10: { + // Line 11: x <- 1 + // Line 12: y <- 2 + // Line 13: z <- 3 + // Line 14: } + let code = "{\nx <- 1\ny <- 2\nz <- 3\n}"; + let location = CodeLocation { + uri: Url::parse("file:///test.R").unwrap(), + start: Position { + line: 10, + character: 0, + }, + end: Position { + line: 14, + character: 1, + }, + }; + + // Breakpoint at document line 12 (which is code line 2, i.e., `y <- 2`) + let mut breakpoints = vec![Breakpoint::new(1, 12, BreakpointState::Unverified)]; + + let result = annotate_input(code, location, Some(&mut breakpoints)).unwrap(); + insta::assert_snapshot!(result); + + // The breakpoint line should remain in document coordinates + assert_eq!(breakpoints[0].line, 12); + assert!(!matches!(breakpoints[0].state, BreakpointState::Invalid(_))); + } + + #[test] + fn test_inject_breakpoints_with_line_offset_nested() { + // Test with line offset and nested braces + let code = "f <- function() {\n x <- 1\n y <- 2\n}"; + let location = CodeLocation { + uri: Url::parse("file:///test.R").unwrap(), + start: Position { + line: 20, + character: 0, + }, + end: Position { + line: 23, + character: 1, + }, + }; + + // Breakpoint at document line 22 (code line 2, i.e., `y <- 2`) + let mut breakpoints = vec![Breakpoint::new(1, 22, BreakpointState::Unverified)]; + + let result = annotate_input(code, location, Some(&mut breakpoints)).unwrap(); + insta::assert_snapshot!(result); + + // The breakpoint line should remain in document coordinates + assert_eq!(breakpoints[0].line, 22); + assert!(!matches!(breakpoints[0].state, BreakpointState::Invalid(_))); + } + + #[test] + fn test_inject_breakpoints_doubly_nested_braces() { + // Test with doubly nested braces: { { 1\n 2 } } + // The inner expressions should be reachable for breakpoints + let code = "{\n {\n 1\n 2\n }\n}"; + let location = CodeLocation { + uri: Url::parse("file:///test.R").unwrap(), + start: Position { + line: 0, + character: 0, + }, + end: Position { + line: 5, + character: 1, + }, + }; + + // Breakpoint at line 2 (the `1` expression inside the inner braces) + let mut breakpoints = vec![Breakpoint::new(1, 2, BreakpointState::Unverified)]; + + let result = annotate_input(code, location, Some(&mut breakpoints)).unwrap(); + insta::assert_snapshot!(result); + + // The breakpoint should be placed at line 2 + assert_eq!(breakpoints[0].line, 2); + assert!(!matches!(breakpoints[0].state, BreakpointState::Invalid(_))); + } + + #[test] + fn test_inject_breakpoints_triply_nested_braces() { + // Test with triply nested braces: { { { 1 } } } + let code = "{\n {\n {\n 1\n }\n }\n}"; + let location = CodeLocation { + uri: Url::parse("file:///test.R").unwrap(), + start: Position { + line: 0, + character: 0, + }, + end: Position { + line: 6, + character: 1, + }, + }; + + // Breakpoint at line 3 (the `1` expression inside the innermost braces) + let mut breakpoints = vec![Breakpoint::new(1, 3, BreakpointState::Unverified)]; + + let result = annotate_input(code, location, Some(&mut breakpoints)).unwrap(); + insta::assert_snapshot!(result); + + // The breakpoint should be placed at line 3 + assert_eq!(breakpoints[0].line, 3); + assert!(!matches!(breakpoints[0].state, BreakpointState::Invalid(_))); + } + + #[test] + fn test_inject_breakpoints_nested_closing_brace_invalid() { + // Breakpoint on inner closing brace should be invalid + let code = "{\n {\n 1\n }\n}"; + let location = CodeLocation { + uri: Url::parse("file:///test.R").unwrap(), + start: Position { + line: 0, + character: 0, + }, + end: Position { + line: 4, + character: 1, + }, + }; + + // Breakpoint at line 3 (the inner `}` line) + let mut breakpoints = vec![Breakpoint::new(1, 3, BreakpointState::Unverified)]; + + let result = annotate_input(code, location, Some(&mut breakpoints)).unwrap(); + // annotate_input always adds #line directive for srcref mapping + let expected = format!("#line 1 \"file:///test.R\"\n{code}"); + assert_eq!(result, expected); + // Marked as invalid + assert!(matches!(breakpoints[0].state, BreakpointState::Invalid(_))); + } + + #[test] + fn test_top_level_breakpoint_single_unverified() { + // Top-level breakpoints stay Unverified (can't be hit in console, but + // may be valid when sourcing the same file) + let code = "x <- 1\ny <- 2\nz <- 3"; + let location = CodeLocation { + uri: Url::parse("file:///test.R").unwrap(), + start: Position { + line: 0, + character: 0, + }, + end: Position { + line: 2, + character: 6, + }, + }; + let mut breakpoints = vec![Breakpoint::new(1, 1, BreakpointState::Unverified)]; + + let result = annotate_input(code, location, Some(&mut breakpoints)).unwrap(); + // annotate_input always adds #line directive for srcref mapping + let expected = format!("#line 1 \"file:///test.R\"\n{code}"); + assert_eq!(result, expected); + + // Breakpoint stays Unverified (not Invalid) + assert!(matches!(breakpoints[0].state, BreakpointState::Unverified)); + } + + #[test] + fn test_top_level_breakpoint_multiple_unverified() { + // Multiple top-level breakpoints stay Unverified + let code = "x <- 1\ny <- 2\nz <- 3\nw <- 4"; + let location = CodeLocation { + uri: Url::parse("file:///test.R").unwrap(), + start: Position { + line: 0, + character: 0, + }, + end: Position { + line: 3, + character: 6, + }, + }; + let mut breakpoints = vec![ + Breakpoint::new(1, 0, BreakpointState::Unverified), + Breakpoint::new(2, 2, BreakpointState::Unverified), + ]; + + let result = annotate_input(code, location, Some(&mut breakpoints)).unwrap(); + // annotate_input always adds #line directive for srcref mapping + let expected = format!("#line 1 \"file:///test.R\"\n{code}"); + assert_eq!(result, expected); + // Both breakpoints stay Unverified + assert!(matches!(breakpoints[0].state, BreakpointState::Unverified)); + assert!(matches!(breakpoints[1].state, BreakpointState::Unverified)); + } + + #[test] + fn test_top_level_breakpoint_mixed_unverified_and_nested() { + // Top-level breakpoints stay Unverified, nested ones get consumed + let code = "x <- 1\nf <- function() {\n y <- 2\n}\nz <- 3"; + let location = CodeLocation { + uri: Url::parse("file:///test.R").unwrap(), + start: Position { + line: 0, + character: 0, + }, + end: Position { + line: 4, + character: 6, + }, + }; + let mut breakpoints = vec![ + Breakpoint::new(1, 0, BreakpointState::Unverified), + Breakpoint::new(2, 2, BreakpointState::Unverified), + Breakpoint::new(3, 4, BreakpointState::Unverified), + ]; + + let result = annotate_input(code, location, Some(&mut breakpoints)).unwrap(); + // Code should contain breakpoint for nested expression only + assert!(result.contains("base::.ark_breakpoint")); + // Top-level breakpoints stay Unverified + assert!(matches!(breakpoints[0].state, BreakpointState::Unverified)); + // Nested breakpoint is consumed (not Invalid) + assert!(!matches!(breakpoints[1].state, BreakpointState::Invalid(_))); + // Top-level breakpoint stays Unverified + assert!(matches!(breakpoints[2].state, BreakpointState::Unverified)); + } + + #[test] + fn test_annotate_source_basic() { + let code = "x <- 1\ny <- 2"; + let uri = Url::parse("file:///test.R").unwrap(); + let mut breakpoints = vec![]; + let result = annotate_source(code, &uri, &mut breakpoints).unwrap(); + insta::assert_snapshot!(result); + } + + #[test] + fn test_annotate_source_with_breakpoint() { + let code = "foo <- function() {\n x <- 1\n y <- 2\n}\nbar <- 3"; + let uri = Url::parse("file:///test.R").unwrap(); + // Breakpoint at line 2 (inside the function, 0-indexed) + let mut breakpoints = vec![Breakpoint::new(1, 1, BreakpointState::Unverified)]; + let result = annotate_source(code, &uri, &mut breakpoints).unwrap(); + insta::assert_snapshot!(result); + } + + #[test] + fn test_annotate_source_multiple_expressions() { + let code = "a <- 1\nb <- 2\nc <- 3"; + let uri = Url::parse("file:///test.R").unwrap(); + let mut breakpoints = vec![]; + let result = annotate_source(code, &uri, &mut breakpoints).unwrap(); + insta::assert_snapshot!(result); + } + + #[test] + fn test_annotate_source_multiline_expression() { + let code = "foo <- function(x) {\n x + 1\n}\nbar <- 2"; + let uri = Url::parse("file:///test.R").unwrap(); + let mut breakpoints = vec![]; + let result = annotate_source(code, &uri, &mut breakpoints).unwrap(); + insta::assert_snapshot!(result); + } + + #[test] + fn test_annotate_source_top_level_breakpoint() { + let code = "x <- 1\ny <- 2\nz <- 3"; + let uri = Url::parse("file:///test.R").unwrap(); + // Breakpoints on top-level expressions (lines 0, 1, 2 in 0-indexed) + let mut breakpoints = vec![ + Breakpoint::new(1, 0, BreakpointState::Unverified), + Breakpoint::new(2, 2, BreakpointState::Unverified), + ]; + let result = annotate_source(code, &uri, &mut breakpoints).unwrap(); + + // Top-level breakpoints should be valid in annotate_source (code is wrapped in braces) + assert_eq!(breakpoints[0].state, BreakpointState::Unverified); + assert_eq!(breakpoints[1].state, BreakpointState::Unverified); + insta::assert_snapshot!(result); + } + + #[test] + fn test_annotate_source_multiple_breakpoints_inside_braces() { + // Breakpoints at lines 1 and 2 (1-based), i.e. lines 0 and 1 (0-indexed) + // Line 0: `{` + // Line 1: ` 1` + let code = "{\n 1\n 2\n}\n\n2"; + let uri = Url::parse("file:///test.R").unwrap(); + let mut breakpoints = vec![ + Breakpoint::new(1, 0, BreakpointState::Unverified), + Breakpoint::new(2, 1, BreakpointState::Unverified), + ]; + let result = annotate_source(code, &uri, &mut breakpoints).unwrap(); + + // Breakpoint 1 should be at line 0 (the `{`) + // Breakpoint 2 should be at line 1 (the `1`) + assert_eq!(breakpoints[0].line, 0); + assert_eq!(breakpoints[1].line, 1); + assert_eq!(breakpoints[0].state, BreakpointState::Unverified); + assert_eq!(breakpoints[1].state, BreakpointState::Unverified); + insta::assert_snapshot!(result); + } + + #[test] + fn test_annotate_source_breakpoint_on_opening_brace() { + // Breakpoint on line 0 (the `{` line) should anchor to the braced expression, + // not dive into the nested list and anchor to line 1. + let code = "{\n 1\n 2\n}\n\n2"; + let uri = Url::parse("file:///test.R").unwrap(); + let mut breakpoints = vec![Breakpoint::new(1, 0, BreakpointState::Unverified)]; + let result = annotate_source(code, &uri, &mut breakpoints).unwrap(); + + // Breakpoint should remain at line 0, not shifted to line 1 + assert_eq!(breakpoints[0].line, 0); + assert_eq!(breakpoints[0].state, BreakpointState::Unverified); + insta::assert_snapshot!(result); + } + + #[test] + fn test_annotate_source_breakpoint_on_function_definition_line() { + // Breakpoint on the function definition line (which includes the opening `{`) + // should anchor to the assignment expression, not dive into the function body. + // Line 0: `f <- function(x) {` + // Line 1: ` 1` + // Line 2: `}` + let code = "f <- function(x) {\n 1\n}"; + let uri = Url::parse("file:///test.R").unwrap(); + let mut breakpoints = vec![ + Breakpoint::new(1, 0, BreakpointState::Unverified), + Breakpoint::new(2, 1, BreakpointState::Unverified), + ]; + let result = annotate_source(code, &uri, &mut breakpoints).unwrap(); + + // Breakpoint 1 should remain at line 0 (the function definition) + // Breakpoint 2 should be at line 1 (inside the function body) + assert_eq!(breakpoints[0].line, 0); + assert_eq!(breakpoints[1].line, 1); + assert_eq!(breakpoints[0].state, BreakpointState::Unverified); + assert_eq!(breakpoints[1].state, BreakpointState::Unverified); + insta::assert_snapshot!(result); + } + + #[test] + fn test_inject_breakpoints_if_else_both_branches() { + let code = "if (TRUE) {\n x <- 1\n} else {\n y <- 2\n}"; + let location = CodeLocation { + uri: Url::parse("file:///test.R").unwrap(), + start: Position { + line: 0, + character: 0, + }, + end: Position { + line: 4, + character: 1, + }, + }; + let mut breakpoints = vec![ + Breakpoint::new(1, 1, BreakpointState::Unverified), + Breakpoint::new(2, 3, BreakpointState::Unverified), + ]; + + let result = annotate_input(code, location, Some(&mut breakpoints)).unwrap(); + insta::assert_snapshot!(result); + + // Both breakpoints should be valid (not marked as invalid) + assert!( + !matches!(breakpoints[0].state, BreakpointState::Invalid(_)), + "First breakpoint should not be invalid" + ); + assert!( + !matches!(breakpoints[1].state, BreakpointState::Invalid(_)), + "Second breakpoint should not be invalid" + ); + } + + #[test] + fn test_inject_breakpoints_multiple_invalid_closing_braces() { + // Multiple breakpoints on closing braces should all be marked invalid + // without re-traversing the tree for each one. + let code = "{\n f <- function() {\n x <- 1\n }\n}"; + // Line 0: { + // Line 1: f <- function() { + // Line 2: x <- 1 + // Line 3: } <- bp1 (closing brace of function) + // Line 4: } <- bp2 (closing brace of outer block) + let location = CodeLocation { + uri: Url::parse("file:///test.R").unwrap(), + start: Position { + line: 0, + character: 0, + }, + end: Position { + line: 4, + character: 1, + }, + }; + let mut breakpoints = vec![ + Breakpoint::new(1, 3, BreakpointState::Unverified), + Breakpoint::new(2, 4, BreakpointState::Unverified), + ]; + + let result = annotate_input(code, location, Some(&mut breakpoints)).unwrap(); + + // annotate_input always adds #line directive for srcref mapping + let expected = format!("#line 1 \"file:///test.R\"\n{code}"); + assert_eq!(result, expected); + + // Both breakpoints should be marked invalid + assert!( + matches!(breakpoints[0].state, BreakpointState::Invalid(_)), + "First breakpoint on closing brace should be invalid" + ); + assert!( + matches!(breakpoints[1].state, BreakpointState::Invalid(_)), + "Second breakpoint on closing brace should be invalid" + ); + } + + #[test] + fn test_inject_breakpoints_empty_brace_sibling() { + // Breakpoint on an empty brace block that's a sibling to other expressions + let code = "{\n x <- 1\n {}\n}"; + // Line 0: { + // Line 1: x <- 1 + // Line 2: {} <- breakpoint here + // Line 3: } + let location = CodeLocation { + uri: Url::parse("file:///test.R").unwrap(), + start: Position { + line: 0, + character: 0, + }, + end: Position { + line: 3, + character: 1, + }, + }; + let mut breakpoints = vec![Breakpoint::new(1, 2, BreakpointState::Unverified)]; + + let result = annotate_input(code, location, Some(&mut breakpoints)).unwrap(); + insta::assert_snapshot!(result); + + // Should anchor to the empty {} expression (it's a valid expression) + assert!( + !matches!(breakpoints[0].state, BreakpointState::Invalid(_)), + "Breakpoint on empty brace block should be valid" + ); + } + + #[test] + fn test_inject_breakpoints_nested_braces_same_line() { + // Test breakpoints on nested brace structures + let code = "{\n {\n }\n}"; + // Line 0: { <- outer open + // Line 1: { <- inner open (this is an expression in outer list) + // Line 2: } <- inner close (invalid - closing brace) + // Line 3: } <- outer close (invalid - closing brace) + let location = CodeLocation { + uri: Url::parse("file:///test.R").unwrap(), + start: Position { + line: 0, + character: 0, + }, + end: Position { + line: 3, + character: 1, + }, + }; + + // Test 1: Breakpoint on inner brace open line (valid - anchors to inner {} expression) + let mut breakpoints = vec![Breakpoint::new(1, 1, BreakpointState::Unverified)]; + let result = annotate_input(code, location.clone(), Some(&mut breakpoints)).unwrap(); + assert!( + !matches!(breakpoints[0].state, BreakpointState::Invalid(_)), + "Breakpoint on inner brace open should be valid" + ); + assert!(result.contains(".ark_breakpoint")); + + // Test 2: Breakpoint on inner closing brace is invalid (inner {} is empty) + let mut breakpoints = vec![Breakpoint::new(2, 2, BreakpointState::Unverified)]; + let result = annotate_input(code, location.clone(), Some(&mut breakpoints)).unwrap(); + assert!( + matches!(breakpoints[0].state, BreakpointState::Invalid(_)), + "Breakpoint on closing brace of empty braces should be invalid" + ); + // No breakpoint injected + assert!(!result.contains(".ark_breakpoint")); + + // Test 3: Breakpoint on outer closing brace (invalid - not part of any expression in the list) + let mut breakpoints = vec![Breakpoint::new(3, 3, BreakpointState::Unverified)]; + let result = annotate_input(code, location.clone(), Some(&mut breakpoints)).unwrap(); + assert!( + matches!(breakpoints[0].state, BreakpointState::Invalid(_)), + "Breakpoint on outer closing brace should be invalid" + ); + let expected = format!("#line 1 \"file:///test.R\"\n{code}"); + assert_eq!(result, expected); + } + + #[test] + fn test_inject_breakpoints_double_braces_same_lines() { + // Test breakpoints with {{ on one line and }} on another + let code = "{{\n}}"; + // Line 0: {{ <- outer and inner open + // Line 1: }} <- inner and outer close + let location = CodeLocation { + uri: Url::parse("file:///test.R").unwrap(), + start: Position { + line: 0, + character: 0, + }, + end: Position { + line: 1, + character: 2, + }, + }; + + // Test 1: Breakpoint on line 0 (valid - anchors to inner {} expression) + let mut breakpoints = vec![Breakpoint::new(1, 0, BreakpointState::Unverified)]; + let result = annotate_input(code, location.clone(), Some(&mut breakpoints)).unwrap(); + assert!( + !matches!(breakpoints[0].state, BreakpointState::Invalid(_)), + "Breakpoint on {{ line should be valid" + ); + assert!(result.contains(".ark_breakpoint")); + + // Test 2: Breakpoint on line 1 (}} line) is invalid - inner {} is empty + let mut breakpoints = vec![Breakpoint::new(2, 1, BreakpointState::Unverified)]; + let result = annotate_input(code, location.clone(), Some(&mut breakpoints)).unwrap(); + // Breakpoint on closing brace of empty braces is invalid + assert!( + matches!(breakpoints[0].state, BreakpointState::Invalid(_)), + "Breakpoint on }} of empty braces should be invalid" + ); + // No breakpoint injected + assert!(!result.contains(".ark_breakpoint")); + } + + #[test] + fn test_inject_breakpoints_inside_multiline_call() { + // Test breakpoint placed on a line inside a multi-line call expression. + // The breakpoint is on the argument line, not the start of the expression, + // but should anchor to the start of the expression. + let code = "{\n foo(\n 1\n )\n}"; + // Line 0: { + // Line 1: foo( + // Line 2: 1 <- breakpoint here + // Line 3: ) + // Line 4: } + let location = CodeLocation { + uri: Url::parse("file:///test.R").unwrap(), + start: Position { + line: 0, + character: 0, + }, + end: Position { + line: 4, + character: 1, + }, + }; + + let mut breakpoints = vec![Breakpoint::new(1, 2, BreakpointState::Unverified)]; + + let result = annotate_input(code, location.clone(), Some(&mut breakpoints)).unwrap(); + insta::assert_snapshot!(result); + + // Breakpoint inside a multi-line expression should anchor to expression start + assert!(!matches!(breakpoints[0].state, BreakpointState::Invalid(_))); + assert_eq!( + breakpoints[0].line, 1, + "Breakpoint should anchor to expression start" + ); } } diff --git a/crates/ark/src/console_debug.rs b/crates/ark/src/console_debug.rs index da5f8b46f..8b23c9393 100644 --- a/crates/ark/src/console_debug.rs +++ b/crates/ark/src/console_debug.rs @@ -1,7 +1,7 @@ // // repl_debug.rs // -// Copyright (C) 2025 Posit Software, PBC. All rights reserved. +// Copyright (C) 2026 Posit Software, PBC. All rights reserved. // use anyhow::anyhow; @@ -14,12 +14,16 @@ use harp::r_string; use harp::session::r_sys_calls; use harp::session::r_sys_frames; use harp::session::r_sys_functions; +use harp::srcref::SrcRef; use harp::utils::r_is_null; +use libr::SEXP; use regex::Regex; use stdext::result::ResultExt; +use url::Url; use crate::dap::dap::DapBackendEvent; use crate::interface::DebugCallText; +use crate::interface::DebugCallTextKind; use crate::interface::RMain; use crate::modules::ARK_ENVS; use crate::srcref::ark_uri; @@ -96,7 +100,7 @@ impl RMain { let mut dap = self.debug_dap.lock().unwrap(); dap.start_debug(stack, preserve_focus, fallback_sources) }, - Err(err) => log::error!("ReadConsole: Can't get stack info: {err}"), + Err(err) => log::error!("ReadConsole: Can't get stack info: {err:?}"), }; } @@ -126,16 +130,16 @@ impl RMain { // If not debugging, nothing to do. DebugCallText::None => (), // If already finalized, keep what we have. - DebugCallText::Finalized(_) => (), + DebugCallText::Finalized(_, _) => (), // If capturing, transition to finalized. - DebugCallText::Capturing(call_text) => { - self.debug_call_text = DebugCallText::Finalized(call_text.clone()) + DebugCallText::Capturing(call_text, kind) => { + self.debug_call_text = DebugCallText::Finalized(call_text.clone(), *kind) }, } } pub(crate) fn debug_handle_write_console(&mut self, content: &str) { - if let DebugCallText::Capturing(ref mut call_text) = self.debug_call_text { + if let DebugCallText::Capturing(ref mut call_text, _) = self.debug_call_text { // Append to current expression if we are currently capturing stdout call_text.push_str(content); return; @@ -145,7 +149,17 @@ impl RMain { // the current expression we are debugging, so we use that as a signal to begin // capturing. if content == "debug: " { - self.debug_call_text = DebugCallText::Capturing(String::new()); + self.debug_call_text = + DebugCallText::Capturing(String::new(), DebugCallTextKind::Debug); + return; + } + + // `debug at *PATH*: *EXPR*` is emitted by R when stepping through + // blocks that have srcrefs. We use this to detect that we've just + // stepped to an injected breakpoint and need to move on automatically. + if content.starts_with("debug at ") { + self.debug_call_text = + DebugCallText::Capturing(String::new(), DebugCallTextKind::DebugAt); return; } @@ -164,13 +178,14 @@ impl RMain { // recreate the debugger state after their code execution. let call_text = match self.debug_call_text.clone() { DebugCallText::None => None, - DebugCallText::Capturing(call_text) => { + DebugCallText::Capturing(call_text, _) => { log::error!( "Call text is in `Capturing` state, but should be `Finalized`: '{call_text}'." ); None }, - DebugCallText::Finalized(call_text) => Some(call_text), + DebugCallText::Finalized(call_text, DebugCallTextKind::Debug) => Some(call_text), + DebugCallText::Finalized(_, DebugCallTextKind::DebugAt) => None, }; let last_start_line = self.debug_last_line; @@ -241,6 +256,8 @@ impl RMain { out.push(as_frame_info(frame, id)?); } + log::trace!("DAP: Current call stack:\n{out:#?}"); + Ok(out) } } @@ -272,7 +289,7 @@ impl RMain { let hash = format!("{:x}", hasher.finish()); ark_uri(&format!( - "debug/session{i}/{hash}/{source_name}.R", + "debug/session{i}/{hash}/{source_name}", i = debug_session_index, )) } @@ -283,6 +300,33 @@ impl RMain { let re = RE_ARK_DEBUG_URI.get_or_init(|| Regex::new(r"^ark-\d+/debug/").unwrap()); re.is_match(uri) } + + pub(crate) fn verify_breakpoints(&self, srcref: RObject) { + let Some(srcref) = SrcRef::try_from(srcref).warn_on_err() else { + return; + }; + + let Some(filename) = srcref + .srcfile() + .and_then(|srcfile| srcfile.filename()) + .log_err() + else { + return; + }; + + // Only process file:// URIs (from our #line directives). + // Plain file paths or empty filenames are skipped silently. + if !filename.starts_with("file://") { + return; + } + + let Some(uri) = Url::parse(&filename).warn_on_err() else { + return; + }; + + let mut dap = self.debug_dap.lock().unwrap(); + dap.verify_breakpoints(&uri, srcref.line_virtual.start, srcref.line_virtual.end); + } } fn as_frame_info(info: libr::SEXP, id: i64) -> Result { @@ -358,3 +402,70 @@ fn as_frame_info(info: libr::SEXP, id: i64) -> Result { }) } } + +#[harp::register] +pub unsafe extern "C-unwind" fn ps_is_breakpoint_enabled( + uri: SEXP, + id: SEXP, +) -> anyhow::Result { + let uri: String = RObject::view(uri).try_into()?; + let uri = Url::parse(&uri)?; + + let id: String = RObject::view(id).try_into()?; + + let console = RMain::get_mut(); + let dap = console.debug_dap.lock().unwrap(); + + let enabled = dap.is_breakpoint_enabled(&uri, id); + Ok(RObject::from(enabled).sexp) +} + +/// Verify a single breakpoint by ID. +/// Called when a breakpoint expression is about to be evaluated. +#[harp::register] +pub unsafe extern "C-unwind" fn ps_verify_breakpoint(uri: SEXP, id: SEXP) -> anyhow::Result { + let uri: String = RObject::view(uri).try_into()?; + let id: String = RObject::view(id).try_into()?; + + let Ok(uri) = Url::parse(&uri) else { + return Ok(libr::R_NilValue); + }; + + let main = RMain::get(); + let mut dap = main.debug_dap.lock().unwrap(); + dap.verify_breakpoint(&uri, &id); + + Ok(libr::R_NilValue) +} + +/// Verify breakpoints in the line range covered by a srcref. +/// Called after each expression is successfully evaluated in source(). +#[harp::register] +pub unsafe extern "C-unwind" fn ps_verify_breakpoints(srcref: SEXP) -> anyhow::Result { + let srcref = RObject::view(srcref); + RMain::get().verify_breakpoints(srcref.clone()); + Ok(libr::R_NilValue) +} + +/// Verify breakpoints in an explicit line range. +/// Called after each top-level expression in source() when using the source hook. +#[harp::register] +pub unsafe extern "C-unwind" fn ps_verify_breakpoints_range( + uri: SEXP, + start_line: SEXP, + end_line: SEXP, +) -> anyhow::Result { + let uri: String = RObject::view(uri).try_into()?; + let start_line: i32 = RObject::view(start_line).try_into()?; + let end_line: i32 = RObject::view(end_line).try_into()?; + + let Ok(uri) = Url::parse(&uri) else { + return Ok(libr::R_NilValue); + }; + + let main = RMain::get(); + let mut dap = main.debug_dap.lock().unwrap(); + dap.verify_breakpoints(&uri, start_line as u32, end_line as u32); + + Ok(libr::R_NilValue) +} diff --git a/crates/ark/src/dap/dap.rs b/crates/ark/src/dap/dap.rs index be0396582..c84397934 100644 --- a/crates/ark/src/dap/dap.rs +++ b/crates/ark/src/dap/dap.rs @@ -1,7 +1,7 @@ // // dap.rs // -// Copyright (C) 2023 Posit Software, PBC. All rights reserved. +// Copyright (C) 2023-2026 Posit Software, PBC. All rights reserved. // // @@ -17,13 +17,75 @@ use crossbeam::channel::Sender; use harp::object::RObject; use stdext::result::ResultExt; use stdext::spawn; +use url::Url; use crate::console_debug::FrameInfo; use crate::dap::dap_server; use crate::request::RRequest; use crate::thread::RThreadSafe; -#[derive(Debug, Copy, Clone)] +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum BreakpointState { + Unverified, + Verified, + Invalid(InvalidReason), + Disabled, +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum InvalidReason { + ClosingBrace, + EmptyBraces, +} + +impl InvalidReason { + pub fn message(&self) -> &'static str { + match self { + InvalidReason::ClosingBrace => "Can't break on closing `}` brace", + InvalidReason::EmptyBraces => "Can't break inside empty braces", + } + } +} + +#[derive(Debug, Clone)] +pub struct Breakpoint { + pub id: i64, + /// The line where the breakpoint is actually placed (may be anchored to expression start). + /// 0-based. + pub line: u32, + /// The line originally requested by the user (before anchoring). Used to match breakpoints + /// across SetBreakpoints requests. 0-based. + pub original_line: u32, + pub state: BreakpointState, + /// Whether this breakpoint was actually injected into code during annotation. + /// Only injected breakpoints can be verified by range-based verification. + pub injected: bool, +} + +impl Breakpoint { + /// Create a new breakpoint. The `original_line` is set to the same as `line`. + pub fn new(id: i64, line: u32, state: BreakpointState) -> Self { + Self { + id, + line, + original_line: line, + state, + injected: false, + } + } + + /// Convert from DAP 1-based line to internal 0-based line + pub fn from_dap_line(line: i64) -> u32 { + (line - 1) as u32 + } + + /// Convert from internal 0-based line to DAP 1-based line + pub fn to_dap_line(line: u32) -> i64 { + (line + 1) as i64 + } +} + +#[derive(Debug, Clone)] pub enum DapBackendEvent { /// Event sent when a normal (non-browser) prompt marks the end of a /// debugging session. @@ -35,6 +97,17 @@ pub enum DapBackendEvent { /// Event sent when a browser prompt is emitted during an existing /// debugging session Stopped(DapStoppedEvent), + + /// Event sent when a breakpoint state changes (verified, unverified, or invalid) + /// The line is included so the frontend can update the breakpoint's position + /// (e.g., when a breakpoint inside a multiline expression anchors to its start) + /// The message is included for invalid breakpoints to explain why. + BreakpointState { + id: i64, + line: u32, + verified: bool, + message: Option, + }, } #[derive(Debug, Copy, Clone)] @@ -56,6 +129,9 @@ pub struct Dap { /// Current call stack pub stack: Option>, + /// Known breakpoints keyed by URI, with document hash + pub breakpoints: HashMap)>, + /// Map of `source` -> `source_reference` used for frames that don't have /// associated files (i.e. no `srcref` attribute). The `source` is the key to /// ensure that we don't insert the same function multiple times, which would result @@ -83,6 +159,9 @@ pub struct Dap { /// information. current_variables_reference: i64, + /// Monotonically increasing breakpoint ID counter + current_breakpoint_id: i64, + /// Channel for sending events to the comm frontend. comm_tx: Option>, @@ -101,10 +180,12 @@ impl Dap { is_connected: false, backend_events_tx: None, stack: None, + breakpoints: HashMap::new(), fallback_sources: HashMap::new(), frame_id_to_variables_reference: HashMap::new(), variables_reference_to_r_object: HashMap::new(), current_variables_reference: 1, + current_breakpoint_id: 1, comm_tx: None, r_request_tx, shared_self: None, @@ -121,34 +202,45 @@ impl Dap { shared } + /// Notify the frontend that we've entered the debugger. + /// + /// The DAP session is expected to always be connected (to receive breakpoint + /// updates). The `start_debug` comm message is a hint for the frontend to + /// show the debug toolbar, not a session lifecycle event. pub fn start_debug( &mut self, mut stack: Vec, preserve_focus: bool, fallback_sources: HashMap, ) { + self.is_debugging = true; self.fallback_sources.extend(fallback_sources); self.load_variables_references(&mut stack); self.stack = Some(stack); - if self.is_debugging { - if let Some(tx) = &self.backend_events_tx { - tx.send(DapBackendEvent::Stopped(DapStoppedEvent { preserve_focus })) + log::trace!("DAP: Sending `start_debug` events"); + + if let Some(comm_tx) = &self.comm_tx { + // Ask frontend to connect to the DAP + comm_tx + .send(amalthea::comm_rpc_message!("start_debug")) + .log_err(); + + if let Some(dap_tx) = &self.backend_events_tx { + dap_tx + .send(DapBackendEvent::Stopped(DapStoppedEvent { preserve_focus })) .log_err(); } - } else { - if let Some(tx) = &self.comm_tx { - // Ask frontend to connect to the DAP - log::trace!("DAP: Sending `start_debug` event"); - let msg = amalthea::comm_rpc_message!("start_debug"); - tx.send(msg).log_err(); - } - - self.is_debugging = true; } } + /// Notify the frontend that we've exited the debugger. + /// + /// The DAP session remains connected. The `stop_debug` comm message is a + /// hint for the frontend to hide the debug toolbar. We send `Continued` + /// (not `Terminated`) so the DAP connection stays active for receiving + /// breakpoint updates. pub fn stop_debug(&mut self) { // Reset state self.stack = None; @@ -158,12 +250,15 @@ impl Dap { self.is_debugging = false; if self.is_connected { - if let Some(_) = &self.comm_tx { - // Let frontend know we've quit the debugger so it can - // terminate the debugging session and disconnect. - if let Some(tx) = &self.backend_events_tx { - log::trace!("DAP: Sending `stop_debug` event"); - tx.send(DapBackendEvent::Terminated).log_err(); + log::trace!("DAP: Sending `stop_debug` events"); + + if let Some(comm_tx) = &self.comm_tx { + comm_tx + .send(amalthea::comm_rpc_message!("stop_debug")) + .log_err(); + + if let Some(datp_tx) = &self.backend_events_tx { + datp_tx.send(DapBackendEvent::Continued).log_err(); } } // else: If not connected to a frontend, the DAP client should @@ -228,6 +323,142 @@ impl Dap { variables_reference } + + pub fn next_breakpoint_id(&mut self) -> i64 { + let id = self.current_breakpoint_id; + self.current_breakpoint_id += 1; + id + } + + /// Verify breakpoints within a line range for a given URI + /// + /// Loops over all breakpoints for the URI and verifies any unverified + /// breakpoints that fall within the range [start_line, end_line). + /// Sends a `BreakpointVerified` event for each newly verified breakpoint. + pub fn verify_breakpoints(&mut self, uri: &Url, start_line: u32, end_line: u32) { + let Some((_, bp_list)) = self.breakpoints.get_mut(uri) else { + return; + }; + + for bp in bp_list.iter_mut() { + // Verified and Disabled breakpoints are both already verified. + // Invalid breakpoints never get verified so we skip them too. + // Only injected breakpoints can be verified by range. Non-injected + // breakpoints were added by the user after the code was parsed and + // remain unverified until re-parsing / re-evaluation. + if matches!( + bp.state, + BreakpointState::Verified | BreakpointState::Disabled | BreakpointState::Invalid(_) + ) || !bp.injected + { + continue; + } + + let line = bp.line; + if line >= start_line && line < end_line { + bp.state = BreakpointState::Verified; + + if let Some(tx) = &self.backend_events_tx { + tx.send(DapBackendEvent::BreakpointState { + id: bp.id, + line: bp.line, + verified: true, + message: None, + }) + .log_err(); + } + } + } + } + + /// Verify a single breakpoint by ID + /// + /// Finds the breakpoint with the given ID for the URI and marks it as verified + /// if it was previously unverified. Sends a `BreakpointVerified` event. + pub fn verify_breakpoint(&mut self, uri: &Url, id: &str) { + let Some((_, bp_list)) = self.breakpoints.get_mut(uri) else { + return; + }; + let Some(bp) = bp_list.iter_mut().find(|bp| bp.id.to_string() == id) else { + return; + }; + + // Only verify unverified breakpoints + if !matches!(bp.state, BreakpointState::Unverified) { + return; + } + + bp.state = BreakpointState::Verified; + + if let Some(tx) = &self.backend_events_tx { + tx.send(DapBackendEvent::BreakpointState { + id: bp.id, + line: bp.line, + verified: true, + message: None, + }) + .log_err(); + } + } + + /// Called when a document changes. Removes all breakpoints for the URI + /// and sends unverified events for each one. + pub fn did_change_document(&mut self, uri: &Url) { + let Some((_, breakpoints)) = self.breakpoints.remove(uri) else { + return; + }; + let Some(tx) = &self.backend_events_tx else { + return; + }; + + for bp in breakpoints { + tx.send(DapBackendEvent::BreakpointState { + id: bp.id, + line: bp.line, + verified: false, + message: None, + }) + .log_err(); + } + } + + /// Notify the frontend about breakpoints that were marked invalid during annotation. + /// Sends a `BreakpointState` event with verified=false and a message for each. + pub fn notify_invalid_breakpoints(&self, breakpoints: &[Breakpoint]) { + let Some(tx) = &self.backend_events_tx else { + return; + }; + + for bp in breakpoints { + let BreakpointState::Invalid(reason) = &bp.state else { + continue; + }; + tx.send(DapBackendEvent::BreakpointState { + id: bp.id, + line: bp.line, + verified: false, + message: Some(reason.message().to_string()), + }) + .log_err(); + } + } + + pub(crate) fn is_breakpoint_enabled(&self, uri: &Url, id: String) -> bool { + let Some((_, breakpoints)) = self.breakpoints.get(uri) else { + return false; + }; + + // Unverified breakpoints are enabled. This happens when we hit a + // breakpoint in an expression that hasn't been evaluated yet (or hasn't + // finished). + breakpoints.iter().any(|bp| { + bp.id.to_string() == id && + matches!( + bp.state, + BreakpointState::Verified | BreakpointState::Unverified + ) + }) + } } // Handler for Amalthea socket threads diff --git a/crates/ark/src/dap/dap_server.rs b/crates/ark/src/dap/dap_server.rs index 014f2e8fb..39b1393d9 100644 --- a/crates/ark/src/dap/dap_server.rs +++ b/crates/ark/src/dap/dap_server.rs @@ -1,7 +1,7 @@ // // dap_server.rs // -// Copyright (C) 2023 Posit Software, PBC. All rights reserved. +// Copyright (C) 2023-2026 Posit Software, PBC. All rights reserved. // // @@ -30,7 +30,10 @@ use dap::server::ServerOutput; use dap::types::*; use stdext::result::ResultExt; use stdext::spawn; +use url::Url; +use super::dap::Breakpoint; +use super::dap::BreakpointState; use super::dap::Dap; use super::dap::DapBackendEvent; use crate::console_debug::FrameInfo; @@ -172,6 +175,19 @@ fn listen_dap_events( DapBackendEvent::Terminated => { Event::Terminated(None) }, + + DapBackendEvent::BreakpointState { id, line, verified, message } => { + Event::Breakpoint(BreakpointEventBody { + reason: BreakpointEventReason::Changed, + breakpoint: dap::types::Breakpoint { + id: Some(id), + line: Some(Breakpoint::to_dap_line(line)), + verified, + message, + ..Default::default() + }, + }) + }, }; let mut output = output.lock().unwrap(); @@ -217,7 +233,7 @@ impl DapServer { Some(req) => req, None => return false, }; - log::trace!("DAP: Got request: {:?}", req); + log::trace!("DAP: Got request: {:#?}", req); let cmd = req.command.clone(); @@ -237,6 +253,9 @@ impl DapServer { Command::Threads => { self.handle_threads(req); }, + Command::SetBreakpoints(args) => { + self.handle_set_breakpoints(req, args); + }, Command::SetExceptionBreakpoints(args) => { self.handle_set_exception_breakpoints(req, args); }, @@ -297,19 +316,192 @@ impl DapServer { self.send_event(Event::Initialized); } + // Handle SetBreakpoints requests from the frontend. + // + // Breakpoint state survives DAP server disconnections via document hashing. + // Disconnections happen when the user uses the disconnect command (the + // frontend automatically reconnects) or when the console session goes to + // the background (the LSP is also disabled, so we don't receive document + // change notifications). When we come back online, we compare the document + // content against our stored hash to detect if breakpoints are now stale. + // + // Key implementation details: + // - We use `original_line` for lookup since the frontend doesn't know about + // our line adjustments and always sends back the original line numbers. + // - When a user unchecks a breakpoint, it appears as a deletion (omitted + // from the request). We preserve verified breakpoints as Disabled so we + // can restore their state when re-enabled without requiring re-sourcing. + fn handle_set_breakpoints(&mut self, req: Request, args: SetBreakpointsArguments) { + let path = args.source.path.clone().unwrap_or_default(); + + let uri = match Url::from_file_path(&path) { + Ok(uri) => uri, + Err(()) => { + log::error!("Failed to convert path to URI: '{path}'"); + let rsp = req.error(&format!("Invalid path: {path}")); + self.respond(rsp); + return; + }, + }; + + // Read document content to compute hash. We currently assume UTF-8 even + // though the frontend supports files with different encodings (but + // UTF-8 is the default). + let doc_content = match std::fs::read_to_string(&path) { + Ok(content) => content, + Err(err) => { + // TODO: What do we do with breakpoints in virtual documents? + log::error!("Failed to read file '{path}': {err}"); + let rsp = req.error(&format!("Failed to read file: {path}")); + self.respond(rsp); + return; + }, + }; + + let args_breakpoints = args.breakpoints.unwrap_or_default(); + + let mut state = self.state.lock().unwrap(); + let old_breakpoints = state.breakpoints.get(&uri).cloned(); + + // Breakpoints are associated with this hash. If the document has + // changed after a reconnection, the breakpoints are no longer valid. + let doc_hash = blake3::hash(doc_content.as_bytes()); + let doc_changed = match &old_breakpoints { + Some((existing_hash, _)) => existing_hash != &doc_hash, + None => true, + }; + + let new_breakpoints = if doc_changed { + log::trace!("DAP: Document changed for {uri}, discarding old breakpoints"); + + // Replace all existing breakpoints by new, unverified ones + args_breakpoints + .iter() + .map(|bp| { + let line = Breakpoint::from_dap_line(bp.line); + Breakpoint { + id: state.next_breakpoint_id(), + line, + original_line: line, + state: BreakpointState::Unverified, + injected: false, + } + }) + .collect() + } else { + log::trace!("DAP: Document unchanged for {uri}, preserving breakpoint states"); + + // Unwrap Safety: `doc_changed` is false, so `existing_breakpoints` is Some + let (_, old_breakpoints) = old_breakpoints.unwrap(); + // Use original_line for lookup since that's what the frontend sends back + let mut old_by_line: HashMap = old_breakpoints + .into_iter() + .map(|bp| (bp.original_line, bp)) + .collect(); + + let mut breakpoints: Vec = Vec::new(); + + for bp in &args_breakpoints { + let line = Breakpoint::from_dap_line(bp.line); + + if let Some(old_bp) = old_by_line.remove(&line) { + // Breakpoint already exists at this line + let new_state = match old_bp.state { + // This breakpoint used to be verified, was disabled, and is now back online + BreakpointState::Disabled => BreakpointState::Verified, + // We preserve other states (verified or unverified) + other => other, + }; + + breakpoints.push(Breakpoint { + id: old_bp.id, + // Preserve the actual (anchored) line from previous verification + line: old_bp.line, + original_line: line, + state: new_state, + // Preserve injected status from old breakpoint + injected: old_bp.injected, + }); + } else { + // New breakpoints always start as Unverified, until they get evaluated once + breakpoints.push(Breakpoint { + id: state.next_breakpoint_id(), + line, + original_line: line, + state: BreakpointState::Unverified, + injected: false, + }); + } + } + + // Remaining verified breakpoints need to be preserved in memory + // when deleted. That's because when user unchecks a breakpoint on + // the frontend, the breakpoint is actually deleted (i.e. omitted) + // by a `SetBreakpoints()` request. When the user reenables the + // breakpoint, we have to restore the verification state. + // Unverified/Invalid breakpoints on the other hand are simply + // dropped since there's no verified state that needs to be + // preserved. + for (original_line, old_bp) in old_by_line { + if matches!(old_bp.state, BreakpointState::Verified) { + breakpoints.push(Breakpoint { + id: old_bp.id, + line: old_bp.line, + original_line, + state: BreakpointState::Disabled, + injected: true, + }); + } + } + + breakpoints + }; + + log::trace!( + "DAP: URI {uri} now has {} breakpoints:\n{:#?}", + new_breakpoints.len(), + new_breakpoints + ); + + state + .breakpoints + .insert(uri, (doc_hash, new_breakpoints.clone())); + + drop(state); + + let response_breakpoints: Vec = new_breakpoints + .iter() + .filter(|bp| !matches!(bp.state, BreakpointState::Disabled)) + .map(|bp| { + let message = match &bp.state { + BreakpointState::Invalid(reason) => Some(reason.message().to_string()), + _ => None, + }; + dap::types::Breakpoint { + id: Some(bp.id), + verified: matches!(bp.state, BreakpointState::Verified), + line: Some(Breakpoint::to_dap_line(bp.line)), + message, + ..Default::default() + } + }) + .collect(); + + let rsp = req.success(ResponseBody::SetBreakpoints(SetBreakpointsResponse { + breakpoints: response_breakpoints, + })); + + self.respond(rsp); + } + fn handle_attach(&mut self, req: Request, _args: AttachRequestArguments) { let rsp = req.success(ResponseBody::Attach); self.respond(rsp); - self.send_event(Event::Stopped(StoppedEventBody { - reason: StoppedEventReason::Step, - description: Some(String::from("Execution paused")), - thread_id: Some(THREAD_ID), - preserve_focus_hint: Some(false), - text: None, - all_threads_stopped: None, - hit_breakpoint_ids: None, - })) + self.send_event(Event::Thread(ThreadEventBody { + reason: ThreadEventReason::Started, + thread_id: THREAD_ID, + })); } fn handle_disconnect(&mut self, req: Request, _args: DisconnectArguments) { @@ -341,7 +533,7 @@ impl DapServer { let rsp = req.success(ResponseBody::Threads(ThreadsResponse { threads: vec![Thread { id: THREAD_ID, - name: String::from("Main thread"), + name: String::from("R console"), }], })); self.respond(rsp); diff --git a/crates/ark/src/interface.rs b/crates/ark/src/interface.rs index a5b50568e..ce7d44e0a 100644 --- a/crates/ark/src/interface.rs +++ b/crates/ark/src/interface.rs @@ -1,7 +1,7 @@ // // interface.rs // -// Copyright (C) 2023-2024 Posit Software, PBC. All rights reserved. +// Copyright (C) 2023-2026 Posit Software, PBC. All rights reserved. // // @@ -95,10 +95,12 @@ use serde_json::json; use stdext::result::ResultExt; use stdext::*; use tokio::sync::mpsc::UnboundedReceiver as AsyncUnboundedReceiver; +use url::Url; use uuid::Uuid; use crate::console_annotate::annotate_input; use crate::console_debug::FrameInfoId; +use crate::dap::dap::Breakpoint; use crate::dap::dap::DapBackendEvent; use crate::dap::Dap; use crate::errors; @@ -155,8 +157,21 @@ pub enum SessionMode { #[derive(Clone, Debug)] pub enum DebugCallText { None, - Capturing(String), - Finalized(String), + Capturing(String, DebugCallTextKind), + Finalized(String, DebugCallTextKind), +} + +#[derive(Clone, Copy, Debug)] +pub enum DebugCallTextKind { + Debug, + DebugAt, +} + +/// Notifications from other components (e.g., LSP) to the Console +#[derive(Debug)] +pub enum ConsoleNotification { + /// Notification that a document has changed, requiring breakpoint invalidation. + DidChangeDocument(Url), } // --- Globals --- @@ -284,7 +299,7 @@ pub struct RMain { pub(crate) debug_session_index: u32, /// The current frame `id`. Unique across all frames within a single debug session. - /// Reset after `stop_debug()`, not between debug steps. + /// Reset after `debug_stop()`, not between debug steps. pub(crate) debug_current_frame_id: i64, /// Tracks how many nested `r_read_console()` calls are on the stack. @@ -337,16 +352,14 @@ impl PendingInputs { pub(crate) fn read( code: &str, location: Option, + breakpoints: Option<&mut [Breakpoint]>, ) -> anyhow::Result> { - let mut _srcfile = None; - let input = if let Some(location) = location { - let annotated_code = annotate_input(code, location); - _srcfile = Some(SrcFile::new_virtual_empty_filename(annotated_code.into())); - harp::ParseInput::SrcFile(&_srcfile.unwrap()) + let annotated_code = annotate_input(code, location, breakpoints)?; + log::trace!("Annotated code: \n```\n{annotated_code}\n```"); + harp::ParseInput::SrcFile(&SrcFile::new_virtual_empty_filename(annotated_code.into())) } else if harp::get_option_bool("keep.source") { - _srcfile = Some(SrcFile::new_virtual_empty_filename(code.into())); - harp::ParseInput::SrcFile(&_srcfile.unwrap()) + harp::ParseInput::SrcFile(&SrcFile::new_virtual_empty_filename(code.into())) } else { harp::ParseInput::Text(code) }; @@ -517,6 +530,7 @@ impl RMain { session_mode: SessionMode, default_repos: DefaultRepos, graphics_device_rx: AsyncUnboundedReceiver, + console_notification_rx: AsyncUnboundedReceiver, ) { // Set the main thread ID. // Must happen before doing anything that checks `RMain::on_main_thread()`, @@ -630,10 +644,9 @@ impl RMain { } // Register all hooks once all modules have been imported - let hook_result = RFunction::from("register_hooks").call_in(ARK_ENVS.positron_ns); - if let Err(err) = hook_result { - log::error!("Error registering some hooks: {err:?}"); - } + RFunction::from("register_hooks") + .call_in(ARK_ENVS.positron_ns) + .log_err(); // Populate srcrefs for namespaces already loaded in the session. // Namespaces of future loaded packages will be populated on load. @@ -652,6 +665,11 @@ impl RMain { log::error!("Error setting default repositories: {err:?}"); } + // Finish initilization of modules + RFunction::from("initialize") + .call_in(ARK_ENVS.positron_ns) + .log_err(); + // Initialise Ark's last value libr::SETCDR(r_symbol!(".ark_last_value"), harp::r_null()); } @@ -664,6 +682,18 @@ impl RMain { ); Self::complete_initialization(main.banner.take(), kernel_init_tx); + // Spawn handler loop for async messages from other components (e.g., LSP). + // Note that we do it after init is complete to avoid deadlocking + // integration tests by spawning an async task. The deadlock is caused + // by the `block_on()` behaviour in + // https://github.com/posit-dev/ark/blob/bd827e73/crates/ark/src/r_task.rs#L261. + r_task::spawn_interrupt({ + let dap_clone = main.debug_dap.clone(); + || async move { + RMain::process_console_notifications(console_notification_rx, dap_clone).await + } + }); + // Initialize the GD context on this thread. // Note that we do it after init is complete to avoid deadlocking // integration tests by spawning an async task. The deadlock is caused @@ -885,6 +915,23 @@ impl RMain { &self.iopub_tx } + // Async messages for the Console. Processed at interrupt time. + async fn process_console_notifications( + mut console_notification_rx: AsyncUnboundedReceiver, + dap: Arc>, + ) { + loop { + while let Some(notification) = console_notification_rx.recv().await { + match notification { + ConsoleNotification::DidChangeDocument(uri) => { + let mut dap = dap.lock().unwrap(); + dap.did_change_document(&uri); + }, + } + } + } + } + fn init_execute_request(&mut self, req: &ExecuteRequest) -> (ConsoleInput, u32) { // Reset the autoprint buffer self.autoprint_output = String::new(); @@ -954,9 +1001,10 @@ impl RMain { // from the last expression we evaluated self.debug_is_debugging = true; self.debug_start(self.debug_preserve_focus); - } else if self.debug_is_debugging { - self.debug_is_debugging = false; - self.debug_stop(); + + // Note that for simplicity this state is reset on exit via the + // cleanups registered in `r_read_console()`. Ideally we'd clean + // from here for symmetry. } if let Some(exception) = self.take_exception() { @@ -990,6 +1038,48 @@ impl RMain { // fall through to event loop let result = self.take_result(); self.handle_active_request(&info, ConsoleValue::Success(result)); + + // Reset debug flag on the global environment. This is a workaround + // for when a breakpoint was entered at top-level, in a `{}` block. + // In that case `browser()` marks the global environment as being + // debugged here: https://github.com/r-devel/r-svn/blob/476ffd4c/src/main/main.c#L1492-L1494. + // Only do it when the call stack is empty, as removing the flag + // prevents normal stepping with `source()`. + if harp::r_n_frame().unwrap_or(0) == 0 { + unsafe { libr::SET_RDEBUG(libr::R_GlobalEnv, 0) }; + } + } + + // If debugger is active, to prevent injected expressions from + // interfering with debug-stepping, we might need to automatically step + // over to the next statement by returning `n` to R. Two cases: + // - We've just stopped due to an injected breakpoint. In this case + // we're in the `.ark_breakpoint()` function and can look at the current + // `sys.function()` to detect this. + // - We've just stepped to another injected breakpoint. In this case we + // look whether our sentinel `.ark_auto_step()` was emitted by R as part + // of the `Debug at` output. + if self.debug_is_debugging { + // Did we just step onto an injected call (breakpoint or verify)? + let at_auto_step = matches!( + &self.debug_call_text, + DebugCallText::Finalized(text, DebugCallTextKind::DebugAt) + if text.trim_start().starts_with("base::.ark_auto_step") + ); + + // Are we stopped by an injected breakpoint + let in_injected_breakpoint = harp::r_current_function().inherits("ark_breakpoint"); + + if in_injected_breakpoint || at_auto_step { + let kind = if in_injected_breakpoint { "in" } else { "at" }; + log::trace!("Auto-step expression reached ({kind}), moving to next expression"); + + self.debug_preserve_focus = false; + self.debug_send_dap(DapBackendEvent::Continued); + + Self::on_console_input(buf, buflen, String::from("n")).unwrap(); + return ConsoleResult::NewInput; + } } // In the future we'll also send browser information, see @@ -1292,7 +1382,7 @@ impl RMain { // Let frontend know the last request is complete. This turns us // back to Idle. - Self::reply_execute_request(&self.iopub_tx, req, &info, value); + Self::reply_execute_request(&self.iopub_tx, req, value); } else { log::info!("No active request to handle, discarding: {value:?}"); } @@ -1345,20 +1435,37 @@ impl RMain { match input { ConsoleInput::Input(code, loc) => { // Parse input into pending expressions - match PendingInputs::read(&code, loc) { + + // Keep the DAP lock while we are updating breakpoints + let mut dap_guard = self.debug_dap.lock().unwrap(); + let uri = loc.as_ref().map(|l| l.uri.clone()); + let breakpoints = loc + .as_ref() + .and_then(|loc| dap_guard.breakpoints.get_mut(&loc.uri)) + .map(|(_, v)| v.as_mut_slice()); + + match PendingInputs::read(&code, loc, breakpoints) { Ok(ParseResult::Success(inputs)) => { self.pending_inputs = inputs; }, Ok(ParseResult::SyntaxError(message)) => { - return Some(ConsoleResult::Error(message)) + return Some(ConsoleResult::Error(message)); }, Err(err) => { return Some(ConsoleResult::Error(format!( "Error while parsing input: {err:?}" - ))) + ))); }, } + // Notify frontend about any breakpoints marked invalid during annotation + if let Some(uri) = &uri { + if let Some((_, bps)) = dap_guard.breakpoints.get(uri) { + dap_guard.notify_invalid_breakpoints(bps); + } + } + drop(dap_guard); + // Evaluate first expression if there is one if let Some(input) = self.pop_pending() { Some(self.handle_pending_input(input, buf, buflen)) @@ -1915,12 +2022,9 @@ impl RMain { fn reply_execute_request( iopub_tx: &Sender, req: ActiveReadConsoleRequest, - prompt_info: &PromptInfo, value: ConsoleValue, ) { - let prompt = &prompt_info.input_prompt; - - log::trace!("Got R prompt '{}', completing execution", prompt); + log::trace!("Completing execution after receiving prompt"); let exec_count = req.exec_count; @@ -2513,6 +2617,18 @@ pub extern "C-unwind" fn r_read_console( // Restore current frame main.read_console_frame.replace(old_current_frame); + + // Always stop debug session when yielding back to R. This prevents + // the debug toolbar from lingering in situations like: + // + // ```r + // { local(browser()); Sys.sleep(10) } + // ``` + // + // For a more practical example see Shiny app example in + // https://github.com/rstudio/rstudio/pull/14848 + main.debug_is_debugging = false; + main.debug_stop(); }, ) } @@ -2562,6 +2678,10 @@ fn r_read_console_impl( main.read_console_nested_return.set(false); } + // We verify breakpoints _after_ evaluation is complete. An + // error will prevent verification. + main.verify_breakpoints(RObject::from(srcref)); + libr::Rf_unprotect(2); return 1; } diff --git a/crates/ark/src/lsp/backend.rs b/crates/ark/src/lsp/backend.rs index fa2dda02d..6c52d1c5f 100644 --- a/crates/ark/src/lsp/backend.rs +++ b/crates/ark/src/lsp/backend.rs @@ -21,6 +21,7 @@ use stdext::result::ResultExt; use tokio::net::TcpListener; use tokio::runtime::Runtime; use tokio::sync::mpsc::unbounded_channel as tokio_unbounded_channel; +use tokio::sync::mpsc::UnboundedSender as AsyncUnboundedSender; use tower_lsp::jsonrpc; use tower_lsp::jsonrpc::Result; use tower_lsp::lsp_types::request::GotoImplementationParams; @@ -34,6 +35,7 @@ use tower_lsp::LspService; use tower_lsp::Server; use super::main_loop::LSP_HAS_CRASHED; +use crate::interface::ConsoleNotification; use crate::interface::RMain; use crate::lsp::handlers::VirtualDocumentParams; use crate::lsp::handlers::VirtualDocumentResponse; @@ -479,6 +481,7 @@ pub fn start_lsp( runtime: Arc, server_start: ServerStartMessage, server_started_tx: Sender, + console_notification_tx: AsyncUnboundedSender, ) { runtime.block_on(async { let ip_address = server_start.ip_address(); @@ -513,7 +516,7 @@ pub fn start_lsp( let (shutdown_tx, mut shutdown_rx) = tokio::sync::mpsc::channel::<()>(1); let init = |client: Client| { - let state = GlobalState::new(client); + let state = GlobalState::new(client, console_notification_tx.clone()); let events_tx = state.events_tx(); // Start main loop and hold onto the handle that keeps it alive diff --git a/crates/ark/src/lsp/handler.rs b/crates/ark/src/lsp/handler.rs index 6bd1a61cb..1c59616cf 100644 --- a/crates/ark/src/lsp/handler.rs +++ b/crates/ark/src/lsp/handler.rs @@ -16,18 +16,24 @@ use crossbeam::channel::Sender; use stdext::spawn; use tokio::runtime::Builder; use tokio::runtime::Runtime; +use tokio::sync::mpsc::UnboundedSender as AsyncUnboundedSender; use super::backend; +use crate::interface::ConsoleNotification; use crate::interface::KernelInfo; pub struct Lsp { runtime: Arc, kernel_init_rx: BusReader, kernel_initialized: bool, + console_notification_tx: AsyncUnboundedSender, } impl Lsp { - pub fn new(kernel_init_rx: BusReader) -> Self { + pub fn new( + kernel_init_rx: BusReader, + console_notification_tx: AsyncUnboundedSender, + ) -> Self { let rt = Builder::new_multi_thread() .enable_all() // One for the main loop and one spare @@ -41,6 +47,7 @@ impl Lsp { runtime: Arc::new(rt), kernel_init_rx, kernel_initialized: false, + console_notification_tx, } } } @@ -68,8 +75,14 @@ impl ServerHandler for Lsp { // account for potential reconnects let runtime = self.runtime.clone(); + let console_notification_tx = self.console_notification_tx.clone(); spawn!("ark-lsp", move || { - backend::start_lsp(runtime, server_start, server_started_tx) + backend::start_lsp( + runtime, + server_start, + server_started_tx, + console_notification_tx, + ) }); return Ok(()); } diff --git a/crates/ark/src/lsp/main_loop.rs b/crates/ark/src/lsp/main_loop.rs index 4347c1013..fc24ab15d 100644 --- a/crates/ark/src/lsp/main_loop.rs +++ b/crates/ark/src/lsp/main_loop.rs @@ -29,6 +29,7 @@ use tower_lsp::Client; use url::Url; use super::backend::RequestResponse; +use crate::interface::ConsoleNotification; use crate::lsp; use crate::lsp::backend::LspMessage; use crate::lsp::backend::LspNotification; @@ -146,13 +147,15 @@ pub(crate) struct GlobalState { /// Unlike `WorldState`, `ParserState` cannot be cloned and is only accessed by /// exclusive handlers. -#[derive(Default)] pub(crate) struct LspState { /// The set of tree-sitter document parsers managed by the `GlobalState`. pub(crate) parsers: HashMap, /// Capabilities negotiated with the client pub(crate) capabilities: Capabilities, + + /// Channel for sending notifications to Console (e.g., document changes for DAP) + pub(crate) console_notification_tx: TokioUnboundedSender, } /// State for the auxiliary loop @@ -177,14 +180,23 @@ impl GlobalState { /// /// * `client`: The tower-lsp client shared with the tower-lsp backend /// and auxiliary loop. - pub(crate) fn new(client: Client) -> Self { + pub(crate) fn new( + client: Client, + console_notification_tx: TokioUnboundedSender, + ) -> Self { // Transmission channel for the main loop events. Shared with the // tower-lsp backend and the Jupyter kernel. let (events_tx, events_rx) = tokio_unbounded_channel::(); + let lsp_state = LspState { + parsers: HashMap::new(), + capabilities: Capabilities::default(), + console_notification_tx, + }; + let mut state = Self { world: WorldState::default(), - lsp_state: LspState::default(), + lsp_state, client, events_tx, events_rx, diff --git a/crates/ark/src/lsp/state_handlers.rs b/crates/ark/src/lsp/state_handlers.rs index ec5c41318..3aef51729 100644 --- a/crates/ark/src/lsp/state_handlers.rs +++ b/crates/ark/src/lsp/state_handlers.rs @@ -6,6 +6,7 @@ // use anyhow::anyhow; +use stdext::result::ResultExt; use tower_lsp::lsp_types; use tower_lsp::lsp_types::CompletionOptions; use tower_lsp::lsp_types::CompletionOptionsCompletionItem; @@ -42,6 +43,7 @@ use tracing::Instrument; use tree_sitter::Parser; use url::Url; +use crate::interface::ConsoleNotification; use crate::lsp; use crate::lsp::capabilities::Capabilities; use crate::lsp::config::indent_style_from_lsp; @@ -252,6 +254,12 @@ pub(crate) fn did_change( lsp::main_loop::index_update(vec![uri.clone()], state.clone()); + // Notify console about document change to invalidate breakpoints + lsp_state + .console_notification_tx + .send(ConsoleNotification::DidChangeDocument(uri.clone())) + .log_err(); + Ok(()) } diff --git a/crates/ark/src/modules/positron/calls.R b/crates/ark/src/modules/positron/calls.R index 661a0c0fe..3bc52555c 100644 --- a/crates/ark/src/modules/positron/calls.R +++ b/crates/ark/src/modules/positron/calls.R @@ -19,6 +19,14 @@ call_name <- function(x) { ) } +simple_call_name <- function(x) { + if (is_simple_call(x)) { + call_name(x) + } else { + NULL + } +} + call_type <- function(x) { stopifnot(typeof(x) == "language") diff --git a/crates/ark/src/modules/positron/calls_deparse.R b/crates/ark/src/modules/positron/calls_deparse.R new file mode 100644 index 000000000..911fa5e21 --- /dev/null +++ b/crates/ark/src/modules/positron/calls_deparse.R @@ -0,0 +1,307 @@ +# +# calls_deparse.R +# +# Copyright (C) 2025 Posit Software, PBC. All rights reserved. +# +# + +deparse_string <- function(x, cutoff = 500L) { + paste_line(deparse(x, width.cutoff = cutoff)) +} + +as_label <- function(x) { + # Remove arguments of call expressions + if (is.call(x) && call_print_type(x) == "prefix") { + x <- x[1] + } + + # Retain only first line + out <- deparse(x)[[1]] + + # And first 20 characters + if (nchar(out) > 20) { + out <- substr(out, 1, 20) + out <- paste0(out, "...") + } + + out +} + +is_simple_call <- function(x) { + call_print_type(x) == "call" +} + +# From https://github.com/r-lib/rlang/blob/main/R/call.R +call_print_type <- function(call) { + stopifnot(is.call(call)) + + type <- call_print_fine_type(call) + switch( + type, + call = "prefix", + control = , + delim = , + subset = "special", + type + ) +} + +call_print_fine_type <- function(call) { + stopifnot(is.call(call)) + + op <- call_parse_type(call) + if (op == "") { + return("call") + } + + switch( + op, + `+unary` = , + `-unary` = , + `~unary` = , + `?unary` = , + `!` = , + `!!` = , + `!!!` = "prefix", + `function` = , + `while` = , + `for` = , + `repeat` = , + `if` = "control", + `(` = , + `{{` = , + `{` = "delim", + `[` = , + `[[` = "subset", + # These operators always print in infix form even if they have + # more arguments + `<-` = , + `<<-` = , + `=` = , + `::` = , + `:::` = , + `$` = , + `@` = "infix", + `+` = , + `-` = , + `?` = , + `~` = , + `:=` = , + `|` = , + `||` = , + `&` = , + `&&` = , + `>` = , + `>=` = , + `<` = , + `<=` = , + `==` = , + `!=` = , + `*` = , + `/` = , + `%%` = , + `special` = , + `:` = , + `^` = if (length(call) == 3) { + "infix" + } else { + "call" + } + ) +} + +# Extracted from C implementation in src/internal/parse.c +call_parse_type <- function(call) { + if (!is.call(call)) { + return("") + } + + head <- call[[1]] + if (!is.symbol(head)) { + return("") + } + + # Check if unary by examining if there's only one argument after the head + is_unary <- length(call) == 2 + + # Control flow keywords + if (identical(head, quote(`break`))) { + return("break") + } + if (identical(head, quote(`next`))) { + return("next") + } + if (identical(head, quote(`for`))) { + return("for") + } + if (identical(head, quote(`while`))) { + return("while") + } + if (identical(head, quote(`repeat`))) { + return("repeat") + } + if (identical(head, quote(`if`))) { + return("if") + } + if (identical(head, quote(`function`))) { + return("function") + } + + # Question mark (help operator) + if (identical(head, quote(`?`))) { + if (is_unary) { + return("?unary") + } + return("?") + } + + # Assignment operators + if (identical(head, quote(`<-`))) { + return("<-") + } + if (identical(head, quote(`<<-`))) { + return("<<-") + } + if (identical(head, quote(`=`))) { + return("=") + } + if (identical(head, quote(`:=`))) { + return(":=") + } + + # Comparison operators + if (identical(head, quote(`<`))) { + return("<") + } + if (identical(head, quote(`<=`))) { + return("<=") + } + if (identical(head, quote(`>`))) { + return(">") + } + if (identical(head, quote(`>=`))) { + return(">=") + } + if (identical(head, quote(`==`))) { + return("==") + } + if (identical(head, quote(`!=`))) { + return("!=") + } + + # Tilde (formula operator) + if (identical(head, quote(`~`))) { + if (is_unary) { + return("~unary") + } + return("~") + } + + # Logical operators + if (identical(head, quote(`|`))) { + return("|") + } + if (identical(head, quote(`||`))) { + return("||") + } + if (identical(head, quote(`&`))) { + return("&") + } + if (identical(head, quote(`&&`))) { + return("&&") + } + + # Bang operators (for negation, unquoting is unsupported) + if (identical(head, quote(`!`))) { + return("!") + } + + # Arithmetic operators + if (identical(head, quote(`+`))) { + if (is_unary) { + return("+unary") + } + return("+") + } + if (identical(head, quote(`-`))) { + if (is_unary) { + return("-unary") + } + return("-") + } + if (identical(head, quote(`*`))) { + return("*") + } + if (identical(head, quote(`/`))) { + return("/") + } + if (identical(head, quote(`^`))) { + return("^") + } + + # Modulo and special operators + if (identical(head, quote(`%%`))) { + return("%%") + } + + # Check for special operators like %in%, %*%, etc. + name <- as.character(head) + if ( + substr(name, 1, 1) == "%" && + nchar(name) > 2 && + substr(name, nchar(name), nchar(name)) == "%" + ) { + return("special") + } + + # Colon operators + if (identical(head, quote(`:`))) { + return(":") + } + if (identical(head, quote(`::`))) { + return("::") + } + if (identical(head, quote(`:::`))) { + return(":::") + } + + # Access operators + if (identical(head, quote(`$`))) { + return("$") + } + if (identical(head, quote(`@`))) { + return("@") + } + + # Subsetting operators + if (identical(head, quote(`[`))) { + return("[") + } + if (identical(head, quote(`[[`))) { + return("[[") + } + + # Parentheses + if (identical(head, quote(`(`))) { + return("(") + } + + # Braces and embrace + if (identical(head, quote(`{`))) { + # Check for embrace operator: {{x}} + if (length(call) == 2) { + cadr <- call[[2]] + if ( + is.call(cadr) && + length(cadr) == 2 && + identical(cadr[[1]], quote(`{`)) && + is.symbol(cadr[[2]]) + ) { + return("{{") + } + } + return("{") + } + + "" +} diff --git a/crates/ark/src/modules/positron/debug.R b/crates/ark/src/modules/positron/debug.R index a0306fd41..ec4ed8dbf 100644 --- a/crates/ark/src/modules/positron/debug.R +++ b/crates/ark/src/modules/positron/debug.R @@ -1,7 +1,7 @@ # # debug.R # -# Copyright (C) 2023-2024 Posit Software, PBC. All rights reserved. +# Copyright (C) 2023-2026 Posit Software, PBC. All rights reserved. # # @@ -14,11 +14,6 @@ debugger_stack_info <- function( calls ) { n <- length(fns) - - if (n == 0L) { - # Must have at least 1 frame on the stack to proceed - return(list()) - } if (n != length(environments) || n != length(calls)) { message <- paste0( "`sys.function()`, `sys.frames()`, and `sys.calls()` didn't return consistent results. ", @@ -27,8 +22,15 @@ debugger_stack_info <- function( stop(sprintf(message, n, length(environments), length(calls))) } - # Top level call never has source references. - # It's what comes through the console input. + if (n == 0L) { + return(list(frame_info_from_srcref( + source_name = ".R", + frame_name = "", + srcref = context_srcref, + environment = NULL + ))) + } + top_level_loc <- 1L top_level_call <- calls[[top_level_loc]] @@ -84,16 +86,30 @@ debugger_stack_info <- function( } top_level_call_frame_info <- function(x) { - x <- call_deparse(x) - x <- lines_join(x) + source_name <- paste0(as_label(x), ".R") + contents <- deparse_string(x) + + srcref <- attr(x, "srcref", exact = TRUE) + if (!is.null(srcref)) { + out <- frame_info_from_srcref( + source_name = source_name, + frame_name = "", + srcref = srcref, + environment = NULL + ) + + if (!is.null(out)) { + return(out) + } + } # We return `0`s to avoid highlighting anything in the top level call. # We just want to show it in the editor, and that's really it. new_frame_info( - source_name = x, + source_name = source_name, frame_name = "", file = NULL, - contents = x, + contents = contents, environment = NULL, start_line = 0L, start_column = 0L, @@ -110,18 +126,8 @@ context_frame_info <- function( frame_call, last_start_line ) { - frame_call_name <- call_name(frame_call) - if (!is.null(frame_call_name)) { - # Figure out the frame function's name and use that as a simpler - # `frame_name` and `source_name` - frame_name <- paste0(frame_call_name, "()") - source_name <- frame_name - } else { - # Otherwise fall back to standard deparsing of `frame_call` - frame_lines <- call_deparse(frame_call) - frame_name <- lines_join(frame_lines) - source_name <- frame_name - } + frame_name <- as_label(frame_call) + source_name <- paste0(frame_name, ".R") frame_info( source_name, @@ -139,15 +145,9 @@ intermediate_frame_infos <- function(n, calls, fns, environments, frame_calls) { attr(call, "srcref", exact = TRUE) }) call_texts <- lapply(calls, function(call) { - call_lines <- call_deparse(call) - call_text <- lines_join(call_lines) - call_text - }) - frame_names <- lapply(frame_calls, function(call) { - call_lines <- call_deparse(call) - call_text <- lines_join(call_lines) - call_text + deparse_string(call) }) + frame_names <- lapply(frame_calls, function(call) as_label(call)) # Currently only tracked for the context frame, as that is where it is most useful, # since that is where the user is actively stepping. @@ -163,7 +163,7 @@ intermediate_frame_infos <- function(n, calls, fns, environments, frame_calls) { frame_name <- frame_names[[i]] out[[i]] <- frame_info( - source_name = call_text, + source_name = paste0(frame_name, ".R"), frame_name = frame_name, srcref = srcref, fn = fn, @@ -200,8 +200,7 @@ frame_info <- function( } # Only deparse if `srcref` failed! - fn_lines <- call_deparse(fn) - fn_text <- lines_join(fn_lines) + fn_text <- deparse_string(fn) # Reparse early on, so even if we fail to find `call_text` or fail to reparse, # we pass a `fn_text` to `frame_info_unknown_range()` where we've consistently removed @@ -252,6 +251,10 @@ frame_info_from_srcref <- function( return(NULL) } + if (is_string(info$file)) { + source_name <- basename(info$file) + } + new_frame_info( source_name = source_name, frame_name = frame_name, @@ -368,14 +371,6 @@ new_frame_info <- function( ) } -call_deparse <- function(x) { - deparse(x, width.cutoff = 500L) -} - -lines_join <- function(x) { - paste0(x, collapse = "\n") -} - #' @param fn_expr A function expression returned from `parse_function_text()`, which #' reparsed the function text while keeping source references. #' @param call_text A single string containing the text of a call to look for @@ -700,3 +695,72 @@ non_parseable_pattern_info <- function(pattern, replacement) { non_parseable_fixed_info <- function(pattern, replacement) { list(pattern = pattern, replacement = replacement, fixed = TRUE) } + +is_breakpoint_enabled <- function(uri, id) { + .ps.Call("ps_is_breakpoint_enabled", uri, id) +} + +verify_breapoint <- function(uri, id) { + .ps.Call("ps_verify_breakpoint", uri, id) +} + +# Injected breakpoint. This receives a `browser()` call in the `expr` argument. +# The argument if forced if the breakpoint is enabled. Since `expr` is promised +# in the calling frame environment, that environment is marked by R as being +# debugged (with `SET_RDEBUG`), allowing to step through it. We're stopped in +# the wrong frame (`.ark_breakpoint()`'s) but the console automatically steps to +# the next expression whenever it detects that the current function (retrieved +# with `sys.function()`) inherits from `ark_breakpoint`. +#' @export +.ark_breakpoint <- structure( + function(expr, uri, id) { + # Verify breakpoint right away, if not already the case We normally + # verify breakpoints after each top-level expression has finished + # evaluating, but if we stop on a breakpoint right away (e.g. because + # it's in an `lapply()` rather than an assigned function) we must verify + # it directly. Otherwise it's confusing for users to stop on an unverified + # breakpoint that appears invalid. + verify_breapoint(uri, id) + + enabled <- is_breakpoint_enabled(uri, id) + log_trace(sprintf( + "DAP: Breakpoint %s for %s enabled: %s", + id, + uri, + enabled + )) + + # Force `browser()` call only if breakpoint is enabled + if (enabled) { + expr + } + }, + class = "ark_breakpoint" +) + +# Wrapper for expressions that should be auto-stepped over in the debugger. The +# debugger detects this by checking if R emitted a `debug at` line containing +# `.ark_auto_step` and automatically steps past it. +#' @export +.ark_auto_step <- function(expr) { + expr +} + +# Verify breakpoints in a line range. Called after each top-level expression in +# `source()`. +.ark_verify_breakpoints_range <- function(uri, start_line, end_line) { + .ps.Call("ps_verify_breakpoints_range", uri, start_line, end_line) +} + +debug_initialize <- function() { + # Store `.ark_breakpoint` and friends in base namespace so they're maximally + # reachable. We might want to do that for all symbols exported from the + # Ark/Positron namespace. + node_poke_cdr(as.symbol(".ark_annotate_source"), .ark_annotate_source) + node_poke_cdr(as.symbol(".ark_auto_step"), .ark_auto_step) + node_poke_cdr(as.symbol(".ark_breakpoint"), .ark_breakpoint) + node_poke_cdr( + as.symbol(".ark_verify_breakpoints_range"), + .ark_verify_breakpoints_range + ) +} diff --git a/crates/ark/src/modules/positron/hooks.R b/crates/ark/src/modules/positron/hooks.R index d25ce85ca..f624cd43c 100644 --- a/crates/ark/src/modules/positron/hooks.R +++ b/crates/ark/src/modules/positron/hooks.R @@ -23,6 +23,7 @@ register_hooks <- function() { namespace = TRUE ) register_getHook_hook() + register_source_hook() } rebind <- function(pkg, name, value, namespace = FALSE) { diff --git a/crates/ark/src/modules/positron/hooks_source.R b/crates/ark/src/modules/positron/hooks_source.R new file mode 100644 index 000000000..7ef33eee9 --- /dev/null +++ b/crates/ark/src/modules/positron/hooks_source.R @@ -0,0 +1,133 @@ +# Hook for `source()` to support breakpoints in sourced files. +# +# When: +# - The input path is a file that has breakpoints +# - No other arguments than `echo` (used by Positron) or `local` are provided +# We opt into a code path where breakpoints are injected and the whole source is +# wrapped in `{}` to allow stepping through it. +register_source_hook <- function() { + rebind("base", "source", make_ark_source(base::source), namespace = TRUE) +} + +make_ark_source <- function(original_source) { + force(original_source) + + # Take all original arguments for e.g. completions + function( + file, + local = FALSE, + echo = verbose, + print.eval = echo, + exprs, + spaced = use_file, + verbose = getOption("verbose"), + prompt.echo = getOption("prompt"), + max.deparse.length = 150, + width.cutoff = 60L, + deparseCtrl = "showAttributes", + chdir = FALSE, + catch.aborts = FALSE, + encoding = getOption("encoding"), + continue.echo = getOption("continue"), + skip.echo = 0, + keep.source = getOption("keep.source"), + ... + ) { + # Compute default argument for `spaced`. Must be defined before the + # fallback calls. + use_file <- missing(exprs) + + # DRY: Promise for calling `original_source` with all arguments. + # Evaluated lazily only when needed for fallback paths. + delayedAssign( + "fall_back", + original_source( + file = file, + local = local, + echo = echo, + print.eval = print.eval, + exprs = exprs, + spaced = spaced, + verbose = verbose, + prompt.echo = prompt.echo, + max.deparse.length = max.deparse.length, + width.cutoff = width.cutoff, + deparseCtrl = deparseCtrl, + chdir = chdir, + catch.aborts = catch.aborts, + encoding = encoding, + continue.echo = continue.echo, + skip.echo = skip.echo, + keep.source = keep.source, + ... + ) + ) + + # Fall back if hook is disabled + if (!isTRUE(getOption("ark.source_hook", default = TRUE))) { + return(fall_back) + } + + call <- match.call() + + # Ignore `echo` and `local` arguments + call$echo <- NULL + call$local <- NULL + + # Fall back if `file` is not supplied or if any argument other than + # `echo` or `local` is supplied + if (is.null(call$file) || length(call[-1]) != 1) { + return(fall_back) + } + + uri <- path_to_file_uri(file) + if (is.null(uri)) { + return(fall_back) + } + + env <- if (isTRUE(local)) { + parent.frame() + } else if (isFALSE(local)) { + .GlobalEnv + } else if (is.environment(local)) { + local + } else { + return(fall_back) + } + + text <- paste( + readLines(uri, encoding = encoding, warn = FALSE), + collapse = "\n" + ) + annotated <- .ps.Call("ps_annotate_source", uri, text) + + # If NULL, no breakpoints exist for that URI, fall back + if (is.null(annotated)) { + return(fall_back) + } + + log_trace(sprintf( + "DAP: `source()` hook called with breakpoint injection for `uri`='%s'", + uri + )) + + parsed <- parse(text = annotated, keep.source = TRUE) + + if (length(parsed) != 1) { + log_trace("`source()`: Expected a single `{}` expression") + } + + # `eval()` loops over the expression vector, handling gracefully + # unexpected lengths (0 or >1) + eval(parsed, env) + } +} + +#' @export +.ark_annotate_source <- function(uri, source) { + stopifnot( + is_string(uri), + is_string(source) + ) + .ps.Call("ps_annotate_source", uri, source) +} diff --git a/crates/ark/src/modules/positron/init.R b/crates/ark/src/modules/positron/init.R index 0831b5d5c..21a6786c2 100644 --- a/crates/ark/src/modules/positron/init.R +++ b/crates/ark/src/modules/positron/init.R @@ -166,3 +166,7 @@ if (!exists("the", inherits = FALSE)) { the$cli_version <- NULL } + +initialize <- function() { + debug_initialize() +} diff --git a/crates/ark/src/modules/positron/utils.R b/crates/ark/src/modules/positron/utils.R index c29462564..8b86b0b23 100644 --- a/crates/ark/src/modules/positron/utils.R +++ b/crates/ark/src/modules/positron/utils.R @@ -182,6 +182,27 @@ system_path <- function(pkg) { "" } +# Convert a file path to a file:// URI +path_to_file_uri <- function(path) { + # `winslash` takes care of Windows backslashes + path <- tryCatch( + normalizePath(path, winslash = "/", mustWork = TRUE), + error = function(e) NULL + ) + if (is.null(path)) { + return(NULL) + } + + # On Windows, paths like "C:/foo" need to become "file:///C:/foo" + # On Unix, paths like "/foo" need to become "file:///foo" + if (startsWith(path, "/")) { + paste0("file://", path) + } else { + paste0("file:///", path) + } +} + + # `NULL` if successful, otherwise an error condition try_load_namespace <- function(package) { tryCatch( @@ -219,3 +240,7 @@ log_error <- function(msg) { stopifnot(is_string(msg)) .Call("ark_log_error", msg) } + +paste_line <- function(x) { + paste0(x, collapse = "\n") +} diff --git a/crates/ark/src/shell.rs b/crates/ark/src/shell.rs index 2d8b6e069..d9e9c4c94 100644 --- a/crates/ark/src/shell.rs +++ b/crates/ark/src/shell.rs @@ -42,6 +42,7 @@ use tokio::sync::mpsc::UnboundedSender as AsyncUnboundedSender; use crate::ark_comm::ArkComm; use crate::help::r_help::RHelp; use crate::help_proxy; +use crate::interface::ConsoleNotification; use crate::interface::KernelInfo; use crate::interface::RMain; use crate::plots::graphics_device::GraphicsDeviceNotification; @@ -59,6 +60,7 @@ pub struct Shell { kernel_init_rx: BusReader, kernel_info: Option, graphics_device_tx: AsyncUnboundedSender, + console_notification_tx: AsyncUnboundedSender, } #[derive(Debug)] @@ -75,6 +77,7 @@ impl Shell { kernel_init_rx: BusReader, kernel_request_tx: Sender, graphics_device_tx: AsyncUnboundedSender, + console_notification_tx: AsyncUnboundedSender, ) -> Self { Self { comm_manager_tx, @@ -84,6 +87,7 @@ impl Shell { kernel_init_rx, kernel_info: None, graphics_device_tx, + console_notification_tx, } } @@ -234,6 +238,7 @@ impl ShellHandler for Shell { self.stdin_request_tx.clone(), self.kernel_request_tx.clone(), self.graphics_device_tx.clone(), + self.console_notification_tx.clone(), ), Comm::Help => handle_comm_open_help(comm), Comm::Other(target_name) if target_name == "ark" => ArkComm::handle_comm_open(comm), @@ -258,6 +263,7 @@ fn handle_comm_open_ui( stdin_request_tx: Sender, kernel_request_tx: Sender, graphics_device_tx: AsyncUnboundedSender, + _console_notification_tx: AsyncUnboundedSender, ) -> amalthea::Result { // Create a frontend to wrap the comm channel we were just given. This starts // a thread that proxies messages to the frontend. diff --git a/crates/ark/src/snapshots/ark__console_annotate__tests__annotate_input_with_breakpoint.snap b/crates/ark/src/snapshots/ark__console_annotate__tests__annotate_input_with_breakpoint.snap new file mode 100644 index 000000000..d271b9a9d --- /dev/null +++ b/crates/ark/src/snapshots/ark__console_annotate__tests__annotate_input_with_breakpoint.snap @@ -0,0 +1,14 @@ +--- +source: crates/ark/src/console_annotate.rs +expression: result +--- +#line 4 "file:///test.R" +{ +0 +base::.ark_auto_step(base::.ark_breakpoint(browser(), "file:///test.R", "1")) +#line 6 "file:///test.R" +1 +base::.ark_auto_step(base::.ark_verify_breakpoints_range("file:///test.R", 6L, 7L)) +#line 7 "file:///test.R" +2 +} diff --git a/crates/ark/src/snapshots/ark__console_annotate__tests__annotate_source_basic.snap b/crates/ark/src/snapshots/ark__console_annotate__tests__annotate_source_basic.snap new file mode 100644 index 000000000..ab7db2b55 --- /dev/null +++ b/crates/ark/src/snapshots/ark__console_annotate__tests__annotate_source_basic.snap @@ -0,0 +1,9 @@ +--- +source: crates/ark/src/console_annotate.rs +expression: result +--- +{ +x <- 1 +y <- 2 +} +base::.ark_auto_step(base::.ark_verify_breakpoints_range("file:///test.R", 1L, 3L)) diff --git a/crates/ark/src/snapshots/ark__console_annotate__tests__annotate_source_breakpoint_on_function_definition_line.snap b/crates/ark/src/snapshots/ark__console_annotate__tests__annotate_source_breakpoint_on_function_definition_line.snap new file mode 100644 index 000000000..49c0c2e59 --- /dev/null +++ b/crates/ark/src/snapshots/ark__console_annotate__tests__annotate_source_breakpoint_on_function_definition_line.snap @@ -0,0 +1,14 @@ +--- +source: crates/ark/src/console_annotate.rs +expression: result +--- +{ +base::.ark_auto_step(base::.ark_breakpoint(browser(), "file:///test.R", "1")) +#line 1 "file:///test.R" +f <- function(x) { +base::.ark_auto_step(base::.ark_breakpoint(browser(), "file:///test.R", "2")) +#line 2 "file:///test.R" + 1 +} +} +base::.ark_auto_step(base::.ark_verify_breakpoints_range("file:///test.R", 1L, 4L)) diff --git a/crates/ark/src/snapshots/ark__console_annotate__tests__annotate_source_breakpoint_on_opening_brace.snap b/crates/ark/src/snapshots/ark__console_annotate__tests__annotate_source_breakpoint_on_opening_brace.snap new file mode 100644 index 000000000..0930cc597 --- /dev/null +++ b/crates/ark/src/snapshots/ark__console_annotate__tests__annotate_source_breakpoint_on_opening_brace.snap @@ -0,0 +1,17 @@ +--- +source: crates/ark/src/console_annotate.rs +expression: result +--- +{ +base::.ark_auto_step(base::.ark_breakpoint(browser(), "file:///test.R", "1")) +#line 1 "file:///test.R" +{ + 1 + 2 +} +base::.ark_auto_step(base::.ark_verify_breakpoints_range("file:///test.R", 1L, 5L)) + +#line 6 "file:///test.R" +2 +} +base::.ark_auto_step(base::.ark_verify_breakpoints_range("file:///test.R", 1L, 7L)) diff --git a/crates/ark/src/snapshots/ark__console_annotate__tests__annotate_source_multiline_expression.snap b/crates/ark/src/snapshots/ark__console_annotate__tests__annotate_source_multiline_expression.snap new file mode 100644 index 000000000..51ece33f1 --- /dev/null +++ b/crates/ark/src/snapshots/ark__console_annotate__tests__annotate_source_multiline_expression.snap @@ -0,0 +1,11 @@ +--- +source: crates/ark/src/console_annotate.rs +expression: result +--- +{ +foo <- function(x) { + x + 1 +} +bar <- 2 +} +base::.ark_auto_step(base::.ark_verify_breakpoints_range("file:///test.R", 1L, 5L)) diff --git a/crates/ark/src/snapshots/ark__console_annotate__tests__annotate_source_multiple_breakpoints_inside_braces.snap b/crates/ark/src/snapshots/ark__console_annotate__tests__annotate_source_multiple_breakpoints_inside_braces.snap new file mode 100644 index 000000000..a86f5d2e0 --- /dev/null +++ b/crates/ark/src/snapshots/ark__console_annotate__tests__annotate_source_multiple_breakpoints_inside_braces.snap @@ -0,0 +1,21 @@ +--- +source: crates/ark/src/console_annotate.rs +expression: result +--- +{ +base::.ark_auto_step(base::.ark_breakpoint(browser(), "file:///test.R", "1")) +#line 1 "file:///test.R" +{ +base::.ark_auto_step(base::.ark_breakpoint(browser(), "file:///test.R", "2")) +#line 2 "file:///test.R" + 1 +base::.ark_auto_step(base::.ark_verify_breakpoints_range("file:///test.R", 2L, 3L)) +#line 3 "file:///test.R" + 2 +} +base::.ark_auto_step(base::.ark_verify_breakpoints_range("file:///test.R", 1L, 5L)) + +#line 6 "file:///test.R" +2 +} +base::.ark_auto_step(base::.ark_verify_breakpoints_range("file:///test.R", 1L, 7L)) diff --git a/crates/ark/src/snapshots/ark__console_annotate__tests__annotate_source_multiple_expressions.snap b/crates/ark/src/snapshots/ark__console_annotate__tests__annotate_source_multiple_expressions.snap new file mode 100644 index 000000000..56f4bf2d7 --- /dev/null +++ b/crates/ark/src/snapshots/ark__console_annotate__tests__annotate_source_multiple_expressions.snap @@ -0,0 +1,10 @@ +--- +source: crates/ark/src/console_annotate.rs +expression: result +--- +{ +a <- 1 +b <- 2 +c <- 3 +} +base::.ark_auto_step(base::.ark_verify_breakpoints_range("file:///test.R", 1L, 4L)) diff --git a/crates/ark/src/snapshots/ark__console_annotate__tests__annotate_source_top_level_breakpoint.snap b/crates/ark/src/snapshots/ark__console_annotate__tests__annotate_source_top_level_breakpoint.snap new file mode 100644 index 000000000..9d50b925e --- /dev/null +++ b/crates/ark/src/snapshots/ark__console_annotate__tests__annotate_source_top_level_breakpoint.snap @@ -0,0 +1,16 @@ +--- +source: crates/ark/src/console_annotate.rs +expression: result +--- +{ +base::.ark_auto_step(base::.ark_breakpoint(browser(), "file:///test.R", "1")) +#line 1 "file:///test.R" +x <- 1 +base::.ark_auto_step(base::.ark_verify_breakpoints_range("file:///test.R", 1L, 2L)) +#line 2 "file:///test.R" +y <- 2 +base::.ark_auto_step(base::.ark_breakpoint(browser(), "file:///test.R", "2")) +#line 3 "file:///test.R" +z <- 3 +} +base::.ark_auto_step(base::.ark_verify_breakpoints_range("file:///test.R", 1L, 4L)) diff --git a/crates/ark/src/snapshots/ark__console_annotate__tests__annotate_source_with_breakpoint.snap b/crates/ark/src/snapshots/ark__console_annotate__tests__annotate_source_with_breakpoint.snap new file mode 100644 index 000000000..a657c9b2e --- /dev/null +++ b/crates/ark/src/snapshots/ark__console_annotate__tests__annotate_source_with_breakpoint.snap @@ -0,0 +1,18 @@ +--- +source: crates/ark/src/console_annotate.rs +expression: result +--- +{ +foo <- function() { +base::.ark_auto_step(base::.ark_breakpoint(browser(), "file:///test.R", "1")) +#line 2 "file:///test.R" + x <- 1 +base::.ark_auto_step(base::.ark_verify_breakpoints_range("file:///test.R", 2L, 3L)) +#line 3 "file:///test.R" + y <- 2 +} +base::.ark_auto_step(base::.ark_verify_breakpoints_range("file:///test.R", 1L, 5L)) +#line 5 "file:///test.R" +bar <- 3 +} +base::.ark_auto_step(base::.ark_verify_breakpoints_range("file:///test.R", 1L, 6L)) diff --git a/crates/ark/src/snapshots/ark__console_annotate__tests__inject_breakpoints_before_within_after_nested.snap b/crates/ark/src/snapshots/ark__console_annotate__tests__inject_breakpoints_before_within_after_nested.snap new file mode 100644 index 000000000..38753c145 --- /dev/null +++ b/crates/ark/src/snapshots/ark__console_annotate__tests__inject_breakpoints_before_within_after_nested.snap @@ -0,0 +1,25 @@ +--- +source: crates/ark/src/console_annotate.rs +expression: result +--- +#line 1 "file:///test.R" +{ +base::.ark_auto_step(base::.ark_breakpoint(browser(), "file:///test.R", "1")) +#line 2 "file:///test.R" +x <- 1 +base::.ark_auto_step(base::.ark_verify_breakpoints_range("file:///test.R", 2L, 3L)) +#line 3 "file:///test.R" +f <- function() { +base::.ark_auto_step(base::.ark_breakpoint(browser(), "file:///test.R", "2")) +#line 4 "file:///test.R" + y <- 2 +base::.ark_auto_step(base::.ark_verify_breakpoints_range("file:///test.R", 4L, 5L)) +#line 5 "file:///test.R" + z <- 3 +} +base::.ark_auto_step(base::.ark_verify_breakpoints_range("file:///test.R", 3L, 7L)) + +base::.ark_auto_step(base::.ark_breakpoint(browser(), "file:///test.R", "3")) +#line 7 "file:///test.R" +w <- 4 +} diff --git a/crates/ark/src/snapshots/ark__console_annotate__tests__inject_breakpoints_doubly_nested_braces.snap b/crates/ark/src/snapshots/ark__console_annotate__tests__inject_breakpoints_doubly_nested_braces.snap new file mode 100644 index 000000000..8efc88530 --- /dev/null +++ b/crates/ark/src/snapshots/ark__console_annotate__tests__inject_breakpoints_doubly_nested_braces.snap @@ -0,0 +1,15 @@ +--- +source: crates/ark/src/console_annotate.rs +expression: result +--- +#line 1 "file:///test.R" +{ + { +base::.ark_auto_step(base::.ark_breakpoint(browser(), "file:///test.R", "1")) +#line 3 "file:///test.R" + 1 +base::.ark_auto_step(base::.ark_verify_breakpoints_range("file:///test.R", 3L, 4L)) +#line 4 "file:///test.R" + 2 + } +} diff --git a/crates/ark/src/snapshots/ark__console_annotate__tests__inject_breakpoints_empty_brace_sibling.snap b/crates/ark/src/snapshots/ark__console_annotate__tests__inject_breakpoints_empty_brace_sibling.snap new file mode 100644 index 000000000..8a745cc37 --- /dev/null +++ b/crates/ark/src/snapshots/ark__console_annotate__tests__inject_breakpoints_empty_brace_sibling.snap @@ -0,0 +1,11 @@ +--- +source: crates/ark/src/console_annotate.rs +expression: result +--- +#line 1 "file:///test.R" +{ + x <- 1 +base::.ark_auto_step(base::.ark_breakpoint(browser(), "file:///test.R", "1")) +#line 3 "file:///test.R" + {} +} diff --git a/crates/ark/src/snapshots/ark__console_annotate__tests__inject_breakpoints_if_else_both_branches.snap b/crates/ark/src/snapshots/ark__console_annotate__tests__inject_breakpoints_if_else_both_branches.snap new file mode 100644 index 000000000..c5cf4229b --- /dev/null +++ b/crates/ark/src/snapshots/ark__console_annotate__tests__inject_breakpoints_if_else_both_branches.snap @@ -0,0 +1,14 @@ +--- +source: crates/ark/src/console_annotate.rs +expression: result +--- +#line 1 "file:///test.R" +if (TRUE) { +base::.ark_auto_step(base::.ark_breakpoint(browser(), "file:///test.R", "1")) +#line 2 "file:///test.R" + x <- 1 +} else { +base::.ark_auto_step(base::.ark_breakpoint(browser(), "file:///test.R", "2")) +#line 4 "file:///test.R" + y <- 2 +} diff --git a/crates/ark/src/snapshots/ark__console_annotate__tests__inject_breakpoints_in_brace_list.snap b/crates/ark/src/snapshots/ark__console_annotate__tests__inject_breakpoints_in_brace_list.snap new file mode 100644 index 000000000..0a57ee73e --- /dev/null +++ b/crates/ark/src/snapshots/ark__console_annotate__tests__inject_breakpoints_in_brace_list.snap @@ -0,0 +1,11 @@ +--- +source: crates/ark/src/console_annotate.rs +expression: result +--- +#line 1 "file:///test.R" +f <- function() { + x <- 1 +base::.ark_auto_step(base::.ark_breakpoint(browser(), "file:///test.R", "1")) +#line 3 "file:///test.R" + y <- 2 +} diff --git a/crates/ark/src/snapshots/ark__console_annotate__tests__inject_breakpoints_inside_multiline_call.snap b/crates/ark/src/snapshots/ark__console_annotate__tests__inject_breakpoints_inside_multiline_call.snap new file mode 100644 index 000000000..94e68c0de --- /dev/null +++ b/crates/ark/src/snapshots/ark__console_annotate__tests__inject_breakpoints_inside_multiline_call.snap @@ -0,0 +1,12 @@ +--- +source: crates/ark/src/console_annotate.rs +expression: result +--- +#line 1 "file:///test.R" +{ +base::.ark_auto_step(base::.ark_breakpoint(browser(), "file:///test.R", "1")) +#line 2 "file:///test.R" + foo( + 1 + ) +} diff --git a/crates/ark/src/snapshots/ark__console_annotate__tests__inject_breakpoints_inside_multiline_expr_anchors_to_start.snap b/crates/ark/src/snapshots/ark__console_annotate__tests__inject_breakpoints_inside_multiline_expr_anchors_to_start.snap new file mode 100644 index 000000000..b9084dac9 --- /dev/null +++ b/crates/ark/src/snapshots/ark__console_annotate__tests__inject_breakpoints_inside_multiline_expr_anchors_to_start.snap @@ -0,0 +1,14 @@ +--- +source: crates/ark/src/console_annotate.rs +expression: result +--- +#line 1 "file:///test.R" +{ +base::.ark_auto_step(base::.ark_breakpoint(browser(), "file:///test.R", "1")) +#line 2 "file:///test.R" + x + + y +base::.ark_auto_step(base::.ark_verify_breakpoints_range("file:///test.R", 2L, 4L)) +#line 4 "file:///test.R" + z +} diff --git a/crates/ark/src/snapshots/ark__console_annotate__tests__inject_breakpoints_multiple.snap b/crates/ark/src/snapshots/ark__console_annotate__tests__inject_breakpoints_multiple.snap new file mode 100644 index 000000000..da44327e0 --- /dev/null +++ b/crates/ark/src/snapshots/ark__console_annotate__tests__inject_breakpoints_multiple.snap @@ -0,0 +1,17 @@ +--- +source: crates/ark/src/console_annotate.rs +expression: result +--- +#line 1 "file:///test.R" +{ +x <- 1 +base::.ark_auto_step(base::.ark_breakpoint(browser(), "file:///test.R", "1")) +#line 3 "file:///test.R" +y <- 2 +base::.ark_auto_step(base::.ark_verify_breakpoints_range("file:///test.R", 3L, 4L)) +#line 4 "file:///test.R" +z <- 3 +base::.ark_auto_step(base::.ark_breakpoint(browser(), "file:///test.R", "2")) +#line 5 "file:///test.R" +w <- 4 +} diff --git a/crates/ark/src/snapshots/ark__console_annotate__tests__inject_breakpoints_multiple_lists.snap b/crates/ark/src/snapshots/ark__console_annotate__tests__inject_breakpoints_multiple_lists.snap new file mode 100644 index 000000000..bc9638eec --- /dev/null +++ b/crates/ark/src/snapshots/ark__console_annotate__tests__inject_breakpoints_multiple_lists.snap @@ -0,0 +1,21 @@ +--- +source: crates/ark/src/console_annotate.rs +expression: result +--- +#line 1 "file:///test.R" +{ +x <- 1 +f <- function() { +base::.ark_auto_step(base::.ark_breakpoint(browser(), "file:///test.R", "1")) +#line 4 "file:///test.R" + y <- 2 +base::.ark_auto_step(base::.ark_verify_breakpoints_range("file:///test.R", 4L, 5L)) +#line 5 "file:///test.R" + z <- 3 +} +base::.ark_auto_step(base::.ark_verify_breakpoints_range("file:///test.R", 3L, 7L)) + +base::.ark_auto_step(base::.ark_breakpoint(browser(), "file:///test.R", "2")) +#line 7 "file:///test.R" +w <- 4 +} diff --git a/crates/ark/src/snapshots/ark__console_annotate__tests__inject_breakpoints_on_blank_line_anchors_to_next.snap b/crates/ark/src/snapshots/ark__console_annotate__tests__inject_breakpoints_on_blank_line_anchors_to_next.snap new file mode 100644 index 000000000..0ec2b2fb1 --- /dev/null +++ b/crates/ark/src/snapshots/ark__console_annotate__tests__inject_breakpoints_on_blank_line_anchors_to_next.snap @@ -0,0 +1,12 @@ +--- +source: crates/ark/src/console_annotate.rs +expression: result +--- +#line 1 "file:///test.R" +{ + x <- 1 +base::.ark_auto_step(base::.ark_breakpoint(browser(), "file:///test.R", "1")) + +#line 4 "file:///test.R" + y <- 2 +} diff --git a/crates/ark/src/snapshots/ark__console_annotate__tests__inject_breakpoints_on_closing_brace_with_valid_breakpoint.snap b/crates/ark/src/snapshots/ark__console_annotate__tests__inject_breakpoints_on_closing_brace_with_valid_breakpoint.snap new file mode 100644 index 000000000..908b7d7c7 --- /dev/null +++ b/crates/ark/src/snapshots/ark__console_annotate__tests__inject_breakpoints_on_closing_brace_with_valid_breakpoint.snap @@ -0,0 +1,13 @@ +--- +source: crates/ark/src/console_annotate.rs +expression: result +--- +#line 1 "file:///test.R" +{ +f <- function() { + x <- 1 +} +base::.ark_auto_step(base::.ark_breakpoint(browser(), "file:///test.R", "2")) +#line 5 "file:///test.R" +y <- 2 +} diff --git a/crates/ark/src/snapshots/ark__console_annotate__tests__inject_breakpoints_single_line.snap b/crates/ark/src/snapshots/ark__console_annotate__tests__inject_breakpoints_single_line.snap new file mode 100644 index 000000000..68dc135cd --- /dev/null +++ b/crates/ark/src/snapshots/ark__console_annotate__tests__inject_breakpoints_single_line.snap @@ -0,0 +1,14 @@ +--- +source: crates/ark/src/console_annotate.rs +expression: result +--- +#line 1 "file:///test.R" +{ +x <- 1 +base::.ark_auto_step(base::.ark_breakpoint(browser(), "file:///test.R", "1")) +#line 3 "file:///test.R" +y <- 2 +base::.ark_auto_step(base::.ark_verify_breakpoints_range("file:///test.R", 3L, 4L)) +#line 4 "file:///test.R" +z <- 3 +} diff --git a/crates/ark/src/snapshots/ark__console_annotate__tests__inject_breakpoints_triply_nested_braces.snap b/crates/ark/src/snapshots/ark__console_annotate__tests__inject_breakpoints_triply_nested_braces.snap new file mode 100644 index 000000000..4d39fb036 --- /dev/null +++ b/crates/ark/src/snapshots/ark__console_annotate__tests__inject_breakpoints_triply_nested_braces.snap @@ -0,0 +1,14 @@ +--- +source: crates/ark/src/console_annotate.rs +expression: result +--- +#line 1 "file:///test.R" +{ + { + { +base::.ark_auto_step(base::.ark_breakpoint(browser(), "file:///test.R", "1")) +#line 4 "file:///test.R" + 1 + } + } +} diff --git a/crates/ark/src/snapshots/ark__console_annotate__tests__inject_breakpoints_with_blank_line.snap b/crates/ark/src/snapshots/ark__console_annotate__tests__inject_breakpoints_with_blank_line.snap new file mode 100644 index 000000000..60433f946 --- /dev/null +++ b/crates/ark/src/snapshots/ark__console_annotate__tests__inject_breakpoints_with_blank_line.snap @@ -0,0 +1,13 @@ +--- +source: crates/ark/src/console_annotate.rs +expression: result +--- +#line 1 "file:///test.R" +{ +x <- 1 +base::.ark_auto_step(base::.ark_breakpoint(browser(), "file:///test.R", "1")) + + +#line 5 "file:///test.R" +y <- 2 +} diff --git a/crates/ark/src/snapshots/ark__console_annotate__tests__inject_breakpoints_with_line_offset.snap b/crates/ark/src/snapshots/ark__console_annotate__tests__inject_breakpoints_with_line_offset.snap new file mode 100644 index 000000000..f5090d0a6 --- /dev/null +++ b/crates/ark/src/snapshots/ark__console_annotate__tests__inject_breakpoints_with_line_offset.snap @@ -0,0 +1,14 @@ +--- +source: crates/ark/src/console_annotate.rs +expression: result +--- +#line 11 "file:///test.R" +{ +x <- 1 +base::.ark_auto_step(base::.ark_breakpoint(browser(), "file:///test.R", "1")) +#line 13 "file:///test.R" +y <- 2 +base::.ark_auto_step(base::.ark_verify_breakpoints_range("file:///test.R", 13L, 14L)) +#line 14 "file:///test.R" +z <- 3 +} diff --git a/crates/ark/src/snapshots/ark__console_annotate__tests__inject_breakpoints_with_line_offset_nested.snap b/crates/ark/src/snapshots/ark__console_annotate__tests__inject_breakpoints_with_line_offset_nested.snap new file mode 100644 index 000000000..8f5fdab41 --- /dev/null +++ b/crates/ark/src/snapshots/ark__console_annotate__tests__inject_breakpoints_with_line_offset_nested.snap @@ -0,0 +1,11 @@ +--- +source: crates/ark/src/console_annotate.rs +expression: result +--- +#line 21 "file:///test.R" +f <- function() { + x <- 1 +base::.ark_auto_step(base::.ark_breakpoint(browser(), "file:///test.R", "1")) +#line 23 "file:///test.R" + y <- 2 +} diff --git a/crates/ark/src/snapshots/ark__console_annotate__tests__multiple_breakpoints_collapse_to_same_line.snap b/crates/ark/src/snapshots/ark__console_annotate__tests__multiple_breakpoints_collapse_to_same_line.snap new file mode 100644 index 000000000..069cfb01f --- /dev/null +++ b/crates/ark/src/snapshots/ark__console_annotate__tests__multiple_breakpoints_collapse_to_same_line.snap @@ -0,0 +1,13 @@ +--- +source: crates/ark/src/console_annotate.rs +expression: result +--- +#line 1 "file:///test.R" +{ +base::.ark_auto_step(base::.ark_breakpoint(browser(), "file:///test.R", "1")) + +#line 3 "file:///test.R" + foo( + 1 + ) +} diff --git a/crates/ark/src/start.rs b/crates/ark/src/start.rs index 19be561fa..fcee6e625 100644 --- a/crates/ark/src/start.rs +++ b/crates/ark/src/start.rs @@ -22,6 +22,7 @@ use crossbeam::channel::unbounded; use crate::control::Control; use crate::dap; +use crate::interface::ConsoleNotification; use crate::interface::SessionMode; use crate::lsp; use crate::plots::graphics_device::GraphicsDeviceNotification; @@ -58,10 +59,17 @@ pub fn start_kernel( let (r_request_tx, r_request_rx) = bounded::(1); let (kernel_request_tx, kernel_request_rx) = bounded::(1); + // Async communication channel with the R thread (Console) + let (console_notification_tx, console_notification_rx) = + tokio::sync::mpsc::unbounded_channel::(); + // Create the LSP and DAP clients. // Not all Amalthea kernels provide these, but ark does. // They must be able to deliver messages to the shell channel directly. - let lsp = Arc::new(Mutex::new(lsp::handler::Lsp::new(kernel_init_tx.add_rx()))); + let lsp = Arc::new(Mutex::new(lsp::handler::Lsp::new( + kernel_init_tx.add_rx(), + console_notification_tx.clone(), + ))); // DAP needs the `RRequest` channel to communicate with // `read_console()` and send commands to the debug interpreter @@ -85,6 +93,7 @@ pub fn start_kernel( kernel_init_rx, kernel_request_tx, graphics_device_tx, + console_notification_tx, )); // Create the control handler; this is used to handle shutdown/interrupt and @@ -146,5 +155,6 @@ pub fn start_kernel( session_mode, default_repos, graphics_device_rx, + console_notification_rx, ) } diff --git a/crates/ark/tests/kernel-debugger.rs b/crates/ark/tests/kernel-debugger.rs index 7df93efd2..76a1a7770 100644 --- a/crates/ark/tests/kernel-debugger.rs +++ b/crates/ark/tests/kernel-debugger.rs @@ -360,6 +360,25 @@ fn test_browser_in_base_env() { assert_eq!(frontend.recv_shell_execute_reply(), input.execution_count); } +#[test] +fn test_execute_request_browser_braced_step_out() { + let frontend = DummyArkFrontend::lock(); + + // Evaluate `{browser()}` which enters the debugger + frontend.execute_request("{browser()}", |result| { + assert!(result.contains("Called from: top level")); + }); + + // Step once with `n` to leave the debugger (the braced expression completes) + frontend.execute_request_invisibly("n"); + + // Now evaluate `{1}` - this should NOT trigger the debugger + // and should return the result normally + frontend.execute_request("{1}", |result| { + assert!(result.contains("[1] 1")); + }); +} + // The minimal environment we can debug in: access to base via `::`. This might // be a problem for very specialised sandboxing environment, but they can // temporarily add `::` while debugging. diff --git a/crates/harp/src/environment.rs b/crates/harp/src/environment.rs index e65705d62..55db89849 100644 --- a/crates/harp/src/environment.rs +++ b/crates/harp/src/environment.rs @@ -17,13 +17,13 @@ use crate::symbol::RSymbol; const FRAME_LOCK_MASK: std::ffi::c_int = 1 << 14; -#[derive(Clone)] +#[derive(Clone, Debug)] pub struct Environment { pub inner: RObject, filter: EnvironmentFilter, } -#[derive(Clone)] +#[derive(Clone, Debug)] pub enum EnvironmentFilter { None, ExcludeHidden, diff --git a/crates/harp/src/parse.rs b/crates/harp/src/parse.rs index 43a141afc..5e7d887de 100644 --- a/crates/harp/src/parse.rs +++ b/crates/harp/src/parse.rs @@ -105,7 +105,7 @@ pub fn parse_status<'a>(input: &ParseInput<'a>) -> crate::Result { let (text, srcfile) = match input { ParseInput::Text(text) => (as_parse_text(text), RObject::null()), - ParseInput::SrcFile(srcfile) => (srcfile.lines()?, srcfile.inner.clone()), + ParseInput::SrcFile(srcfile) => (srcfile.lines()?, srcfile.inner.inner.clone()), }; let result = diff --git a/crates/harp/src/parser/parse_data.rs b/crates/harp/src/parser/parse_data.rs index 2b4e07c7a..4543c739c 100644 --- a/crates/harp/src/parser/parse_data.rs +++ b/crates/harp/src/parser/parse_data.rs @@ -35,7 +35,7 @@ pub enum ParseDataKind { impl ParseData { pub fn from_srcfile(srcfile: &harp::srcref::SrcFile) -> harp::Result { let data = RFunction::new("utils", "getParseData") - .add(srcfile.inner.sexp) + .add(srcfile.inner.inner.sexp) .call()?; if data.sexp == harp::RObject::null().sexp { diff --git a/crates/harp/src/parser/srcref.rs b/crates/harp/src/parser/srcref.rs index d7b76782d..7eac60084 100644 --- a/crates/harp/src/parser/srcref.rs +++ b/crates/harp/src/parser/srcref.rs @@ -8,18 +8,22 @@ use core::f64; use anyhow::anyhow; +use stdext::result::ResultExt; use stdext::unwrap; use crate::exec::RFunction; use crate::exec::RFunctionExt; use crate::vector::IntegerVector; use crate::vector::Vector; +use crate::Environment; use crate::RObject; /// Structured representation of `srcref` integer vectors /// 0-based offsets. #[derive(Debug)] pub struct SrcRef { + pub inner: IntegerVector, + /// Lines and virtual lines may differ if a `#line` directive is used in code: /// the former just counts actual lines, the latter respects the directive. /// `line` corresponds to `line_parsed` in the original base R srcref vector. @@ -33,7 +37,16 @@ pub struct SrcRef { #[derive(Clone, Debug)] pub struct SrcFile { - pub inner: RObject, + pub inner: Environment, +} + +impl SrcRef { + pub fn srcfile(&self) -> anyhow::Result { + let Some(srcfile) = self.inner.object.get_attribute("srcfile") else { + return Err(anyhow!("Can't find `srcfile` attribute")); + }; + SrcFile::wrap(srcfile) + } } // Takes user-facing object as input. The srcrefs are retrieved from @@ -111,6 +124,7 @@ impl TryFrom for SrcRef { line_virtual: line, column, column_byte, + inner: value, }) } } @@ -118,6 +132,19 @@ impl TryFrom for SrcRef { /// Creates the same sort of srcfile object as with `parse(text = )`. /// Takes code as an R string containing newlines, or as a R vector of lines. impl SrcFile { + pub fn wrap(value: RObject) -> anyhow::Result { + if value.kind() != libr::ENVSXP { + return Err(anyhow!("Expected an environment, got {:?}", value.kind())); + } + if !value.inherits("srcfile") { + return Err(anyhow!("Expected an srcfile, got {:?}", value.class())); + } + + Ok(Self { + inner: Environment::new(value), + }) + } + // Created by the R function `parse()` pub fn new_virtual(text: RObject) -> Self { let inner = RFunction::new("base", "srcfilecopy") @@ -128,7 +155,9 @@ impl SrcFile { // Unwrap safety: Should never fail, unless something is seriously wrong let inner = inner.unwrap(); - Self { inner } + Self { + inner: Environment::new(inner), + } } // Created by the C-level parser @@ -144,16 +173,25 @@ impl SrcFile { } CLASS.with(|c| inner.set_attribute("class", c.sexp)); - Self { inner } + Self { + inner: Environment::new(inner), + } } pub fn lines(&self) -> harp::Result { RFunction::new("base", "getSrcLines") - .add(self.inner.sexp) + .add(self.inner.inner.sexp) .param("first", 1) .param("last", f64::INFINITY) .call() } + + pub fn filename(&self) -> anyhow::Result { + // In theory we should check if `filename` is relative, and prefix it + // with `wd` in that case, if `wd` is set. For now we only use this + // method to fetch our own URIs. + self.inner.get("filename")?.try_into().anyhow() + } } impl From<&str> for SrcFile { diff --git a/crates/harp/src/session.rs b/crates/harp/src/session.rs index 261ab0164..48dde118c 100644 --- a/crates/harp/src/session.rs +++ b/crates/harp/src/session.rs @@ -28,6 +28,7 @@ static mut NFRAME_CALL: Option = None; static mut SYS_CALLS_CALL: Option = None; static mut SYS_FRAMES_CALL: Option = None; static mut CURRENT_ENV_CALL: Option = None; +static mut CURRENT_FUNCTION_CALL: Option = None; pub fn r_n_frame() -> crate::Result { SESSION_INIT.call_once(init_interface); @@ -66,6 +67,11 @@ pub fn r_current_frame() -> RObject { unsafe { libr::Rf_eval(CURRENT_ENV_CALL.unwrap_unchecked(), R_BaseEnv) }.into() } +pub fn r_current_function() -> RObject { + SESSION_INIT.call_once(init_interface); + unsafe { libr::Rf_eval(CURRENT_FUNCTION_CALL.unwrap_unchecked(), R_BaseEnv) }.into() +} + pub fn r_sys_functions() -> crate::Result { unsafe { let mut protect = RProtect::new(); @@ -167,5 +173,10 @@ fn init_interface() { let current_env_call = r_lang!(closure.sexp); R_PreserveObject(current_env_call); CURRENT_ENV_CALL = Some(current_env_call); + + let closure = harp::parse_eval_base("function() sys.function(-1)").unwrap(); + let current_function_call = r_lang!(closure.sexp); + R_PreserveObject(current_function_call); + CURRENT_FUNCTION_CALL = Some(current_function_call); } } diff --git a/crates/harp/src/vector/integer_vector.rs b/crates/harp/src/vector/integer_vector.rs index 549385f71..f5bee507e 100644 --- a/crates/harp/src/vector/integer_vector.rs +++ b/crates/harp/src/vector/integer_vector.rs @@ -19,7 +19,7 @@ use crate::vector::Vector; #[harp_macros::vector] pub struct IntegerVector { - object: RObject, + pub object: RObject, } impl Vector for IntegerVector { diff --git a/crates/libr/src/r.rs b/crates/libr/src/r.rs index 54a9c0f1f..d74527023 100644 --- a/crates/libr/src/r.rs +++ b/crates/libr/src/r.rs @@ -274,6 +274,8 @@ functions::generate! { pub fn RDEBUG(x: SEXP) -> std::ffi::c_int; + pub fn SET_RDEBUG(x: SEXP, i: std::ffi::c_int); + pub fn REAL(x: SEXP) -> *mut f64; pub fn REAL_ELT(x: SEXP, i: R_xlen_t) -> f64;