From 7b6a97e64140153f706b43472781c1a7a11a0be2 Mon Sep 17 00:00:00 2001 From: Lionel Henry Date: Tue, 9 Dec 2025 10:50:02 +0100 Subject: [PATCH 01/42] Switch to permanent DAP session --- crates/ark/src/dap/dap.rs | 36 ++++++++++++++++++++---------------- 1 file changed, 20 insertions(+), 16 deletions(-) diff --git a/crates/ark/src/dap/dap.rs b/crates/ark/src/dap/dap.rs index be0396582..3f3e88401 100644 --- a/crates/ark/src/dap/dap.rs +++ b/crates/ark/src/dap/dap.rs @@ -132,20 +132,19 @@ impl Dap { self.load_variables_references(&mut stack); self.stack = Some(stack); - if self.is_debugging { - if let Some(tx) = &self.backend_events_tx { - tx.send(DapBackendEvent::Stopped(DapStoppedEvent { preserve_focus })) + log::trace!("DAP: Sending `start_debug` events"); + + if let Some(comm_tx) = &self.comm_tx { + // Ask frontend to connect to the DAP + comm_tx + .send(amalthea::comm_rpc_message!("start_debug")) + .log_err(); + + if let Some(dap_tx) = &self.backend_events_tx { + dap_tx + .send(DapBackendEvent::Stopped(DapStoppedEvent { preserve_focus })) .log_err(); } - } else { - if let Some(tx) = &self.comm_tx { - // Ask frontend to connect to the DAP - log::trace!("DAP: Sending `start_debug` event"); - let msg = amalthea::comm_rpc_message!("start_debug"); - tx.send(msg).log_err(); - } - - self.is_debugging = true; } } @@ -158,12 +157,17 @@ impl Dap { self.is_debugging = false; if self.is_connected { - if let Some(_) = &self.comm_tx { + log::trace!("DAP: Sending `stop_debug` events"); + + if let Some(comm_tx) = &self.comm_tx { + comm_tx + .send(amalthea::comm_rpc_message!("stop_debug")) + .log_err(); + // Let frontend know we've quit the debugger so it can // terminate the debugging session and disconnect. - if let Some(tx) = &self.backend_events_tx { - log::trace!("DAP: Sending `stop_debug` event"); - tx.send(DapBackendEvent::Terminated).log_err(); + if let Some(datp_tx) = &self.backend_events_tx { + datp_tx.send(DapBackendEvent::Continued).log_err(); } } // else: If not connected to a frontend, the DAP client should From 114a728ed7001d736cfa0fead846193fe7f7e9c2 Mon Sep 17 00:00:00 2001 From: Lionel Henry Date: Tue, 9 Dec 2025 14:07:40 +0100 Subject: [PATCH 02/42] Handle breakpoints request --- crates/ark/src/dap/dap.rs | 52 ++++++++++++++++++++++++ crates/ark/src/dap/dap_server.rs | 68 ++++++++++++++++++++++++++++++++ 2 files changed, 120 insertions(+) diff --git a/crates/ark/src/dap/dap.rs b/crates/ark/src/dap/dap.rs index 3f3e88401..25adfa54e 100644 --- a/crates/ark/src/dap/dap.rs +++ b/crates/ark/src/dap/dap.rs @@ -17,12 +17,20 @@ use crossbeam::channel::Sender; use harp::object::RObject; use stdext::result::ResultExt; use stdext::spawn; +use url::Url; use crate::console_debug::FrameInfo; use crate::dap::dap_server; use crate::request::RRequest; use crate::thread::RThreadSafe; +#[derive(Debug, Clone)] +pub struct Breakpoint { + pub id: i64, + pub line: u32, + pub verified: bool, +} + #[derive(Debug, Copy, Clone)] pub enum DapBackendEvent { /// Event sent when a normal (non-browser) prompt marks the end of a @@ -35,6 +43,9 @@ pub enum DapBackendEvent { /// Event sent when a browser prompt is emitted during an existing /// debugging session Stopped(DapStoppedEvent), + + /// Event sent when a breakpoint has been verified + BreakpointVerified(i64), } #[derive(Debug, Copy, Clone)] @@ -56,6 +67,9 @@ pub struct Dap { /// Current call stack pub stack: Option>, + /// Known breakpoints keyed by URI + pub breakpoints: HashMap>, + /// Map of `source` -> `source_reference` used for frames that don't have /// associated files (i.e. no `srcref` attribute). The `source` is the key to /// ensure that we don't insert the same function multiple times, which would result @@ -83,6 +97,9 @@ pub struct Dap { /// information. current_variables_reference: i64, + /// Monotonically increasing breakpoint ID counter + current_breakpoint_id: i64, + /// Channel for sending events to the comm frontend. comm_tx: Option>, @@ -101,10 +118,12 @@ impl Dap { is_connected: false, backend_events_tx: None, stack: None, + breakpoints: HashMap::new(), fallback_sources: HashMap::new(), frame_id_to_variables_reference: HashMap::new(), variables_reference_to_r_object: HashMap::new(), current_variables_reference: 1, + current_breakpoint_id: 1, comm_tx: None, r_request_tx, shared_self: None, @@ -232,6 +251,39 @@ impl Dap { variables_reference } + + pub fn next_breakpoint_id(&mut self) -> i64 { + let id = self.current_breakpoint_id; + self.current_breakpoint_id += 1; + id + } + + /// Verify breakpoints within a line range for a given URI + /// + /// Loops over all breakpoints for the URI and verifies any unverified + /// breakpoints that fall within the range [start_line, end_line]. + /// Sends a `BreakpointVerified` event for each newly verified breakpoint. + pub fn verify_range(&mut self, uri: &Url, start_line: usize, end_line: usize) { + let Some(bp_list) = self.breakpoints.get_mut(uri) else { + return; + }; + + for bp in bp_list.iter_mut() { + if bp.verified { + continue; + } + + let line = bp.line as usize; + if line >= start_line && line <= end_line { + bp.verified = true; + + if let Some(tx) = &self.backend_events_tx { + tx.send(DapBackendEvent::BreakpointVerified(bp.id)) + .log_err(); + } + } + } + } } // Handler for Amalthea socket threads diff --git a/crates/ark/src/dap/dap_server.rs b/crates/ark/src/dap/dap_server.rs index 014f2e8fb..cc3d4c18b 100644 --- a/crates/ark/src/dap/dap_server.rs +++ b/crates/ark/src/dap/dap_server.rs @@ -30,7 +30,9 @@ use dap::server::ServerOutput; use dap::types::*; use stdext::result::ResultExt; use stdext::spawn; +use url::Url; +use super::dap::Breakpoint; use super::dap::Dap; use super::dap::DapBackendEvent; use crate::console_debug::FrameInfo; @@ -172,6 +174,17 @@ fn listen_dap_events( DapBackendEvent::Terminated => { Event::Terminated(None) }, + + DapBackendEvent::BreakpointVerified(id) => { + Event::Breakpoint(BreakpointEventBody { + reason: BreakpointEventReason::Changed, + breakpoint: dap::types::Breakpoint { + id: Some(id), + verified: true, + ..Default::default() + }, + }) + }, }; let mut output = output.lock().unwrap(); @@ -237,6 +250,9 @@ impl DapServer { Command::Threads => { self.handle_threads(req); }, + Command::SetBreakpoints(args) => { + self.handle_set_breakpoints(req, args); + }, Command::SetExceptionBreakpoints(args) => { self.handle_set_exception_breakpoints(req, args); }, @@ -297,6 +313,58 @@ impl DapServer { self.send_event(Event::Initialized); } + fn handle_set_breakpoints(&mut self, req: Request, args: SetBreakpointsArguments) { + let path = args.source.path.clone().unwrap_or_default(); + + let uri = match Url::from_file_path(&path) { + Ok(uri) => uri, + Err(()) => { + log::error!("Failed to convert path to URI: '{path}'"); + let rsp = req.error(&format!("Invalid path: {path}")); + self.respond(rsp); + return; + }, + }; + + let source_breakpoints = args.breakpoints.unwrap_or_default(); + + let mut state = self.state.lock().unwrap(); + + let breakpoints: Vec = source_breakpoints + .iter() + .map(|bp| Breakpoint { + id: state.next_breakpoint_id(), + line: bp.line as u32, + verified: false, + }) + .collect(); + + log::trace!( + "DAP: URI {uri} now has {} unverified breakpoints", + breakpoints.len() + ); + + state.breakpoints.insert(uri, breakpoints.clone()); + + drop(state); + + let response_breakpoints: Vec = breakpoints + .iter() + .map(|bp| dap::types::Breakpoint { + id: Some(bp.id), + verified: bp.verified, + line: Some(bp.line as i64), + ..Default::default() + }) + .collect(); + + let rsp = req.success(ResponseBody::SetBreakpoints(SetBreakpointsResponse { + breakpoints: response_breakpoints, + })); + + self.respond(rsp); + } + fn handle_attach(&mut self, req: Request, _args: AttachRequestArguments) { let rsp = req.success(ResponseBody::Attach); self.respond(rsp); From 6755e148a52a15a6cf6811999aff9d7019a146ac Mon Sep 17 00:00:00 2001 From: Lionel Henry Date: Wed, 10 Dec 2025 20:14:25 +0100 Subject: [PATCH 03/42] Inject breakpoints --- Cargo.lock | 1 + Cargo.toml | 1 + crates/ark/Cargo.toml | 1 + crates/ark/src/console_annotate.rs | 710 +++++++++++++++++- crates/ark/src/dap/dap.rs | 3 +- crates/ark/src/dap/dap_server.rs | 4 +- crates/ark/src/interface.rs | 19 +- ...reakpoints_before_within_after_nested.snap | 16 + ...sts__inject_breakpoints_in_brace_list.snap | 10 + ...e__tests__inject_breakpoints_multiple.snap | 12 + ...ts__inject_breakpoints_multiple_lists.snap | 14 + ...n_closing_brace_with_valid_breakpoint.snap | 10 + ...tests__inject_breakpoints_single_line.snap | 9 + ...s__inject_breakpoints_with_blank_line.snap | 10 + 14 files changed, 804 insertions(+), 16 deletions(-) create mode 100644 crates/ark/src/snapshots/ark__console_annotate__tests__inject_breakpoints_before_within_after_nested.snap create mode 100644 crates/ark/src/snapshots/ark__console_annotate__tests__inject_breakpoints_in_brace_list.snap create mode 100644 crates/ark/src/snapshots/ark__console_annotate__tests__inject_breakpoints_multiple.snap create mode 100644 crates/ark/src/snapshots/ark__console_annotate__tests__inject_breakpoints_multiple_lists.snap create mode 100644 crates/ark/src/snapshots/ark__console_annotate__tests__inject_breakpoints_on_closing_brace_with_valid_breakpoint.snap create mode 100644 crates/ark/src/snapshots/ark__console_annotate__tests__inject_breakpoints_single_line.snap create mode 100644 crates/ark/src/snapshots/ark__console_annotate__tests__inject_breakpoints_with_blank_line.snap diff --git a/Cargo.lock b/Cargo.lock index 31fbd4b37..b68ab7454 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -347,6 +347,7 @@ version = "0.1.222" dependencies = [ "actix-web", "aether_lsp_utils", + "air_r_factory", "air_r_parser", "air_r_syntax", "amalthea", diff --git a/Cargo.toml b/Cargo.toml index 38aea55bf..f0b2c7c12 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -27,6 +27,7 @@ authors = ["Posit Software, PBC"] [workspace.dependencies] biome_line_index = { git = "https://github.com/biomejs/biome", rev = "c13fc60726883781e4530a4437724273b560c8e0" } biome_rowan = { git = "https://github.com/biomejs/biome", rev = "c13fc60726883781e4530a4437724273b560c8e0" } +aether_factory = { git = "https://github.com/posit-dev/air", package = "air_r_factory", rev = "f959e32eee91" } aether_lsp_utils = { git = "https://github.com/posit-dev/air", rev = "f959e32eee91" } aether_parser = { git = "https://github.com/posit-dev/air", package = "air_r_parser", rev = "f959e32eee91" } aether_syntax = { git = "https://github.com/posit-dev/air", package = "air_r_syntax", rev = "f959e32eee91" } diff --git a/crates/ark/Cargo.toml b/crates/ark/Cargo.toml index ffa669405..b01ff0cba 100644 --- a/crates/ark/Cargo.toml +++ b/crates/ark/Cargo.toml @@ -27,6 +27,7 @@ dashmap = "5.4.0" aether_parser.workspace = true aether_syntax.workspace = true aether_lsp_utils.workspace = true +aether_factory.workspace = true ego-tree = "0.6.2" harp = { path = "../harp" } http = "0.2.9" diff --git a/crates/ark/src/console_annotate.rs b/crates/ark/src/console_annotate.rs index 4bb925fe9..39e247eef 100644 --- a/crates/ark/src/console_annotate.rs +++ b/crates/ark/src/console_annotate.rs @@ -4,10 +4,27 @@ // Copyright (C) 2025 Posit Software, PBC. All rights reserved. // +use aether_syntax::RBracedExpressions; +use aether_syntax::RExpressionList; +use aether_syntax::RRoot; +use aether_syntax::RSyntaxNode; use amalthea::wire::execute_request::CodeLocation; +use biome_line_index::LineIndex; use biome_rowan::AstNode; +use biome_rowan::AstNodeList; +use biome_rowan::SyntaxElement; +use biome_rowan::TextRange; +use biome_rowan::TextSize; +use biome_rowan::WalkEvent; +use url::Url; -pub(crate) fn annotate_input(code: &str, location: CodeLocation) -> String { +use crate::dap::dap::Breakpoint; + +pub(crate) fn annotate_input( + code: &str, + location: CodeLocation, + breakpoints: Option<&mut [Breakpoint]>, +) -> String { let node = aether_parser::parse(code, Default::default()).tree(); let Some(first_token) = node.syntax().first_token() else { return code.into(); @@ -57,7 +74,367 @@ pub(crate) fn annotate_input(code: &str, location: CodeLocation) -> String { return code.into(); }; - new_node.to_string() + let out = new_node.to_string(); + + if let Some(breakpoints) = breakpoints { + let line_index = LineIndex::new(&out); + inject_breakpoints(&out, location, breakpoints, &line_index) + } else { + out + } +} + +#[allow(dead_code)] +pub(crate) fn inject_breakpoints( + code: &str, + location: CodeLocation, + breakpoints: &mut [Breakpoint], + line_index: &LineIndex, +) -> String { + let root = aether_parser::parse(code, Default::default()).tree(); + + // Filter breakpoints to only those within the source's valid range + let breakpoints: Vec<_> = breakpoints + .iter_mut() + .filter(|bp| bp.line >= location.start.line && bp.line <= location.end.line) + .collect(); + + if breakpoints.is_empty() { + return code.into(); + } + + // Phase 1: Find breakpoint anchors + let anchors = find_breakpoint_anchors(root.syntax(), breakpoints, &location.uri, line_index); + + if anchors.is_empty() { + return code.into(); + } + + // Phase 2: Inject breakpoints + inject_breakpoint_calls(root.syntax(), anchors, &location.uri) +} + +struct BreakpointAnchor { + breakpoint_id: i64, + actual_line: u32, +} + +fn find_breakpoint_anchors( + root: &RSyntaxNode, + mut breakpoints: Vec<&mut Breakpoint>, + uri: &Url, + line_index: &LineIndex, +) -> Vec { + // Sort breakpoints by line ascending + breakpoints.sort_by_key(|bp| bp.line); + + let mut anchors = Vec::new(); + let mut bp_iter = breakpoints.into_iter().peekable(); + + // Start from the root's expression list + let Some(r) = RRoot::cast(root.clone()) else { + return anchors; + }; + let root_list = r.expressions(); + + find_anchors_in_list( + &root_list, + &mut bp_iter, + &mut anchors, + uri, + line_index, + true, + ); + + anchors +} + +fn find_anchors_in_list<'a>( + list: &RExpressionList, + breakpoints: &mut std::iter::Peekable>, + anchors: &mut Vec, + uri: &Url, + line_index: &LineIndex, + is_root: bool, +) { + let elements: Vec<_> = list.into_iter().collect(); + + if elements.is_empty() { + return; + } + + let mut i = 0; + while i < elements.len() { + let Some(bp) = breakpoints.peek() else { + return; + }; + + let target_line = bp.line; + let current = &elements[i]; + let current_start_line = get_start_line(current.syntax(), line_index); + + // Base case: target line is at or before current element's start + if target_line <= current_start_line { + let bp = breakpoints.next().unwrap(); + bp.line = current_start_line; + anchors.push(BreakpointAnchor { + breakpoint_id: bp.id, + actual_line: current_start_line, + }); + continue; + } + + // Check if target is beyond current element + let next_start_line = if i + 1 < elements.len() { + Some(get_start_line(elements[i + 1].syntax(), line_index)) + } else { + None + }; + + // Recursion case: target must be within current element + if next_start_line.map_or(true, |next| target_line < next) { + // If we're at the last element of a nested list and there's no next element, + // the target might be beyond this list. Pop back up to let the parent handle it. + if !is_root && next_start_line.is_none() { + return; + } + + // Search within current element for brace lists + let anchors_before = anchors.len(); + if find_anchor_in_element(current.syntax(), breakpoints, anchors, uri, line_index) + .is_some() + { + // A nested brace list was found and processed. + if anchors.len() > anchors_before { + // Anchor(s) placed in nested list. Continue without incrementing + // `i` to re-check this element for any remaining breakpoints + // (handles multiple breakpoints in same block). + continue; + } + // The nested list was exhausted without placing an anchor for the + // current breakpoint. This means the target line is beyond all + // expressions in the nested list (e.g., on a closing `}` line with + // no executable code). Mark this breakpoint as invalid. + let bp = breakpoints.next().unwrap(); + bp.invalid = true; + continue; + } else { + // No brace list found, use current element as fallback + let bp = breakpoints.next().unwrap(); + bp.line = current_start_line; + anchors.push(BreakpointAnchor { + breakpoint_id: bp.id, + actual_line: current_start_line, + }); + continue; + } + } + + // Continue case: move to next element + i += 1; + } +} + +fn find_anchor_in_element<'a>( + element: &RSyntaxNode, + breakpoints: &mut std::iter::Peekable>, + anchors: &mut Vec, + uri: &Url, + line_index: &LineIndex, +) -> Option<()> { + use biome_rowan::WalkEvent; + + // Search for brace lists in descendants + for event in element.preorder() { + let node = match event { + WalkEvent::Enter(n) => n, + WalkEvent::Leave(_) => continue, + }; + + if let Some(braced) = RBracedExpressions::cast(node) { + let expr_list = braced.expressions(); + if !expr_list.is_empty() { + // Found a non-empty brace list, recurse into it + find_anchors_in_list(&expr_list, breakpoints, anchors, uri, line_index, false); + return Some(()); + } + } + } + + None +} + +fn inject_breakpoint_calls( + root: &RSyntaxNode, + mut anchors: Vec, + uri: &Url, +) -> String { + if anchors.is_empty() { + return root.to_string(); + } + + // Sort anchors by line DESCENDING so we modify from bottom to top. + // This preserves line numbers for earlier breakpoints. + anchors.sort_by_key(|a| std::cmp::Reverse(a.actual_line)); + + let mut source = root.to_string(); + + // Process each breakpoint independently by re-parsing after each injection + for anchor_info in anchors { + // Re-parse the current source + let parse_result = aether_parser::parse(&source, Default::default()); + let root = parse_result.tree(); + let new_line_index = LineIndex::new(&source); + + // Find the anchor node at the target line + // We need to search the re-parsed tree for the node at actual_line + let Some(new_anchor) = + find_node_at_line(root.syntax(), anchor_info.actual_line, &new_line_index) + else { + continue; + }; + + // Get the parent list and find the anchor's index + let Some(parent) = new_anchor.parent() else { + continue; + }; + + let parent_children: Vec<_> = parent.children().collect(); + let Some(index) = parent_children + .iter() + .position(|child| child == &new_anchor) + else { + continue; + }; + + // Create the breakpoint call and modified anchor + let breakpoint_call = create_breakpoint_call(anchor_info.breakpoint_id); + let modified_anchor = add_line_directive_to_node(&new_anchor, anchor_info.actual_line, uri); + + // Inject the breakpoint by splicing + let modified_parent = parent.clone().splice_slots(index..=index, [ + Some(SyntaxElement::Node(breakpoint_call)), + Some(SyntaxElement::Node(modified_anchor)), + ]); + + // Propagate the change to the root + let new_root = propagate_change_to_root(&parent, modified_parent); + + // Update source for next iteration + source = new_root.to_string(); + } + + source +} + +/// Find a node at the specified line in the AST. +/// Returns the first direct child of a list (program or brace list) that starts at or after the target line. +fn find_node_at_line( + root: &RSyntaxNode, + target_line: u32, + line_index: &LineIndex, +) -> Option { + // We need to find expression lists and check their children + for event in root.preorder() { + let node = match event { + WalkEvent::Enter(n) => n, + WalkEvent::Leave(_) => continue, + }; + + // Check if this is a root or brace expression list + let expr_list = if let Some(r) = RRoot::cast(node.clone()) { + r.expressions() + } else if let Some(braced) = RBracedExpressions::cast(node.clone()) { + braced.expressions() + } else { + continue; + }; + + // Check each child of this list + for expr in expr_list.into_iter() { + let child_line = get_start_line(expr.syntax(), line_index); + if child_line == target_line { + return Some(expr.into_syntax()); + } + } + } + + None +} + +/// Propagate a node replacement up to the root of the tree. +fn propagate_change_to_root(original: &RSyntaxNode, replacement: RSyntaxNode) -> RSyntaxNode { + let mut current_original = original.clone(); + let mut current_replacement = replacement; + + while let Some(parent) = current_original.parent() { + let new_parent = parent + .clone() + .replace_child( + current_original.clone().into(), + current_replacement.clone().into(), + ) + .expect("Failed to replace child"); + + current_original = parent; + current_replacement = new_parent; + } + + current_replacement +} + +fn get_start_line(node: &RSyntaxNode, line_index: &LineIndex) -> u32 { + let text_range: TextRange = node.text_trimmed_range(); + let offset: TextSize = text_range.start(); + line_index.line_col(offset).map(|lc| lc.line).unwrap_or(0) +} + +fn create_breakpoint_call(breakpoint_id: i64) -> RSyntaxNode { + let code = format!("\n.ark_breakpoint(browser(), {breakpoint_id})\n"); + aether_parser::parse(&code, Default::default()).syntax() +} + +fn add_line_directive_to_node(node: &RSyntaxNode, line: u32, uri: &Url) -> RSyntaxNode { + let Some(first_token) = node.first_token() else { + return node.clone(); + }; + + let line_directive = format!("#line {line} \"{uri}\"", line = line + 1); + + // Collect existing leading trivia, but skip only the first newline to avoid double blank lines + // Preserve any additional newlines (blank lines) that may follow + let existing_trivia: Vec<_> = first_token + .leading_trivia() + .pieces() + .enumerate() + .filter_map(|(i, piece)| { + // Skip only the very first newline + if i == 0 && piece.kind() == biome_rowan::TriviaPieceKind::Newline { + None + } else { + Some((piece.kind(), piece.text().to_string())) + } + }) + .collect(); + + // Create new trivia with #line directive prepended, followed by a newline + let new_trivia: Vec<_> = vec![ + ( + biome_rowan::TriviaPieceKind::SingleLineComment, + line_directive, + ), + (biome_rowan::TriviaPieceKind::Newline, "\n".to_string()), + ] + .into_iter() + .chain(existing_trivia.into_iter()) + .collect(); + + let new_first_token = + first_token.with_leading_trivia(new_trivia.iter().map(|(k, t)| (*k, t.as_str()))); + + node.clone() + .replace_child(first_token.into(), new_first_token.into()) + .unwrap_or_else(|| node.clone()) } #[cfg(test)] @@ -80,7 +457,7 @@ mod tests { fn test_annotate_input_basic() { let code = "x <- 1\ny <- 2"; let location = make_location(0, 0); - let result = annotate_input(code, location); + let result = annotate_input(code, location, None); insta::assert_snapshot!(result); } @@ -88,7 +465,7 @@ mod tests { fn test_annotate_input_shifted_line() { let code = "x <- 1\ny <- 2"; let location = make_location(10, 0); - let result = annotate_input(code, location); + let result = annotate_input(code, location, None); insta::assert_snapshot!(result); } @@ -96,7 +473,7 @@ mod tests { fn test_annotate_input_shifted_character() { let code = "x <- 1\ny <- 2"; let location = make_location(0, 5); - let result = annotate_input(code, location); + let result = annotate_input(code, location, None); insta::assert_snapshot!(result); } @@ -104,7 +481,7 @@ mod tests { fn test_annotate_input_shifted_line_and_character() { let code = "x <- 1\ny <- 2"; let location = make_location(10, 5); - let result = annotate_input(code, location); + let result = annotate_input(code, location, None); insta::assert_snapshot!(result); } @@ -112,7 +489,7 @@ mod tests { fn test_annotate_input_with_existing_whitespace() { let code = " x <- 1\n y <- 2"; let location = make_location(0, 0); - let result = annotate_input(code, location); + let result = annotate_input(code, location, None); insta::assert_snapshot!(result); } @@ -120,7 +497,7 @@ mod tests { fn test_annotate_input_with_existing_whitespace_shifted() { let code = " x <- 1\n y <- 2"; let location = make_location(0, 2); - let result = annotate_input(code, location); + let result = annotate_input(code, location, None); insta::assert_snapshot!(result); } @@ -128,7 +505,7 @@ mod tests { fn test_annotate_input_with_existing_comment() { let code = "# comment\nx <- 1"; let location = make_location(0, 0); - let result = annotate_input(code, location); + let result = annotate_input(code, location, None); insta::assert_snapshot!(result); } @@ -136,7 +513,320 @@ mod tests { fn test_annotate_input_empty_code() { let code = ""; let location = make_location(0, 0); - let result = annotate_input(code, location); + let result = annotate_input(code, location, None); + insta::assert_snapshot!(result); + } + + #[test] + fn test_inject_breakpoints_single_line() { + let code = "x <- 1\ny <- 2\nz <- 3"; + let location = CodeLocation { + uri: Url::parse("file:///test.R").unwrap(), + start: Position { + line: 0, + character: 0, + }, + end: Position { + line: 2, + character: 6, + }, + }; + let line_index = LineIndex::new(code); + let mut breakpoints = vec![Breakpoint { + id: 1, + line: 1, + verified: false, + invalid: false, + }]; + + let result = inject_breakpoints(code, location, &mut breakpoints, &line_index); + insta::assert_snapshot!(result); + assert!(!breakpoints[0].verified); + } + + #[test] + fn test_inject_breakpoints_multiple() { + let code = "x <- 1\ny <- 2\nz <- 3\nw <- 4"; + let location = CodeLocation { + uri: Url::parse("file:///test.R").unwrap(), + start: Position { + line: 0, + character: 0, + }, + end: Position { + line: 3, + character: 6, + }, + }; + let line_index = LineIndex::new(code); + let mut breakpoints = vec![ + Breakpoint { + id: 1, + line: 1, + verified: false, + invalid: false, + }, + Breakpoint { + id: 2, + line: 3, + verified: false, + invalid: false, + }, + ]; + + let result = inject_breakpoints(code, location, &mut breakpoints, &line_index); + insta::assert_snapshot!(result); + assert!(!breakpoints[0].verified); + assert!(!breakpoints[1].verified); + assert!(!breakpoints[1].invalid); // Valid location + } + + #[test] + fn test_inject_breakpoints_in_brace_list() { + let code = "f <- function() {\n x <- 1\n y <- 2\n}"; + let location = CodeLocation { + uri: Url::parse("file:///test.R").unwrap(), + start: Position { + line: 0, + character: 0, + }, + end: Position { + line: 3, + character: 1, + }, + }; + let line_index = LineIndex::new(code); + let mut breakpoints = vec![Breakpoint { + id: 1, + line: 2, + verified: false, + invalid: false, + }]; + + let result = inject_breakpoints(code, location, &mut breakpoints, &line_index); + insta::assert_snapshot!(result); + assert!(!breakpoints[0].verified); + } + + #[test] + fn test_inject_breakpoints_out_of_range() { + let code = "x <- 1\ny <- 2"; + let location = CodeLocation { + uri: Url::parse("file:///test.R").unwrap(), + start: Position { + line: 0, + character: 0, + }, + end: Position { + line: 1, + character: 6, + }, + }; + let line_index = LineIndex::new(code); + let mut breakpoints = vec![Breakpoint { + id: 1, + line: 10, + verified: false, + invalid: false, + }]; + + let result = inject_breakpoints(code, location, &mut breakpoints, &line_index); + // Should return unchanged code + assert_eq!(result, code); + assert!(!breakpoints[0].verified); + } + + #[test] + fn test_inject_breakpoints_multiple_lists() { + // This test has breakpoints in different parent lists: + // - One in the root list + // - One in a nested brace list + // This may expose issues with the current propagate_change_to_root approach + let code = "x <- 1\nf <- function() {\n y <- 2\n z <- 3\n}\nw <- 4"; + let location = CodeLocation { + uri: Url::parse("file:///test.R").unwrap(), + start: Position { + line: 0, + character: 0, + }, + end: Position { + line: 5, + character: 6, + }, + }; + let line_index = LineIndex::new(code); + let mut breakpoints = vec![ + Breakpoint { + id: 1, + line: 2, + verified: false, + invalid: false, + }, + Breakpoint { + id: 2, + line: 5, + verified: false, + invalid: false, + }, + ]; + + let result = inject_breakpoints(code, location, &mut breakpoints, &line_index); + insta::assert_snapshot!(result); + assert!(!breakpoints[0].verified); + assert!(!breakpoints[1].verified); + assert!(!breakpoints[1].invalid); // Valid location + } + + #[test] + fn test_inject_breakpoints_with_blank_line() { + // Test that blank lines before an anchor are preserved + let code = "x <- 1\n\n\ny <- 2"; + let location = CodeLocation { + uri: Url::parse("file:///test.R").unwrap(), + start: Position { + line: 0, + character: 0, + }, + end: Position { + line: 3, + character: 6, + }, + }; + let line_index = LineIndex::new(code); + let mut breakpoints = vec![Breakpoint { + id: 1, + line: 3, + verified: false, + invalid: false, + }]; + + let result = inject_breakpoints(code, location, &mut breakpoints, &line_index); + insta::assert_snapshot!(result); + assert!(!breakpoints[0].verified); + } + + #[test] + fn test_inject_breakpoints_on_closing_brace() { + // Breakpoint on a line with only `}` should be left unverified + // (no executable code there) + let code = "f <- function() {\n x <- 1\n}\ny <- 2"; + let location = CodeLocation { + uri: Url::parse("file:///test.R").unwrap(), + start: Position { + line: 0, + character: 0, + }, + end: Position { + line: 3, + character: 6, + }, + }; + let line_index = LineIndex::new(code); + let mut breakpoints = vec![Breakpoint { + id: 1, + line: 2, // The `}` line + verified: false, + invalid: false, + }]; + + let result = inject_breakpoints(code, location, &mut breakpoints, &line_index); + // Should return unchanged code since breakpoint is invalid + assert_eq!(result, code); + assert!(!breakpoints[0].verified); + + // Marked as invalid + assert!(breakpoints[0].invalid); + } + + #[test] + fn test_inject_breakpoints_on_closing_brace_with_valid_breakpoint() { + // One breakpoint on `}` (invalid) and one on valid code + let code = "f <- function() {\n x <- 1\n}\ny <- 2"; + let location = CodeLocation { + uri: Url::parse("file:///test.R").unwrap(), + start: Position { + line: 0, + character: 0, + }, + end: Position { + line: 3, + character: 6, + }, + }; + let line_index = LineIndex::new(code); + let mut breakpoints = vec![ + Breakpoint { + id: 1, + line: 2, // The `}` line - invalid + verified: false, + invalid: false, + }, + Breakpoint { + id: 2, + line: 3, // `y <- 2` - valid + verified: false, + invalid: false, + }, + ]; + + let result = inject_breakpoints(code, location, &mut breakpoints, &line_index); + insta::assert_snapshot!(result); + + // Invalid location + assert!(!breakpoints[0].verified); + assert!(breakpoints[0].invalid); + + assert!(!breakpoints[1].verified); + assert!(!breakpoints[1].invalid); + } + + #[test] + fn test_inject_breakpoints_before_within_after_nested() { + // Comprehensive test with breakpoints: + // - Before nested list (line 0: `x <- 1`) + // - Within nested list (line 2: `y <- 2`) + // - After nested list (line 5: `w <- 4`) + let code = "x <- 1\nf <- function() {\n y <- 2\n z <- 3\n}\nw <- 4"; + let location = CodeLocation { + uri: Url::parse("file:///test.R").unwrap(), + start: Position { + line: 0, + character: 0, + }, + end: Position { + line: 5, + character: 6, + }, + }; + let line_index = LineIndex::new(code); + let mut breakpoints = vec![ + Breakpoint { + id: 1, + line: 0, // `x <- 1` - before nested + verified: false, + invalid: false, + }, + Breakpoint { + id: 2, + line: 2, // `y <- 2` - within nested + verified: false, + invalid: false, + }, + Breakpoint { + id: 3, + line: 5, // `w <- 4` - after nested + verified: false, + invalid: false, + }, + ]; + + let result = inject_breakpoints(code, location, &mut breakpoints, &line_index); insta::assert_snapshot!(result); + assert!(!breakpoints[0].verified); + assert!(!breakpoints[0].invalid); + assert!(!breakpoints[1].verified); + assert!(!breakpoints[1].invalid); + assert!(!breakpoints[2].verified); + assert!(!breakpoints[2].invalid); } } diff --git a/crates/ark/src/dap/dap.rs b/crates/ark/src/dap/dap.rs index 25adfa54e..044d9ed6a 100644 --- a/crates/ark/src/dap/dap.rs +++ b/crates/ark/src/dap/dap.rs @@ -29,6 +29,7 @@ pub struct Breakpoint { pub id: i64, pub line: u32, pub verified: bool, + pub invalid: bool, } #[derive(Debug, Copy, Clone)] @@ -269,7 +270,7 @@ impl Dap { }; for bp in bp_list.iter_mut() { - if bp.verified { + if bp.verified || bp.invalid { continue; } diff --git a/crates/ark/src/dap/dap_server.rs b/crates/ark/src/dap/dap_server.rs index cc3d4c18b..a602f559d 100644 --- a/crates/ark/src/dap/dap_server.rs +++ b/crates/ark/src/dap/dap_server.rs @@ -330,12 +330,14 @@ impl DapServer { let mut state = self.state.lock().unwrap(); + // Positron sends 1-based line offsets, but this is configurable by client let breakpoints: Vec = source_breakpoints .iter() .map(|bp| Breakpoint { id: state.next_breakpoint_id(), - line: bp.line as u32, + line: (bp.line - 1) as u32, verified: false, + invalid: false, }) .collect(); diff --git a/crates/ark/src/interface.rs b/crates/ark/src/interface.rs index a5b50568e..2ac26e45c 100644 --- a/crates/ark/src/interface.rs +++ b/crates/ark/src/interface.rs @@ -99,6 +99,7 @@ use uuid::Uuid; use crate::console_annotate::annotate_input; use crate::console_debug::FrameInfoId; +use crate::dap::dap::Breakpoint; use crate::dap::dap::DapBackendEvent; use crate::dap::Dap; use crate::errors; @@ -337,11 +338,12 @@ impl PendingInputs { pub(crate) fn read( code: &str, location: Option, + breakpoints: Option<&mut [Breakpoint]>, ) -> anyhow::Result> { let mut _srcfile = None; let input = if let Some(location) = location { - let annotated_code = annotate_input(code, location); + let annotated_code = annotate_input(code, location, breakpoints); _srcfile = Some(SrcFile::new_virtual_empty_filename(annotated_code.into())); harp::ParseInput::SrcFile(&_srcfile.unwrap()) } else if harp::get_option_bool("keep.source") { @@ -1345,19 +1347,28 @@ impl RMain { match input { ConsoleInput::Input(code, loc) => { // Parse input into pending expressions - match PendingInputs::read(&code, loc) { + + // Keep the DAP lock while we are updating breakpoints + let mut dap_guard = self.debug_dap.lock().unwrap(); + let breakpoints = loc + .as_ref() + .and_then(|loc| dap_guard.breakpoints.get_mut(&loc.uri)) + .map(|v| v.as_mut_slice()); + + match PendingInputs::read(&code, loc, breakpoints) { Ok(ParseResult::Success(inputs)) => { self.pending_inputs = inputs; }, Ok(ParseResult::SyntaxError(message)) => { - return Some(ConsoleResult::Error(message)) + return Some(ConsoleResult::Error(message)); }, Err(err) => { return Some(ConsoleResult::Error(format!( "Error while parsing input: {err:?}" - ))) + ))); }, } + drop(dap_guard); // Evaluate first expression if there is one if let Some(input) = self.pop_pending() { diff --git a/crates/ark/src/snapshots/ark__console_annotate__tests__inject_breakpoints_before_within_after_nested.snap b/crates/ark/src/snapshots/ark__console_annotate__tests__inject_breakpoints_before_within_after_nested.snap new file mode 100644 index 000000000..f6a0eaa6b --- /dev/null +++ b/crates/ark/src/snapshots/ark__console_annotate__tests__inject_breakpoints_before_within_after_nested.snap @@ -0,0 +1,16 @@ +--- +source: crates/ark/src/console_annotate.rs +expression: result +--- +.ark_breakpoint(browser(), 1) +#line 1 "file:///test.R" +x <- 1 +f <- function() { +.ark_breakpoint(browser(), 2) +#line 3 "file:///test.R" + y <- 2 + z <- 3 +} +.ark_breakpoint(browser(), 3) +#line 6 "file:///test.R" +w <- 4 diff --git a/crates/ark/src/snapshots/ark__console_annotate__tests__inject_breakpoints_in_brace_list.snap b/crates/ark/src/snapshots/ark__console_annotate__tests__inject_breakpoints_in_brace_list.snap new file mode 100644 index 000000000..0c3b123a8 --- /dev/null +++ b/crates/ark/src/snapshots/ark__console_annotate__tests__inject_breakpoints_in_brace_list.snap @@ -0,0 +1,10 @@ +--- +source: crates/ark/src/console_annotate.rs +expression: result +--- +f <- function() { + x <- 1 +.ark_breakpoint(browser(), 1) +#line 3 "file:///test.R" + y <- 2 +} diff --git a/crates/ark/src/snapshots/ark__console_annotate__tests__inject_breakpoints_multiple.snap b/crates/ark/src/snapshots/ark__console_annotate__tests__inject_breakpoints_multiple.snap new file mode 100644 index 000000000..aa8a97f6c --- /dev/null +++ b/crates/ark/src/snapshots/ark__console_annotate__tests__inject_breakpoints_multiple.snap @@ -0,0 +1,12 @@ +--- +source: crates/ark/src/console_annotate.rs +expression: result +--- +x <- 1 +.ark_breakpoint(browser(), 1) +#line 2 "file:///test.R" +y <- 2 +z <- 3 +.ark_breakpoint(browser(), 2) +#line 4 "file:///test.R" +w <- 4 diff --git a/crates/ark/src/snapshots/ark__console_annotate__tests__inject_breakpoints_multiple_lists.snap b/crates/ark/src/snapshots/ark__console_annotate__tests__inject_breakpoints_multiple_lists.snap new file mode 100644 index 000000000..65786edc6 --- /dev/null +++ b/crates/ark/src/snapshots/ark__console_annotate__tests__inject_breakpoints_multiple_lists.snap @@ -0,0 +1,14 @@ +--- +source: crates/ark/src/console_annotate.rs +expression: result +--- +x <- 1 +f <- function() { +.ark_breakpoint(browser(), 1) +#line 3 "file:///test.R" + y <- 2 + z <- 3 +} +.ark_breakpoint(browser(), 2) +#line 6 "file:///test.R" +w <- 4 diff --git a/crates/ark/src/snapshots/ark__console_annotate__tests__inject_breakpoints_on_closing_brace_with_valid_breakpoint.snap b/crates/ark/src/snapshots/ark__console_annotate__tests__inject_breakpoints_on_closing_brace_with_valid_breakpoint.snap new file mode 100644 index 000000000..01e5a75f4 --- /dev/null +++ b/crates/ark/src/snapshots/ark__console_annotate__tests__inject_breakpoints_on_closing_brace_with_valid_breakpoint.snap @@ -0,0 +1,10 @@ +--- +source: crates/ark/src/console_annotate.rs +expression: result +--- +f <- function() { + x <- 1 +} +.ark_breakpoint(browser(), 2) +#line 4 "file:///test.R" +y <- 2 diff --git a/crates/ark/src/snapshots/ark__console_annotate__tests__inject_breakpoints_single_line.snap b/crates/ark/src/snapshots/ark__console_annotate__tests__inject_breakpoints_single_line.snap new file mode 100644 index 000000000..3bf773e55 --- /dev/null +++ b/crates/ark/src/snapshots/ark__console_annotate__tests__inject_breakpoints_single_line.snap @@ -0,0 +1,9 @@ +--- +source: crates/ark/src/console_annotate.rs +expression: result +--- +x <- 1 +.ark_breakpoint(browser(), 1) +#line 2 "file:///test.R" +y <- 2 +z <- 3 diff --git a/crates/ark/src/snapshots/ark__console_annotate__tests__inject_breakpoints_with_blank_line.snap b/crates/ark/src/snapshots/ark__console_annotate__tests__inject_breakpoints_with_blank_line.snap new file mode 100644 index 000000000..84a2f4180 --- /dev/null +++ b/crates/ark/src/snapshots/ark__console_annotate__tests__inject_breakpoints_with_blank_line.snap @@ -0,0 +1,10 @@ +--- +source: crates/ark/src/console_annotate.rs +expression: result +--- +x <- 1 +.ark_breakpoint(browser(), 1) +#line 4 "file:///test.R" + + +y <- 2 From ba3b376db0fdbc7a0a0747153f6cec6b3b37174d Mon Sep 17 00:00:00 2001 From: Lionel Henry Date: Thu, 11 Dec 2025 15:12:26 +0100 Subject: [PATCH 04/42] Verify breakpoints after evaluation --- crates/ark/src/dap/dap.rs | 4 +-- crates/ark/src/interface.rs | 24 +++++++++++++ crates/ark/src/modules/positron/debug.R | 5 +++ crates/harp/src/environment.rs | 4 +-- crates/harp/src/parse.rs | 2 +- crates/harp/src/parser/parse_data.rs | 2 +- crates/harp/src/parser/srcref.rs | 46 +++++++++++++++++++++--- crates/harp/src/vector/integer_vector.rs | 2 +- 8 files changed, 78 insertions(+), 11 deletions(-) diff --git a/crates/ark/src/dap/dap.rs b/crates/ark/src/dap/dap.rs index 044d9ed6a..a92d64855 100644 --- a/crates/ark/src/dap/dap.rs +++ b/crates/ark/src/dap/dap.rs @@ -264,7 +264,7 @@ impl Dap { /// Loops over all breakpoints for the URI and verifies any unverified /// breakpoints that fall within the range [start_line, end_line]. /// Sends a `BreakpointVerified` event for each newly verified breakpoint. - pub fn verify_range(&mut self, uri: &Url, start_line: usize, end_line: usize) { + pub fn verify_breakpoints(&mut self, uri: &Url, start_line: u32, end_line: u32) { let Some(bp_list) = self.breakpoints.get_mut(uri) else { return; }; @@ -274,7 +274,7 @@ impl Dap { continue; } - let line = bp.line as usize; + let line = bp.line; if line >= start_line && line <= end_line { bp.verified = true; diff --git a/crates/ark/src/interface.rs b/crates/ark/src/interface.rs index 2ac26e45c..8b0e1964d 100644 --- a/crates/ark/src/interface.rs +++ b/crates/ark/src/interface.rs @@ -78,6 +78,7 @@ use harp::session::r_traceback; use harp::srcref::get_srcref_list; use harp::srcref::srcref_list_get; use harp::srcref::SrcFile; +use harp::srcref::SrcRef; use harp::utils::r_is_data_frame; use harp::utils::r_typeof; use harp::R_MAIN_THREAD_ID; @@ -95,6 +96,7 @@ use serde_json::json; use stdext::result::ResultExt; use stdext::*; use tokio::sync::mpsc::UnboundedReceiver as AsyncUnboundedReceiver; +use url::Url; use uuid::Uuid; use crate::console_annotate::annotate_input; @@ -2388,6 +2390,24 @@ impl RMain { } } + pub(crate) fn verify_breakpoints(&self, srcref: RObject) { + let Some(srcref) = SrcRef::try_from(srcref).log_err() else { + return; + }; + + let Some(uri) = srcref + .srcfile() + .and_then(|srcfile| srcfile.filename()) + .and_then(|filename| Url::parse(&filename).anyhow()) + .log_err() + else { + return; + }; + + let mut dap = self.debug_dap.lock().unwrap(); + dap.verify_breakpoints(&uri, srcref.line_virtual.start, srcref.line_virtual.end); + } + #[cfg(not(test))] // Avoid warnings in unit test pub(crate) fn read_console_frame(&self) -> RObject { self.read_console_frame.borrow().clone() @@ -2573,6 +2593,10 @@ fn r_read_console_impl( main.read_console_nested_return.set(false); } + // We verify breakpoints _after_ evaluation is complete. An + // error will prevent verification. + main.verify_breakpoints(RObject::from(srcref)); + libr::Rf_unprotect(2); return 1; } diff --git a/crates/ark/src/modules/positron/debug.R b/crates/ark/src/modules/positron/debug.R index a0306fd41..84daacd3e 100644 --- a/crates/ark/src/modules/positron/debug.R +++ b/crates/ark/src/modules/positron/debug.R @@ -700,3 +700,8 @@ non_parseable_pattern_info <- function(pattern, replacement) { non_parseable_fixed_info <- function(pattern, replacement) { list(pattern = pattern, replacement = replacement, fixed = TRUE) } + +#' @export +.ark.breakpoint <- function(expr, id) { + expr +} diff --git a/crates/harp/src/environment.rs b/crates/harp/src/environment.rs index e65705d62..55db89849 100644 --- a/crates/harp/src/environment.rs +++ b/crates/harp/src/environment.rs @@ -17,13 +17,13 @@ use crate::symbol::RSymbol; const FRAME_LOCK_MASK: std::ffi::c_int = 1 << 14; -#[derive(Clone)] +#[derive(Clone, Debug)] pub struct Environment { pub inner: RObject, filter: EnvironmentFilter, } -#[derive(Clone)] +#[derive(Clone, Debug)] pub enum EnvironmentFilter { None, ExcludeHidden, diff --git a/crates/harp/src/parse.rs b/crates/harp/src/parse.rs index 43a141afc..5e7d887de 100644 --- a/crates/harp/src/parse.rs +++ b/crates/harp/src/parse.rs @@ -105,7 +105,7 @@ pub fn parse_status<'a>(input: &ParseInput<'a>) -> crate::Result { let (text, srcfile) = match input { ParseInput::Text(text) => (as_parse_text(text), RObject::null()), - ParseInput::SrcFile(srcfile) => (srcfile.lines()?, srcfile.inner.clone()), + ParseInput::SrcFile(srcfile) => (srcfile.lines()?, srcfile.inner.inner.clone()), }; let result = diff --git a/crates/harp/src/parser/parse_data.rs b/crates/harp/src/parser/parse_data.rs index 2b4e07c7a..4543c739c 100644 --- a/crates/harp/src/parser/parse_data.rs +++ b/crates/harp/src/parser/parse_data.rs @@ -35,7 +35,7 @@ pub enum ParseDataKind { impl ParseData { pub fn from_srcfile(srcfile: &harp::srcref::SrcFile) -> harp::Result { let data = RFunction::new("utils", "getParseData") - .add(srcfile.inner.sexp) + .add(srcfile.inner.inner.sexp) .call()?; if data.sexp == harp::RObject::null().sexp { diff --git a/crates/harp/src/parser/srcref.rs b/crates/harp/src/parser/srcref.rs index d7b76782d..7eac60084 100644 --- a/crates/harp/src/parser/srcref.rs +++ b/crates/harp/src/parser/srcref.rs @@ -8,18 +8,22 @@ use core::f64; use anyhow::anyhow; +use stdext::result::ResultExt; use stdext::unwrap; use crate::exec::RFunction; use crate::exec::RFunctionExt; use crate::vector::IntegerVector; use crate::vector::Vector; +use crate::Environment; use crate::RObject; /// Structured representation of `srcref` integer vectors /// 0-based offsets. #[derive(Debug)] pub struct SrcRef { + pub inner: IntegerVector, + /// Lines and virtual lines may differ if a `#line` directive is used in code: /// the former just counts actual lines, the latter respects the directive. /// `line` corresponds to `line_parsed` in the original base R srcref vector. @@ -33,7 +37,16 @@ pub struct SrcRef { #[derive(Clone, Debug)] pub struct SrcFile { - pub inner: RObject, + pub inner: Environment, +} + +impl SrcRef { + pub fn srcfile(&self) -> anyhow::Result { + let Some(srcfile) = self.inner.object.get_attribute("srcfile") else { + return Err(anyhow!("Can't find `srcfile` attribute")); + }; + SrcFile::wrap(srcfile) + } } // Takes user-facing object as input. The srcrefs are retrieved from @@ -111,6 +124,7 @@ impl TryFrom for SrcRef { line_virtual: line, column, column_byte, + inner: value, }) } } @@ -118,6 +132,19 @@ impl TryFrom for SrcRef { /// Creates the same sort of srcfile object as with `parse(text = )`. /// Takes code as an R string containing newlines, or as a R vector of lines. impl SrcFile { + pub fn wrap(value: RObject) -> anyhow::Result { + if value.kind() != libr::ENVSXP { + return Err(anyhow!("Expected an environment, got {:?}", value.kind())); + } + if !value.inherits("srcfile") { + return Err(anyhow!("Expected an srcfile, got {:?}", value.class())); + } + + Ok(Self { + inner: Environment::new(value), + }) + } + // Created by the R function `parse()` pub fn new_virtual(text: RObject) -> Self { let inner = RFunction::new("base", "srcfilecopy") @@ -128,7 +155,9 @@ impl SrcFile { // Unwrap safety: Should never fail, unless something is seriously wrong let inner = inner.unwrap(); - Self { inner } + Self { + inner: Environment::new(inner), + } } // Created by the C-level parser @@ -144,16 +173,25 @@ impl SrcFile { } CLASS.with(|c| inner.set_attribute("class", c.sexp)); - Self { inner } + Self { + inner: Environment::new(inner), + } } pub fn lines(&self) -> harp::Result { RFunction::new("base", "getSrcLines") - .add(self.inner.sexp) + .add(self.inner.inner.sexp) .param("first", 1) .param("last", f64::INFINITY) .call() } + + pub fn filename(&self) -> anyhow::Result { + // In theory we should check if `filename` is relative, and prefix it + // with `wd` in that case, if `wd` is set. For now we only use this + // method to fetch our own URIs. + self.inner.get("filename")?.try_into().anyhow() + } } impl From<&str> for SrcFile { diff --git a/crates/harp/src/vector/integer_vector.rs b/crates/harp/src/vector/integer_vector.rs index 549385f71..f5bee507e 100644 --- a/crates/harp/src/vector/integer_vector.rs +++ b/crates/harp/src/vector/integer_vector.rs @@ -19,7 +19,7 @@ use crate::vector::Vector; #[harp_macros::vector] pub struct IntegerVector { - object: RObject, + pub object: RObject, } impl Vector for IntegerVector { From 468138c8bd07e6513aa9fcaabc38f2a7b76032ff Mon Sep 17 00:00:00 2001 From: Lionel Henry Date: Thu, 11 Dec 2025 15:45:58 +0100 Subject: [PATCH 05/42] Implement breakpoint detection in `ReadConsole` --- crates/ark/src/interface.rs | 20 ++++++++++++++++++++ crates/ark/src/modules/positron/debug.R | 10 +++++++--- crates/harp/src/session.rs | 11 +++++++++++ 3 files changed, 38 insertions(+), 3 deletions(-) diff --git a/crates/ark/src/interface.rs b/crates/ark/src/interface.rs index 8b0e1964d..30c19706a 100644 --- a/crates/ark/src/interface.rs +++ b/crates/ark/src/interface.rs @@ -658,6 +658,16 @@ impl RMain { // Initialise Ark's last value libr::SETCDR(r_symbol!(".ark_last_value"), harp::r_null()); + + // Store `.ark_breakpoint` in base namespace so it's maximally reachable + libr::SETCDR( + r_symbol!(".ark_breakpoint"), + // Originally defined in Positron namespace, get it from there + Environment::view(ARK_ENVS.positron_ns) + .get(".ark_breakpoint") + .unwrap() + .sexp, + ); } // Now that R has started (emitting any startup messages that we capture in the @@ -996,6 +1006,16 @@ impl RMain { self.handle_active_request(&info, ConsoleValue::Success(result)); } + // If debugger is active, get current function and check whether it + // inherits from `ark_breakpoint`. If it does, send `n` automatically. + if harp::r_current_function().inherits("ark_breakpoint") { + self.debug_preserve_focus = false; + self.debug_send_dap(DapBackendEvent::Continued); + + Self::on_console_input(buf, buflen, String::from("n")).unwrap(); + return ConsoleResult::NewInput; + } + // In the future we'll also send browser information, see // https://github.com/posit-dev/positron/issues/3001. Currently this is // a push model where we send the console inputs at each round. In the diff --git a/crates/ark/src/modules/positron/debug.R b/crates/ark/src/modules/positron/debug.R index 84daacd3e..2277552dc 100644 --- a/crates/ark/src/modules/positron/debug.R +++ b/crates/ark/src/modules/positron/debug.R @@ -702,6 +702,10 @@ non_parseable_fixed_info <- function(pattern, replacement) { } #' @export -.ark.breakpoint <- function(expr, id) { - expr -} +.ark_breakpoint <- structure( + function(expr, id) { + # TODO: Don't force `expr` if breakpoint is disabled + expr + }, + class = "ark_breakpoint" +) diff --git a/crates/harp/src/session.rs b/crates/harp/src/session.rs index 261ab0164..48dde118c 100644 --- a/crates/harp/src/session.rs +++ b/crates/harp/src/session.rs @@ -28,6 +28,7 @@ static mut NFRAME_CALL: Option = None; static mut SYS_CALLS_CALL: Option = None; static mut SYS_FRAMES_CALL: Option = None; static mut CURRENT_ENV_CALL: Option = None; +static mut CURRENT_FUNCTION_CALL: Option = None; pub fn r_n_frame() -> crate::Result { SESSION_INIT.call_once(init_interface); @@ -66,6 +67,11 @@ pub fn r_current_frame() -> RObject { unsafe { libr::Rf_eval(CURRENT_ENV_CALL.unwrap_unchecked(), R_BaseEnv) }.into() } +pub fn r_current_function() -> RObject { + SESSION_INIT.call_once(init_interface); + unsafe { libr::Rf_eval(CURRENT_FUNCTION_CALL.unwrap_unchecked(), R_BaseEnv) }.into() +} + pub fn r_sys_functions() -> crate::Result { unsafe { let mut protect = RProtect::new(); @@ -167,5 +173,10 @@ fn init_interface() { let current_env_call = r_lang!(closure.sexp); R_PreserveObject(current_env_call); CURRENT_ENV_CALL = Some(current_env_call); + + let closure = harp::parse_eval_base("function() sys.function(-1)").unwrap(); + let current_function_call = r_lang!(closure.sexp); + R_PreserveObject(current_function_call); + CURRENT_FUNCTION_CALL = Some(current_function_call); } } From 089916af9f396f4c9be7679ff85cbc2fb30fb5ca Mon Sep 17 00:00:00 2001 From: Lionel Henry Date: Thu, 11 Dec 2025 17:13:46 +0100 Subject: [PATCH 06/42] Correctly handle code location offsets --- crates/ark/src/console_annotate.rs | 206 +++++++++++++++--- crates/ark/src/dap/dap_server.rs | 2 +- ...tests__annotate_input_with_breakpoint.snap | 10 + ...__inject_breakpoints_with_line_offset.snap | 9 + ...t_breakpoints_with_line_offset_nested.snap | 10 + 5 files changed, 204 insertions(+), 33 deletions(-) create mode 100644 crates/ark/src/snapshots/ark__console_annotate__tests__annotate_input_with_breakpoint.snap create mode 100644 crates/ark/src/snapshots/ark__console_annotate__tests__inject_breakpoints_with_line_offset.snap create mode 100644 crates/ark/src/snapshots/ark__console_annotate__tests__inject_breakpoints_with_line_offset_nested.snap diff --git a/crates/ark/src/console_annotate.rs b/crates/ark/src/console_annotate.rs index 39e247eef..6d5724c20 100644 --- a/crates/ark/src/console_annotate.rs +++ b/crates/ark/src/console_annotate.rs @@ -25,9 +25,19 @@ pub(crate) fn annotate_input( location: CodeLocation, breakpoints: Option<&mut [Breakpoint]>, ) -> String { - let node = aether_parser::parse(code, Default::default()).tree(); + // First, inject breakpoints into the original code (before adding line directive). + // This ensures AST line numbers match the code coordinates we expect. + let code_with_breakpoints = if let Some(breakpoints) = breakpoints { + let line_index = LineIndex::new(code); + inject_breakpoints(code, location.clone(), breakpoints, &line_index) + } else { + code.to_string() + }; + + // Now add the line directive to the (possibly modified) code + let node = aether_parser::parse(&code_with_breakpoints, Default::default()).tree(); let Some(first_token) = node.syntax().first_token() else { - return code.into(); + return code_with_breakpoints; }; let line_directive = format!( @@ -71,17 +81,10 @@ pub(crate) fn annotate_input( .clone() .replace_child(first_token.into(), new_first_token.into()) else { - return code.into(); + return code_with_breakpoints; }; - let out = new_node.to_string(); - - if let Some(breakpoints) = breakpoints { - let line_index = LineIndex::new(&out); - inject_breakpoints(&out, location, breakpoints, &line_index) - } else { - out - } + new_node.to_string() } #[allow(dead_code)] @@ -93,6 +96,10 @@ pub(crate) fn inject_breakpoints( ) -> String { let root = aether_parser::parse(code, Default::default()).tree(); + // The offset between document coordinates and code coordinates. + // Breakpoints are in document coordinates, but AST nodes are in code coordinates. + let line_offset = location.start.line; + // Filter breakpoints to only those within the source's valid range let breakpoints: Vec<_> = breakpoints .iter_mut() @@ -104,19 +111,26 @@ pub(crate) fn inject_breakpoints( } // Phase 1: Find breakpoint anchors - let anchors = find_breakpoint_anchors(root.syntax(), breakpoints, &location.uri, line_index); + let anchors = find_breakpoint_anchors( + root.syntax(), + breakpoints, + &location.uri, + line_index, + line_offset, + ); if anchors.is_empty() { return code.into(); } // Phase 2: Inject breakpoints - inject_breakpoint_calls(root.syntax(), anchors, &location.uri) + inject_breakpoint_calls(root.syntax(), anchors, &location.uri, line_offset) } struct BreakpointAnchor { breakpoint_id: i64, - actual_line: u32, + /// The line in code coordinates (0-based within parsed code) + code_line: u32, } fn find_breakpoint_anchors( @@ -124,6 +138,7 @@ fn find_breakpoint_anchors( mut breakpoints: Vec<&mut Breakpoint>, uri: &Url, line_index: &LineIndex, + line_offset: u32, ) -> Vec { // Sort breakpoints by line ascending breakpoints.sort_by_key(|bp| bp.line); @@ -143,6 +158,7 @@ fn find_breakpoint_anchors( &mut anchors, uri, line_index, + line_offset, true, ); @@ -155,6 +171,7 @@ fn find_anchors_in_list<'a>( anchors: &mut Vec, uri: &Url, line_index: &LineIndex, + line_offset: u32, is_root: bool, ) { let elements: Vec<_> = list.into_iter().collect(); @@ -169,40 +186,49 @@ fn find_anchors_in_list<'a>( return; }; - let target_line = bp.line; + // Convert breakpoint line from document coordinates to code coordinates + let target_code_line = bp.line - line_offset; let current = &elements[i]; - let current_start_line = get_start_line(current.syntax(), line_index); + let current_code_line = get_start_line(current.syntax(), line_index); // Base case: target line is at or before current element's start - if target_line <= current_start_line { + if target_code_line <= current_code_line { let bp = breakpoints.next().unwrap(); - bp.line = current_start_line; + // Update bp.line to the actual document line where the breakpoint is placed + bp.line = current_code_line + line_offset; anchors.push(BreakpointAnchor { breakpoint_id: bp.id, - actual_line: current_start_line, + code_line: current_code_line, }); continue; } // Check if target is beyond current element - let next_start_line = if i + 1 < elements.len() { + let next_code_line = if i + 1 < elements.len() { Some(get_start_line(elements[i + 1].syntax(), line_index)) } else { None }; // Recursion case: target must be within current element - if next_start_line.map_or(true, |next| target_line < next) { + if next_code_line.map_or(true, |next| target_code_line < next) { // If we're at the last element of a nested list and there's no next element, // the target might be beyond this list. Pop back up to let the parent handle it. - if !is_root && next_start_line.is_none() { + if !is_root && next_code_line.is_none() { return; } // Search within current element for brace lists let anchors_before = anchors.len(); - if find_anchor_in_element(current.syntax(), breakpoints, anchors, uri, line_index) - .is_some() + if find_anchor_in_element( + current.syntax(), + breakpoints, + anchors, + uri, + line_index, + line_offset, + ) + .is_some() { // A nested brace list was found and processed. if anchors.len() > anchors_before { @@ -221,10 +247,11 @@ fn find_anchors_in_list<'a>( } else { // No brace list found, use current element as fallback let bp = breakpoints.next().unwrap(); - bp.line = current_start_line; + // Update bp.line to the actual document line where the breakpoint is placed + bp.line = current_code_line + line_offset; anchors.push(BreakpointAnchor { breakpoint_id: bp.id, - actual_line: current_start_line, + code_line: current_code_line, }); continue; } @@ -241,6 +268,7 @@ fn find_anchor_in_element<'a>( anchors: &mut Vec, uri: &Url, line_index: &LineIndex, + line_offset: u32, ) -> Option<()> { use biome_rowan::WalkEvent; @@ -255,7 +283,15 @@ fn find_anchor_in_element<'a>( let expr_list = braced.expressions(); if !expr_list.is_empty() { // Found a non-empty brace list, recurse into it - find_anchors_in_list(&expr_list, breakpoints, anchors, uri, line_index, false); + find_anchors_in_list( + &expr_list, + breakpoints, + anchors, + uri, + line_index, + line_offset, + false, + ); return Some(()); } } @@ -268,6 +304,7 @@ fn inject_breakpoint_calls( root: &RSyntaxNode, mut anchors: Vec, uri: &Url, + line_offset: u32, ) -> String { if anchors.is_empty() { return root.to_string(); @@ -275,7 +312,7 @@ fn inject_breakpoint_calls( // Sort anchors by line DESCENDING so we modify from bottom to top. // This preserves line numbers for earlier breakpoints. - anchors.sort_by_key(|a| std::cmp::Reverse(a.actual_line)); + anchors.sort_by_key(|a| std::cmp::Reverse(a.code_line)); let mut source = root.to_string(); @@ -286,10 +323,9 @@ fn inject_breakpoint_calls( let root = parse_result.tree(); let new_line_index = LineIndex::new(&source); - // Find the anchor node at the target line - // We need to search the re-parsed tree for the node at actual_line + // Find the anchor node at the target line (using code coordinates) let Some(new_anchor) = - find_node_at_line(root.syntax(), anchor_info.actual_line, &new_line_index) + find_node_at_line(root.syntax(), anchor_info.code_line, &new_line_index) else { continue; }; @@ -308,8 +344,10 @@ fn inject_breakpoint_calls( }; // Create the breakpoint call and modified anchor + // Line directive uses document coordinates (code_line + line_offset) let breakpoint_call = create_breakpoint_call(anchor_info.breakpoint_id); - let modified_anchor = add_line_directive_to_node(&new_anchor, anchor_info.actual_line, uri); + let doc_line = anchor_info.code_line + line_offset; + let modified_anchor = add_line_directive_to_node(&new_anchor, doc_line, uri); // Inject the breakpoint by splicing let modified_parent = parent.clone().splice_slots(index..=index, [ @@ -517,6 +555,38 @@ mod tests { insta::assert_snapshot!(result); } + #[test] + fn test_annotate_input_with_breakpoint() { + // Test the full annotate_input path with breakpoints. + // This ensures breakpoints are injected correctly before the line directive is added. + let code = "0\n1\n2"; + let location = CodeLocation { + uri: Url::parse("file:///test.R").unwrap(), + start: Position { + line: 3, + character: 0, + }, + end: Position { + line: 5, + character: 1, + }, + }; + // Breakpoint at document line 4 (code line 1, i.e., `1`) + let mut breakpoints = vec![Breakpoint { + id: 1, + line: 4, + verified: false, + invalid: false, + }]; + + let result = annotate_input(code, location, Some(&mut breakpoints)); + insta::assert_snapshot!(result); + + // Breakpoint line should remain in document coordinates + assert_eq!(breakpoints[0].line, 4); + assert!(!breakpoints[0].invalid); + } + #[test] fn test_inject_breakpoints_single_line() { let code = "x <- 1\ny <- 2\nz <- 3"; @@ -829,4 +899,76 @@ mod tests { assert!(!breakpoints[2].verified); assert!(!breakpoints[2].invalid); } + + #[test] + fn test_inject_breakpoints_with_line_offset() { + // Test that breakpoints work correctly when the code starts at a non-zero line + // in the document. This simulates executing a selection from the middle of a file. + // + // The code represents lines 10-12 of the original document: + // Line 10: x <- 1 + // Line 11: y <- 2 + // Line 12: z <- 3 + let code = "x <- 1\ny <- 2\nz <- 3"; + let location = CodeLocation { + uri: Url::parse("file:///test.R").unwrap(), + start: Position { + line: 10, + character: 0, + }, + end: Position { + line: 12, + character: 6, + }, + }; + let line_index = LineIndex::new(code); + + // Breakpoint at document line 11 (which is code line 1, i.e., `y <- 2`) + let mut breakpoints = vec![Breakpoint { + id: 1, + line: 11, + verified: false, + invalid: false, + }]; + + let result = inject_breakpoints(code, location, &mut breakpoints, &line_index); + insta::assert_snapshot!(result); + + // The breakpoint line should remain in document coordinates + assert_eq!(breakpoints[0].line, 11); + assert!(!breakpoints[0].invalid); + } + + #[test] + fn test_inject_breakpoints_with_line_offset_nested() { + // Test with line offset and nested braces + let code = "f <- function() {\n x <- 1\n y <- 2\n}"; + let location = CodeLocation { + uri: Url::parse("file:///test.R").unwrap(), + start: Position { + line: 20, + character: 0, + }, + end: Position { + line: 23, + character: 1, + }, + }; + let line_index = LineIndex::new(code); + + // Breakpoint at document line 22 (code line 2, i.e., `y <- 2`) + let mut breakpoints = vec![Breakpoint { + id: 1, + line: 22, + verified: false, + invalid: false, + }]; + + let result = inject_breakpoints(code, location, &mut breakpoints, &line_index); + insta::assert_snapshot!(result); + + // The breakpoint line should remain in document coordinates + assert_eq!(breakpoints[0].line, 22); + assert!(!breakpoints[0].invalid); + } } diff --git a/crates/ark/src/dap/dap_server.rs b/crates/ark/src/dap/dap_server.rs index a602f559d..dda713aaa 100644 --- a/crates/ark/src/dap/dap_server.rs +++ b/crates/ark/src/dap/dap_server.rs @@ -355,7 +355,7 @@ impl DapServer { .map(|bp| dap::types::Breakpoint { id: Some(bp.id), verified: bp.verified, - line: Some(bp.line as i64), + line: Some((bp.line + 1) as i64), ..Default::default() }) .collect(); diff --git a/crates/ark/src/snapshots/ark__console_annotate__tests__annotate_input_with_breakpoint.snap b/crates/ark/src/snapshots/ark__console_annotate__tests__annotate_input_with_breakpoint.snap new file mode 100644 index 000000000..27699a911 --- /dev/null +++ b/crates/ark/src/snapshots/ark__console_annotate__tests__annotate_input_with_breakpoint.snap @@ -0,0 +1,10 @@ +--- +source: crates/ark/src/console_annotate.rs +expression: result +--- +#line 4 "file:///test.R" +0 +.ark_breakpoint(browser(), 1) +#line 5 "file:///test.R" +1 +2 diff --git a/crates/ark/src/snapshots/ark__console_annotate__tests__inject_breakpoints_with_line_offset.snap b/crates/ark/src/snapshots/ark__console_annotate__tests__inject_breakpoints_with_line_offset.snap new file mode 100644 index 000000000..6eccac8a4 --- /dev/null +++ b/crates/ark/src/snapshots/ark__console_annotate__tests__inject_breakpoints_with_line_offset.snap @@ -0,0 +1,9 @@ +--- +source: crates/ark/src/console_annotate.rs +expression: result +--- +x <- 1 +.ark_breakpoint(browser(), 1) +#line 12 "file:///test.R" +y <- 2 +z <- 3 diff --git a/crates/ark/src/snapshots/ark__console_annotate__tests__inject_breakpoints_with_line_offset_nested.snap b/crates/ark/src/snapshots/ark__console_annotate__tests__inject_breakpoints_with_line_offset_nested.snap new file mode 100644 index 000000000..e585c5a5e --- /dev/null +++ b/crates/ark/src/snapshots/ark__console_annotate__tests__inject_breakpoints_with_line_offset_nested.snap @@ -0,0 +1,10 @@ +--- +source: crates/ark/src/console_annotate.rs +expression: result +--- +f <- function() { + x <- 1 +.ark_breakpoint(browser(), 1) +#line 23 "file:///test.R" + y <- 2 +} From 4bb2befa909efd082e0695cc134a02e3c5ffa8a1 Mon Sep 17 00:00:00 2001 From: Lionel Henry Date: Thu, 11 Dec 2025 17:34:45 +0100 Subject: [PATCH 07/42] Insert line directives after newlines --- crates/ark/src/console_annotate.rs | 42 +++++++++++++------ ...s__inject_breakpoints_with_blank_line.snap | 2 +- 2 files changed, 31 insertions(+), 13 deletions(-) diff --git a/crates/ark/src/console_annotate.rs b/crates/ark/src/console_annotate.rs index 6d5724c20..b032233ec 100644 --- a/crates/ark/src/console_annotate.rs +++ b/crates/ark/src/console_annotate.rs @@ -440,7 +440,6 @@ fn add_line_directive_to_node(node: &RSyntaxNode, line: u32, uri: &Url) -> RSynt let line_directive = format!("#line {line} \"{uri}\"", line = line + 1); // Collect existing leading trivia, but skip only the first newline to avoid double blank lines - // Preserve any additional newlines (blank lines) that may follow let existing_trivia: Vec<_> = first_token .leading_trivia() .pieces() @@ -455,17 +454,36 @@ fn add_line_directive_to_node(node: &RSyntaxNode, line: u32, uri: &Url) -> RSynt }) .collect(); - // Create new trivia with #line directive prepended, followed by a newline - let new_trivia: Vec<_> = vec![ - ( - biome_rowan::TriviaPieceKind::SingleLineComment, - line_directive, - ), - (biome_rowan::TriviaPieceKind::Newline, "\n".to_string()), - ] - .into_iter() - .chain(existing_trivia.into_iter()) - .collect(); + // Insert line directive before the final whitespace (indentation) if present. + // This preserves indentation: `[\n, \n, ws]` becomes `[\n, \n, directive, \n, ws]` + // rather than `[\n, \n, ws, directive, \n]` which would break indentation. + let new_trivia: Vec<_> = if existing_trivia.last().map_or(false, |(k, _)| { + *k == biome_rowan::TriviaPieceKind::Whitespace + }) { + let (init, last) = existing_trivia.split_at(existing_trivia.len() - 1); + init.iter() + .cloned() + .chain(vec![ + ( + biome_rowan::TriviaPieceKind::SingleLineComment, + line_directive, + ), + (biome_rowan::TriviaPieceKind::Newline, "\n".to_string()), + ]) + .chain(last.iter().cloned()) + .collect() + } else { + existing_trivia + .into_iter() + .chain(vec![ + ( + biome_rowan::TriviaPieceKind::SingleLineComment, + line_directive, + ), + (biome_rowan::TriviaPieceKind::Newline, "\n".to_string()), + ]) + .collect() + }; let new_first_token = first_token.with_leading_trivia(new_trivia.iter().map(|(k, t)| (*k, t.as_str()))); diff --git a/crates/ark/src/snapshots/ark__console_annotate__tests__inject_breakpoints_with_blank_line.snap b/crates/ark/src/snapshots/ark__console_annotate__tests__inject_breakpoints_with_blank_line.snap index 84a2f4180..6b7368a1b 100644 --- a/crates/ark/src/snapshots/ark__console_annotate__tests__inject_breakpoints_with_blank_line.snap +++ b/crates/ark/src/snapshots/ark__console_annotate__tests__inject_breakpoints_with_blank_line.snap @@ -4,7 +4,7 @@ expression: result --- x <- 1 .ark_breakpoint(browser(), 1) -#line 4 "file:///test.R" +#line 4 "file:///test.R" y <- 2 From 78ff7c2bb45f9965f352d3eff10865d79135f2bd Mon Sep 17 00:00:00 2001 From: Lionel Henry Date: Thu, 11 Dec 2025 18:22:29 +0100 Subject: [PATCH 08/42] Step over injected breakpoint --- crates/ark/src/console_debug.rs | 26 ++++++++++++++------ crates/ark/src/interface.rs | 42 ++++++++++++++++++++++++++------- 2 files changed, 52 insertions(+), 16 deletions(-) diff --git a/crates/ark/src/console_debug.rs b/crates/ark/src/console_debug.rs index da5f8b46f..f4c876fb8 100644 --- a/crates/ark/src/console_debug.rs +++ b/crates/ark/src/console_debug.rs @@ -20,6 +20,7 @@ use stdext::result::ResultExt; use crate::dap::dap::DapBackendEvent; use crate::interface::DebugCallText; +use crate::interface::DebugCallTextKind; use crate::interface::RMain; use crate::modules::ARK_ENVS; use crate::srcref::ark_uri; @@ -126,16 +127,16 @@ impl RMain { // If not debugging, nothing to do. DebugCallText::None => (), // If already finalized, keep what we have. - DebugCallText::Finalized(_) => (), + DebugCallText::Finalized(_, _) => (), // If capturing, transition to finalized. - DebugCallText::Capturing(call_text) => { - self.debug_call_text = DebugCallText::Finalized(call_text.clone()) + DebugCallText::Capturing(call_text, kind) => { + self.debug_call_text = DebugCallText::Finalized(call_text.clone(), *kind) }, } } pub(crate) fn debug_handle_write_console(&mut self, content: &str) { - if let DebugCallText::Capturing(ref mut call_text) = self.debug_call_text { + if let DebugCallText::Capturing(ref mut call_text, _) = self.debug_call_text { // Append to current expression if we are currently capturing stdout call_text.push_str(content); return; @@ -145,7 +146,17 @@ impl RMain { // the current expression we are debugging, so we use that as a signal to begin // capturing. if content == "debug: " { - self.debug_call_text = DebugCallText::Capturing(String::new()); + self.debug_call_text = + DebugCallText::Capturing(String::new(), DebugCallTextKind::Debug); + return; + } + + // `debug at *PATH*: *EXPR*` is emitted by R when stepping through + // blocks that have srcrefs. We use this to detect that we've just + // stepped to an injected breakpoint and need to move on automatically. + if content.starts_with("debug at ") { + self.debug_call_text = + DebugCallText::Capturing(String::new(), DebugCallTextKind::DebugAt); return; } @@ -164,13 +175,14 @@ impl RMain { // recreate the debugger state after their code execution. let call_text = match self.debug_call_text.clone() { DebugCallText::None => None, - DebugCallText::Capturing(call_text) => { + DebugCallText::Capturing(call_text, _) => { log::error!( "Call text is in `Capturing` state, but should be `Finalized`: '{call_text}'." ); None }, - DebugCallText::Finalized(call_text) => Some(call_text), + DebugCallText::Finalized(call_text, DebugCallTextKind::Debug) => Some(call_text), + DebugCallText::Finalized(_, DebugCallTextKind::DebugAt) => None, }; let last_start_line = self.debug_last_line; diff --git a/crates/ark/src/interface.rs b/crates/ark/src/interface.rs index 30c19706a..d8c4990c4 100644 --- a/crates/ark/src/interface.rs +++ b/crates/ark/src/interface.rs @@ -158,8 +158,14 @@ pub enum SessionMode { #[derive(Clone, Debug)] pub enum DebugCallText { None, - Capturing(String), - Finalized(String), + Capturing(String, DebugCallTextKind), + Finalized(String, DebugCallTextKind), +} + +#[derive(Clone, Copy, Debug)] +pub enum DebugCallTextKind { + Debug, + DebugAt, } // --- Globals --- @@ -1006,14 +1012,32 @@ impl RMain { self.handle_active_request(&info, ConsoleValue::Success(result)); } - // If debugger is active, get current function and check whether it - // inherits from `ark_breakpoint`. If it does, send `n` automatically. - if harp::r_current_function().inherits("ark_breakpoint") { - self.debug_preserve_focus = false; - self.debug_send_dap(DapBackendEvent::Continued); + // If debugger is active, to prevent injected expressions from + // interfering with debug-stepping, we might need to automatically step + // over to the next statement by returning `n` to R. Two cases: + // - We've just stopped due to an injected breakpoint. In this case + // we're in the `.ark_breakpoint()` function and can look at the current + // `sys.function()` to detect this. + // - We've just stepped to another injected breakpoint. In this case we + // look at what function R emitted as part of the `Debug at` output. + if self.debug_is_debugging { + // Did we just step onto an injected breakpoint + let at_injected_breakpoint = matches!( + &self.debug_call_text, + DebugCallText::Finalized(text, DebugCallTextKind::DebugAt) + if text.contains(".ark_breakpoint") + ); - Self::on_console_input(buf, buflen, String::from("n")).unwrap(); - return ConsoleResult::NewInput; + // Are we stopped by an injected breakpoint + let in_injected_breakpoint = harp::r_current_function().inherits("ark_breakpoint"); + + if at_injected_breakpoint || in_injected_breakpoint { + self.debug_preserve_focus = false; + self.debug_send_dap(DapBackendEvent::Continued); + + Self::on_console_input(buf, buflen, String::from("n")).unwrap(); + return ConsoleResult::NewInput; + } } // In the future we'll also send browser information, see From cc6281cf4019b47bf54c02511fd2826b7e90f364 Mon Sep 17 00:00:00 2001 From: Lionel Henry Date: Fri, 12 Dec 2025 10:47:27 +0100 Subject: [PATCH 09/42] Model breakpoint state with an enum --- crates/ark/src/console_annotate.rs | 104 ++++++++++++----------------- crates/ark/src/dap/dap.rs | 17 +++-- crates/ark/src/dap/dap_server.rs | 6 +- 3 files changed, 59 insertions(+), 68 deletions(-) diff --git a/crates/ark/src/console_annotate.rs b/crates/ark/src/console_annotate.rs index b032233ec..1f38545ab 100644 --- a/crates/ark/src/console_annotate.rs +++ b/crates/ark/src/console_annotate.rs @@ -19,6 +19,7 @@ use biome_rowan::WalkEvent; use url::Url; use crate::dap::dap::Breakpoint; +use crate::dap::dap::BreakpointState; pub(crate) fn annotate_input( code: &str, @@ -242,7 +243,7 @@ fn find_anchors_in_list<'a>( // expressions in the nested list (e.g., on a closing `}` line with // no executable code). Mark this breakpoint as invalid. let bp = breakpoints.next().unwrap(); - bp.invalid = true; + bp.state = BreakpointState::Invalid; continue; } else { // No brace list found, use current element as fallback @@ -593,8 +594,7 @@ mod tests { let mut breakpoints = vec![Breakpoint { id: 1, line: 4, - verified: false, - invalid: false, + state: BreakpointState::Unverified, }]; let result = annotate_input(code, location, Some(&mut breakpoints)); @@ -602,7 +602,7 @@ mod tests { // Breakpoint line should remain in document coordinates assert_eq!(breakpoints[0].line, 4); - assert!(!breakpoints[0].invalid); + assert!(!matches!(breakpoints[0].state, BreakpointState::Invalid)); } #[test] @@ -623,13 +623,12 @@ mod tests { let mut breakpoints = vec![Breakpoint { id: 1, line: 1, - verified: false, - invalid: false, + state: BreakpointState::Unverified, }]; let result = inject_breakpoints(code, location, &mut breakpoints, &line_index); insta::assert_snapshot!(result); - assert!(!breakpoints[0].verified); + assert!(!matches!(breakpoints[0].state, BreakpointState::Verified)); } #[test] @@ -651,22 +650,20 @@ mod tests { Breakpoint { id: 1, line: 1, - verified: false, - invalid: false, + state: BreakpointState::Unverified, }, Breakpoint { id: 2, line: 3, - verified: false, - invalid: false, + state: BreakpointState::Unverified, }, ]; let result = inject_breakpoints(code, location, &mut breakpoints, &line_index); insta::assert_snapshot!(result); - assert!(!breakpoints[0].verified); - assert!(!breakpoints[1].verified); - assert!(!breakpoints[1].invalid); // Valid location + assert!(!matches!(breakpoints[0].state, BreakpointState::Verified)); + assert!(!matches!(breakpoints[1].state, BreakpointState::Verified)); + assert!(!matches!(breakpoints[1].state, BreakpointState::Invalid)); // Valid location } #[test] @@ -687,13 +684,12 @@ mod tests { let mut breakpoints = vec![Breakpoint { id: 1, line: 2, - verified: false, - invalid: false, + state: BreakpointState::Unverified, }]; let result = inject_breakpoints(code, location, &mut breakpoints, &line_index); insta::assert_snapshot!(result); - assert!(!breakpoints[0].verified); + assert!(!matches!(breakpoints[0].state, BreakpointState::Verified)); } #[test] @@ -714,14 +710,13 @@ mod tests { let mut breakpoints = vec![Breakpoint { id: 1, line: 10, - verified: false, - invalid: false, + state: BreakpointState::Unverified, }]; let result = inject_breakpoints(code, location, &mut breakpoints, &line_index); // Should return unchanged code assert_eq!(result, code); - assert!(!breakpoints[0].verified); + assert!(!matches!(breakpoints[0].state, BreakpointState::Verified)); } #[test] @@ -747,22 +742,20 @@ mod tests { Breakpoint { id: 1, line: 2, - verified: false, - invalid: false, + state: BreakpointState::Unverified, }, Breakpoint { id: 2, line: 5, - verified: false, - invalid: false, + state: BreakpointState::Unverified, }, ]; let result = inject_breakpoints(code, location, &mut breakpoints, &line_index); insta::assert_snapshot!(result); - assert!(!breakpoints[0].verified); - assert!(!breakpoints[1].verified); - assert!(!breakpoints[1].invalid); // Valid location + assert!(!matches!(breakpoints[0].state, BreakpointState::Verified)); + assert!(!matches!(breakpoints[1].state, BreakpointState::Verified)); + assert!(!matches!(breakpoints[1].state, BreakpointState::Invalid)); // Valid location } #[test] @@ -784,13 +777,12 @@ mod tests { let mut breakpoints = vec![Breakpoint { id: 1, line: 3, - verified: false, - invalid: false, + state: BreakpointState::Unverified, }]; let result = inject_breakpoints(code, location, &mut breakpoints, &line_index); insta::assert_snapshot!(result); - assert!(!breakpoints[0].verified); + assert!(!matches!(breakpoints[0].state, BreakpointState::Verified)); } #[test] @@ -813,17 +805,14 @@ mod tests { let mut breakpoints = vec![Breakpoint { id: 1, line: 2, // The `}` line - verified: false, - invalid: false, + state: BreakpointState::Unverified, }]; let result = inject_breakpoints(code, location, &mut breakpoints, &line_index); // Should return unchanged code since breakpoint is invalid assert_eq!(result, code); - assert!(!breakpoints[0].verified); - // Marked as invalid - assert!(breakpoints[0].invalid); + assert!(matches!(breakpoints[0].state, BreakpointState::Invalid)); } #[test] @@ -846,14 +835,12 @@ mod tests { Breakpoint { id: 1, line: 2, // The `}` line - invalid - verified: false, - invalid: false, + state: BreakpointState::Unverified, }, Breakpoint { id: 2, line: 3, // `y <- 2` - valid - verified: false, - invalid: false, + state: BreakpointState::Unverified, }, ]; @@ -861,11 +848,11 @@ mod tests { insta::assert_snapshot!(result); // Invalid location - assert!(!breakpoints[0].verified); - assert!(breakpoints[0].invalid); + assert!(!matches!(breakpoints[0].state, BreakpointState::Verified)); + assert!(matches!(breakpoints[0].state, BreakpointState::Invalid)); - assert!(!breakpoints[1].verified); - assert!(!breakpoints[1].invalid); + assert!(!matches!(breakpoints[1].state, BreakpointState::Verified)); + assert!(!matches!(breakpoints[1].state, BreakpointState::Invalid)); } #[test] @@ -891,31 +878,28 @@ mod tests { Breakpoint { id: 1, line: 0, // `x <- 1` - before nested - verified: false, - invalid: false, + state: BreakpointState::Unverified, }, Breakpoint { id: 2, line: 2, // `y <- 2` - within nested - verified: false, - invalid: false, + state: BreakpointState::Unverified, }, Breakpoint { id: 3, line: 5, // `w <- 4` - after nested - verified: false, - invalid: false, + state: BreakpointState::Unverified, }, ]; let result = inject_breakpoints(code, location, &mut breakpoints, &line_index); insta::assert_snapshot!(result); - assert!(!breakpoints[0].verified); - assert!(!breakpoints[0].invalid); - assert!(!breakpoints[1].verified); - assert!(!breakpoints[1].invalid); - assert!(!breakpoints[2].verified); - assert!(!breakpoints[2].invalid); + assert!(!matches!(breakpoints[0].state, BreakpointState::Verified)); + assert!(!matches!(breakpoints[0].state, BreakpointState::Invalid)); + assert!(!matches!(breakpoints[1].state, BreakpointState::Verified)); + assert!(!matches!(breakpoints[1].state, BreakpointState::Invalid)); + assert!(!matches!(breakpoints[2].state, BreakpointState::Verified)); + assert!(!matches!(breakpoints[2].state, BreakpointState::Invalid)); } #[test] @@ -945,8 +929,7 @@ mod tests { let mut breakpoints = vec![Breakpoint { id: 1, line: 11, - verified: false, - invalid: false, + state: BreakpointState::Unverified, }]; let result = inject_breakpoints(code, location, &mut breakpoints, &line_index); @@ -954,7 +937,7 @@ mod tests { // The breakpoint line should remain in document coordinates assert_eq!(breakpoints[0].line, 11); - assert!(!breakpoints[0].invalid); + assert!(!matches!(breakpoints[0].state, BreakpointState::Invalid)); } #[test] @@ -978,8 +961,7 @@ mod tests { let mut breakpoints = vec![Breakpoint { id: 1, line: 22, - verified: false, - invalid: false, + state: BreakpointState::Unverified, }]; let result = inject_breakpoints(code, location, &mut breakpoints, &line_index); @@ -987,6 +969,6 @@ mod tests { // The breakpoint line should remain in document coordinates assert_eq!(breakpoints[0].line, 22); - assert!(!breakpoints[0].invalid); + assert!(!matches!(breakpoints[0].state, BreakpointState::Invalid)); } } diff --git a/crates/ark/src/dap/dap.rs b/crates/ark/src/dap/dap.rs index a92d64855..918e8544a 100644 --- a/crates/ark/src/dap/dap.rs +++ b/crates/ark/src/dap/dap.rs @@ -24,12 +24,18 @@ use crate::dap::dap_server; use crate::request::RRequest; use crate::thread::RThreadSafe; +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum BreakpointState { + Unverified, + Verified, + Invalid, +} + #[derive(Debug, Clone)] pub struct Breakpoint { pub id: i64, pub line: u32, - pub verified: bool, - pub invalid: bool, + pub state: BreakpointState, } #[derive(Debug, Copy, Clone)] @@ -270,13 +276,16 @@ impl Dap { }; for bp in bp_list.iter_mut() { - if bp.verified || bp.invalid { + if matches!( + bp.state, + BreakpointState::Verified | BreakpointState::Invalid + ) { continue; } let line = bp.line; if line >= start_line && line <= end_line { - bp.verified = true; + bp.state = BreakpointState::Verified; if let Some(tx) = &self.backend_events_tx { tx.send(DapBackendEvent::BreakpointVerified(bp.id)) diff --git a/crates/ark/src/dap/dap_server.rs b/crates/ark/src/dap/dap_server.rs index dda713aaa..9debd4a92 100644 --- a/crates/ark/src/dap/dap_server.rs +++ b/crates/ark/src/dap/dap_server.rs @@ -33,6 +33,7 @@ use stdext::spawn; use url::Url; use super::dap::Breakpoint; +use super::dap::BreakpointState; use super::dap::Dap; use super::dap::DapBackendEvent; use crate::console_debug::FrameInfo; @@ -336,8 +337,7 @@ impl DapServer { .map(|bp| Breakpoint { id: state.next_breakpoint_id(), line: (bp.line - 1) as u32, - verified: false, - invalid: false, + state: BreakpointState::Unverified, }) .collect(); @@ -354,7 +354,7 @@ impl DapServer { .iter() .map(|bp| dap::types::Breakpoint { id: Some(bp.id), - verified: bp.verified, + verified: matches!(bp.state, BreakpointState::Verified), line: Some((bp.line + 1) as i64), ..Default::default() }) From 020c52f545fdcb2a7f26e1e8cb8b79b3d9fcb0c6 Mon Sep 17 00:00:00 2001 From: Lionel Henry Date: Fri, 12 Dec 2025 11:30:06 +0100 Subject: [PATCH 10/42] Add hash-based breakpoint state preservation This avoids unnecessary re-verification when users toggle breakpoints on/off in unchanged documents. Will also allow restoring breakpoint on session change in multi-session workflows --- Cargo.lock | 32 +++++++++ crates/ark/Cargo.toml | 1 + crates/ark/src/dap/dap.rs | 25 +++++-- crates/ark/src/dap/dap_server.rs | 118 +++++++++++++++++++++++++++---- crates/ark/src/interface.rs | 2 +- 5 files changed, 157 insertions(+), 21 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index b68ab7454..43d1efbbd 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -357,6 +357,7 @@ dependencies = [ "base64 0.21.0", "biome_line_index", "biome_rowan", + "blake3", "bus", "cc", "cfg-if", @@ -410,6 +411,18 @@ dependencies = [ "yaml-rust", ] +[[package]] +name = "arrayref" +version = "0.3.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "76a2e8124351fda1ef8aaaa3bbd7ebbcb486bbcd4225aca0aa0d84bb2db8fecb" + +[[package]] +name = "arrayvec" +version = "0.7.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7c02d123df017efcdfbd739ef81735b36c5ba83ec3c59c80a9d7ecc718f92e50" + [[package]] name = "assert_matches" version = "1.5.0" @@ -605,6 +618,19 @@ version = "2.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b4682ae6287fcf752ecaabbfcc7b6f9b72aa33933dc23a554d853aea8eea8635" +[[package]] +name = "blake3" +version = "1.8.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3888aaa89e4b2a40fca9848e400f6a658a5a3978de7be858e209cafa8be9a4a0" +dependencies = [ + "arrayref", + "arrayvec", + "cc", + "cfg-if", + "constant_time_eq", +] + [[package]] name = "block-buffer" version = "0.10.4" @@ -739,6 +765,12 @@ dependencies = [ "windows-sys 0.52.0", ] +[[package]] +name = "constant_time_eq" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7c74b8349d32d297c9134b8c88677813a227df8f779daa29bfc29c183fe3dca6" + [[package]] name = "convert_case" version = "0.4.0" diff --git a/crates/ark/Cargo.toml b/crates/ark/Cargo.toml index b01ff0cba..e81549ff8 100644 --- a/crates/ark/Cargo.toml +++ b/crates/ark/Cargo.toml @@ -18,6 +18,7 @@ async-trait = "0.1.66" base64 = "0.21.0" biome_line_index.workspace = true biome_rowan.workspace = true +blake3 = "1.8.2" bus = "2.3.0" cfg-if = "1.0.0" crossbeam = { version = "0.8.2", features = ["crossbeam-channel"] } diff --git a/crates/ark/src/dap/dap.rs b/crates/ark/src/dap/dap.rs index 918e8544a..7f2707db8 100644 --- a/crates/ark/src/dap/dap.rs +++ b/crates/ark/src/dap/dap.rs @@ -29,15 +29,28 @@ pub enum BreakpointState { Unverified, Verified, Invalid, + Disabled, } #[derive(Debug, Clone)] pub struct Breakpoint { pub id: i64, - pub line: u32, + pub line: u32, // 0-based pub state: BreakpointState, } +impl Breakpoint { + /// Convert from DAP 1-based line to internal 0-based line + pub fn from_dap_line(line: i64) -> u32 { + (line - 1) as u32 + } + + /// Convert from internal 0-based line to DAP 1-based line + pub fn to_dap_line(line: u32) -> i64 { + (line + 1) as i64 + } +} + #[derive(Debug, Copy, Clone)] pub enum DapBackendEvent { /// Event sent when a normal (non-browser) prompt marks the end of a @@ -74,8 +87,8 @@ pub struct Dap { /// Current call stack pub stack: Option>, - /// Known breakpoints keyed by URI - pub breakpoints: HashMap>, + /// Known breakpoints keyed by URI, with document hash + pub breakpoints: HashMap)>, /// Map of `source` -> `source_reference` used for frames that don't have /// associated files (i.e. no `srcref` attribute). The `source` is the key to @@ -271,14 +284,16 @@ impl Dap { /// breakpoints that fall within the range [start_line, end_line]. /// Sends a `BreakpointVerified` event for each newly verified breakpoint. pub fn verify_breakpoints(&mut self, uri: &Url, start_line: u32, end_line: u32) { - let Some(bp_list) = self.breakpoints.get_mut(uri) else { + let Some((_, bp_list)) = self.breakpoints.get_mut(uri) else { return; }; for bp in bp_list.iter_mut() { + // Verified and Disabled breakpoints are both already verified. + // Invalid breakpoints never get verified so we skip them too. if matches!( bp.state, - BreakpointState::Verified | BreakpointState::Invalid + BreakpointState::Verified | BreakpointState::Disabled | BreakpointState::Invalid ) { continue; } diff --git a/crates/ark/src/dap/dap_server.rs b/crates/ark/src/dap/dap_server.rs index 9debd4a92..a0876f1f8 100644 --- a/crates/ark/src/dap/dap_server.rs +++ b/crates/ark/src/dap/dap_server.rs @@ -327,35 +327,123 @@ impl DapServer { }, }; - let source_breakpoints = args.breakpoints.unwrap_or_default(); + // Read document content to compute hash. We currently assume UTF-8 even + // though the frontend supports files with different encodings (but + // UTF-8 is the default). + let doc_content = match std::fs::read_to_string(&path) { + Ok(content) => content, + Err(err) => { + // TODO: What do we do with breakpoints in virtual documents? + log::error!("Failed to read file '{path}': {err}"); + let rsp = req.error(&format!("Failed to read file: {path}")); + self.respond(rsp); + return; + }, + }; + + let args_breakpoints = args.breakpoints.unwrap_or_default(); let mut state = self.state.lock().unwrap(); + let old_breakpoints = state.breakpoints.get(&uri).cloned(); + + // Breakpoints are associated with this hash. If the document has + // changed after a reconnection, the breakpoints are no longer valid. + let doc_hash = blake3::hash(doc_content.as_bytes()); + let doc_changed = match &old_breakpoints { + Some((existing_hash, _)) => existing_hash != &doc_hash, + None => true, + }; - // Positron sends 1-based line offsets, but this is configurable by client - let breakpoints: Vec = source_breakpoints - .iter() - .map(|bp| Breakpoint { - id: state.next_breakpoint_id(), - line: (bp.line - 1) as u32, - state: BreakpointState::Unverified, - }) - .collect(); + let new_breakpoints = if doc_changed { + log::trace!("DAP: Document changed for {uri}, discarding old breakpoints"); + + // Replace all existing breakpoints by new, unverified ones + args_breakpoints + .iter() + .map(|bp| Breakpoint { + id: state.next_breakpoint_id(), + line: Breakpoint::from_dap_line(bp.line), + state: BreakpointState::Unverified, + }) + .collect() + } else { + log::trace!("DAP: Document unchanged for {uri}, preserving breakpoint states"); + + // Unwrap Safety: `doc_changed` is false, so `existing_breakpoints` is Some + let (_, old_breakpoints) = old_breakpoints.unwrap(); + let mut old_by_line: HashMap = old_breakpoints + .into_iter() + .map(|bp| (bp.line, bp)) + .collect(); + + let mut breakpoints: Vec = Vec::new(); + + for bp in &args_breakpoints { + let line = Breakpoint::from_dap_line(bp.line); + + if let Some(old_bp) = old_by_line.remove(&line) { + // Breakpoint already exists at this line + let new_state = match old_bp.state { + // This breakpoint used to be verified, was disabled, and is now back online + BreakpointState::Disabled => BreakpointState::Verified, + // We preserve other states (verified or unverified) + other => other, + }; + + breakpoints.push(Breakpoint { + id: old_bp.id, + line, + state: new_state, + }); + } else { + // New breakpoints always start as Unverified, until they get evaluated once + breakpoints.push(Breakpoint { + id: state.next_breakpoint_id(), + line, + state: BreakpointState::Unverified, + }); + } + } + + // Remaining verified breakpoints need to be preserved in memory + // when deleted. That's because when user unchecks a breakpoint on + // the frontend, the breakpoint is actually deleted (i.e. omitted) + // by a `SetBreakpoints()` request. When the user reenables the + // breakpoint, we have to restore the verification state. + // Unverified/Invalid breakpoints on the other hand are simply + // dropped since there's no verified state that needs to be + // preserved. + for (line, old_bp) in old_by_line { + if matches!(old_bp.state, BreakpointState::Verified) { + breakpoints.push(Breakpoint { + id: old_bp.id, + line, + state: BreakpointState::Disabled, + }); + } + } + + breakpoints + }; log::trace!( - "DAP: URI {uri} now has {} unverified breakpoints", - breakpoints.len() + "DAP: URI {uri} now has {} breakpoints", + new_breakpoints.len() ); - state.breakpoints.insert(uri, breakpoints.clone()); + state + .breakpoints + .insert(uri, (doc_hash, new_breakpoints.clone())); drop(state); - let response_breakpoints: Vec = breakpoints + let response_breakpoints: Vec = new_breakpoints .iter() + .filter(|bp| !matches!(bp.state, BreakpointState::Disabled)) .map(|bp| dap::types::Breakpoint { id: Some(bp.id), verified: matches!(bp.state, BreakpointState::Verified), - line: Some((bp.line + 1) as i64), + line: Some(Breakpoint::to_dap_line(bp.line)), ..Default::default() }) .collect(); diff --git a/crates/ark/src/interface.rs b/crates/ark/src/interface.rs index d8c4990c4..e2f66effe 100644 --- a/crates/ark/src/interface.rs +++ b/crates/ark/src/interface.rs @@ -1399,7 +1399,7 @@ impl RMain { let breakpoints = loc .as_ref() .and_then(|loc| dap_guard.breakpoints.get_mut(&loc.uri)) - .map(|v| v.as_mut_slice()); + .map(|(_, v)| v.as_mut_slice()); match PendingInputs::read(&code, loc, breakpoints) { Ok(ParseResult::Success(inputs)) => { From c68ef871c2b91a2584089f06ccb2c310426a2f00 Mon Sep 17 00:00:00 2001 From: Lionel Henry Date: Fri, 12 Dec 2025 13:04:17 +0100 Subject: [PATCH 11/42] Simplify code with lifetime extension --- crates/ark/src/interface.rs | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/crates/ark/src/interface.rs b/crates/ark/src/interface.rs index e2f66effe..b7a9e76d2 100644 --- a/crates/ark/src/interface.rs +++ b/crates/ark/src/interface.rs @@ -348,15 +348,11 @@ impl PendingInputs { location: Option, breakpoints: Option<&mut [Breakpoint]>, ) -> anyhow::Result> { - let mut _srcfile = None; - let input = if let Some(location) = location { let annotated_code = annotate_input(code, location, breakpoints); - _srcfile = Some(SrcFile::new_virtual_empty_filename(annotated_code.into())); - harp::ParseInput::SrcFile(&_srcfile.unwrap()) + harp::ParseInput::SrcFile(&SrcFile::new_virtual_empty_filename(annotated_code.into())) } else if harp::get_option_bool("keep.source") { - _srcfile = Some(SrcFile::new_virtual_empty_filename(code.into())); - harp::ParseInput::SrcFile(&_srcfile.unwrap()) + harp::ParseInput::SrcFile(&SrcFile::new_virtual_empty_filename(code.into())) } else { harp::ParseInput::Text(code) }; From 8d77bc1c3028e04fb0f0bff72deb07b24250f07a Mon Sep 17 00:00:00 2001 From: Lionel Henry Date: Fri, 12 Dec 2025 11:57:16 +0100 Subject: [PATCH 12/42] Make breakpoints conditional --- crates/ark/src/console_annotate.rs | 10 +++++--- crates/ark/src/console_debug.rs | 23 +++++++++++++++++++ crates/ark/src/dap/dap.rs | 10 ++++++++ crates/ark/src/interface.rs | 3 +++ crates/ark/src/modules/positron/debug.R | 18 +++++++++++++-- ...tests__annotate_input_with_breakpoint.snap | 2 +- ...reakpoints_before_within_after_nested.snap | 6 ++--- ...sts__inject_breakpoints_in_brace_list.snap | 2 +- ...e__tests__inject_breakpoints_multiple.snap | 4 ++-- ...ts__inject_breakpoints_multiple_lists.snap | 4 ++-- ...n_closing_brace_with_valid_breakpoint.snap | 2 +- ...tests__inject_breakpoints_single_line.snap | 2 +- ...s__inject_breakpoints_with_blank_line.snap | 2 +- ...__inject_breakpoints_with_line_offset.snap | 2 +- ...t_breakpoints_with_line_offset_nested.snap | 2 +- 15 files changed, 73 insertions(+), 19 deletions(-) diff --git a/crates/ark/src/console_annotate.rs b/crates/ark/src/console_annotate.rs index 1f38545ab..e3236c758 100644 --- a/crates/ark/src/console_annotate.rs +++ b/crates/ark/src/console_annotate.rs @@ -346,7 +346,7 @@ fn inject_breakpoint_calls( // Create the breakpoint call and modified anchor // Line directive uses document coordinates (code_line + line_offset) - let breakpoint_call = create_breakpoint_call(anchor_info.breakpoint_id); + let breakpoint_call = create_breakpoint_call(uri, anchor_info.breakpoint_id); let doc_line = anchor_info.code_line + line_offset; let modified_anchor = add_line_directive_to_node(&new_anchor, doc_line, uri); @@ -428,8 +428,12 @@ fn get_start_line(node: &RSyntaxNode, line_index: &LineIndex) -> u32 { line_index.line_col(offset).map(|lc| lc.line).unwrap_or(0) } -fn create_breakpoint_call(breakpoint_id: i64) -> RSyntaxNode { - let code = format!("\n.ark_breakpoint(browser(), {breakpoint_id})\n"); +fn create_breakpoint_call(uri: &Url, id: i64) -> RSyntaxNode { + // NOTE: If you use `base::browser()` here in an attempt to prevent masking + // issues in case someone redefined `browser()`, you'll cause the function + // in which the breakpoint is injected to be bytecode-compiled. This is a + // limitation/bug of https://github.com/r-devel/r-svn/blob/e2aae817/src/library/compiler/R/cmp.R#L1273-L1290 + let code = format!("\nbase::.ark_breakpoint(browser(), \"{uri}\", \"{id}\")\n"); aether_parser::parse(&code, Default::default()).syntax() } diff --git a/crates/ark/src/console_debug.rs b/crates/ark/src/console_debug.rs index f4c876fb8..fe221245b 100644 --- a/crates/ark/src/console_debug.rs +++ b/crates/ark/src/console_debug.rs @@ -15,8 +15,10 @@ use harp::session::r_sys_calls; use harp::session::r_sys_frames; use harp::session::r_sys_functions; use harp::utils::r_is_null; +use libr::SEXP; use regex::Regex; use stdext::result::ResultExt; +use url::Url; use crate::dap::dap::DapBackendEvent; use crate::interface::DebugCallText; @@ -295,6 +297,13 @@ impl RMain { let re = RE_ARK_DEBUG_URI.get_or_init(|| Regex::new(r"^ark-\d+/debug/").unwrap()); re.is_match(uri) } + + pub(crate) fn is_breakpoint_enabled(&self, uri: &Url, id: String) -> bool { + self.debug_dap + .lock() + .unwrap() + .is_breakpoint_enabled(uri, id) + } } fn as_frame_info(info: libr::SEXP, id: i64) -> Result { @@ -370,3 +379,17 @@ fn as_frame_info(info: libr::SEXP, id: i64) -> Result { }) } } + +#[harp::register] +pub unsafe extern "C-unwind" fn ps_is_breakpoint_enabled( + uri: SEXP, + id: SEXP, +) -> anyhow::Result { + let uri: String = RObject::view(uri).try_into()?; + let uri = Url::parse(&uri)?; + + let id: String = RObject::view(id).try_into()?; + + let enabled: RObject = RMain::get().is_breakpoint_enabled(&uri, id).into(); + Ok(enabled.sexp) +} diff --git a/crates/ark/src/dap/dap.rs b/crates/ark/src/dap/dap.rs index 7f2707db8..ab226745d 100644 --- a/crates/ark/src/dap/dap.rs +++ b/crates/ark/src/dap/dap.rs @@ -309,6 +309,16 @@ impl Dap { } } } + + pub(crate) fn is_breakpoint_enabled(&self, uri: &Url, id: String) -> bool { + let Some((_, breakpoints)) = self.breakpoints.get(uri) else { + return false; + }; + + breakpoints + .iter() + .any(|bp| bp.id.to_string() == id && matches!(bp.state, BreakpointState::Verified)) + } } // Handler for Amalthea socket threads diff --git a/crates/ark/src/interface.rs b/crates/ark/src/interface.rs index b7a9e76d2..38519ef45 100644 --- a/crates/ark/src/interface.rs +++ b/crates/ark/src/interface.rs @@ -1028,6 +1028,9 @@ impl RMain { let in_injected_breakpoint = harp::r_current_function().inherits("ark_breakpoint"); if at_injected_breakpoint || in_injected_breakpoint { + let kind = if at_injected_breakpoint { "at" } else { "in" }; + log::trace!("Injected breakpoint reached ({kind}), moving to next expression"); + self.debug_preserve_focus = false; self.debug_send_dap(DapBackendEvent::Continued); diff --git a/crates/ark/src/modules/positron/debug.R b/crates/ark/src/modules/positron/debug.R index 2277552dc..af1b041cd 100644 --- a/crates/ark/src/modules/positron/debug.R +++ b/crates/ark/src/modules/positron/debug.R @@ -701,10 +701,24 @@ non_parseable_fixed_info <- function(pattern, replacement) { list(pattern = pattern, replacement = replacement, fixed = TRUE) } +is_breakpoint_enabled <- function(uri, id) { + .ps.Call("ps_is_breakpoint_enabled", uri, id) +} + +# Injected breakpoint. This receives a `browser()` call in the `expr` argument. +# The argument if forced if the breakpoint is enabled. Since `expr` is promised +# in the calling frame environment, that environment is marked by R as being +# debugged (with `SET_RDEBUG`), allowing to step through it. We're stopped in +# the wrong frame (`.ark_breakpoint()`'s) but the console automatically steps to +# the next expression whenever it detects that the current function (retrieved +# with `sys.function()`) inherits from `ark_breakpoint`. #' @export .ark_breakpoint <- structure( - function(expr, id) { - # TODO: Don't force `expr` if breakpoint is disabled + function(expr, uri, id) { + # Force `browser()` call only if breakpoint is enabled + if (!is_breakpoint_enabled(uri, id)) { + return() + } expr }, class = "ark_breakpoint" diff --git a/crates/ark/src/snapshots/ark__console_annotate__tests__annotate_input_with_breakpoint.snap b/crates/ark/src/snapshots/ark__console_annotate__tests__annotate_input_with_breakpoint.snap index 27699a911..ce21114f9 100644 --- a/crates/ark/src/snapshots/ark__console_annotate__tests__annotate_input_with_breakpoint.snap +++ b/crates/ark/src/snapshots/ark__console_annotate__tests__annotate_input_with_breakpoint.snap @@ -4,7 +4,7 @@ expression: result --- #line 4 "file:///test.R" 0 -.ark_breakpoint(browser(), 1) +base::.ark_breakpoint(browser(), "file:///test.R", "1") #line 5 "file:///test.R" 1 2 diff --git a/crates/ark/src/snapshots/ark__console_annotate__tests__inject_breakpoints_before_within_after_nested.snap b/crates/ark/src/snapshots/ark__console_annotate__tests__inject_breakpoints_before_within_after_nested.snap index f6a0eaa6b..5c26c3f85 100644 --- a/crates/ark/src/snapshots/ark__console_annotate__tests__inject_breakpoints_before_within_after_nested.snap +++ b/crates/ark/src/snapshots/ark__console_annotate__tests__inject_breakpoints_before_within_after_nested.snap @@ -2,15 +2,15 @@ source: crates/ark/src/console_annotate.rs expression: result --- -.ark_breakpoint(browser(), 1) +base::.ark_breakpoint(browser(), "file:///test.R", "1") #line 1 "file:///test.R" x <- 1 f <- function() { -.ark_breakpoint(browser(), 2) +base::.ark_breakpoint(browser(), "file:///test.R", "2") #line 3 "file:///test.R" y <- 2 z <- 3 } -.ark_breakpoint(browser(), 3) +base::.ark_breakpoint(browser(), "file:///test.R", "3") #line 6 "file:///test.R" w <- 4 diff --git a/crates/ark/src/snapshots/ark__console_annotate__tests__inject_breakpoints_in_brace_list.snap b/crates/ark/src/snapshots/ark__console_annotate__tests__inject_breakpoints_in_brace_list.snap index 0c3b123a8..39a9731c9 100644 --- a/crates/ark/src/snapshots/ark__console_annotate__tests__inject_breakpoints_in_brace_list.snap +++ b/crates/ark/src/snapshots/ark__console_annotate__tests__inject_breakpoints_in_brace_list.snap @@ -4,7 +4,7 @@ expression: result --- f <- function() { x <- 1 -.ark_breakpoint(browser(), 1) +base::.ark_breakpoint(browser(), "file:///test.R", "1") #line 3 "file:///test.R" y <- 2 } diff --git a/crates/ark/src/snapshots/ark__console_annotate__tests__inject_breakpoints_multiple.snap b/crates/ark/src/snapshots/ark__console_annotate__tests__inject_breakpoints_multiple.snap index aa8a97f6c..5cd66097d 100644 --- a/crates/ark/src/snapshots/ark__console_annotate__tests__inject_breakpoints_multiple.snap +++ b/crates/ark/src/snapshots/ark__console_annotate__tests__inject_breakpoints_multiple.snap @@ -3,10 +3,10 @@ source: crates/ark/src/console_annotate.rs expression: result --- x <- 1 -.ark_breakpoint(browser(), 1) +base::.ark_breakpoint(browser(), "file:///test.R", "1") #line 2 "file:///test.R" y <- 2 z <- 3 -.ark_breakpoint(browser(), 2) +base::.ark_breakpoint(browser(), "file:///test.R", "2") #line 4 "file:///test.R" w <- 4 diff --git a/crates/ark/src/snapshots/ark__console_annotate__tests__inject_breakpoints_multiple_lists.snap b/crates/ark/src/snapshots/ark__console_annotate__tests__inject_breakpoints_multiple_lists.snap index 65786edc6..6126b2976 100644 --- a/crates/ark/src/snapshots/ark__console_annotate__tests__inject_breakpoints_multiple_lists.snap +++ b/crates/ark/src/snapshots/ark__console_annotate__tests__inject_breakpoints_multiple_lists.snap @@ -4,11 +4,11 @@ expression: result --- x <- 1 f <- function() { -.ark_breakpoint(browser(), 1) +base::.ark_breakpoint(browser(), "file:///test.R", "1") #line 3 "file:///test.R" y <- 2 z <- 3 } -.ark_breakpoint(browser(), 2) +base::.ark_breakpoint(browser(), "file:///test.R", "2") #line 6 "file:///test.R" w <- 4 diff --git a/crates/ark/src/snapshots/ark__console_annotate__tests__inject_breakpoints_on_closing_brace_with_valid_breakpoint.snap b/crates/ark/src/snapshots/ark__console_annotate__tests__inject_breakpoints_on_closing_brace_with_valid_breakpoint.snap index 01e5a75f4..c517d6d38 100644 --- a/crates/ark/src/snapshots/ark__console_annotate__tests__inject_breakpoints_on_closing_brace_with_valid_breakpoint.snap +++ b/crates/ark/src/snapshots/ark__console_annotate__tests__inject_breakpoints_on_closing_brace_with_valid_breakpoint.snap @@ -5,6 +5,6 @@ expression: result f <- function() { x <- 1 } -.ark_breakpoint(browser(), 2) +base::.ark_breakpoint(browser(), "file:///test.R", "2") #line 4 "file:///test.R" y <- 2 diff --git a/crates/ark/src/snapshots/ark__console_annotate__tests__inject_breakpoints_single_line.snap b/crates/ark/src/snapshots/ark__console_annotate__tests__inject_breakpoints_single_line.snap index 3bf773e55..42f0366c9 100644 --- a/crates/ark/src/snapshots/ark__console_annotate__tests__inject_breakpoints_single_line.snap +++ b/crates/ark/src/snapshots/ark__console_annotate__tests__inject_breakpoints_single_line.snap @@ -3,7 +3,7 @@ source: crates/ark/src/console_annotate.rs expression: result --- x <- 1 -.ark_breakpoint(browser(), 1) +base::.ark_breakpoint(browser(), "file:///test.R", "1") #line 2 "file:///test.R" y <- 2 z <- 3 diff --git a/crates/ark/src/snapshots/ark__console_annotate__tests__inject_breakpoints_with_blank_line.snap b/crates/ark/src/snapshots/ark__console_annotate__tests__inject_breakpoints_with_blank_line.snap index 6b7368a1b..39357c195 100644 --- a/crates/ark/src/snapshots/ark__console_annotate__tests__inject_breakpoints_with_blank_line.snap +++ b/crates/ark/src/snapshots/ark__console_annotate__tests__inject_breakpoints_with_blank_line.snap @@ -3,7 +3,7 @@ source: crates/ark/src/console_annotate.rs expression: result --- x <- 1 -.ark_breakpoint(browser(), 1) +base::.ark_breakpoint(browser(), "file:///test.R", "1") #line 4 "file:///test.R" diff --git a/crates/ark/src/snapshots/ark__console_annotate__tests__inject_breakpoints_with_line_offset.snap b/crates/ark/src/snapshots/ark__console_annotate__tests__inject_breakpoints_with_line_offset.snap index 6eccac8a4..057e53037 100644 --- a/crates/ark/src/snapshots/ark__console_annotate__tests__inject_breakpoints_with_line_offset.snap +++ b/crates/ark/src/snapshots/ark__console_annotate__tests__inject_breakpoints_with_line_offset.snap @@ -3,7 +3,7 @@ source: crates/ark/src/console_annotate.rs expression: result --- x <- 1 -.ark_breakpoint(browser(), 1) +base::.ark_breakpoint(browser(), "file:///test.R", "1") #line 12 "file:///test.R" y <- 2 z <- 3 diff --git a/crates/ark/src/snapshots/ark__console_annotate__tests__inject_breakpoints_with_line_offset_nested.snap b/crates/ark/src/snapshots/ark__console_annotate__tests__inject_breakpoints_with_line_offset_nested.snap index e585c5a5e..d71ce7871 100644 --- a/crates/ark/src/snapshots/ark__console_annotate__tests__inject_breakpoints_with_line_offset_nested.snap +++ b/crates/ark/src/snapshots/ark__console_annotate__tests__inject_breakpoints_with_line_offset_nested.snap @@ -4,7 +4,7 @@ expression: result --- f <- function() { x <- 1 -.ark_breakpoint(browser(), 1) +base::.ark_breakpoint(browser(), "file:///test.R", "1") #line 23 "file:///test.R" y <- 2 } From 1d6c7e54a2cfbdbb46e51202874134ecab293f3c Mon Sep 17 00:00:00 2001 From: Lionel Henry Date: Sat, 13 Dec 2025 10:12:19 +0100 Subject: [PATCH 13/42] Invalidate breakpoints when LSP document changes --- crates/ark/src/dap/dap.rs | 30 +++++++++++++++++++++---- crates/ark/src/dap/dap_server.rs | 4 ++-- crates/ark/src/interface.rs | 33 ++++++++++++++++++++++++++++ crates/ark/src/lsp/backend.rs | 5 ++++- crates/ark/src/lsp/handler.rs | 17 ++++++++++++-- crates/ark/src/lsp/main_loop.rs | 18 ++++++++++++--- crates/ark/src/lsp/state_handlers.rs | 8 +++++++ crates/ark/src/shell.rs | 6 +++++ crates/ark/src/start.rs | 12 +++++++++- 9 files changed, 120 insertions(+), 13 deletions(-) diff --git a/crates/ark/src/dap/dap.rs b/crates/ark/src/dap/dap.rs index ab226745d..16142b331 100644 --- a/crates/ark/src/dap/dap.rs +++ b/crates/ark/src/dap/dap.rs @@ -64,8 +64,8 @@ pub enum DapBackendEvent { /// debugging session Stopped(DapStoppedEvent), - /// Event sent when a breakpoint has been verified - BreakpointVerified(i64), + /// Event sent when a breakpoint state changes (verified or unverified) + BreakpointState { id: i64, verified: bool }, } #[derive(Debug, Copy, Clone)] @@ -303,13 +303,35 @@ impl Dap { bp.state = BreakpointState::Verified; if let Some(tx) = &self.backend_events_tx { - tx.send(DapBackendEvent::BreakpointVerified(bp.id)) - .log_err(); + tx.send(DapBackendEvent::BreakpointState { + id: bp.id, + verified: true, + }) + .log_err(); } } } } + /// Called when a document changes. Removes all breakpoints for the URI + /// and sends unverified events for each one. + pub fn did_change_document(&mut self, uri: &Url) { + let Some((_, breakpoints)) = self.breakpoints.remove(uri) else { + return; + }; + let Some(tx) = &self.backend_events_tx else { + return; + }; + + for bp in breakpoints { + tx.send(DapBackendEvent::BreakpointState { + id: bp.id, + verified: false, + }) + .log_err(); + } + } + pub(crate) fn is_breakpoint_enabled(&self, uri: &Url, id: String) -> bool { let Some((_, breakpoints)) = self.breakpoints.get(uri) else { return false; diff --git a/crates/ark/src/dap/dap_server.rs b/crates/ark/src/dap/dap_server.rs index a0876f1f8..02119f593 100644 --- a/crates/ark/src/dap/dap_server.rs +++ b/crates/ark/src/dap/dap_server.rs @@ -176,12 +176,12 @@ fn listen_dap_events( Event::Terminated(None) }, - DapBackendEvent::BreakpointVerified(id) => { + DapBackendEvent::BreakpointState { id, verified } => { Event::Breakpoint(BreakpointEventBody { reason: BreakpointEventReason::Changed, breakpoint: dap::types::Breakpoint { id: Some(id), - verified: true, + verified, ..Default::default() }, }) diff --git a/crates/ark/src/interface.rs b/crates/ark/src/interface.rs index 38519ef45..87ec3d6fd 100644 --- a/crates/ark/src/interface.rs +++ b/crates/ark/src/interface.rs @@ -168,6 +168,13 @@ pub enum DebugCallTextKind { DebugAt, } +/// Notifications from other components (e.g., LSP) to the Console +#[derive(Debug)] +pub enum ConsoleNotification { + /// Notification that a document has changed, requiring breakpoint invalidation. + DidChangeDocument(Url), +} + // --- Globals --- // These values must be global in order for them to be accessible from R // callbacks, which do not have a facility for passing or returning context. @@ -523,6 +530,7 @@ impl RMain { session_mode: SessionMode, default_repos: DefaultRepos, graphics_device_rx: AsyncUnboundedReceiver, + console_notification_rx: AsyncUnboundedReceiver, ) { // Set the main thread ID. // Must happen before doing anything that checks `RMain::on_main_thread()`, @@ -551,6 +559,14 @@ impl RMain { let main = RMain::get_mut(); + // Spawn handler loop for async messages + r_task::spawn_interrupt({ + let dap_clone = main.debug_dap.clone(); + || async move { + RMain::process_console_notifications(console_notification_rx, dap_clone).await + } + }); + let mut r_args = r_args.clone(); // Record if the user has requested that we don't load the site/user level R profiles @@ -901,6 +917,23 @@ impl RMain { &self.iopub_tx } + // Async messages for the Console. Processed at interrupt time. + async fn process_console_notifications( + mut console_notification_rx: AsyncUnboundedReceiver, + dap: Arc>, + ) { + loop { + while let Some(notification) = console_notification_rx.recv().await { + match notification { + ConsoleNotification::DidChangeDocument(uri) => { + let mut dap = dap.lock().unwrap(); + dap.did_change_document(&uri); + }, + } + } + } + } + fn init_execute_request(&mut self, req: &ExecuteRequest) -> (ConsoleInput, u32) { // Reset the autoprint buffer self.autoprint_output = String::new(); diff --git a/crates/ark/src/lsp/backend.rs b/crates/ark/src/lsp/backend.rs index fa2dda02d..6c52d1c5f 100644 --- a/crates/ark/src/lsp/backend.rs +++ b/crates/ark/src/lsp/backend.rs @@ -21,6 +21,7 @@ use stdext::result::ResultExt; use tokio::net::TcpListener; use tokio::runtime::Runtime; use tokio::sync::mpsc::unbounded_channel as tokio_unbounded_channel; +use tokio::sync::mpsc::UnboundedSender as AsyncUnboundedSender; use tower_lsp::jsonrpc; use tower_lsp::jsonrpc::Result; use tower_lsp::lsp_types::request::GotoImplementationParams; @@ -34,6 +35,7 @@ use tower_lsp::LspService; use tower_lsp::Server; use super::main_loop::LSP_HAS_CRASHED; +use crate::interface::ConsoleNotification; use crate::interface::RMain; use crate::lsp::handlers::VirtualDocumentParams; use crate::lsp::handlers::VirtualDocumentResponse; @@ -479,6 +481,7 @@ pub fn start_lsp( runtime: Arc, server_start: ServerStartMessage, server_started_tx: Sender, + console_notification_tx: AsyncUnboundedSender, ) { runtime.block_on(async { let ip_address = server_start.ip_address(); @@ -513,7 +516,7 @@ pub fn start_lsp( let (shutdown_tx, mut shutdown_rx) = tokio::sync::mpsc::channel::<()>(1); let init = |client: Client| { - let state = GlobalState::new(client); + let state = GlobalState::new(client, console_notification_tx.clone()); let events_tx = state.events_tx(); // Start main loop and hold onto the handle that keeps it alive diff --git a/crates/ark/src/lsp/handler.rs b/crates/ark/src/lsp/handler.rs index 6bd1a61cb..1c59616cf 100644 --- a/crates/ark/src/lsp/handler.rs +++ b/crates/ark/src/lsp/handler.rs @@ -16,18 +16,24 @@ use crossbeam::channel::Sender; use stdext::spawn; use tokio::runtime::Builder; use tokio::runtime::Runtime; +use tokio::sync::mpsc::UnboundedSender as AsyncUnboundedSender; use super::backend; +use crate::interface::ConsoleNotification; use crate::interface::KernelInfo; pub struct Lsp { runtime: Arc, kernel_init_rx: BusReader, kernel_initialized: bool, + console_notification_tx: AsyncUnboundedSender, } impl Lsp { - pub fn new(kernel_init_rx: BusReader) -> Self { + pub fn new( + kernel_init_rx: BusReader, + console_notification_tx: AsyncUnboundedSender, + ) -> Self { let rt = Builder::new_multi_thread() .enable_all() // One for the main loop and one spare @@ -41,6 +47,7 @@ impl Lsp { runtime: Arc::new(rt), kernel_init_rx, kernel_initialized: false, + console_notification_tx, } } } @@ -68,8 +75,14 @@ impl ServerHandler for Lsp { // account for potential reconnects let runtime = self.runtime.clone(); + let console_notification_tx = self.console_notification_tx.clone(); spawn!("ark-lsp", move || { - backend::start_lsp(runtime, server_start, server_started_tx) + backend::start_lsp( + runtime, + server_start, + server_started_tx, + console_notification_tx, + ) }); return Ok(()); } diff --git a/crates/ark/src/lsp/main_loop.rs b/crates/ark/src/lsp/main_loop.rs index 4347c1013..fc24ab15d 100644 --- a/crates/ark/src/lsp/main_loop.rs +++ b/crates/ark/src/lsp/main_loop.rs @@ -29,6 +29,7 @@ use tower_lsp::Client; use url::Url; use super::backend::RequestResponse; +use crate::interface::ConsoleNotification; use crate::lsp; use crate::lsp::backend::LspMessage; use crate::lsp::backend::LspNotification; @@ -146,13 +147,15 @@ pub(crate) struct GlobalState { /// Unlike `WorldState`, `ParserState` cannot be cloned and is only accessed by /// exclusive handlers. -#[derive(Default)] pub(crate) struct LspState { /// The set of tree-sitter document parsers managed by the `GlobalState`. pub(crate) parsers: HashMap, /// Capabilities negotiated with the client pub(crate) capabilities: Capabilities, + + /// Channel for sending notifications to Console (e.g., document changes for DAP) + pub(crate) console_notification_tx: TokioUnboundedSender, } /// State for the auxiliary loop @@ -177,14 +180,23 @@ impl GlobalState { /// /// * `client`: The tower-lsp client shared with the tower-lsp backend /// and auxiliary loop. - pub(crate) fn new(client: Client) -> Self { + pub(crate) fn new( + client: Client, + console_notification_tx: TokioUnboundedSender, + ) -> Self { // Transmission channel for the main loop events. Shared with the // tower-lsp backend and the Jupyter kernel. let (events_tx, events_rx) = tokio_unbounded_channel::(); + let lsp_state = LspState { + parsers: HashMap::new(), + capabilities: Capabilities::default(), + console_notification_tx, + }; + let mut state = Self { world: WorldState::default(), - lsp_state: LspState::default(), + lsp_state, client, events_tx, events_rx, diff --git a/crates/ark/src/lsp/state_handlers.rs b/crates/ark/src/lsp/state_handlers.rs index ec5c41318..3aef51729 100644 --- a/crates/ark/src/lsp/state_handlers.rs +++ b/crates/ark/src/lsp/state_handlers.rs @@ -6,6 +6,7 @@ // use anyhow::anyhow; +use stdext::result::ResultExt; use tower_lsp::lsp_types; use tower_lsp::lsp_types::CompletionOptions; use tower_lsp::lsp_types::CompletionOptionsCompletionItem; @@ -42,6 +43,7 @@ use tracing::Instrument; use tree_sitter::Parser; use url::Url; +use crate::interface::ConsoleNotification; use crate::lsp; use crate::lsp::capabilities::Capabilities; use crate::lsp::config::indent_style_from_lsp; @@ -252,6 +254,12 @@ pub(crate) fn did_change( lsp::main_loop::index_update(vec![uri.clone()], state.clone()); + // Notify console about document change to invalidate breakpoints + lsp_state + .console_notification_tx + .send(ConsoleNotification::DidChangeDocument(uri.clone())) + .log_err(); + Ok(()) } diff --git a/crates/ark/src/shell.rs b/crates/ark/src/shell.rs index 2d8b6e069..d9e9c4c94 100644 --- a/crates/ark/src/shell.rs +++ b/crates/ark/src/shell.rs @@ -42,6 +42,7 @@ use tokio::sync::mpsc::UnboundedSender as AsyncUnboundedSender; use crate::ark_comm::ArkComm; use crate::help::r_help::RHelp; use crate::help_proxy; +use crate::interface::ConsoleNotification; use crate::interface::KernelInfo; use crate::interface::RMain; use crate::plots::graphics_device::GraphicsDeviceNotification; @@ -59,6 +60,7 @@ pub struct Shell { kernel_init_rx: BusReader, kernel_info: Option, graphics_device_tx: AsyncUnboundedSender, + console_notification_tx: AsyncUnboundedSender, } #[derive(Debug)] @@ -75,6 +77,7 @@ impl Shell { kernel_init_rx: BusReader, kernel_request_tx: Sender, graphics_device_tx: AsyncUnboundedSender, + console_notification_tx: AsyncUnboundedSender, ) -> Self { Self { comm_manager_tx, @@ -84,6 +87,7 @@ impl Shell { kernel_init_rx, kernel_info: None, graphics_device_tx, + console_notification_tx, } } @@ -234,6 +238,7 @@ impl ShellHandler for Shell { self.stdin_request_tx.clone(), self.kernel_request_tx.clone(), self.graphics_device_tx.clone(), + self.console_notification_tx.clone(), ), Comm::Help => handle_comm_open_help(comm), Comm::Other(target_name) if target_name == "ark" => ArkComm::handle_comm_open(comm), @@ -258,6 +263,7 @@ fn handle_comm_open_ui( stdin_request_tx: Sender, kernel_request_tx: Sender, graphics_device_tx: AsyncUnboundedSender, + _console_notification_tx: AsyncUnboundedSender, ) -> amalthea::Result { // Create a frontend to wrap the comm channel we were just given. This starts // a thread that proxies messages to the frontend. diff --git a/crates/ark/src/start.rs b/crates/ark/src/start.rs index 19be561fa..fcee6e625 100644 --- a/crates/ark/src/start.rs +++ b/crates/ark/src/start.rs @@ -22,6 +22,7 @@ use crossbeam::channel::unbounded; use crate::control::Control; use crate::dap; +use crate::interface::ConsoleNotification; use crate::interface::SessionMode; use crate::lsp; use crate::plots::graphics_device::GraphicsDeviceNotification; @@ -58,10 +59,17 @@ pub fn start_kernel( let (r_request_tx, r_request_rx) = bounded::(1); let (kernel_request_tx, kernel_request_rx) = bounded::(1); + // Async communication channel with the R thread (Console) + let (console_notification_tx, console_notification_rx) = + tokio::sync::mpsc::unbounded_channel::(); + // Create the LSP and DAP clients. // Not all Amalthea kernels provide these, but ark does. // They must be able to deliver messages to the shell channel directly. - let lsp = Arc::new(Mutex::new(lsp::handler::Lsp::new(kernel_init_tx.add_rx()))); + let lsp = Arc::new(Mutex::new(lsp::handler::Lsp::new( + kernel_init_tx.add_rx(), + console_notification_tx.clone(), + ))); // DAP needs the `RRequest` channel to communicate with // `read_console()` and send commands to the debug interpreter @@ -85,6 +93,7 @@ pub fn start_kernel( kernel_init_rx, kernel_request_tx, graphics_device_tx, + console_notification_tx, )); // Create the control handler; this is used to handle shutdown/interrupt and @@ -146,5 +155,6 @@ pub fn start_kernel( session_mode, default_repos, graphics_device_rx, + console_notification_rx, ) } From b516bcbf4f19c5ea69cc5effb4a2a66397788e54 Mon Sep 17 00:00:00 2001 From: Lionel Henry Date: Sat, 13 Dec 2025 17:29:58 +0100 Subject: [PATCH 14/42] Reattach immediately after disconnection --- crates/ark/src/dap/dap.rs | 1 + crates/ark/src/dap/dap_server.rs | 5 +++++ 2 files changed, 6 insertions(+) diff --git a/crates/ark/src/dap/dap.rs b/crates/ark/src/dap/dap.rs index 16142b331..4debd2841 100644 --- a/crates/ark/src/dap/dap.rs +++ b/crates/ark/src/dap/dap.rs @@ -166,6 +166,7 @@ impl Dap { preserve_focus: bool, fallback_sources: HashMap, ) { + self.is_debugging = true; self.fallback_sources.extend(fallback_sources); self.load_variables_references(&mut stack); diff --git a/crates/ark/src/dap/dap_server.rs b/crates/ark/src/dap/dap_server.rs index 02119f593..2faca134e 100644 --- a/crates/ark/src/dap/dap_server.rs +++ b/crates/ark/src/dap/dap_server.rs @@ -479,6 +479,11 @@ impl DapServer { let rsp = req.success(ResponseBody::Disconnect); self.respond(rsp); + + // Instruct client to reattach immediately + if let Some(tx) = &self.comm_tx { + tx.send(amalthea::comm_rpc_message!("attach")).log_err(); + } } fn handle_restart(&mut self, req: Request, _args: T) { From c988dedf5a11e40864683d20e3e9278a2936f560 Mon Sep 17 00:00:00 2001 From: Lionel Henry Date: Sun, 14 Dec 2025 10:41:56 +0100 Subject: [PATCH 15/42] Start in running state --- crates/ark/src/dap/dap_server.rs | 15 +++++---------- 1 file changed, 5 insertions(+), 10 deletions(-) diff --git a/crates/ark/src/dap/dap_server.rs b/crates/ark/src/dap/dap_server.rs index 2faca134e..002090439 100644 --- a/crates/ark/src/dap/dap_server.rs +++ b/crates/ark/src/dap/dap_server.rs @@ -459,15 +459,10 @@ impl DapServer { let rsp = req.success(ResponseBody::Attach); self.respond(rsp); - self.send_event(Event::Stopped(StoppedEventBody { - reason: StoppedEventReason::Step, - description: Some(String::from("Execution paused")), - thread_id: Some(THREAD_ID), - preserve_focus_hint: Some(false), - text: None, - all_threads_stopped: None, - hit_breakpoint_ids: None, - })) + self.send_event(Event::Thread(ThreadEventBody { + reason: ThreadEventReason::Started, + thread_id: THREAD_ID, + })); } fn handle_disconnect(&mut self, req: Request, _args: DisconnectArguments) { @@ -504,7 +499,7 @@ impl DapServer { let rsp = req.success(ResponseBody::Threads(ThreadsResponse { threads: vec![Thread { id: THREAD_ID, - name: String::from("Main thread"), + name: String::from("R console"), }], })); self.respond(rsp); From 26e037104bab9dcbf46e26b036cb42ea5c3d8ac4 Mon Sep 17 00:00:00 2001 From: Lionel Henry Date: Mon, 15 Dec 2025 12:25:28 +0100 Subject: [PATCH 16/42] Use rlang-like `as_label()` to create stack frame calls --- crates/ark/src/modules/positron/calls.R | 8 + .../ark/src/modules/positron/calls_deparse.R | 307 ++++++++++++++++++ crates/ark/src/modules/positron/debug.R | 38 +-- crates/ark/src/modules/positron/utils.R | 4 + 4 files changed, 328 insertions(+), 29 deletions(-) create mode 100644 crates/ark/src/modules/positron/calls_deparse.R diff --git a/crates/ark/src/modules/positron/calls.R b/crates/ark/src/modules/positron/calls.R index 661a0c0fe..3bc52555c 100644 --- a/crates/ark/src/modules/positron/calls.R +++ b/crates/ark/src/modules/positron/calls.R @@ -19,6 +19,14 @@ call_name <- function(x) { ) } +simple_call_name <- function(x) { + if (is_simple_call(x)) { + call_name(x) + } else { + NULL + } +} + call_type <- function(x) { stopifnot(typeof(x) == "language") diff --git a/crates/ark/src/modules/positron/calls_deparse.R b/crates/ark/src/modules/positron/calls_deparse.R new file mode 100644 index 000000000..7929ea689 --- /dev/null +++ b/crates/ark/src/modules/positron/calls_deparse.R @@ -0,0 +1,307 @@ +# +# calls_deparse.R +# +# Copyright (C) 2025 Posit Software, PBC. All rights reserved. +# +# + +call_deparse <- function(x) { + deparse(x, width.cutoff = 500L) +} + +as_label <- function(x) { + # Remove arguments of call expressions + if (call_print_type(x) == "prefix") { + x <- x[1] + } + + # Retain only first line + out <- call_deparse(x)[[1]] + + # And first 20 characters + if (nchar(out) > 20) { + out <- substr(out, 1, 20) + out <- paste0(out, "...") + } + + out +} + +is_simple_call <- function(x) { + call_print_type(x) == "call" +} + +# From https://github.com/r-lib/rlang/blob/main/R/call.R +call_print_type <- function(call) { + stopifnot(is.call(call)) + + type <- call_print_fine_type(call) + switch( + type, + call = "prefix", + control = , + delim = , + subset = "special", + type + ) +} + +call_print_fine_type <- function(call) { + stopifnot(is.call(call)) + + op <- call_parse_type(call) + if (op == "") { + return("call") + } + + switch( + op, + `+unary` = , + `-unary` = , + `~unary` = , + `?unary` = , + `!` = , + `!!` = , + `!!!` = "prefix", + `function` = , + `while` = , + `for` = , + `repeat` = , + `if` = "control", + `(` = , + `{{` = , + `{` = "delim", + `[` = , + `[[` = "subset", + # These operators always print in infix form even if they have + # more arguments + `<-` = , + `<<-` = , + `=` = , + `::` = , + `:::` = , + `$` = , + `@` = "infix", + `+` = , + `-` = , + `?` = , + `~` = , + `:=` = , + `|` = , + `||` = , + `&` = , + `&&` = , + `>` = , + `>=` = , + `<` = , + `<=` = , + `==` = , + `!=` = , + `*` = , + `/` = , + `%%` = , + `special` = , + `:` = , + `^` = if (length(call) == 3) { + "infix" + } else { + "call" + } + ) +} + +# Extracted from C implementation in src/internal/parse.c +call_parse_type <- function(call) { + if (!is.call(call)) { + return("") + } + + head <- call[[1]] + if (!is.symbol(head)) { + return("") + } + + # Check if unary by examining if there's only one argument after the head + is_unary <- length(call) == 2 + + # Control flow keywords + if (identical(head, quote(`break`))) { + return("break") + } + if (identical(head, quote(`next`))) { + return("next") + } + if (identical(head, quote(`for`))) { + return("for") + } + if (identical(head, quote(`while`))) { + return("while") + } + if (identical(head, quote(`repeat`))) { + return("repeat") + } + if (identical(head, quote(`if`))) { + return("if") + } + if (identical(head, quote(`function`))) { + return("function") + } + + # Question mark (help operator) + if (identical(head, quote(`?`))) { + if (is_unary) { + return("?unary") + } + return("?") + } + + # Assignment operators + if (identical(head, quote(`<-`))) { + return("<-") + } + if (identical(head, quote(`<<-`))) { + return("<<-") + } + if (identical(head, quote(`=`))) { + return("=") + } + if (identical(head, quote(`:=`))) { + return(":=") + } + + # Comparison operators + if (identical(head, quote(`<`))) { + return("<") + } + if (identical(head, quote(`<=`))) { + return("<=") + } + if (identical(head, quote(`>`))) { + return(">") + } + if (identical(head, quote(`>=`))) { + return(">=") + } + if (identical(head, quote(`==`))) { + return("==") + } + if (identical(head, quote(`!=`))) { + return("!=") + } + + # Tilde (formula operator) + if (identical(head, quote(`~`))) { + if (is_unary) { + return("~unary") + } + return("~") + } + + # Logical operators + if (identical(head, quote(`|`))) { + return("|") + } + if (identical(head, quote(`||`))) { + return("||") + } + if (identical(head, quote(`&`))) { + return("&") + } + if (identical(head, quote(`&&`))) { + return("&&") + } + + # Bang operators (for negation, unquoting is unsupported) + if (identical(head, quote(`!`))) { + return("!") + } + + # Arithmetic operators + if (identical(head, quote(`+`))) { + if (is_unary) { + return("+unary") + } + return("+") + } + if (identical(head, quote(`-`))) { + if (is_unary) { + return("-unary") + } + return("-") + } + if (identical(head, quote(`*`))) { + return("*") + } + if (identical(head, quote(`/`))) { + return("/") + } + if (identical(head, quote(`^`))) { + return("^") + } + + # Modulo and special operators + if (identical(head, quote(`%%`))) { + return("%%") + } + + # Check for special operators like %in%, %*%, etc. + name <- as.character(head) + if ( + substr(name, 1, 1) == "%" && + nchar(name) > 2 && + substr(name, nchar(name), nchar(name)) == "%" + ) { + return("special") + } + + # Colon operators + if (identical(head, quote(`:`))) { + return(":") + } + if (identical(head, quote(`::`))) { + return("::") + } + if (identical(head, quote(`:::`))) { + return(":::") + } + + # Access operators + if (identical(head, quote(`$`))) { + return("$") + } + if (identical(head, quote(`@`))) { + return("@") + } + + # Subsetting operators + if (identical(head, quote(`[`))) { + return("[") + } + if (identical(head, quote(`[[`))) { + return("[[") + } + + # Parentheses + if (identical(head, quote(`(`))) { + return("(") + } + + # Braces and embrace + if (identical(head, quote(`{`))) { + # Check for embrace operator: {{x}} + if (length(call) == 2) { + cadr <- call[[2]] + if ( + is.call(cadr) && + length(cadr) == 2 && + identical(cadr[[1]], quote(`{`)) && + is.symbol(cadr[[2]]) + ) { + return("{{") + } + } + return("{") + } + + "" +} diff --git a/crates/ark/src/modules/positron/debug.R b/crates/ark/src/modules/positron/debug.R index af1b041cd..75dbf816e 100644 --- a/crates/ark/src/modules/positron/debug.R +++ b/crates/ark/src/modules/positron/debug.R @@ -85,7 +85,7 @@ debugger_stack_info <- function( top_level_call_frame_info <- function(x) { x <- call_deparse(x) - x <- lines_join(x) + x <- paste_line(x) # We return `0`s to avoid highlighting anything in the top level call. # We just want to show it in the editor, and that's really it. @@ -110,18 +110,8 @@ context_frame_info <- function( frame_call, last_start_line ) { - frame_call_name <- call_name(frame_call) - if (!is.null(frame_call_name)) { - # Figure out the frame function's name and use that as a simpler - # `frame_name` and `source_name` - frame_name <- paste0(frame_call_name, "()") - source_name <- frame_name - } else { - # Otherwise fall back to standard deparsing of `frame_call` - frame_lines <- call_deparse(frame_call) - frame_name <- lines_join(frame_lines) - source_name <- frame_name - } + frame_name <- as_label(frame_call) + source_name <- frame_name frame_info( source_name, @@ -140,14 +130,10 @@ intermediate_frame_infos <- function(n, calls, fns, environments, frame_calls) { }) call_texts <- lapply(calls, function(call) { call_lines <- call_deparse(call) - call_text <- lines_join(call_lines) - call_text - }) - frame_names <- lapply(frame_calls, function(call) { - call_lines <- call_deparse(call) - call_text <- lines_join(call_lines) + call_text <- paste_line(call_lines) call_text }) + frame_names <- lapply(frame_calls, function(call) as_label(call)) # Currently only tracked for the context frame, as that is where it is most useful, # since that is where the user is actively stepping. @@ -163,7 +149,7 @@ intermediate_frame_infos <- function(n, calls, fns, environments, frame_calls) { frame_name <- frame_names[[i]] out[[i]] <- frame_info( - source_name = call_text, + source_name = frame_name, frame_name = frame_name, srcref = srcref, fn = fn, @@ -201,7 +187,7 @@ frame_info <- function( # Only deparse if `srcref` failed! fn_lines <- call_deparse(fn) - fn_text <- lines_join(fn_lines) + fn_text <- paste_line(fn_lines) # Reparse early on, so even if we fail to find `call_text` or fail to reparse, # we pass a `fn_text` to `frame_info_unknown_range()` where we've consistently removed @@ -252,6 +238,8 @@ frame_info_from_srcref <- function( return(NULL) } + source_name <- basename(info$file) + new_frame_info( source_name = source_name, frame_name = frame_name, @@ -368,14 +356,6 @@ new_frame_info <- function( ) } -call_deparse <- function(x) { - deparse(x, width.cutoff = 500L) -} - -lines_join <- function(x) { - paste0(x, collapse = "\n") -} - #' @param fn_expr A function expression returned from `parse_function_text()`, which #' reparsed the function text while keeping source references. #' @param call_text A single string containing the text of a call to look for diff --git a/crates/ark/src/modules/positron/utils.R b/crates/ark/src/modules/positron/utils.R index c29462564..5411f1997 100644 --- a/crates/ark/src/modules/positron/utils.R +++ b/crates/ark/src/modules/positron/utils.R @@ -219,3 +219,7 @@ log_error <- function(msg) { stopifnot(is_string(msg)) .Call("ark_log_error", msg) } + +paste_line <- function(x) { + paste0(x, collapse = "\n") +} From fadfefcb2d38d0a0c1c297e881100a5c8f35fef8 Mon Sep 17 00:00:00 2001 From: Lionel Henry Date: Mon, 15 Dec 2025 18:48:10 +0100 Subject: [PATCH 17/42] Use srcrefs for top-level frame too --- crates/ark/src/modules/positron/debug.R | 27 +++++++++++++++++++------ 1 file changed, 21 insertions(+), 6 deletions(-) diff --git a/crates/ark/src/modules/positron/debug.R b/crates/ark/src/modules/positron/debug.R index 75dbf816e..5714d8031 100644 --- a/crates/ark/src/modules/positron/debug.R +++ b/crates/ark/src/modules/positron/debug.R @@ -84,13 +84,26 @@ debugger_stack_info <- function( } top_level_call_frame_info <- function(x) { - x <- call_deparse(x) - x <- paste_line(x) + source_name <- paste0(as_label(x), ".R") + + srcref <- attr(x, "srcref", exact = TRUE) + if (!is.null(srcref)) { + out <- frame_info_from_srcref( + source_name = source_name, + frame_name = "", + srcref = srcref, + environment = NULL + ) + + if (!is.null(out)) { + return(out) + } + } # We return `0`s to avoid highlighting anything in the top level call. # We just want to show it in the editor, and that's really it. new_frame_info( - source_name = x, + source_name = source_name, frame_name = "", file = NULL, contents = x, @@ -111,7 +124,7 @@ context_frame_info <- function( last_start_line ) { frame_name <- as_label(frame_call) - source_name <- frame_name + source_name <- paste0(frame_name, ".R") frame_info( source_name, @@ -149,7 +162,7 @@ intermediate_frame_infos <- function(n, calls, fns, environments, frame_calls) { frame_name <- frame_names[[i]] out[[i]] <- frame_info( - source_name = frame_name, + source_name = paste0(frame_name, ".R"), frame_name = frame_name, srcref = srcref, fn = fn, @@ -238,7 +251,9 @@ frame_info_from_srcref <- function( return(NULL) } - source_name <- basename(info$file) + if (is_string(info$file)) { + source_name <- basename(info$file) + } new_frame_info( source_name = source_name, From 1f66b8b6a35974104292247671da216733013cad Mon Sep 17 00:00:00 2001 From: Lionel Henry Date: Tue, 16 Dec 2025 13:16:40 +0100 Subject: [PATCH 18/42] Handle disconnections from the frontend side --- crates/ark/src/dap/dap_server.rs | 5 ----- 1 file changed, 5 deletions(-) diff --git a/crates/ark/src/dap/dap_server.rs b/crates/ark/src/dap/dap_server.rs index 002090439..ba7bde0f9 100644 --- a/crates/ark/src/dap/dap_server.rs +++ b/crates/ark/src/dap/dap_server.rs @@ -474,11 +474,6 @@ impl DapServer { let rsp = req.success(ResponseBody::Disconnect); self.respond(rsp); - - // Instruct client to reattach immediately - if let Some(tx) = &self.comm_tx { - tx.send(amalthea::comm_rpc_message!("attach")).log_err(); - } } fn handle_restart(&mut self, req: Request, _args: T) { From 86d48caff7db3edc9ba9f1c208b9e2c70126643b Mon Sep 17 00:00:00 2001 From: Lionel Henry Date: Wed, 17 Dec 2025 09:00:20 +0100 Subject: [PATCH 19/42] Consolidate `is_breakpoint_enabled()` method --- crates/ark/src/console_debug.rs | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/crates/ark/src/console_debug.rs b/crates/ark/src/console_debug.rs index fe221245b..90f9f7c52 100644 --- a/crates/ark/src/console_debug.rs +++ b/crates/ark/src/console_debug.rs @@ -297,13 +297,6 @@ impl RMain { let re = RE_ARK_DEBUG_URI.get_or_init(|| Regex::new(r"^ark-\d+/debug/").unwrap()); re.is_match(uri) } - - pub(crate) fn is_breakpoint_enabled(&self, uri: &Url, id: String) -> bool { - self.debug_dap - .lock() - .unwrap() - .is_breakpoint_enabled(uri, id) - } } fn as_frame_info(info: libr::SEXP, id: i64) -> Result { @@ -390,6 +383,9 @@ pub unsafe extern "C-unwind" fn ps_is_breakpoint_enabled( let id: String = RObject::view(id).try_into()?; - let enabled: RObject = RMain::get().is_breakpoint_enabled(&uri, id).into(); + let console = RMain::get_mut(); + let dap = console.debug_dap.lock().unwrap(); + + let enabled: RObject = dap.is_breakpoint_enabled(&uri, id).into(); Ok(enabled.sexp) } From 552b30980062c37cc417de660419056ff1527f8b Mon Sep 17 00:00:00 2001 From: Lionel Henry Date: Wed, 17 Dec 2025 09:41:25 +0100 Subject: [PATCH 20/42] Unverified breakpoints are enabled --- crates/ark/src/dap/dap.rs | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/crates/ark/src/dap/dap.rs b/crates/ark/src/dap/dap.rs index 4debd2841..3f9952f59 100644 --- a/crates/ark/src/dap/dap.rs +++ b/crates/ark/src/dap/dap.rs @@ -338,9 +338,16 @@ impl Dap { return false; }; - breakpoints - .iter() - .any(|bp| bp.id.to_string() == id && matches!(bp.state, BreakpointState::Verified)) + // Unverified breakpoints are enabled. This happens when we hit a + // breakpoint in an expression that hasn't been evaluated yet (or hasn't + // finished). + breakpoints.iter().any(|bp| { + bp.id.to_string() == id && + matches!( + bp.state, + BreakpointState::Verified | BreakpointState::Unverified + ) + }) } } From 906555e1f86dfe7048c37c07344f0ae1aa855718 Mon Sep 17 00:00:00 2001 From: Lionel Henry Date: Fri, 19 Dec 2025 09:25:41 +0100 Subject: [PATCH 21/42] Log structure "Got request" message in DAP --- crates/ark/src/dap/dap_server.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crates/ark/src/dap/dap_server.rs b/crates/ark/src/dap/dap_server.rs index ba7bde0f9..4c98bfa8a 100644 --- a/crates/ark/src/dap/dap_server.rs +++ b/crates/ark/src/dap/dap_server.rs @@ -231,7 +231,7 @@ impl DapServer { Some(req) => req, None => return false, }; - log::trace!("DAP: Got request: {:?}", req); + log::trace!("DAP: Got request: {:#?}", req); let cmd = req.command.clone(); From a7ad00314309e51a70b0557ab2132913a6df6f92 Mon Sep 17 00:00:00 2001 From: Lionel Henry Date: Tue, 16 Dec 2025 11:17:40 +0100 Subject: [PATCH 22/42] Add `source()` hook for breakpoint injection --- crates/ark/src/console_annotate.rs | 580 +++++++++++++++--- crates/ark/src/console_debug.rs | 64 +- crates/ark/src/interface.rs | 64 +- crates/ark/src/modules/positron/debug.R | 43 +- crates/ark/src/modules/positron/hooks.R | 1 + .../ark/src/modules/positron/hooks_source.R | 129 ++++ crates/ark/src/modules/positron/init.R | 4 + crates/ark/src/modules/positron/utils.R | 21 + ...tests__annotate_input_with_breakpoint.snap | 6 +- ...nnotate__tests__annotate_source_basic.snap | 12 + ..._annotate_source_multiline_expression.snap | 14 + ..._annotate_source_multiple_expressions.snap | 15 + ...ests__annotate_source_with_breakpoint.snap | 17 + ...reakpoints_before_within_after_nested.snap | 14 +- ...ject_breakpoints_doubly_nested_braces.snap | 12 + ...sts__inject_breakpoints_in_brace_list.snap | 2 +- ...e__tests__inject_breakpoints_multiple.snap | 10 +- ...ts__inject_breakpoints_multiple_lists.snap | 10 +- ...n_closing_brace_with_valid_breakpoint.snap | 6 +- ...tests__inject_breakpoints_single_line.snap | 6 +- ...ject_breakpoints_triply_nested_braces.snap | 13 + ...s__inject_breakpoints_with_blank_line.snap | 6 +- ...__inject_breakpoints_with_line_offset.snap | 6 +- ...t_breakpoints_with_line_offset_nested.snap | 2 +- 24 files changed, 901 insertions(+), 156 deletions(-) create mode 100644 crates/ark/src/modules/positron/hooks_source.R create mode 100644 crates/ark/src/snapshots/ark__console_annotate__tests__annotate_source_basic.snap create mode 100644 crates/ark/src/snapshots/ark__console_annotate__tests__annotate_source_multiline_expression.snap create mode 100644 crates/ark/src/snapshots/ark__console_annotate__tests__annotate_source_multiple_expressions.snap create mode 100644 crates/ark/src/snapshots/ark__console_annotate__tests__annotate_source_with_breakpoint.snap create mode 100644 crates/ark/src/snapshots/ark__console_annotate__tests__inject_breakpoints_doubly_nested_braces.snap create mode 100644 crates/ark/src/snapshots/ark__console_annotate__tests__inject_breakpoints_triply_nested_braces.snap diff --git a/crates/ark/src/console_annotate.rs b/crates/ark/src/console_annotate.rs index e3236c758..0293fac4e 100644 --- a/crates/ark/src/console_annotate.rs +++ b/crates/ark/src/console_annotate.rs @@ -9,6 +9,7 @@ use aether_syntax::RExpressionList; use aether_syntax::RRoot; use aether_syntax::RSyntaxNode; use amalthea::wire::execute_request::CodeLocation; +use amalthea::wire::execute_request::Position; use biome_line_index::LineIndex; use biome_rowan::AstNode; use biome_rowan::AstNodeList; @@ -16,10 +17,13 @@ use biome_rowan::SyntaxElement; use biome_rowan::TextRange; use biome_rowan::TextSize; use biome_rowan::WalkEvent; +use harp::object::RObject; +use libr::SEXP; use url::Url; use crate::dap::dap::Breakpoint; use crate::dap::dap::BreakpointState; +use crate::interface::RMain; pub(crate) fn annotate_input( code: &str, @@ -192,8 +196,32 @@ fn find_anchors_in_list<'a>( let current = &elements[i]; let current_code_line = get_start_line(current.syntax(), line_index); - // Base case: target line is at or before current element's start + // Base case: target line is at or before current element's start. + // At root level, we can't place breakpoints (R can't step at top-level), + // so we must try to find a nested brace list first. if target_code_line <= current_code_line { + if is_root { + // At root level, try to find a nested brace list in this element + let anchors_before = anchors.len(); + if find_anchor_in_element( + current.syntax(), + breakpoints, + anchors, + uri, + line_index, + line_offset, + ) + .is_some() && + anchors.len() > anchors_before + { + // Successfully placed in nested list + continue; + } + // No nested brace list found, mark as invalid + let bp = breakpoints.next().unwrap(); + bp.state = BreakpointState::Invalid; + continue; + } let bp = breakpoints.next().unwrap(); // Update bp.line to the actual document line where the breakpoint is placed bp.line = current_code_line + line_offset; @@ -213,12 +241,6 @@ fn find_anchors_in_list<'a>( // Recursion case: target must be within current element if next_code_line.map_or(true, |next| target_code_line < next) { - // If we're at the last element of a nested list and there's no next element, - // the target might be beyond this list. Pop back up to let the parent handle it. - if !is_root && next_code_line.is_none() { - return; - } - // Search within current element for brace lists let anchors_before = anchors.len(); if find_anchor_in_element( @@ -241,12 +263,30 @@ fn find_anchors_in_list<'a>( // The nested list was exhausted without placing an anchor for the // current breakpoint. This means the target line is beyond all // expressions in the nested list (e.g., on a closing `}` line with - // no executable code). Mark this breakpoint as invalid. + // no executable code). + if !is_root && next_code_line.is_none() { + // Pop back up to let the parent handle it - the target might + // still be reachable via a sibling element in an outer list. + return; + } + // At root level or with more elements, mark as invalid. let bp = breakpoints.next().unwrap(); bp.state = BreakpointState::Invalid; continue; } else { - // No brace list found, use current element as fallback + // No brace list found in this element. + if !is_root && next_code_line.is_none() { + // Pop back up to let the parent handle it - the target might + // still be reachable via a sibling element in an outer list. + return; + } + if is_root { + // At root level, can't place breakpoints without a nested brace list + let bp = breakpoints.next().unwrap(); + bp.state = BreakpointState::Invalid; + continue; + } + // Use current element as fallback (only in nested lists) let bp = breakpoints.next().unwrap(); // Update bp.line to the actual document line where the breakpoint is placed bp.line = current_code_line + line_offset; @@ -428,12 +468,20 @@ fn get_start_line(node: &RSyntaxNode, line_index: &LineIndex) -> u32 { line_index.line_col(offset).map(|lc| lc.line).unwrap_or(0) } +fn get_end_line(node: &RSyntaxNode, line_index: &LineIndex) -> u32 { + let text_range: TextRange = node.text_trimmed_range(); + let offset: TextSize = text_range.end(); + line_index.line_col(offset).map(|lc| lc.line).unwrap_or(0) +} + fn create_breakpoint_call(uri: &Url, id: i64) -> RSyntaxNode { // NOTE: If you use `base::browser()` here in an attempt to prevent masking // issues in case someone redefined `browser()`, you'll cause the function // in which the breakpoint is injected to be bytecode-compiled. This is a // limitation/bug of https://github.com/r-devel/r-svn/blob/e2aae817/src/library/compiler/R/cmp.R#L1273-L1290 - let code = format!("\nbase::.ark_breakpoint(browser(), \"{uri}\", \"{id}\")\n"); + // Wrapped in .ark_auto_step() so the debugger automatically steps over it. + let code = + format!("\nbase::.ark_auto_step(base::.ark_breakpoint(browser(), \"{uri}\", \"{id}\"))\n"); aether_parser::parse(&code, Default::default()).syntax() } @@ -498,6 +546,133 @@ fn add_line_directive_to_node(node: &RSyntaxNode, line: u32, uri: &Url) -> RSynt .unwrap_or_else(|| node.clone()) } +/// Annotate source code for `source()` and `pkgload::load_all()`. +/// +/// - Wraps the whole source in a `{}` block. This allows R to step through the +/// top-level expressions. +/// - Injects breakpoint calls (`.ark_auto_step(.ark_breakpoint(...))`) at +/// breakpoint locations. +/// - Injects verification calls (`.ark_auto_step(.ark_verify_breakpoints_range(...))`) +/// after each top-level expression. Verifying expression by expression allows +/// marking breakpoints as verified even when an expression fails mid-script. +/// - `#line` directives before each original expression so the debugger knows +/// where to step in the original file. +pub(crate) fn annotate_source(code: &str, uri: &Url, breakpoints: &mut [Breakpoint]) -> String { + let line_index = LineIndex::new(code); + + // Parse the original code to get line ranges for each top-level expression + let original_root = aether_parser::parse(code, Default::default()).tree(); + let Some(original_r) = RRoot::cast(original_root.syntax().clone()) else { + return code.to_string(); + }; + + // Collect original line ranges before any modifications + let original_ranges: Vec<(u32, u32)> = original_r + .expressions() + .into_iter() + .map(|expr| { + let start = get_start_line(expr.syntax(), &line_index); + let end = get_end_line(expr.syntax(), &line_index); + (start, end) + }) + .collect(); + + if original_ranges.is_empty() { + return code.to_string(); + } + + // Now inject breakpoints into the code + let location = CodeLocation { + uri: uri.clone(), + start: Position { + line: 0, + character: 0, + }, + end: Position { + line: code.lines().count().saturating_sub(1) as u32, + character: code.lines().last().map(|l| l.len()).unwrap_or(0), + }, + }; + let code_with_breakpoints = inject_breakpoints(code, location, breakpoints, &line_index); + + // Re-parse the code with breakpoints to get the updated structure + let root = aether_parser::parse(&code_with_breakpoints, Default::default()).tree(); + + let Some(r) = RRoot::cast(root.syntax().clone()) else { + return code_with_breakpoints; + }; + + let exprs: Vec<_> = r.expressions().into_iter().collect(); + + // Build the output with wrapping braces and verify calls + let mut output = String::from("{\n"); + + // Track which original expression we're on + let mut original_expr_idx = 0; + + for expr in exprs.iter() { + let expr_str = expr.syntax().to_string(); + + // Check if this is an injected breakpoint call (starts with base::.ark_auto_step) + let is_injected = expr_str + .trim_start() + .starts_with("base::.ark_auto_step(base::.ark_breakpoint"); + + if is_injected { + // Just output the breakpoint call without #line or verify + output.push_str(expr_str.trim_start()); + output.push('\n'); + } else { + // This is an original expression - use the tracked original line range + if let Some(&(start_line, end_line)) = original_ranges.get(original_expr_idx) { + // Add #line directive (R uses 1-based lines) + output.push_str(&format!("#line {} \"{}\"\n", start_line + 1, uri)); + + // Add the expression, stripping leading whitespace since we added our own newline + output.push_str(expr_str.trim_start()); + output.push('\n'); + + // Add verify call after the expression + // Use L suffix for integer literals in R + output.push_str(&format!( + "base::.ark_auto_step(base::.ark_verify_breakpoints_range(\"{}\", {}L, {}L))\n", + uri, + start_line + 1, + end_line + 1 + )); + + original_expr_idx += 1; + } + } + } + + output.push_str("}\n"); + output +} + +#[harp::register] +pub unsafe extern "C-unwind" fn ps_annotate_source(uri: SEXP, code: SEXP) -> anyhow::Result { + let uri: String = RObject::view(uri).try_into()?; + let code: String = RObject::view(code).try_into()?; + + let uri = Url::parse(&uri)?; + + let main = RMain::get(); + let mut dap_guard = main.debug_dap.lock().unwrap(); + + // If there are no breakpoints for this file, return NULL to signal no + // annotation needed + let Some((_, breakpoints)) = dap_guard.breakpoints.get_mut(&uri) else { + return Ok(harp::r_null()); + }; + if breakpoints.is_empty() { + return Ok(harp::r_null()); + } + + let annotated = annotate_source(&code, &uri, breakpoints.as_mut_slice()); + Ok(RObject::try_from(annotated)?.sexp) +} + #[cfg(test)] mod tests { use amalthea::wire::execute_request::CodeLocation; @@ -581,8 +756,8 @@ mod tests { #[test] fn test_annotate_input_with_breakpoint() { // Test the full annotate_input path with breakpoints. - // This ensures breakpoints are injected correctly before the line directive is added. - let code = "0\n1\n2"; + // Wrap in braces so breakpoints are valid. + let code = "{\n0\n1\n2\n}"; let location = CodeLocation { uri: Url::parse("file:///test.R").unwrap(), start: Position { @@ -590,14 +765,14 @@ mod tests { character: 0, }, end: Position { - line: 5, + line: 7, character: 1, }, }; - // Breakpoint at document line 4 (code line 1, i.e., `1`) + // Breakpoint at document line 5 (code line 2, i.e., `1`) let mut breakpoints = vec![Breakpoint { id: 1, - line: 4, + line: 5, state: BreakpointState::Unverified, }]; @@ -605,13 +780,14 @@ mod tests { insta::assert_snapshot!(result); // Breakpoint line should remain in document coordinates - assert_eq!(breakpoints[0].line, 4); + assert_eq!(breakpoints[0].line, 5); assert!(!matches!(breakpoints[0].state, BreakpointState::Invalid)); } #[test] fn test_inject_breakpoints_single_line() { - let code = "x <- 1\ny <- 2\nz <- 3"; + // Wrap in braces so breakpoints are valid (inside a brace list) + let code = "{\nx <- 1\ny <- 2\nz <- 3\n}"; let location = CodeLocation { uri: Url::parse("file:///test.R").unwrap(), start: Position { @@ -619,25 +795,26 @@ mod tests { character: 0, }, end: Position { - line: 2, - character: 6, + line: 4, + character: 1, }, }; let line_index = LineIndex::new(code); let mut breakpoints = vec![Breakpoint { id: 1, - line: 1, + line: 2, // `y <- 2` state: BreakpointState::Unverified, }]; let result = inject_breakpoints(code, location, &mut breakpoints, &line_index); insta::assert_snapshot!(result); - assert!(!matches!(breakpoints[0].state, BreakpointState::Verified)); + assert!(!matches!(breakpoints[0].state, BreakpointState::Invalid)); } #[test] fn test_inject_breakpoints_multiple() { - let code = "x <- 1\ny <- 2\nz <- 3\nw <- 4"; + // Wrap in braces so breakpoints are valid (inside a brace list) + let code = "{\nx <- 1\ny <- 2\nz <- 3\nw <- 4\n}"; let location = CodeLocation { uri: Url::parse("file:///test.R").unwrap(), start: Position { @@ -645,29 +822,28 @@ mod tests { character: 0, }, end: Position { - line: 3, - character: 6, + line: 5, + character: 1, }, }; let line_index = LineIndex::new(code); let mut breakpoints = vec![ Breakpoint { id: 1, - line: 1, + line: 2, // `y <- 2` state: BreakpointState::Unverified, }, Breakpoint { id: 2, - line: 3, + line: 4, // `w <- 4` state: BreakpointState::Unverified, }, ]; let result = inject_breakpoints(code, location, &mut breakpoints, &line_index); insta::assert_snapshot!(result); - assert!(!matches!(breakpoints[0].state, BreakpointState::Verified)); - assert!(!matches!(breakpoints[1].state, BreakpointState::Verified)); - assert!(!matches!(breakpoints[1].state, BreakpointState::Invalid)); // Valid location + assert!(!matches!(breakpoints[0].state, BreakpointState::Invalid)); + assert!(!matches!(breakpoints[1].state, BreakpointState::Invalid)); } #[test] @@ -726,10 +902,10 @@ mod tests { #[test] fn test_inject_breakpoints_multiple_lists() { // This test has breakpoints in different parent lists: - // - One in the root list - // - One in a nested brace list - // This may expose issues with the current propagate_change_to_root approach - let code = "x <- 1\nf <- function() {\n y <- 2\n z <- 3\n}\nw <- 4"; + // - One in the outer brace list + // - One in a nested brace list (inside function) + // Wrap in braces so both breakpoints are valid + let code = "{\nx <- 1\nf <- function() {\n y <- 2\n z <- 3\n}\nw <- 4\n}"; let location = CodeLocation { uri: Url::parse("file:///test.R").unwrap(), start: Position { @@ -737,35 +913,36 @@ mod tests { character: 0, }, end: Position { - line: 5, - character: 6, + line: 7, + character: 1, }, }; let line_index = LineIndex::new(code); let mut breakpoints = vec![ Breakpoint { id: 1, - line: 2, + line: 3, // Inside function - `y <- 2` state: BreakpointState::Unverified, }, Breakpoint { id: 2, - line: 5, + line: 6, // In outer braces - `w <- 4` state: BreakpointState::Unverified, }, ]; let result = inject_breakpoints(code, location, &mut breakpoints, &line_index); insta::assert_snapshot!(result); - assert!(!matches!(breakpoints[0].state, BreakpointState::Verified)); - assert!(!matches!(breakpoints[1].state, BreakpointState::Verified)); - assert!(!matches!(breakpoints[1].state, BreakpointState::Invalid)); // Valid location + // Both breakpoints are valid (inside brace lists) + assert!(!matches!(breakpoints[0].state, BreakpointState::Invalid)); + assert!(!matches!(breakpoints[1].state, BreakpointState::Invalid)); } #[test] fn test_inject_breakpoints_with_blank_line() { // Test that blank lines before an anchor are preserved - let code = "x <- 1\n\n\ny <- 2"; + // Wrap in braces so breakpoints are valid + let code = "{\nx <- 1\n\n\ny <- 2\n}"; let location = CodeLocation { uri: Url::parse("file:///test.R").unwrap(), start: Position { @@ -773,20 +950,20 @@ mod tests { character: 0, }, end: Position { - line: 3, - character: 6, + line: 5, + character: 1, }, }; let line_index = LineIndex::new(code); let mut breakpoints = vec![Breakpoint { id: 1, - line: 3, + line: 4, // `y <- 2` state: BreakpointState::Unverified, }]; let result = inject_breakpoints(code, location, &mut breakpoints, &line_index); insta::assert_snapshot!(result); - assert!(!matches!(breakpoints[0].state, BreakpointState::Verified)); + assert!(!matches!(breakpoints[0].state, BreakpointState::Invalid)); } #[test] @@ -821,8 +998,9 @@ mod tests { #[test] fn test_inject_breakpoints_on_closing_brace_with_valid_breakpoint() { - // One breakpoint on `}` (invalid) and one on valid code - let code = "f <- function() {\n x <- 1\n}\ny <- 2"; + // One breakpoint on `}` (invalid) and one on valid code in outer braces + // Wrap in braces so the second breakpoint is valid + let code = "{\nf <- function() {\n x <- 1\n}\ny <- 2\n}"; let location = CodeLocation { uri: Url::parse("file:///test.R").unwrap(), start: Position { @@ -830,20 +1008,20 @@ mod tests { character: 0, }, end: Position { - line: 3, - character: 6, + line: 5, + character: 1, }, }; let line_index = LineIndex::new(code); let mut breakpoints = vec![ Breakpoint { id: 1, - line: 2, // The `}` line - invalid + line: 3, // The `}` line of the function - invalid state: BreakpointState::Unverified, }, Breakpoint { id: 2, - line: 3, // `y <- 2` - valid + line: 4, // `y <- 2` - in outer braces, valid state: BreakpointState::Unverified, }, ]; @@ -851,21 +1029,20 @@ mod tests { let result = inject_breakpoints(code, location, &mut breakpoints, &line_index); insta::assert_snapshot!(result); - // Invalid location - assert!(!matches!(breakpoints[0].state, BreakpointState::Verified)); + // First breakpoint is invalid (on closing brace) assert!(matches!(breakpoints[0].state, BreakpointState::Invalid)); - - assert!(!matches!(breakpoints[1].state, BreakpointState::Verified)); + // Second breakpoint is valid (in outer brace list) assert!(!matches!(breakpoints[1].state, BreakpointState::Invalid)); } #[test] fn test_inject_breakpoints_before_within_after_nested() { // Comprehensive test with breakpoints: - // - Before nested list (line 0: `x <- 1`) - // - Within nested list (line 2: `y <- 2`) - // - After nested list (line 5: `w <- 4`) - let code = "x <- 1\nf <- function() {\n y <- 2\n z <- 3\n}\nw <- 4"; + // - Before nested list (line 1: `x <- 1`) - in outer braces + // - Within nested list (line 3: `y <- 2`) - inside function + // - After nested list (line 6: `w <- 4`) - in outer braces + // Wrap in braces so all breakpoints are valid + let code = "{\nx <- 1\nf <- function() {\n y <- 2\n z <- 3\n}\nw <- 4\n}"; let location = CodeLocation { uri: Url::parse("file:///test.R").unwrap(), start: Position { @@ -873,36 +1050,34 @@ mod tests { character: 0, }, end: Position { - line: 5, - character: 6, + line: 7, + character: 1, }, }; let line_index = LineIndex::new(code); let mut breakpoints = vec![ Breakpoint { id: 1, - line: 0, // `x <- 1` - before nested + line: 1, // `x <- 1` - in outer braces state: BreakpointState::Unverified, }, Breakpoint { id: 2, - line: 2, // `y <- 2` - within nested + line: 3, // `y <- 2` - within nested function state: BreakpointState::Unverified, }, Breakpoint { id: 3, - line: 5, // `w <- 4` - after nested + line: 6, // `w <- 4` - in outer braces state: BreakpointState::Unverified, }, ]; let result = inject_breakpoints(code, location, &mut breakpoints, &line_index); insta::assert_snapshot!(result); - assert!(!matches!(breakpoints[0].state, BreakpointState::Verified)); + // All breakpoints are valid (inside brace lists) assert!(!matches!(breakpoints[0].state, BreakpointState::Invalid)); - assert!(!matches!(breakpoints[1].state, BreakpointState::Verified)); assert!(!matches!(breakpoints[1].state, BreakpointState::Invalid)); - assert!(!matches!(breakpoints[2].state, BreakpointState::Verified)); assert!(!matches!(breakpoints[2].state, BreakpointState::Invalid)); } @@ -910,12 +1085,15 @@ mod tests { fn test_inject_breakpoints_with_line_offset() { // Test that breakpoints work correctly when the code starts at a non-zero line // in the document. This simulates executing a selection from the middle of a file. + // Wrap in braces so breakpoints are valid. // - // The code represents lines 10-12 of the original document: - // Line 10: x <- 1 - // Line 11: y <- 2 - // Line 12: z <- 3 - let code = "x <- 1\ny <- 2\nz <- 3"; + // The code represents lines 10-14 of the original document: + // Line 10: { + // Line 11: x <- 1 + // Line 12: y <- 2 + // Line 13: z <- 3 + // Line 14: } + let code = "{\nx <- 1\ny <- 2\nz <- 3\n}"; let location = CodeLocation { uri: Url::parse("file:///test.R").unwrap(), start: Position { @@ -923,16 +1101,16 @@ mod tests { character: 0, }, end: Position { - line: 12, - character: 6, + line: 14, + character: 1, }, }; let line_index = LineIndex::new(code); - // Breakpoint at document line 11 (which is code line 1, i.e., `y <- 2`) + // Breakpoint at document line 12 (which is code line 2, i.e., `y <- 2`) let mut breakpoints = vec![Breakpoint { id: 1, - line: 11, + line: 12, state: BreakpointState::Unverified, }]; @@ -940,7 +1118,7 @@ mod tests { insta::assert_snapshot!(result); // The breakpoint line should remain in document coordinates - assert_eq!(breakpoints[0].line, 11); + assert_eq!(breakpoints[0].line, 12); assert!(!matches!(breakpoints[0].state, BreakpointState::Invalid)); } @@ -975,4 +1153,252 @@ mod tests { assert_eq!(breakpoints[0].line, 22); assert!(!matches!(breakpoints[0].state, BreakpointState::Invalid)); } + + #[test] + fn test_inject_breakpoints_doubly_nested_braces() { + // Test with doubly nested braces: { { 1\n 2 } } + // The inner expressions should be reachable for breakpoints + let code = "{\n {\n 1\n 2\n }\n}"; + let location = CodeLocation { + uri: Url::parse("file:///test.R").unwrap(), + start: Position { + line: 0, + character: 0, + }, + end: Position { + line: 5, + character: 1, + }, + }; + let line_index = LineIndex::new(code); + + // Breakpoint at line 2 (the `1` expression inside the inner braces) + let mut breakpoints = vec![Breakpoint { + id: 1, + line: 2, + state: BreakpointState::Unverified, + }]; + + let result = inject_breakpoints(code, location, &mut breakpoints, &line_index); + insta::assert_snapshot!(result); + + // The breakpoint should be placed at line 2 + assert_eq!(breakpoints[0].line, 2); + assert!(!matches!(breakpoints[0].state, BreakpointState::Invalid)); + } + + #[test] + fn test_inject_breakpoints_triply_nested_braces() { + // Test with triply nested braces: { { { 1 } } } + let code = "{\n {\n {\n 1\n }\n }\n}"; + let location = CodeLocation { + uri: Url::parse("file:///test.R").unwrap(), + start: Position { + line: 0, + character: 0, + }, + end: Position { + line: 6, + character: 1, + }, + }; + let line_index = LineIndex::new(code); + + // Breakpoint at line 3 (the `1` expression inside the innermost braces) + let mut breakpoints = vec![Breakpoint { + id: 1, + line: 3, + state: BreakpointState::Unverified, + }]; + + let result = inject_breakpoints(code, location, &mut breakpoints, &line_index); + insta::assert_snapshot!(result); + + // The breakpoint should be placed at line 3 + assert_eq!(breakpoints[0].line, 3); + assert!(!matches!(breakpoints[0].state, BreakpointState::Invalid)); + } + + #[test] + fn test_inject_breakpoints_nested_closing_brace_invalid() { + // Breakpoint on inner closing brace should be invalid + let code = "{\n {\n 1\n }\n}"; + let location = CodeLocation { + uri: Url::parse("file:///test.R").unwrap(), + start: Position { + line: 0, + character: 0, + }, + end: Position { + line: 4, + character: 1, + }, + }; + let line_index = LineIndex::new(code); + + // Breakpoint at line 3 (the inner `}` line) + let mut breakpoints = vec![Breakpoint { + id: 1, + line: 3, + state: BreakpointState::Unverified, + }]; + + let result = inject_breakpoints(code, location, &mut breakpoints, &line_index); + // Should return unchanged code since breakpoint is invalid + assert_eq!(result, code); + // Marked as invalid + assert!(matches!(breakpoints[0].state, BreakpointState::Invalid)); + } + + #[test] + fn test_top_level_breakpoint_single_invalid() { + // Top-level breakpoints are invalid (R can't step at top-level) + let code = "x <- 1\ny <- 2\nz <- 3"; + let location = CodeLocation { + uri: Url::parse("file:///test.R").unwrap(), + start: Position { + line: 0, + character: 0, + }, + end: Position { + line: 2, + character: 6, + }, + }; + let line_index = LineIndex::new(code); + let mut breakpoints = vec![Breakpoint { + id: 1, + line: 1, + state: BreakpointState::Unverified, + }]; + + let result = inject_breakpoints(code, location, &mut breakpoints, &line_index); + // Code unchanged since breakpoint is invalid + assert_eq!(result, code); + // Breakpoint marked as invalid + assert!(matches!(breakpoints[0].state, BreakpointState::Invalid)); + } + + #[test] + fn test_top_level_breakpoint_multiple_invalid() { + // Multiple top-level breakpoints are all invalid + let code = "x <- 1\ny <- 2\nz <- 3\nw <- 4"; + let location = CodeLocation { + uri: Url::parse("file:///test.R").unwrap(), + start: Position { + line: 0, + character: 0, + }, + end: Position { + line: 3, + character: 6, + }, + }; + let line_index = LineIndex::new(code); + let mut breakpoints = vec![ + Breakpoint { + id: 1, + line: 0, + state: BreakpointState::Unverified, + }, + Breakpoint { + id: 2, + line: 2, + state: BreakpointState::Unverified, + }, + ]; + + let result = inject_breakpoints(code, location, &mut breakpoints, &line_index); + // Code unchanged since all breakpoints are invalid + assert_eq!(result, code); + // Both breakpoints marked as invalid + assert!(matches!(breakpoints[0].state, BreakpointState::Invalid)); + assert!(matches!(breakpoints[1].state, BreakpointState::Invalid)); + } + + #[test] + fn test_top_level_breakpoint_mixed_invalid_and_nested() { + // Top-level breakpoints are invalid even when mixed with nested ones + let code = "x <- 1\nf <- function() {\n y <- 2\n}\nz <- 3"; + let location = CodeLocation { + uri: Url::parse("file:///test.R").unwrap(), + start: Position { + line: 0, + character: 0, + }, + end: Position { + line: 4, + character: 6, + }, + }; + let line_index = LineIndex::new(code); + let mut breakpoints = vec![ + Breakpoint { + id: 1, + line: 0, // `x <- 1` - top-level, invalid + state: BreakpointState::Unverified, + }, + Breakpoint { + id: 2, + line: 2, // `y <- 2` - inside function, valid + state: BreakpointState::Unverified, + }, + Breakpoint { + id: 3, + line: 4, // `z <- 3` - top-level, invalid + state: BreakpointState::Unverified, + }, + ]; + + let result = inject_breakpoints(code, location, &mut breakpoints, &line_index); + // Code should contain breakpoint for nested expression only + assert!(result.contains("base::.ark_breakpoint")); + // Top-level breakpoints are invalid + assert!(matches!(breakpoints[0].state, BreakpointState::Invalid)); + // Nested breakpoint is valid + assert!(!matches!(breakpoints[1].state, BreakpointState::Invalid)); + // Top-level breakpoint is invalid + assert!(matches!(breakpoints[2].state, BreakpointState::Invalid)); + } + + #[test] + fn test_annotate_source_basic() { + let code = "x <- 1\ny <- 2"; + let uri = Url::parse("file:///test.R").unwrap(); + let mut breakpoints = vec![]; + let result = annotate_source(code, &uri, &mut breakpoints); + insta::assert_snapshot!(result); + } + + #[test] + fn test_annotate_source_with_breakpoint() { + let code = "foo <- function() {\n x <- 1\n y <- 2\n}\nbar <- 3"; + let uri = Url::parse("file:///test.R").unwrap(); + // Breakpoint at line 2 (inside the function, 0-indexed) + let mut breakpoints = vec![Breakpoint { + id: 1, + line: 1, + state: BreakpointState::Unverified, + }]; + let result = annotate_source(code, &uri, &mut breakpoints); + insta::assert_snapshot!(result); + } + + #[test] + fn test_annotate_source_multiple_expressions() { + let code = "a <- 1\nb <- 2\nc <- 3"; + let uri = Url::parse("file:///test.R").unwrap(); + let mut breakpoints = vec![]; + let result = annotate_source(code, &uri, &mut breakpoints); + insta::assert_snapshot!(result); + } + + #[test] + fn test_annotate_source_multiline_expression() { + let code = "foo <- function(x) {\n x + 1\n}\nbar <- 2"; + let uri = Url::parse("file:///test.R").unwrap(); + let mut breakpoints = vec![]; + let result = annotate_source(code, &uri, &mut breakpoints); + insta::assert_snapshot!(result); + } } diff --git a/crates/ark/src/console_debug.rs b/crates/ark/src/console_debug.rs index 90f9f7c52..7ed53c152 100644 --- a/crates/ark/src/console_debug.rs +++ b/crates/ark/src/console_debug.rs @@ -14,6 +14,7 @@ use harp::r_string; use harp::session::r_sys_calls; use harp::session::r_sys_frames; use harp::session::r_sys_functions; +use harp::srcref::SrcRef; use harp::utils::r_is_null; use libr::SEXP; use regex::Regex; @@ -255,6 +256,8 @@ impl RMain { out.push(as_frame_info(frame, id)?); } + log::trace!("DAP: Current call stack:\n{out:#?}"); + Ok(out) } } @@ -286,7 +289,7 @@ impl RMain { let hash = format!("{:x}", hasher.finish()); ark_uri(&format!( - "debug/session{i}/{hash}/{source_name}.R", + "debug/session{i}/{hash}/{source_name}", i = debug_session_index, )) } @@ -297,6 +300,33 @@ impl RMain { let re = RE_ARK_DEBUG_URI.get_or_init(|| Regex::new(r"^ark-\d+/debug/").unwrap()); re.is_match(uri) } + + pub(crate) fn verify_breakpoints(&self, srcref: RObject) { + let Some(srcref) = SrcRef::try_from(srcref).warn_on_err() else { + return; + }; + + let Some(filename) = srcref + .srcfile() + .and_then(|srcfile| srcfile.filename()) + .log_err() + else { + return; + }; + + // Only process file:// URIs (from our #line directives). + // Plain file paths or empty filenames are skipped silently. + if !filename.starts_with("file://") { + return; + } + + let Some(uri) = Url::parse(&filename).warn_on_err() else { + return; + }; + + let mut dap = self.debug_dap.lock().unwrap(); + dap.verify_breakpoints(&uri, srcref.line_virtual.start, srcref.line_virtual.end); + } } fn as_frame_info(info: libr::SEXP, id: i64) -> Result { @@ -389,3 +419,35 @@ pub unsafe extern "C-unwind" fn ps_is_breakpoint_enabled( let enabled: RObject = dap.is_breakpoint_enabled(&uri, id).into(); Ok(enabled.sexp) } + +/// Verify breakpoints in the line range covered by a srcref. +/// Called after each expression is successfully evaluated in source(). +#[harp::register] +pub unsafe extern "C-unwind" fn ps_verify_breakpoints(srcref: SEXP) -> anyhow::Result { + let srcref = RObject::view(srcref); + RMain::get().verify_breakpoints(srcref.clone()); + Ok(libr::R_NilValue) +} + +/// Verify breakpoints in an explicit line range. +/// Called after each top-level expression in source() when using the source hook. +#[harp::register] +pub unsafe extern "C-unwind" fn ps_verify_breakpoints_range( + uri: SEXP, + start_line: SEXP, + end_line: SEXP, +) -> anyhow::Result { + let uri: String = RObject::view(uri).try_into()?; + let start_line: i32 = RObject::view(start_line).try_into()?; + let end_line: i32 = RObject::view(end_line).try_into()?; + + let Ok(uri) = Url::parse(&uri) else { + return Ok(libr::R_NilValue); + }; + + let main = RMain::get(); + let mut dap = main.debug_dap.lock().unwrap(); + dap.verify_breakpoints(&uri, start_line as u32, end_line as u32); + + Ok(libr::R_NilValue) +} diff --git a/crates/ark/src/interface.rs b/crates/ark/src/interface.rs index 87ec3d6fd..fcf67695a 100644 --- a/crates/ark/src/interface.rs +++ b/crates/ark/src/interface.rs @@ -78,7 +78,6 @@ use harp::session::r_traceback; use harp::srcref::get_srcref_list; use harp::srcref::srcref_list_get; use harp::srcref::SrcFile; -use harp::srcref::SrcRef; use harp::utils::r_is_data_frame; use harp::utils::r_typeof; use harp::R_MAIN_THREAD_ID; @@ -357,6 +356,7 @@ impl PendingInputs { ) -> anyhow::Result> { let input = if let Some(location) = location { let annotated_code = annotate_input(code, location, breakpoints); + log::trace!("Annotated code: \n```\n{annotated_code}\n```"); harp::ParseInput::SrcFile(&SrcFile::new_virtual_empty_filename(annotated_code.into())) } else if harp::get_option_bool("keep.source") { harp::ParseInput::SrcFile(&SrcFile::new_virtual_empty_filename(code.into())) @@ -652,10 +652,9 @@ impl RMain { } // Register all hooks once all modules have been imported - let hook_result = RFunction::from("register_hooks").call_in(ARK_ENVS.positron_ns); - if let Err(err) = hook_result { - log::error!("Error registering some hooks: {err:?}"); - } + RFunction::from("register_hooks") + .call_in(ARK_ENVS.positron_ns) + .log_err(); // Populate srcrefs for namespaces already loaded in the session. // Namespaces of future loaded packages will be populated on load. @@ -674,18 +673,13 @@ impl RMain { log::error!("Error setting default repositories: {err:?}"); } + // Finish initilization of modules + RFunction::from("initialize") + .call_in(ARK_ENVS.positron_ns) + .log_err(); + // Initialise Ark's last value libr::SETCDR(r_symbol!(".ark_last_value"), harp::r_null()); - - // Store `.ark_breakpoint` in base namespace so it's maximally reachable - libr::SETCDR( - r_symbol!(".ark_breakpoint"), - // Originally defined in Positron namespace, get it from there - Environment::view(ARK_ENVS.positron_ns) - .get(".ark_breakpoint") - .unwrap() - .sexp, - ); } // Now that R has started (emitting any startup messages that we capture in the @@ -1048,21 +1042,22 @@ impl RMain { // we're in the `.ark_breakpoint()` function and can look at the current // `sys.function()` to detect this. // - We've just stepped to another injected breakpoint. In this case we - // look at what function R emitted as part of the `Debug at` output. + // look whether our sentinel `.ark_auto_step()` was emitted by R as part + // of the `Debug at` output. if self.debug_is_debugging { - // Did we just step onto an injected breakpoint - let at_injected_breakpoint = matches!( + // Did we just step onto an injected breakpoint or verification call + let at_auto_step = matches!( &self.debug_call_text, DebugCallText::Finalized(text, DebugCallTextKind::DebugAt) - if text.contains(".ark_breakpoint") + if text.contains(".ark_auto_step") ); // Are we stopped by an injected breakpoint let in_injected_breakpoint = harp::r_current_function().inherits("ark_breakpoint"); - if at_injected_breakpoint || in_injected_breakpoint { - let kind = if at_injected_breakpoint { "at" } else { "in" }; - log::trace!("Injected breakpoint reached ({kind}), moving to next expression"); + if in_injected_breakpoint || at_auto_step { + let kind = if in_injected_breakpoint { "in" } else { "at" }; + log::trace!("Auto-step expression reached ({kind}), moving to next expression"); self.debug_preserve_focus = false; self.debug_send_dap(DapBackendEvent::Continued); @@ -1372,7 +1367,7 @@ impl RMain { // Let frontend know the last request is complete. This turns us // back to Idle. - Self::reply_execute_request(&self.iopub_tx, req, &info, value); + Self::reply_execute_request(&self.iopub_tx, req, value); } else { log::info!("No active request to handle, discarding: {value:?}"); } @@ -2004,12 +1999,9 @@ impl RMain { fn reply_execute_request( iopub_tx: &Sender, req: ActiveReadConsoleRequest, - prompt_info: &PromptInfo, value: ConsoleValue, ) { - let prompt = &prompt_info.input_prompt; - - log::trace!("Got R prompt '{}', completing execution", prompt); + log::trace!("Completing execution after receiving prompt"); let exec_count = req.exec_count; @@ -2466,24 +2458,6 @@ impl RMain { } } - pub(crate) fn verify_breakpoints(&self, srcref: RObject) { - let Some(srcref) = SrcRef::try_from(srcref).log_err() else { - return; - }; - - let Some(uri) = srcref - .srcfile() - .and_then(|srcfile| srcfile.filename()) - .and_then(|filename| Url::parse(&filename).anyhow()) - .log_err() - else { - return; - }; - - let mut dap = self.debug_dap.lock().unwrap(); - dap.verify_breakpoints(&uri, srcref.line_virtual.start, srcref.line_virtual.end); - } - #[cfg(not(test))] // Avoid warnings in unit test pub(crate) fn read_console_frame(&self) -> RObject { self.read_console_frame.borrow().clone() diff --git a/crates/ark/src/modules/positron/debug.R b/crates/ark/src/modules/positron/debug.R index 5714d8031..fe293a0fe 100644 --- a/crates/ark/src/modules/positron/debug.R +++ b/crates/ark/src/modules/positron/debug.R @@ -14,11 +14,6 @@ debugger_stack_info <- function( calls ) { n <- length(fns) - - if (n == 0L) { - # Must have at least 1 frame on the stack to proceed - return(list()) - } if (n != length(environments) || n != length(calls)) { message <- paste0( "`sys.function()`, `sys.frames()`, and `sys.calls()` didn't return consistent results. ", @@ -27,8 +22,15 @@ debugger_stack_info <- function( stop(sprintf(message, n, length(environments), length(calls))) } - # Top level call never has source references. - # It's what comes through the console input. + if (n == 0L) { + return(list(frame_info_from_srcref( + source_name = ".R", + frame_name = "", + srcref = context_srcref, + environment = NULL + ))) + } + top_level_loc <- 1L top_level_call <- calls[[top_level_loc]] @@ -718,3 +720,30 @@ is_breakpoint_enabled <- function(uri, id) { }, class = "ark_breakpoint" ) + +# Wrapper for expressions that should be auto-stepped over in the debugger. The +# debugger detects this by checking if R emitted a `debug at` line containing +# `.ark_auto_step` and automatically steps past it. +#' @export +.ark_auto_step <- function(expr) { + expr +} + +# Verify breakpoints in a line range. Called after each top-level expression in +# `source()`. +.ark_verify_breakpoints_range <- function(uri, start_line, end_line) { + .ps.Call("ps_verify_breakpoints_range", uri, start_line, end_line) +} + +debug_initialize <- function() { + # Store `.ark_breakpoint` and friends in base namespace so they're maximally + # reachable. We might want to do that for all symbols exported from the + # Ark/Positron namespace. + node_poke_cdr(as.symbol(".ark_annotate_source"), .ark_annotate_source) + node_poke_cdr(as.symbol(".ark_auto_step"), .ark_auto_step) + node_poke_cdr(as.symbol(".ark_breakpoint"), .ark_breakpoint) + node_poke_cdr( + as.symbol(".ark_verify_breakpoints_range"), + .ark_verify_breakpoints_range + ) +} diff --git a/crates/ark/src/modules/positron/hooks.R b/crates/ark/src/modules/positron/hooks.R index d25ce85ca..f624cd43c 100644 --- a/crates/ark/src/modules/positron/hooks.R +++ b/crates/ark/src/modules/positron/hooks.R @@ -23,6 +23,7 @@ register_hooks <- function() { namespace = TRUE ) register_getHook_hook() + register_source_hook() } rebind <- function(pkg, name, value, namespace = FALSE) { diff --git a/crates/ark/src/modules/positron/hooks_source.R b/crates/ark/src/modules/positron/hooks_source.R new file mode 100644 index 000000000..e924e8e56 --- /dev/null +++ b/crates/ark/src/modules/positron/hooks_source.R @@ -0,0 +1,129 @@ +# Hook for `source()` to support breakpoints in sourced files. +# +# When: +# - The input path is a file that has breakpoints +# - No other arguments than `echo` (used by Positron) or `local` are provided +# We opt into a code path where breakpoints are injected and the whole source is +# wrapped in `{}` to allow stepping through it. +register_source_hook <- function() { + rebind("base", "source", make_ark_source(base::source), namespace = TRUE) +} + +make_ark_source <- function(original_source) { + force(original_source) + + # Take all original arguments for e.g. completions + function( + file, + local = FALSE, + echo = verbose, + print.eval = echo, + exprs, + spaced = use_file, + verbose = getOption("verbose"), + prompt.echo = getOption("prompt"), + max.deparse.length = 150, + width.cutoff = 60L, + deparseCtrl = "showAttributes", + chdir = FALSE, + catch.aborts = FALSE, + encoding = getOption("encoding"), + continue.echo = getOption("continue"), + skip.echo = 0, + keep.source = getOption("keep.source"), + ... + ) { + # Compute default argument for `spaced`. Must be defined before the + # fallback calls. + use_file <- missing(exprs) + + # DRY: Promise for calling `original_source` with all arguments. + # Evaluated lazily only when needed for fallback paths. + delayedAssign( + "fall_back", + original_source( + file = file, + local = local, + echo = echo, + print.eval = print.eval, + exprs = exprs, + spaced = spaced, + verbose = verbose, + prompt.echo = prompt.echo, + max.deparse.length = max.deparse.length, + width.cutoff = width.cutoff, + deparseCtrl = deparseCtrl, + chdir = chdir, + catch.aborts = catch.aborts, + encoding = encoding, + continue.echo = continue.echo, + skip.echo = skip.echo, + keep.source = keep.source, + ... + ) + ) + + # Fall back if hook is disabled + if (!isTRUE(getOption("ark.source_hook", default = TRUE))) { + return(fall_back) + } + + call <- match.call() + + # Ignore `echo` and `local` arguments + call$echo <- NULL + call$local <- NULL + + # Fall back if `file` is not supplied or if any argument other than + # `echo` or `local` is supplied + if (is.null(call$file) || length(call[-1]) != 1) { + return(fall_back) + } + + uri <- path_to_file_uri(file) + if (is.null(uri)) { + return(fall_back) + } + + env <- if (isTRUE(local)) { + parent.frame() + } else if (isFALSE(local)) { + .GlobalEnv + } else if (is.environment(local)) { + local + } else { + return(fall_back) + } + + text <- paste( + readLines(uri, encoding = encoding, warn = FALSE), + collapse = "\n" + ) + annotated <- .ps.Call("ps_annotate_source", uri, text) + + # If NULL, no breakpoints exist for that URI, fall back + if (is.null(annotated)) { + return(fall_back) + } + + log_trace(sprintf( + "DAP: `source()` hook called with breakpoint injection for `uri`='%s'", + uri + )) + + parsed <- parse(text = annotated, keep.source = TRUE) + + for (expr in parsed) { + eval(parsed, env) + } + } +} + +#' @export +.ark_annotate_source <- function(uri, source) { + stopifnot( + is_string(uri), + is_string(source) + ) + .ps.Call("ps_annotate_source", uri, source) +} diff --git a/crates/ark/src/modules/positron/init.R b/crates/ark/src/modules/positron/init.R index 0831b5d5c..21a6786c2 100644 --- a/crates/ark/src/modules/positron/init.R +++ b/crates/ark/src/modules/positron/init.R @@ -166,3 +166,7 @@ if (!exists("the", inherits = FALSE)) { the$cli_version <- NULL } + +initialize <- function() { + debug_initialize() +} diff --git a/crates/ark/src/modules/positron/utils.R b/crates/ark/src/modules/positron/utils.R index 5411f1997..8b86b0b23 100644 --- a/crates/ark/src/modules/positron/utils.R +++ b/crates/ark/src/modules/positron/utils.R @@ -182,6 +182,27 @@ system_path <- function(pkg) { "" } +# Convert a file path to a file:// URI +path_to_file_uri <- function(path) { + # `winslash` takes care of Windows backslashes + path <- tryCatch( + normalizePath(path, winslash = "/", mustWork = TRUE), + error = function(e) NULL + ) + if (is.null(path)) { + return(NULL) + } + + # On Windows, paths like "C:/foo" need to become "file:///C:/foo" + # On Unix, paths like "/foo" need to become "file:///foo" + if (startsWith(path, "/")) { + paste0("file://", path) + } else { + paste0("file:///", path) + } +} + + # `NULL` if successful, otherwise an error condition try_load_namespace <- function(package) { tryCatch( diff --git a/crates/ark/src/snapshots/ark__console_annotate__tests__annotate_input_with_breakpoint.snap b/crates/ark/src/snapshots/ark__console_annotate__tests__annotate_input_with_breakpoint.snap index ce21114f9..961572c6e 100644 --- a/crates/ark/src/snapshots/ark__console_annotate__tests__annotate_input_with_breakpoint.snap +++ b/crates/ark/src/snapshots/ark__console_annotate__tests__annotate_input_with_breakpoint.snap @@ -3,8 +3,10 @@ source: crates/ark/src/console_annotate.rs expression: result --- #line 4 "file:///test.R" +{ 0 -base::.ark_breakpoint(browser(), "file:///test.R", "1") -#line 5 "file:///test.R" +base::.ark_auto_step(base::.ark_breakpoint(browser(), "file:///test.R", "1")) +#line 6 "file:///test.R" 1 2 +} diff --git a/crates/ark/src/snapshots/ark__console_annotate__tests__annotate_source_basic.snap b/crates/ark/src/snapshots/ark__console_annotate__tests__annotate_source_basic.snap new file mode 100644 index 000000000..96ad4ffe9 --- /dev/null +++ b/crates/ark/src/snapshots/ark__console_annotate__tests__annotate_source_basic.snap @@ -0,0 +1,12 @@ +--- +source: crates/ark/src/console_annotate.rs +expression: result +--- +{ +#line 1 "file:///test.R" +x <- 1 +base::.ark_auto_step(base::.ark_verify_breakpoints_range("file:///test.R", 1L, 1L)) +#line 2 "file:///test.R" +y <- 2 +base::.ark_auto_step(base::.ark_verify_breakpoints_range("file:///test.R", 2L, 2L)) +} diff --git a/crates/ark/src/snapshots/ark__console_annotate__tests__annotate_source_multiline_expression.snap b/crates/ark/src/snapshots/ark__console_annotate__tests__annotate_source_multiline_expression.snap new file mode 100644 index 000000000..68fc4c518 --- /dev/null +++ b/crates/ark/src/snapshots/ark__console_annotate__tests__annotate_source_multiline_expression.snap @@ -0,0 +1,14 @@ +--- +source: crates/ark/src/console_annotate.rs +expression: result +--- +{ +#line 1 "file:///test.R" +foo <- function(x) { + x + 1 +} +base::.ark_auto_step(base::.ark_verify_breakpoints_range("file:///test.R", 1L, 3L)) +#line 4 "file:///test.R" +bar <- 2 +base::.ark_auto_step(base::.ark_verify_breakpoints_range("file:///test.R", 4L, 4L)) +} diff --git a/crates/ark/src/snapshots/ark__console_annotate__tests__annotate_source_multiple_expressions.snap b/crates/ark/src/snapshots/ark__console_annotate__tests__annotate_source_multiple_expressions.snap new file mode 100644 index 000000000..8f5f8d7b5 --- /dev/null +++ b/crates/ark/src/snapshots/ark__console_annotate__tests__annotate_source_multiple_expressions.snap @@ -0,0 +1,15 @@ +--- +source: crates/ark/src/console_annotate.rs +expression: result +--- +{ +#line 1 "file:///test.R" +a <- 1 +base::.ark_auto_step(base::.ark_verify_breakpoints_range("file:///test.R", 1L, 1L)) +#line 2 "file:///test.R" +b <- 2 +base::.ark_auto_step(base::.ark_verify_breakpoints_range("file:///test.R", 2L, 2L)) +#line 3 "file:///test.R" +c <- 3 +base::.ark_auto_step(base::.ark_verify_breakpoints_range("file:///test.R", 3L, 3L)) +} diff --git a/crates/ark/src/snapshots/ark__console_annotate__tests__annotate_source_with_breakpoint.snap b/crates/ark/src/snapshots/ark__console_annotate__tests__annotate_source_with_breakpoint.snap new file mode 100644 index 000000000..c4e94bdc4 --- /dev/null +++ b/crates/ark/src/snapshots/ark__console_annotate__tests__annotate_source_with_breakpoint.snap @@ -0,0 +1,17 @@ +--- +source: crates/ark/src/console_annotate.rs +expression: result +--- +{ +#line 1 "file:///test.R" +foo <- function() { +base::.ark_auto_step(base::.ark_breakpoint(browser(), "file:///test.R", "1")) +#line 2 "file:///test.R" + x <- 1 + y <- 2 +} +base::.ark_auto_step(base::.ark_verify_breakpoints_range("file:///test.R", 1L, 4L)) +#line 5 "file:///test.R" +bar <- 3 +base::.ark_auto_step(base::.ark_verify_breakpoints_range("file:///test.R", 5L, 5L)) +} diff --git a/crates/ark/src/snapshots/ark__console_annotate__tests__inject_breakpoints_before_within_after_nested.snap b/crates/ark/src/snapshots/ark__console_annotate__tests__inject_breakpoints_before_within_after_nested.snap index 5c26c3f85..f3f24ab75 100644 --- a/crates/ark/src/snapshots/ark__console_annotate__tests__inject_breakpoints_before_within_after_nested.snap +++ b/crates/ark/src/snapshots/ark__console_annotate__tests__inject_breakpoints_before_within_after_nested.snap @@ -2,15 +2,17 @@ source: crates/ark/src/console_annotate.rs expression: result --- -base::.ark_breakpoint(browser(), "file:///test.R", "1") -#line 1 "file:///test.R" +{ +base::.ark_auto_step(base::.ark_breakpoint(browser(), "file:///test.R", "1")) +#line 2 "file:///test.R" x <- 1 f <- function() { -base::.ark_breakpoint(browser(), "file:///test.R", "2") -#line 3 "file:///test.R" +base::.ark_auto_step(base::.ark_breakpoint(browser(), "file:///test.R", "2")) +#line 4 "file:///test.R" y <- 2 z <- 3 } -base::.ark_breakpoint(browser(), "file:///test.R", "3") -#line 6 "file:///test.R" +base::.ark_auto_step(base::.ark_breakpoint(browser(), "file:///test.R", "3")) +#line 7 "file:///test.R" w <- 4 +} diff --git a/crates/ark/src/snapshots/ark__console_annotate__tests__inject_breakpoints_doubly_nested_braces.snap b/crates/ark/src/snapshots/ark__console_annotate__tests__inject_breakpoints_doubly_nested_braces.snap new file mode 100644 index 000000000..f9b6e0172 --- /dev/null +++ b/crates/ark/src/snapshots/ark__console_annotate__tests__inject_breakpoints_doubly_nested_braces.snap @@ -0,0 +1,12 @@ +--- +source: crates/ark/src/console_annotate.rs +expression: result +--- +{ + { +base::.ark_auto_step(base::.ark_breakpoint(browser(), "file:///test.R", "1")) +#line 3 "file:///test.R" + 1 + 2 + } +} diff --git a/crates/ark/src/snapshots/ark__console_annotate__tests__inject_breakpoints_in_brace_list.snap b/crates/ark/src/snapshots/ark__console_annotate__tests__inject_breakpoints_in_brace_list.snap index 39a9731c9..c24e14073 100644 --- a/crates/ark/src/snapshots/ark__console_annotate__tests__inject_breakpoints_in_brace_list.snap +++ b/crates/ark/src/snapshots/ark__console_annotate__tests__inject_breakpoints_in_brace_list.snap @@ -4,7 +4,7 @@ expression: result --- f <- function() { x <- 1 -base::.ark_breakpoint(browser(), "file:///test.R", "1") +base::.ark_auto_step(base::.ark_breakpoint(browser(), "file:///test.R", "1")) #line 3 "file:///test.R" y <- 2 } diff --git a/crates/ark/src/snapshots/ark__console_annotate__tests__inject_breakpoints_multiple.snap b/crates/ark/src/snapshots/ark__console_annotate__tests__inject_breakpoints_multiple.snap index 5cd66097d..80e415701 100644 --- a/crates/ark/src/snapshots/ark__console_annotate__tests__inject_breakpoints_multiple.snap +++ b/crates/ark/src/snapshots/ark__console_annotate__tests__inject_breakpoints_multiple.snap @@ -2,11 +2,13 @@ source: crates/ark/src/console_annotate.rs expression: result --- +{ x <- 1 -base::.ark_breakpoint(browser(), "file:///test.R", "1") -#line 2 "file:///test.R" +base::.ark_auto_step(base::.ark_breakpoint(browser(), "file:///test.R", "1")) +#line 3 "file:///test.R" y <- 2 z <- 3 -base::.ark_breakpoint(browser(), "file:///test.R", "2") -#line 4 "file:///test.R" +base::.ark_auto_step(base::.ark_breakpoint(browser(), "file:///test.R", "2")) +#line 5 "file:///test.R" w <- 4 +} diff --git a/crates/ark/src/snapshots/ark__console_annotate__tests__inject_breakpoints_multiple_lists.snap b/crates/ark/src/snapshots/ark__console_annotate__tests__inject_breakpoints_multiple_lists.snap index 6126b2976..763ecf020 100644 --- a/crates/ark/src/snapshots/ark__console_annotate__tests__inject_breakpoints_multiple_lists.snap +++ b/crates/ark/src/snapshots/ark__console_annotate__tests__inject_breakpoints_multiple_lists.snap @@ -2,13 +2,15 @@ source: crates/ark/src/console_annotate.rs expression: result --- +{ x <- 1 f <- function() { -base::.ark_breakpoint(browser(), "file:///test.R", "1") -#line 3 "file:///test.R" +base::.ark_auto_step(base::.ark_breakpoint(browser(), "file:///test.R", "1")) +#line 4 "file:///test.R" y <- 2 z <- 3 } -base::.ark_breakpoint(browser(), "file:///test.R", "2") -#line 6 "file:///test.R" +base::.ark_auto_step(base::.ark_breakpoint(browser(), "file:///test.R", "2")) +#line 7 "file:///test.R" w <- 4 +} diff --git a/crates/ark/src/snapshots/ark__console_annotate__tests__inject_breakpoints_on_closing_brace_with_valid_breakpoint.snap b/crates/ark/src/snapshots/ark__console_annotate__tests__inject_breakpoints_on_closing_brace_with_valid_breakpoint.snap index c517d6d38..a4f965bc3 100644 --- a/crates/ark/src/snapshots/ark__console_annotate__tests__inject_breakpoints_on_closing_brace_with_valid_breakpoint.snap +++ b/crates/ark/src/snapshots/ark__console_annotate__tests__inject_breakpoints_on_closing_brace_with_valid_breakpoint.snap @@ -2,9 +2,11 @@ source: crates/ark/src/console_annotate.rs expression: result --- +{ f <- function() { x <- 1 } -base::.ark_breakpoint(browser(), "file:///test.R", "2") -#line 4 "file:///test.R" +base::.ark_auto_step(base::.ark_breakpoint(browser(), "file:///test.R", "2")) +#line 5 "file:///test.R" y <- 2 +} diff --git a/crates/ark/src/snapshots/ark__console_annotate__tests__inject_breakpoints_single_line.snap b/crates/ark/src/snapshots/ark__console_annotate__tests__inject_breakpoints_single_line.snap index 42f0366c9..ea388b4b7 100644 --- a/crates/ark/src/snapshots/ark__console_annotate__tests__inject_breakpoints_single_line.snap +++ b/crates/ark/src/snapshots/ark__console_annotate__tests__inject_breakpoints_single_line.snap @@ -2,8 +2,10 @@ source: crates/ark/src/console_annotate.rs expression: result --- +{ x <- 1 -base::.ark_breakpoint(browser(), "file:///test.R", "1") -#line 2 "file:///test.R" +base::.ark_auto_step(base::.ark_breakpoint(browser(), "file:///test.R", "1")) +#line 3 "file:///test.R" y <- 2 z <- 3 +} diff --git a/crates/ark/src/snapshots/ark__console_annotate__tests__inject_breakpoints_triply_nested_braces.snap b/crates/ark/src/snapshots/ark__console_annotate__tests__inject_breakpoints_triply_nested_braces.snap new file mode 100644 index 000000000..b861a2f0d --- /dev/null +++ b/crates/ark/src/snapshots/ark__console_annotate__tests__inject_breakpoints_triply_nested_braces.snap @@ -0,0 +1,13 @@ +--- +source: crates/ark/src/console_annotate.rs +expression: result +--- +{ + { + { +base::.ark_auto_step(base::.ark_breakpoint(browser(), "file:///test.R", "1")) +#line 4 "file:///test.R" + 1 + } + } +} diff --git a/crates/ark/src/snapshots/ark__console_annotate__tests__inject_breakpoints_with_blank_line.snap b/crates/ark/src/snapshots/ark__console_annotate__tests__inject_breakpoints_with_blank_line.snap index 39357c195..c992ee6c7 100644 --- a/crates/ark/src/snapshots/ark__console_annotate__tests__inject_breakpoints_with_blank_line.snap +++ b/crates/ark/src/snapshots/ark__console_annotate__tests__inject_breakpoints_with_blank_line.snap @@ -2,9 +2,11 @@ source: crates/ark/src/console_annotate.rs expression: result --- +{ x <- 1 -base::.ark_breakpoint(browser(), "file:///test.R", "1") +base::.ark_auto_step(base::.ark_breakpoint(browser(), "file:///test.R", "1")) -#line 4 "file:///test.R" +#line 5 "file:///test.R" y <- 2 +} diff --git a/crates/ark/src/snapshots/ark__console_annotate__tests__inject_breakpoints_with_line_offset.snap b/crates/ark/src/snapshots/ark__console_annotate__tests__inject_breakpoints_with_line_offset.snap index 057e53037..bdb3170ff 100644 --- a/crates/ark/src/snapshots/ark__console_annotate__tests__inject_breakpoints_with_line_offset.snap +++ b/crates/ark/src/snapshots/ark__console_annotate__tests__inject_breakpoints_with_line_offset.snap @@ -2,8 +2,10 @@ source: crates/ark/src/console_annotate.rs expression: result --- +{ x <- 1 -base::.ark_breakpoint(browser(), "file:///test.R", "1") -#line 12 "file:///test.R" +base::.ark_auto_step(base::.ark_breakpoint(browser(), "file:///test.R", "1")) +#line 13 "file:///test.R" y <- 2 z <- 3 +} diff --git a/crates/ark/src/snapshots/ark__console_annotate__tests__inject_breakpoints_with_line_offset_nested.snap b/crates/ark/src/snapshots/ark__console_annotate__tests__inject_breakpoints_with_line_offset_nested.snap index d71ce7871..423387e00 100644 --- a/crates/ark/src/snapshots/ark__console_annotate__tests__inject_breakpoints_with_line_offset_nested.snap +++ b/crates/ark/src/snapshots/ark__console_annotate__tests__inject_breakpoints_with_line_offset_nested.snap @@ -4,7 +4,7 @@ expression: result --- f <- function() { x <- 1 -base::.ark_breakpoint(browser(), "file:///test.R", "1") +base::.ark_auto_step(base::.ark_breakpoint(browser(), "file:///test.R", "1")) #line 23 "file:///test.R" y <- 2 } From 26ab8b16dab317c870e19e5088310041f790498d Mon Sep 17 00:00:00 2001 From: Lionel Henry Date: Fri, 19 Dec 2025 11:05:38 +0100 Subject: [PATCH 23/42] Consistently use half-open ranges --- crates/ark/src/console_annotate.rs | 53 ++++++++++--------- crates/ark/src/dap/dap.rs | 4 +- ...nnotate__tests__annotate_source_basic.snap | 4 +- ..._annotate_source_multiline_expression.snap | 4 +- ..._annotate_source_multiple_expressions.snap | 6 +-- ...ests__annotate_source_with_breakpoint.snap | 4 +- 6 files changed, 38 insertions(+), 37 deletions(-) diff --git a/crates/ark/src/console_annotate.rs b/crates/ark/src/console_annotate.rs index 0293fac4e..d4fcc770c 100644 --- a/crates/ark/src/console_annotate.rs +++ b/crates/ark/src/console_annotate.rs @@ -14,8 +14,6 @@ use biome_line_index::LineIndex; use biome_rowan::AstNode; use biome_rowan::AstNodeList; use biome_rowan::SyntaxElement; -use biome_rowan::TextRange; -use biome_rowan::TextSize; use biome_rowan::WalkEvent; use harp::object::RObject; use libr::SEXP; @@ -194,7 +192,7 @@ fn find_anchors_in_list<'a>( // Convert breakpoint line from document coordinates to code coordinates let target_code_line = bp.line - line_offset; let current = &elements[i]; - let current_code_line = get_start_line(current.syntax(), line_index); + let current_code_line = text_trimmed_line_range(current.syntax(), line_index).start; // Base case: target line is at or before current element's start. // At root level, we can't place breakpoints (R can't step at top-level), @@ -234,7 +232,7 @@ fn find_anchors_in_list<'a>( // Check if target is beyond current element let next_code_line = if i + 1 < elements.len() { - Some(get_start_line(elements[i + 1].syntax(), line_index)) + Some(text_trimmed_line_range(elements[i + 1].syntax(), line_index).start) } else { None }; @@ -431,7 +429,7 @@ fn find_node_at_line( // Check each child of this list for expr in expr_list.into_iter() { - let child_line = get_start_line(expr.syntax(), line_index); + let child_line = text_trimmed_line_range(expr.syntax(), line_index).start; if child_line == target_line { return Some(expr.into_syntax()); } @@ -462,16 +460,23 @@ fn propagate_change_to_root(original: &RSyntaxNode, replacement: RSyntaxNode) -> current_replacement } -fn get_start_line(node: &RSyntaxNode, line_index: &LineIndex) -> u32 { - let text_range: TextRange = node.text_trimmed_range(); - let offset: TextSize = text_range.start(); - line_index.line_col(offset).map(|lc| lc.line).unwrap_or(0) -} +/// Returns the line range [start, end) for the node's trimmed text. +/// TODO: Should move to an ext trait in aether_utils? Probably fine to have it +/// depend on the syntax crate (and can be made optional if needed). +fn text_trimmed_line_range(node: &RSyntaxNode, line_index: &LineIndex) -> std::ops::Range { + let text_range = node.text_trimmed_range(); + + let start = line_index + .line_col(text_range.start()) + .map(|lc| lc.line) + .unwrap_or(0); -fn get_end_line(node: &RSyntaxNode, line_index: &LineIndex) -> u32 { - let text_range: TextRange = node.text_trimmed_range(); - let offset: TextSize = text_range.end(); - line_index.line_col(offset).map(|lc| lc.line).unwrap_or(0) + let end = line_index + .line_col(text_range.end()) + .map(|lc| lc.line + 1) // Close the range end + .unwrap_or(0); + + start..end } fn create_breakpoint_call(uri: &Url, id: i64) -> RSyntaxNode { @@ -567,14 +572,10 @@ pub(crate) fn annotate_source(code: &str, uri: &Url, breakpoints: &mut [Breakpoi }; // Collect original line ranges before any modifications - let original_ranges: Vec<(u32, u32)> = original_r + let original_ranges: Vec<_> = original_r .expressions() .into_iter() - .map(|expr| { - let start = get_start_line(expr.syntax(), &line_index); - let end = get_end_line(expr.syntax(), &line_index); - (start, end) - }) + .map(|expr| text_trimmed_line_range(expr.syntax(), &line_index)) .collect(); if original_ranges.is_empty() { @@ -624,21 +625,21 @@ pub(crate) fn annotate_source(code: &str, uri: &Url, breakpoints: &mut [Breakpoi output.push('\n'); } else { // This is an original expression - use the tracked original line range - if let Some(&(start_line, end_line)) = original_ranges.get(original_expr_idx) { + if let Some(line_range) = original_ranges.get(original_expr_idx) { // Add #line directive (R uses 1-based lines) - output.push_str(&format!("#line {} \"{}\"\n", start_line + 1, uri)); + output.push_str(&format!("#line {} \"{}\"\n", line_range.start + 1, uri)); // Add the expression, stripping leading whitespace since we added our own newline output.push_str(expr_str.trim_start()); output.push('\n'); - // Add verify call after the expression - // Use L suffix for integer literals in R + // Add verification call after the expression. + // Convert from 0-based to 1-based lines for R. output.push_str(&format!( "base::.ark_auto_step(base::.ark_verify_breakpoints_range(\"{}\", {}L, {}L))\n", uri, - start_line + 1, - end_line + 1 + line_range.start + 1, + line_range.end + 1 )); original_expr_idx += 1; diff --git a/crates/ark/src/dap/dap.rs b/crates/ark/src/dap/dap.rs index 3f9952f59..8cdba9b07 100644 --- a/crates/ark/src/dap/dap.rs +++ b/crates/ark/src/dap/dap.rs @@ -282,7 +282,7 @@ impl Dap { /// Verify breakpoints within a line range for a given URI /// /// Loops over all breakpoints for the URI and verifies any unverified - /// breakpoints that fall within the range [start_line, end_line]. + /// breakpoints that fall within the range [start_line, end_line). /// Sends a `BreakpointVerified` event for each newly verified breakpoint. pub fn verify_breakpoints(&mut self, uri: &Url, start_line: u32, end_line: u32) { let Some((_, bp_list)) = self.breakpoints.get_mut(uri) else { @@ -300,7 +300,7 @@ impl Dap { } let line = bp.line; - if line >= start_line && line <= end_line { + if line >= start_line && line < end_line { bp.state = BreakpointState::Verified; if let Some(tx) = &self.backend_events_tx { diff --git a/crates/ark/src/snapshots/ark__console_annotate__tests__annotate_source_basic.snap b/crates/ark/src/snapshots/ark__console_annotate__tests__annotate_source_basic.snap index 96ad4ffe9..67b94d6d0 100644 --- a/crates/ark/src/snapshots/ark__console_annotate__tests__annotate_source_basic.snap +++ b/crates/ark/src/snapshots/ark__console_annotate__tests__annotate_source_basic.snap @@ -5,8 +5,8 @@ expression: result { #line 1 "file:///test.R" x <- 1 -base::.ark_auto_step(base::.ark_verify_breakpoints_range("file:///test.R", 1L, 1L)) +base::.ark_auto_step(base::.ark_verify_breakpoints_range("file:///test.R", 1L, 2L)) #line 2 "file:///test.R" y <- 2 -base::.ark_auto_step(base::.ark_verify_breakpoints_range("file:///test.R", 2L, 2L)) +base::.ark_auto_step(base::.ark_verify_breakpoints_range("file:///test.R", 2L, 3L)) } diff --git a/crates/ark/src/snapshots/ark__console_annotate__tests__annotate_source_multiline_expression.snap b/crates/ark/src/snapshots/ark__console_annotate__tests__annotate_source_multiline_expression.snap index 68fc4c518..5a67cbc93 100644 --- a/crates/ark/src/snapshots/ark__console_annotate__tests__annotate_source_multiline_expression.snap +++ b/crates/ark/src/snapshots/ark__console_annotate__tests__annotate_source_multiline_expression.snap @@ -7,8 +7,8 @@ expression: result foo <- function(x) { x + 1 } -base::.ark_auto_step(base::.ark_verify_breakpoints_range("file:///test.R", 1L, 3L)) +base::.ark_auto_step(base::.ark_verify_breakpoints_range("file:///test.R", 1L, 4L)) #line 4 "file:///test.R" bar <- 2 -base::.ark_auto_step(base::.ark_verify_breakpoints_range("file:///test.R", 4L, 4L)) +base::.ark_auto_step(base::.ark_verify_breakpoints_range("file:///test.R", 4L, 5L)) } diff --git a/crates/ark/src/snapshots/ark__console_annotate__tests__annotate_source_multiple_expressions.snap b/crates/ark/src/snapshots/ark__console_annotate__tests__annotate_source_multiple_expressions.snap index 8f5f8d7b5..feefc52bd 100644 --- a/crates/ark/src/snapshots/ark__console_annotate__tests__annotate_source_multiple_expressions.snap +++ b/crates/ark/src/snapshots/ark__console_annotate__tests__annotate_source_multiple_expressions.snap @@ -5,11 +5,11 @@ expression: result { #line 1 "file:///test.R" a <- 1 -base::.ark_auto_step(base::.ark_verify_breakpoints_range("file:///test.R", 1L, 1L)) +base::.ark_auto_step(base::.ark_verify_breakpoints_range("file:///test.R", 1L, 2L)) #line 2 "file:///test.R" b <- 2 -base::.ark_auto_step(base::.ark_verify_breakpoints_range("file:///test.R", 2L, 2L)) +base::.ark_auto_step(base::.ark_verify_breakpoints_range("file:///test.R", 2L, 3L)) #line 3 "file:///test.R" c <- 3 -base::.ark_auto_step(base::.ark_verify_breakpoints_range("file:///test.R", 3L, 3L)) +base::.ark_auto_step(base::.ark_verify_breakpoints_range("file:///test.R", 3L, 4L)) } diff --git a/crates/ark/src/snapshots/ark__console_annotate__tests__annotate_source_with_breakpoint.snap b/crates/ark/src/snapshots/ark__console_annotate__tests__annotate_source_with_breakpoint.snap index c4e94bdc4..0e247ad4f 100644 --- a/crates/ark/src/snapshots/ark__console_annotate__tests__annotate_source_with_breakpoint.snap +++ b/crates/ark/src/snapshots/ark__console_annotate__tests__annotate_source_with_breakpoint.snap @@ -10,8 +10,8 @@ base::.ark_auto_step(base::.ark_breakpoint(browser(), "file:///test.R", "1")) x <- 1 y <- 2 } -base::.ark_auto_step(base::.ark_verify_breakpoints_range("file:///test.R", 1L, 4L)) +base::.ark_auto_step(base::.ark_verify_breakpoints_range("file:///test.R", 1L, 5L)) #line 5 "file:///test.R" bar <- 3 -base::.ark_auto_step(base::.ark_verify_breakpoints_range("file:///test.R", 5L, 5L)) +base::.ark_auto_step(base::.ark_verify_breakpoints_range("file:///test.R", 5L, 6L)) } From b81abc09bae9571238a50feb489f2ef28a4aa2bb Mon Sep 17 00:00:00 2001 From: Lionel Henry Date: Fri, 19 Dec 2025 15:24:18 +0100 Subject: [PATCH 24/42] Review annotation code --- Cargo.lock | 90 +- Cargo.toml | 12 +- crates/ark/src/console_annotate.rs | 1348 +++++++++++------ crates/ark/src/interface.rs | 2 +- ...nject_breakpoints_empty_brace_sibling.snap | 10 + ...ect_breakpoints_if_else_both_branches.snap | 13 + 6 files changed, 949 insertions(+), 526 deletions(-) create mode 100644 crates/ark/src/snapshots/ark__console_annotate__tests__inject_breakpoints_empty_brace_sibling.snap create mode 100644 crates/ark/src/snapshots/ark__console_annotate__tests__inject_breakpoints_if_else_both_branches.snap diff --git a/Cargo.lock b/Cargo.lock index 43d1efbbd..028620d4d 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -209,11 +209,10 @@ checksum = "320119579fcad9c21884f5c4861d16174d0e06250625266f50fe6898340abefa" [[package]] name = "aether_lsp_utils" version = "0.0.0" -source = "git+https://github.com/posit-dev/air?rev=f959e32eee91#f959e32eee91654f04a44a32e97321ef5d510e93" dependencies = [ "anyhow", - "biome_line_index", - "biome_text_size", + "biome_line_index 0.1.0 (git+https://github.com/lionel-/biome?rev=41d799cfa4cedd25625fc3f6bd7898532873f051)", + "biome_text_size 0.5.7 (git+https://github.com/lionel-/biome?rev=41d799cfa4cedd25625fc3f6bd7898532873f051)", "dissimilar", "itertools 0.13.0", "line_ending", @@ -246,21 +245,19 @@ dependencies = [ [[package]] name = "air_r_factory" version = "0.0.0" -source = "git+https://github.com/posit-dev/air?rev=f959e32eee91#f959e32eee91654f04a44a32e97321ef5d510e93" dependencies = [ "air_r_syntax", - "biome_rowan", + "biome_rowan 0.5.7 (git+https://github.com/lionel-/biome?rev=41d799cfa4cedd25625fc3f6bd7898532873f051)", ] [[package]] name = "air_r_parser" version = "0.0.0" -source = "git+https://github.com/posit-dev/air?rev=f959e32eee91#f959e32eee91654f04a44a32e97321ef5d510e93" dependencies = [ "air_r_factory", "air_r_syntax", "biome_parser", - "biome_rowan", + "biome_rowan 0.5.7 (git+https://github.com/lionel-/biome?rev=41d799cfa4cedd25625fc3f6bd7898532873f051)", "biome_unicode_table", "serde", "tracing", @@ -271,9 +268,8 @@ dependencies = [ [[package]] name = "air_r_syntax" version = "0.0.0" -source = "git+https://github.com/posit-dev/air?rev=f959e32eee91#f959e32eee91654f04a44a32e97321ef5d510e93" dependencies = [ - "biome_rowan", + "biome_rowan 0.5.7 (git+https://github.com/lionel-/biome?rev=41d799cfa4cedd25625fc3f6bd7898532873f051)", "serde", ] @@ -355,8 +351,8 @@ dependencies = [ "assert_matches", "async-trait", "base64 0.21.0", - "biome_line_index", - "biome_rowan", + "biome_line_index 0.1.0", + "biome_rowan 0.5.7", "blake3", "bus", "cc", @@ -488,10 +484,10 @@ checksum = "72b3254f16251a8381aa12e40e3c4d2f0199f8c6508fbecb9d91f575e0fbb8c6" [[package]] name = "biome_console" version = "0.5.7" -source = "git+https://github.com/biomejs/biome?rev=c13fc60726883781e4530a4437724273b560c8e0#c13fc60726883781e4530a4437724273b560c8e0" +source = "git+https://github.com/lionel-/biome?rev=41d799cfa4cedd25625fc3f6bd7898532873f051#41d799cfa4cedd25625fc3f6bd7898532873f051" dependencies = [ "biome_markup", - "biome_text_size", + "biome_text_size 0.5.7 (git+https://github.com/lionel-/biome?rev=41d799cfa4cedd25625fc3f6bd7898532873f051)", "serde", "termcolor", "unicode-segmentation", @@ -501,15 +497,15 @@ dependencies = [ [[package]] name = "biome_diagnostics" version = "0.5.7" -source = "git+https://github.com/biomejs/biome?rev=c13fc60726883781e4530a4437724273b560c8e0#c13fc60726883781e4530a4437724273b560c8e0" +source = "git+https://github.com/lionel-/biome?rev=41d799cfa4cedd25625fc3f6bd7898532873f051#41d799cfa4cedd25625fc3f6bd7898532873f051" dependencies = [ "backtrace", "biome_console", "biome_diagnostics_categories", "biome_diagnostics_macros", - "biome_rowan", - "biome_text_edit", - "biome_text_size", + "biome_rowan 0.5.7 (git+https://github.com/lionel-/biome?rev=41d799cfa4cedd25625fc3f6bd7898532873f051)", + "biome_text_edit 0.5.7 (git+https://github.com/lionel-/biome?rev=41d799cfa4cedd25625fc3f6bd7898532873f051)", + "biome_text_size 0.5.7 (git+https://github.com/lionel-/biome?rev=41d799cfa4cedd25625fc3f6bd7898532873f051)", "enumflags2", "serde", "serde_json", @@ -521,7 +517,7 @@ dependencies = [ [[package]] name = "biome_diagnostics_categories" version = "0.5.7" -source = "git+https://github.com/biomejs/biome?rev=c13fc60726883781e4530a4437724273b560c8e0#c13fc60726883781e4530a4437724273b560c8e0" +source = "git+https://github.com/lionel-/biome?rev=41d799cfa4cedd25625fc3f6bd7898532873f051#41d799cfa4cedd25625fc3f6bd7898532873f051" dependencies = [ "quote", "serde", @@ -530,7 +526,7 @@ dependencies = [ [[package]] name = "biome_diagnostics_macros" version = "0.5.7" -source = "git+https://github.com/biomejs/biome?rev=c13fc60726883781e4530a4437724273b560c8e0#c13fc60726883781e4530a4437724273b560c8e0" +source = "git+https://github.com/lionel-/biome?rev=41d799cfa4cedd25625fc3f6bd7898532873f051#41d799cfa4cedd25625fc3f6bd7898532873f051" dependencies = [ "proc-macro-error2", "proc-macro2", @@ -541,16 +537,24 @@ dependencies = [ [[package]] name = "biome_line_index" version = "0.1.0" -source = "git+https://github.com/biomejs/biome?rev=c13fc60726883781e4530a4437724273b560c8e0#c13fc60726883781e4530a4437724273b560c8e0" dependencies = [ - "biome_text_size", + "biome_text_size 0.5.7", + "rustc-hash", +] + +[[package]] +name = "biome_line_index" +version = "0.1.0" +source = "git+https://github.com/lionel-/biome?rev=41d799cfa4cedd25625fc3f6bd7898532873f051#41d799cfa4cedd25625fc3f6bd7898532873f051" +dependencies = [ + "biome_text_size 0.5.7 (git+https://github.com/lionel-/biome?rev=41d799cfa4cedd25625fc3f6bd7898532873f051)", "rustc-hash", ] [[package]] name = "biome_markup" version = "0.5.7" -source = "git+https://github.com/biomejs/biome?rev=c13fc60726883781e4530a4437724273b560c8e0#c13fc60726883781e4530a4437724273b560c8e0" +source = "git+https://github.com/lionel-/biome?rev=41d799cfa4cedd25625fc3f6bd7898532873f051#41d799cfa4cedd25625fc3f6bd7898532873f051" dependencies = [ "proc-macro-error2", "proc-macro2", @@ -560,11 +564,11 @@ dependencies = [ [[package]] name = "biome_parser" version = "0.5.7" -source = "git+https://github.com/biomejs/biome?rev=c13fc60726883781e4530a4437724273b560c8e0#c13fc60726883781e4530a4437724273b560c8e0" +source = "git+https://github.com/lionel-/biome?rev=41d799cfa4cedd25625fc3f6bd7898532873f051#41d799cfa4cedd25625fc3f6bd7898532873f051" dependencies = [ "biome_console", "biome_diagnostics", - "biome_rowan", + "biome_rowan 0.5.7 (git+https://github.com/lionel-/biome?rev=41d799cfa4cedd25625fc3f6bd7898532873f051)", "biome_unicode_table", "drop_bomb", "enumflags2", @@ -574,10 +578,20 @@ dependencies = [ [[package]] name = "biome_rowan" version = "0.5.7" -source = "git+https://github.com/biomejs/biome?rev=c13fc60726883781e4530a4437724273b560c8e0#c13fc60726883781e4530a4437724273b560c8e0" dependencies = [ - "biome_text_edit", - "biome_text_size", + "biome_text_edit 0.5.7", + "biome_text_size 0.5.7", + "hashbrown 0.15.5", + "rustc-hash", +] + +[[package]] +name = "biome_rowan" +version = "0.5.7" +source = "git+https://github.com/lionel-/biome?rev=41d799cfa4cedd25625fc3f6bd7898532873f051#41d799cfa4cedd25625fc3f6bd7898532873f051" +dependencies = [ + "biome_text_edit 0.5.7 (git+https://github.com/lionel-/biome?rev=41d799cfa4cedd25625fc3f6bd7898532873f051)", + "biome_text_size 0.5.7 (git+https://github.com/lionel-/biome?rev=41d799cfa4cedd25625fc3f6bd7898532873f051)", "hashbrown 0.15.5", "rustc-hash", "serde", @@ -586,9 +600,17 @@ dependencies = [ [[package]] name = "biome_text_edit" version = "0.5.7" -source = "git+https://github.com/biomejs/biome?rev=c13fc60726883781e4530a4437724273b560c8e0#c13fc60726883781e4530a4437724273b560c8e0" dependencies = [ - "biome_text_size", + "biome_text_size 0.5.7", + "similar", +] + +[[package]] +name = "biome_text_edit" +version = "0.5.7" +source = "git+https://github.com/lionel-/biome?rev=41d799cfa4cedd25625fc3f6bd7898532873f051#41d799cfa4cedd25625fc3f6bd7898532873f051" +dependencies = [ + "biome_text_size 0.5.7 (git+https://github.com/lionel-/biome?rev=41d799cfa4cedd25625fc3f6bd7898532873f051)", "serde", "similar", ] @@ -596,7 +618,11 @@ dependencies = [ [[package]] name = "biome_text_size" version = "0.5.7" -source = "git+https://github.com/biomejs/biome?rev=c13fc60726883781e4530a4437724273b560c8e0#c13fc60726883781e4530a4437724273b560c8e0" + +[[package]] +name = "biome_text_size" +version = "0.5.7" +source = "git+https://github.com/lionel-/biome?rev=41d799cfa4cedd25625fc3f6bd7898532873f051#41d799cfa4cedd25625fc3f6bd7898532873f051" dependencies = [ "serde", ] @@ -604,7 +630,7 @@ dependencies = [ [[package]] name = "biome_unicode_table" version = "0.5.7" -source = "git+https://github.com/biomejs/biome?rev=c13fc60726883781e4530a4437724273b560c8e0#c13fc60726883781e4530a4437724273b560c8e0" +source = "git+https://github.com/lionel-/biome?rev=41d799cfa4cedd25625fc3f6bd7898532873f051#41d799cfa4cedd25625fc3f6bd7898532873f051" [[package]] name = "bitflags" @@ -1967,7 +1993,6 @@ dependencies = [ [[package]] name = "line_ending" version = "0.0.0" -source = "git+https://github.com/posit-dev/air?rev=f959e32eee91#f959e32eee91654f04a44a32e97321ef5d510e93" dependencies = [ "memchr", "settings", @@ -3062,7 +3087,6 @@ dependencies = [ [[package]] name = "settings" version = "0.0.0" -source = "git+https://github.com/posit-dev/air?rev=f959e32eee91#f959e32eee91654f04a44a32e97321ef5d510e93" [[package]] name = "sha1" diff --git a/Cargo.toml b/Cargo.toml index f0b2c7c12..fccff5d2b 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -25,11 +25,11 @@ license = "MIT" authors = ["Posit Software, PBC"] [workspace.dependencies] -biome_line_index = { git = "https://github.com/biomejs/biome", rev = "c13fc60726883781e4530a4437724273b560c8e0" } -biome_rowan = { git = "https://github.com/biomejs/biome", rev = "c13fc60726883781e4530a4437724273b560c8e0" } -aether_factory = { git = "https://github.com/posit-dev/air", package = "air_r_factory", rev = "f959e32eee91" } -aether_lsp_utils = { git = "https://github.com/posit-dev/air", rev = "f959e32eee91" } -aether_parser = { git = "https://github.com/posit-dev/air", package = "air_r_parser", rev = "f959e32eee91" } -aether_syntax = { git = "https://github.com/posit-dev/air", package = "air_r_syntax", rev = "f959e32eee91" } +biome_line_index = { path = "/Users/lionel/Sync/Projects/Positron/biome/crates/biome_line_index" } +biome_rowan = { path = "/Users/lionel/Sync/Projects/Positron/biome/crates/biome_rowan" } +aether_factory = { path = "/Users/lionel/Sync/Projects/Positron/air/crates/air_r_factory", package = "air_r_factory" } +aether_lsp_utils = { path = "/Users/lionel/Sync/Projects/Positron/air/crates/aether_lsp_utils" } +aether_parser = { path = "/Users/lionel/Sync/Projects/Positron/air/crates/air_r_parser", package = "air_r_parser" } +aether_syntax = { path = "/Users/lionel/Sync/Projects/Positron/air/crates/air_r_syntax", package = "air_r_syntax" } # For https://github.com/ebkalderon/tower-lsp/pull/428 tower-lsp = { branch = "bugfix/patches", git = "https://github.com/lionel-/tower-lsp" } diff --git a/crates/ark/src/console_annotate.rs b/crates/ark/src/console_annotate.rs index d4fcc770c..53ff6bc35 100644 --- a/crates/ark/src/console_annotate.rs +++ b/crates/ark/src/console_annotate.rs @@ -1,19 +1,24 @@ // // console_annotate.rs // -// Copyright (C) 2025 Posit Software, PBC. All rights reserved. +// Copyright (C) 2026 Posit Software, PBC. All rights reserved. // use aether_syntax::RBracedExpressions; use aether_syntax::RExpressionList; +use aether_syntax::RLanguage; use aether_syntax::RRoot; +use aether_syntax::RSyntaxKind; use aether_syntax::RSyntaxNode; use amalthea::wire::execute_request::CodeLocation; -use amalthea::wire::execute_request::Position; +use anyhow::anyhow; use biome_line_index::LineIndex; +use biome_rowan::syntax::SyntaxElementKey; use biome_rowan::AstNode; use biome_rowan::AstNodeList; -use biome_rowan::SyntaxElement; +use biome_rowan::SyntaxRewriter; +use biome_rowan::TriviaPieceKind; +use biome_rowan::VisitNodeSignal; use biome_rowan::WalkEvent; use harp::object::RObject; use libr::SEXP; @@ -23,632 +28,722 @@ use crate::dap::dap::Breakpoint; use crate::dap::dap::BreakpointState; use crate::interface::RMain; +/// Function name used for auto-stepping over injected calls such as breakpoints +const AUTO_STEP_FUNCTION: &str = ".ark_auto_step"; + +// Main responsibilities of code annotation: +// +// 1. Inject breakpoints in the code, along with line directives that map to +// original document lines. Breakpoints can only be injected in the top-level +// expression list or in a `{}` list. +// +// 2. Mark invalid breakpoints as such in the DAP state. This concerns +// breakpoints on a closing `}` or inside multi-line expressions. +// The breakpoints are marked invalid by mutation as soon as they are matched to +// an invalid location by our code walker. +// +// 3. When sourcing with `source()` or `load_all()`, wrap in `{}` to allow +// top-level stepping, inject breakpoints, and inject top-level verification +// calls to let Ark know a breakpoint is now active after evaluation. This is +// handled in a separate code path in `annotate_source()`. +// +// Breakpoint injection happens in two phases: +// +// - We first collect "anchors", i.e. the syntax node where a breakpoint should +// be injected, its line in document coordinates, and a unique identifier. +// +// - In a second pass we use a Biome `SyntaxRewriter` to go ahead and modify the +// code. This is a tree visitor that allows replacing the node on the way out +// (currently via an extension to the Biome API that will be submitted to Biome +// later on). This approach was chosen over `BatchMutation`, which collects +// changes and applies them from deepest to shallowest, because the latter: +// +// - Doesn't handle insertions in lists (though that could be contributed). +// Only replacements are currently supported. +// +// - Doesn't handle nested changes in a node that is later _replaced_. +// If the scheduled changes were pure insertions (if insertions were +// supported) then nested changes would compose correctly. However, nested +// changes wouldn't work well as soon as a replacement is involved, because +// BatchMutation can't express "edit a descendant" and "replace an ancestor" +// in one batch without risking the ancestor replacement overwriting the +// descendant edit. +// +// That limitation interacts badly with Biome's strict stance on mutation. +// For example, you can't add a comment to a node; you have to create a new one +// that features a comment. This issue arises when adding a line directive, e.g. in: +// +// ```r +// { # BP 1 +// 1 # BP 2 +// } +// ``` +// +// BP 2 causes changes inside the braces. Then BP 1 causes the whole brace +// expression to be replaced with a variant that has a line directive attached. +// But there is no way to express both these changes to BatchMutation because it +// takes modifications upfront. This is why we work instead with `SyntaxRewriter` +// which allows us to replace nodes from bottom to top as we go. +// +// Note that Rust-Analyzer's version of Rowan is much more flexible and allow you to +// create a mutable syntax tree that you can freely update (see `clone_for_update()` +// and the tree editor API). Unfortunately Biome has adopted a strict stance on +// immutable data structures so we don't have access to such affordances. + +// Called by ReadConsole to inject breakpoints (if any) and source reference +// mapping (via a line directive) pub(crate) fn annotate_input( code: &str, location: CodeLocation, breakpoints: Option<&mut [Breakpoint]>, -) -> String { - // First, inject breakpoints into the original code (before adding line directive). - // This ensures AST line numbers match the code coordinates we expect. +) -> anyhow::Result { + // First, inject breakpoints into the original code. This must happen before + // we add the outer line directive, otherwise the coordinates of inner line + // directives are shifted by 1 line. let code_with_breakpoints = if let Some(breakpoints) = breakpoints { let line_index = LineIndex::new(code); - inject_breakpoints(code, location.clone(), breakpoints, &line_index) + inject_breakpoints(code, location.clone(), breakpoints, &line_index)? } else { code.to_string() }; - // Now add the line directive to the (possibly modified) code - let node = aether_parser::parse(&code_with_breakpoints, Default::default()).tree(); - let Some(first_token) = node.syntax().first_token() else { - return code_with_breakpoints; - }; - + // Now add the line directive to the (possibly modified) code. This maps the + // code to evaluate to a location in the original document. let line_directive = format!( "#line {line} \"{uri}\"", line = location.start.line + 1, uri = location.uri ); - // Leading whitespace to ensure that R starts parsing expressions from - // the expected `character` offset. + // Add leading whitespace to ensure that R starts parsing expressions from + // the expected `character` offset let leading_padding = " ".repeat(location.start.character as usize); - // Collect existing leading trivia as (kind, text) tuples - let existing_trivia: Vec<_> = first_token - .leading_trivia() - .pieces() - .map(|piece| (piece.kind(), piece.text().to_string())) - .collect(); - - // Create new trivia with line directive prepended - let new_trivia: Vec<_> = vec![ - ( - biome_rowan::TriviaPieceKind::SingleLineComment, - line_directive.to_string(), - ), - (biome_rowan::TriviaPieceKind::Newline, "\n".to_string()), - ( - biome_rowan::TriviaPieceKind::Whitespace, - leading_padding.to_string(), - ), - ] - .into_iter() - .chain(existing_trivia.into_iter()) - .collect(); - - let new_first_token = - first_token.with_leading_trivia(new_trivia.iter().map(|(k, t)| (*k, t.as_str()))); - - let Some(new_node) = node - .syntax() - .clone() - .replace_child(first_token.into(), new_first_token.into()) - else { - return code_with_breakpoints; - }; - - new_node.to_string() + Ok(format!( + "{line_directive}\n{leading_padding}{code_with_breakpoints}" + )) } -#[allow(dead_code)] pub(crate) fn inject_breakpoints( code: &str, location: CodeLocation, breakpoints: &mut [Breakpoint], line_index: &LineIndex, -) -> String { +) -> anyhow::Result { let root = aether_parser::parse(code, Default::default()).tree(); - // The offset between document coordinates and code coordinates. - // Breakpoints are in document coordinates, but AST nodes are in code coordinates. + // The offset between document coordinates and code coordinates. Breakpoints + // are in document coordinates, but AST nodes are in code coordinates + // (starting at line 0). let line_offset = location.start.line; - // Filter breakpoints to only those within the source's valid range + // Filter breakpoints to only those within the source's valid range. We + // collect both for simplicity and because we need to sort the vector + // later on. let breakpoints: Vec<_> = breakpoints .iter_mut() .filter(|bp| bp.line >= location.start.line && bp.line <= location.end.line) .collect(); if breakpoints.is_empty() { - return code.into(); + return Ok(code.to_string()); } - // Phase 1: Find breakpoint anchors - let anchors = find_breakpoint_anchors( - root.syntax(), - breakpoints, - &location.uri, - line_index, - line_offset, - ); + // First collect all breakpoint anchors, then inject in a separate pass. + // This two-stage approach is not necessary but keeps the anchor-finding + // logic (with its edge cases like invalid breakpoints, nesting decisions, + // look-ahead) separate from the tree transformation with the + // `SyntaxRewriter`. + let anchors = find_breakpoint_anchors(root.syntax(), breakpoints, line_index, line_offset)?; if anchors.is_empty() { - return code.into(); + return Ok(code.to_string()); + } + + // Build map of anchor key -> (breakpoint_id, doc_line). + // Anchors already store document coordinates. + let breakpoint_map: std::collections::HashMap<_, _> = anchors + .into_iter() + .map(|a| (a.anchor.key(), (a.breakpoint_id, a.doc_line))) + .collect(); + + // Now inject breakpoints with a `SyntaxRewriter`. This is the most + // practical option we have with Biome's Rowan because `BatchMutation` does + // not support 1-to-2 splicing (insert breakpoint call before an expression, + // keeping the original). + let mut rewriter = BreakpointRewriter::new(&location.uri, breakpoint_map); + let transformed = rewriter.transform(root.syntax().clone()); + + if let Some(err) = rewriter.take_err() { + return Err(err); + } + + Ok(transformed.to_string()) +} + +/// Annotate source code for `source()` and `pkgload::load_all()`. +/// +/// - Wraps the whole source in a `{}` block. This allows R to step through the +/// top-level expressions. +/// - Injects breakpoint calls (`.ark_auto_step(.ark_breakpoint(...))`) at +/// breakpoint locations. +/// - Injects verification calls (`.ark_auto_step(.ark_verify_breakpoints_range(...))`) +/// after each top-level expression. Verifying expression by expression allows +/// marking breakpoints as verified even when an expression fails mid-script. +/// - `#line` directives before each original expression so the debugger knows +/// where to step in the original file. +pub(crate) fn annotate_source( + code: &str, + uri: &Url, + breakpoints: &mut [Breakpoint], +) -> anyhow::Result { + let line_index = LineIndex::new(code); + + let root = aether_parser::parse(code, Default::default()).tree(); + let root_node = RRoot::cast(root.syntax().clone()) + .ok_or_else(|| anyhow!("Failed to cast parsed tree to RRoot"))?; + + // Collect line ranges for top-level expressions BEFORE any modifications + let top_level_ranges: Vec> = root_node + .expressions() + .into_iter() + .map(|expr| text_trimmed_line_range(expr.syntax(), &line_index)) + .collect::>>()?; + + if top_level_ranges.is_empty() { + return Ok(code.to_string()); + } + + // Find breakpoint anchors (may be nested within top-level expressions) + let bp_vec: Vec<_> = breakpoints.iter_mut().collect(); + let anchors = find_breakpoint_anchors(root.syntax(), bp_vec, &line_index, 0)?; + + // Build map of anchor key -> (breakpoint_id, doc_line). + let breakpoint_map: std::collections::HashMap<_, _> = anchors + .into_iter() + .map(|a| (a.anchor.key(), (a.breakpoint_id, a.doc_line))) + .collect(); + + let mut rewriter = BreakpointRewriter::new(uri, breakpoint_map); + let transformed = rewriter.transform(root.syntax().clone()); + + if let Some(err) = rewriter.take_err() { + return Err(err); } - // Phase 2: Inject breakpoints - inject_breakpoint_calls(root.syntax(), anchors, &location.uri, line_offset) + let transformed_root = RRoot::cast(transformed) + .ok_or_else(|| anyhow!("Failed to cast transformed tree to RRoot"))?; + + // Rebuild root expression list with #line directives and verify calls + let annotated = annotate_root_list( + transformed_root.expressions().syntax().clone(), + &top_level_ranges, + uri, + )?; + + // Wrap in braces so R can step through expressions + Ok(format!("{{\n{annotated}}}\n")) } struct BreakpointAnchor { + /// Unique identifier for the breakpoint, injected as argument in the code breakpoint_id: i64, - /// The line in code coordinates (0-based within parsed code) - code_line: u32, + /// The line in document coordinates (0-based) + doc_line: u32, + /// The anchor node (expression to place breakpoint before) + anchor: RSyntaxNode, } fn find_breakpoint_anchors( root: &RSyntaxNode, mut breakpoints: Vec<&mut Breakpoint>, - uri: &Url, line_index: &LineIndex, line_offset: u32, -) -> Vec { - // Sort breakpoints by line ascending +) -> anyhow::Result> { + // Sort breakpoints by ascending line so we can walk the expression lists in + // DFS order, and match breakpoints to expressions by comparing lines. Both + // sequences proceed in roughly the same order (by line number), so we can + // consume breakpoints one by one as we find their anchors without needing + // to go backward in either sequence. breakpoints.sort_by_key(|bp| bp.line); - let mut anchors = Vec::new(); + // Peekable so we can inspect the next breakpoint's line without consuming it, + // deciding whether to place it at the current expression or continue to the + // next expression without consuming the current breakpoint. let mut bp_iter = breakpoints.into_iter().peekable(); + let mut anchors = Vec::new(); + // Start from the root's expression list - let Some(r) = RRoot::cast(root.clone()) else { - return anchors; - }; + let r = + RRoot::cast(root.clone()).ok_or_else(|| anyhow!("Failed to cast parsed tree to RRoot"))?; let root_list = r.expressions(); find_anchors_in_list( &root_list, &mut bp_iter, &mut anchors, - uri, line_index, line_offset, true, - ); + )?; - anchors + Ok(anchors) } +// Takes an expression list, either from the root node or a brace node fn find_anchors_in_list<'a>( list: &RExpressionList, breakpoints: &mut std::iter::Peekable>, anchors: &mut Vec, - uri: &Url, line_index: &LineIndex, line_offset: u32, is_root: bool, -) { +) -> anyhow::Result<()> { + // Collect to allow indexed look-ahead and re-checking the same element + // without consuming an iterator let elements: Vec<_> = list.into_iter().collect(); if elements.is_empty() { - return; + return Ok(()); } let mut i = 0; while i < elements.len() { let Some(bp) = breakpoints.peek() else { - return; + // No more breakpoints + return Ok(()); }; // Convert breakpoint line from document coordinates to code coordinates - let target_code_line = bp.line - line_offset; + let bp_code_line = bp.line - line_offset; + let current = &elements[i]; - let current_code_line = text_trimmed_line_range(current.syntax(), line_index).start; - - // Base case: target line is at or before current element's start. - // At root level, we can't place breakpoints (R can't step at top-level), - // so we must try to find a nested brace list first. - if target_code_line <= current_code_line { - if is_root { - // At root level, try to find a nested brace list in this element - let anchors_before = anchors.len(); - if find_anchor_in_element( - current.syntax(), - breakpoints, - anchors, - uri, - line_index, - line_offset, - ) - .is_some() && - anchors.len() > anchors_before - { - // Successfully placed in nested list - continue; - } - // No nested brace list found, mark as invalid - let bp = breakpoints.next().unwrap(); - bp.state = BreakpointState::Invalid; + let current_line = text_trimmed_line_range(current.syntax(), line_index)?.start; + + let next_line = if i + 1 < elements.len() { + let next = &elements[i + 1]; + let next_line = text_trimmed_line_range(next.syntax(), line_index)?.start; + + // If the breakpoint is at or past the next element, move on + if bp_code_line >= next_line { + i += 1; continue; } - let bp = breakpoints.next().unwrap(); - // Update bp.line to the actual document line where the breakpoint is placed - bp.line = current_code_line + line_offset; - anchors.push(BreakpointAnchor { - breakpoint_id: bp.id, - code_line: current_code_line, - }); - continue; - } - // Check if target is beyond current element - let next_code_line = if i + 1 < elements.len() { - Some(text_trimmed_line_range(elements[i + 1].syntax(), line_index).start) + // Otherwise the breakpoint is either at `current_line` or between + // `current_line` and `next_line` + Some(next_line) } else { + // There is no next element. The breakpoint either belongs to the + // current element or is past the current list and we need to + // backtrack and explore sibling trees. None }; - // Recursion case: target must be within current element - if next_code_line.map_or(true, |next| target_code_line < next) { - // Search within current element for brace lists - let anchors_before = anchors.len(); - if find_anchor_in_element( - current.syntax(), - breakpoints, - anchors, - uri, - line_index, - line_offset, - ) - .is_some() - { - // A nested brace list was found and processed. - if anchors.len() > anchors_before { - // Anchor(s) placed in nested list. Continue without incrementing - // `i` to re-check this element for any remaining breakpoints - // (handles multiple breakpoints in same block). - continue; - } - // The nested list was exhausted without placing an anchor for the - // current breakpoint. This means the target line is beyond all - // expressions in the nested list (e.g., on a closing `}` line with - // no executable code). - if !is_root && next_code_line.is_none() { - // Pop back up to let the parent handle it - the target might - // still be reachable via a sibling element in an outer list. - return; - } - // At root level or with more elements, mark as invalid. - let bp = breakpoints.next().unwrap(); - bp.state = BreakpointState::Invalid; - continue; - } else { - // No brace list found in this element. - if !is_root && next_code_line.is_none() { - // Pop back up to let the parent handle it - the target might - // still be reachable via a sibling element in an outer list. - return; - } - if is_root { - // At root level, can't place breakpoints without a nested brace list - let bp = breakpoints.next().unwrap(); - bp.state = BreakpointState::Invalid; - continue; - } - // Use current element as fallback (only in nested lists) - let bp = breakpoints.next().unwrap(); - // Update bp.line to the actual document line where the breakpoint is placed - bp.line = current_code_line + line_offset; - anchors.push(BreakpointAnchor { - breakpoint_id: bp.id, - code_line: current_code_line, - }); + // Try to place in a nested brace list first + let found_nested = find_anchors_in_nested_list( + current.syntax(), + breakpoints, + anchors, + line_index, + line_offset, + )?; + + if found_nested { + let Some(bp) = breakpoints.peek() else { + // No breakpoints left to process + return Ok(()); + }; + + let bp_code_line = bp.line - line_offset; + + // If next breakpoint is at or past next element, advance + if next_line.is_some_and(|next| bp_code_line >= next) { + i += 1; continue; } + + // Breakpoint is still within this element but wasn't placed. + // It means it's on a closing brace so consume it and mark invalid. + let bp = breakpoints.next().unwrap(); + bp.state = BreakpointState::Invalid; + + i += 1; + continue; + } + + if is_root { + // We never place breakpoints at top-level. R can only step through a `{` list. + let bp = breakpoints.next().unwrap(); + bp.state = BreakpointState::Invalid; + + i += 1; + continue; } - // Continue case: move to next element - i += 1; + if next_line.is_none() && bp_code_line > current_line { + // Breakpoint is past this scope entirely, in a sibling tree. Let + // parent handle it. + return Ok(()); + } + + // Place breakpoint at current element of the `{` list + let bp = breakpoints.next().unwrap(); + let doc_line = current_line + line_offset; + bp.line = doc_line; + anchors.push(BreakpointAnchor { + breakpoint_id: bp.id, + doc_line, + anchor: current.syntax().clone(), + }); } + + Ok(()) } -fn find_anchor_in_element<'a>( +fn find_anchors_in_nested_list<'a>( element: &RSyntaxNode, breakpoints: &mut std::iter::Peekable>, anchors: &mut Vec, - uri: &Url, line_index: &LineIndex, line_offset: u32, -) -> Option<()> { - use biome_rowan::WalkEvent; +) -> anyhow::Result { + let mut found_any = false; + let mut skip_until: Option = None; // Search for brace lists in descendants for event in element.preorder() { - let node = match event { - WalkEvent::Enter(n) => n, - WalkEvent::Leave(_) => continue, - }; + match event { + WalkEvent::Leave(node) => { + // If we're leaving the node we're skipping, clear the skip flag + if skip_until.as_ref() == Some(&node) { + skip_until = None; + } + continue; + }, - if let Some(braced) = RBracedExpressions::cast(node) { - let expr_list = braced.expressions(); - if !expr_list.is_empty() { - // Found a non-empty brace list, recurse into it - find_anchors_in_list( - &expr_list, - breakpoints, - anchors, - uri, - line_index, - line_offset, - false, - ); - return Some(()); - } + WalkEvent::Enter(node) => { + // If we're currently skipping a subtree, continue + if skip_until.is_some() { + continue; + } + + if let Some(braced) = RBracedExpressions::cast(node.clone()) { + let expr_list = braced.expressions(); + if !expr_list.is_empty() { + // Found a non-empty brace list, recurse into it + find_anchors_in_list( + &expr_list, + breakpoints, + anchors, + line_index, + line_offset, + false, + )?; + found_any = true; + + // Skip this node's subtree to avoid double-processing + skip_until = Some(node); + } + } + }, } } - None + Ok(found_any) } -fn inject_breakpoint_calls( - root: &RSyntaxNode, - mut anchors: Vec, - uri: &Url, - line_offset: u32, -) -> String { - if anchors.is_empty() { - return root.to_string(); - } +/// Rewriter that injects breakpoint calls into expression lists. +/// +/// We use `SyntaxRewriter` rather than `BatchMutation` because we need 1-to-2 +/// splicing (insert breakpoint call before an expression, keeping the +/// original). `BatchMutation` only supports 1-to-1 or 1-to-0 replacements. +struct BreakpointRewriter<'a> { + uri: &'a Url, - // Sort anchors by line DESCENDING so we modify from bottom to top. - // This preserves line numbers for earlier breakpoints. - anchors.sort_by_key(|a| std::cmp::Reverse(a.code_line)); + /// Map from anchor key to (breakpoint_id, line_in_document_coords) + breakpoint_map: std::collections::HashMap, - let mut source = root.to_string(); + /// Stack of pending injections, one frame per expression list we're inside. + injection_stack: Vec>, - // Process each breakpoint independently by re-parsing after each injection - for anchor_info in anchors { - // Re-parse the current source - let parse_result = aether_parser::parse(&source, Default::default()); - let root = parse_result.tree(); - let new_line_index = LineIndex::new(&source); + /// First error encountered during transformation (if any) + err: Option, +} - // Find the anchor node at the target line (using code coordinates) - let Some(new_anchor) = - find_node_at_line(root.syntax(), anchor_info.code_line, &new_line_index) - else { - continue; - }; +/// Pending injection to be applied when visiting the parent expression list. +struct PendingInjection { + /// Slot index in the parent list + slot_index: usize, + /// Nodes to insert before this slot + insert_before: Vec, +} - // Get the parent list and find the anchor's index - let Some(parent) = new_anchor.parent() else { - continue; - }; +impl<'a> BreakpointRewriter<'a> { + fn new( + uri: &'a Url, + breakpoint_map: std::collections::HashMap, + ) -> Self { + Self { + uri, + breakpoint_map, + injection_stack: Vec::new(), + err: None, + } + } - let parent_children: Vec<_> = parent.children().collect(); - let Some(index) = parent_children - .iter() - .position(|child| child == &new_anchor) - else { - continue; - }; + /// Take the error (if any) out of the rewriter. + fn take_err(&mut self) -> Option { + self.err.take() + } - // Create the breakpoint call and modified anchor - // Line directive uses document coordinates (code_line + line_offset) - let breakpoint_call = create_breakpoint_call(uri, anchor_info.breakpoint_id); - let doc_line = anchor_info.code_line + line_offset; - let modified_anchor = add_line_directive_to_node(&new_anchor, doc_line, uri); + /// Record an error and return the original node unchanged. + fn fail(&mut self, err: anyhow::Error, node: RSyntaxNode) -> RSyntaxNode { + if self.err.is_none() { + self.err = Some(err); + } + node + } +} - // Inject the breakpoint by splicing - let modified_parent = parent.clone().splice_slots(index..=index, [ - Some(SyntaxElement::Node(breakpoint_call)), - Some(SyntaxElement::Node(modified_anchor)), - ]); +impl SyntaxRewriter for BreakpointRewriter<'_> { + type Language = RLanguage; - // Propagate the change to the root - let new_root = propagate_change_to_root(&parent, modified_parent); + fn visit_node(&mut self, node: RSyntaxNode) -> VisitNodeSignal { + // Only push frames for expression lists, not other list types + if node.kind() == RSyntaxKind::R_EXPRESSION_LIST { + self.injection_stack.push(Vec::new()); + } - // Update source for next iteration - source = new_root.to_string(); + VisitNodeSignal::Traverse(node) } - source -} + fn visit_node_post(&mut self, node: RSyntaxNode) -> RSyntaxNode { + // If we already have an error, skip processing + if self.err.is_some() { + return node; + } -/// Find a node at the specified line in the AST. -/// Returns the first direct child of a list (program or brace list) that starts at or after the target line. -fn find_node_at_line( - root: &RSyntaxNode, - target_line: u32, - line_index: &LineIndex, -) -> Option { - // We need to find expression lists and check their children - for event in root.preorder() { - let node = match event { - WalkEvent::Enter(n) => n, - WalkEvent::Leave(_) => continue, + // If an expression list, apply any pending injections + if node.kind() == RSyntaxKind::R_EXPRESSION_LIST { + let injections = self.injection_stack.pop().unwrap_or_default(); + + if injections.is_empty() { + return node; + } else { + return Self::apply_injections(node, injections); + } + } + + let Some(&(breakpoint_id, line)) = self.breakpoint_map.get(&node.key()) else { + // Not a breakpoint anchor, nothing to inject + return node; }; - // Check if this is a root or brace expression list - let expr_list = if let Some(r) = RRoot::cast(node.clone()) { - r.expressions() - } else if let Some(braced) = RBracedExpressions::cast(node.clone()) { - braced.expressions() - } else { - continue; + // Anchors are always inside expression lists, so we must have a frame + let Some(frame) = self.injection_stack.last_mut() else { + return self.fail( + anyhow!("Breakpoint anchor found outside expression list"), + node, + ); }; - // Check each child of this list - for expr in expr_list.into_iter() { - let child_line = text_trimmed_line_range(expr.syntax(), line_index).start; - if child_line == target_line { - return Some(expr.into_syntax()); - } - } - } + // Add line directive to current node right away + let decorated_node = match add_line_directive_to_node(&node, line, self.uri) { + Ok(n) => n, + Err(err) => return self.fail(err, node), + }; - None -} + // Queue breakpoint injection for parent expression list + let breakpoint_call = create_breakpoint_call(self.uri, breakpoint_id); + frame.push(PendingInjection { + slot_index: node.index(), + insert_before: vec![breakpoint_call], + }); -/// Propagate a node replacement up to the root of the tree. -fn propagate_change_to_root(original: &RSyntaxNode, replacement: RSyntaxNode) -> RSyntaxNode { - let mut current_original = original.clone(); - let mut current_replacement = replacement; - - while let Some(parent) = current_original.parent() { - let new_parent = parent - .clone() - .replace_child( - current_original.clone().into(), - current_replacement.clone().into(), - ) - .expect("Failed to replace child"); - - current_original = parent; - current_replacement = new_parent; + decorated_node } +} - current_replacement +impl BreakpointRewriter<'_> { + /// Apply pending injections to an expression list node. + fn apply_injections( + mut node: RSyntaxNode, + mut injections: Vec, + ) -> RSyntaxNode { + // Sort by slot index descending so we can splice without invalidating indices + injections.sort_by(|a, b| b.slot_index.cmp(&a.slot_index)); + + for injection in injections { + // Insert before (at the slot index) + if !injection.insert_before.is_empty() { + node = node.splice_slots( + injection.slot_index..injection.slot_index, + injection.insert_before.into_iter().map(|n| Some(n.into())), + ); + } + } + + node + } } /// Returns the line range [start, end) for the node's trimmed text. -/// TODO: Should move to an ext trait in aether_utils? Probably fine to have it -/// depend on the syntax crate (and can be made optional if needed). -fn text_trimmed_line_range(node: &RSyntaxNode, line_index: &LineIndex) -> std::ops::Range { +fn text_trimmed_line_range( + node: &RSyntaxNode, + line_index: &LineIndex, +) -> anyhow::Result> { + // This gives a range in offset coordinates. We need to retrieve the line range + // using the line index. let text_range = node.text_trimmed_range(); let start = line_index .line_col(text_range.start()) .map(|lc| lc.line) - .unwrap_or(0); + .ok_or_else(|| anyhow!("Failed to get line for text range start offset"))?; let end = line_index .line_col(text_range.end()) .map(|lc| lc.line + 1) // Close the range end - .unwrap_or(0); + .ok_or_else(|| anyhow!("Failed to get line for text range end offset"))?; - start..end -} - -fn create_breakpoint_call(uri: &Url, id: i64) -> RSyntaxNode { - // NOTE: If you use `base::browser()` here in an attempt to prevent masking - // issues in case someone redefined `browser()`, you'll cause the function - // in which the breakpoint is injected to be bytecode-compiled. This is a - // limitation/bug of https://github.com/r-devel/r-svn/blob/e2aae817/src/library/compiler/R/cmp.R#L1273-L1290 - // Wrapped in .ark_auto_step() so the debugger automatically steps over it. - let code = - format!("\nbase::.ark_auto_step(base::.ark_breakpoint(browser(), \"{uri}\", \"{id}\"))\n"); - aether_parser::parse(&code, Default::default()).syntax() + Ok(start..end) } -fn add_line_directive_to_node(node: &RSyntaxNode, line: u32, uri: &Url) -> RSyntaxNode { - let Some(first_token) = node.first_token() else { - return node.clone(); - }; +type TriviaPieces = Vec<(TriviaPieceKind, String)>; - let line_directive = format!("#line {line} \"{uri}\"", line = line + 1); - - // Collect existing leading trivia, but skip only the first newline to avoid double blank lines - let existing_trivia: Vec<_> = first_token +/// Collects leading trivia from a token as (kind, text) tuples. +fn collect_leading_trivia(token: &aether_syntax::RSyntaxToken) -> TriviaPieces { + token .leading_trivia() .pieces() - .enumerate() - .filter_map(|(i, piece)| { - // Skip only the very first newline - if i == 0 && piece.kind() == biome_rowan::TriviaPieceKind::Newline { - None - } else { - Some((piece.kind(), piece.text().to_string())) - } - }) - .collect(); + .map(|piece| (piece.kind(), piece.text().to_string())) + .collect() +} - // Insert line directive before the final whitespace (indentation) if present. - // This preserves indentation: `[\n, \n, ws]` becomes `[\n, \n, directive, \n, ws]` - // rather than `[\n, \n, ws, directive, \n]` which would break indentation. - let new_trivia: Vec<_> = if existing_trivia.last().map_or(false, |(k, _)| { - *k == biome_rowan::TriviaPieceKind::Whitespace - }) { - let (init, last) = existing_trivia.split_at(existing_trivia.len() - 1); - init.iter() - .cloned() - .chain(vec![ - ( - biome_rowan::TriviaPieceKind::SingleLineComment, - line_directive, - ), - (biome_rowan::TriviaPieceKind::Newline, "\n".to_string()), - ]) - .chain(last.iter().cloned()) - .collect() +/// Creates trivia pieces for a line directive comment followed by a newline. +fn line_directive_trivia(line: u32, uri: &Url) -> TriviaPieces { + let directive = format!("#line {} \"{}\"", line + 1, uri); + vec![ + (TriviaPieceKind::SingleLineComment, directive), + (TriviaPieceKind::Newline, "\n".to_string()), + ] +} + +/// Inserts trivia pieces before trailing whitespace (indentation) if present. +/// This preserves indentation: `[\n, \n, ws]` becomes `[\n, \n, , ws]` +fn insert_before_trailing_whitespace( + mut trivia: TriviaPieces, + to_insert: TriviaPieces, +) -> TriviaPieces { + let has_trailing_whitespace = trivia + .last() + .is_some_and(|(k, _)| *k == TriviaPieceKind::Whitespace); + + if has_trailing_whitespace { + let Some(last) = trivia.pop() else { + trivia.extend(to_insert); + return trivia; + }; + trivia.extend(to_insert); + trivia.push(last); } else { - existing_trivia - .into_iter() - .chain(vec![ - ( - biome_rowan::TriviaPieceKind::SingleLineComment, - line_directive, - ), - (biome_rowan::TriviaPieceKind::Newline, "\n".to_string()), - ]) - .collect() - }; + trivia.extend(to_insert); + } + + trivia +} + +fn add_line_directive_to_node( + node: &RSyntaxNode, + line: u32, + uri: &Url, +) -> anyhow::Result { + let first_token = node + .first_token() + .ok_or_else(|| anyhow!("Node has no first token for line directive"))?; + + let mut existing_trivia = collect_leading_trivia(&first_token); + + // Skip leading newline as it belongs to the previous node + if existing_trivia + .first() + .is_some_and(|(kind, _)| *kind == TriviaPieceKind::Newline) + { + existing_trivia.remove(0); + } + + let directive_trivia = line_directive_trivia(line, uri); + let new_trivia = insert_before_trailing_whitespace(existing_trivia, directive_trivia); let new_first_token = first_token.with_leading_trivia(new_trivia.iter().map(|(k, t)| (*k, t.as_str()))); node.clone() .replace_child(first_token.into(), new_first_token.into()) - .unwrap_or_else(|| node.clone()) + .ok_or_else(|| anyhow!("Failed to replace first token with line directive")) } -/// Annotate source code for `source()` and `pkgload::load_all()`. -/// -/// - Wraps the whole source in a `{}` block. This allows R to step through the -/// top-level expressions. -/// - Injects breakpoint calls (`.ark_auto_step(.ark_breakpoint(...))`) at -/// breakpoint locations. -/// - Injects verification calls (`.ark_auto_step(.ark_verify_breakpoints_range(...))`) -/// after each top-level expression. Verifying expression by expression allows -/// marking breakpoints as verified even when an expression fails mid-script. -/// - `#line` directives before each original expression so the debugger knows -/// where to step in the original file. -pub(crate) fn annotate_source(code: &str, uri: &Url, breakpoints: &mut [Breakpoint]) -> String { - let line_index = LineIndex::new(code); - - // Parse the original code to get line ranges for each top-level expression - let original_root = aether_parser::parse(code, Default::default()).tree(); - let Some(original_r) = RRoot::cast(original_root.syntax().clone()) else { - return code.to_string(); - }; - - // Collect original line ranges before any modifications - let original_ranges: Vec<_> = original_r - .expressions() - .into_iter() - .map(|expr| text_trimmed_line_range(expr.syntax(), &line_index)) - .collect(); - - if original_ranges.is_empty() { - return code.to_string(); - } - - // Now inject breakpoints into the code - let location = CodeLocation { - uri: uri.clone(), - start: Position { - line: 0, - character: 0, - }, - end: Position { - line: code.lines().count().saturating_sub(1) as u32, - character: code.lines().last().map(|l| l.len()).unwrap_or(0), - }, - }; - let code_with_breakpoints = inject_breakpoints(code, location, breakpoints, &line_index); +/// Rebuild the root expression list with #line directives and verify calls for +/// each expression. +fn annotate_root_list( + list_node: RSyntaxNode, + ranges: &[std::ops::Range], + uri: &Url, +) -> anyhow::Result { + let mut result_slots: Vec>> = Vec::new(); - // Re-parse the code with breakpoints to get the updated structure - let root = aether_parser::parse(&code_with_breakpoints, Default::default()).tree(); + // Use pre-computed line ranges (from before any transformations) + let mut range_iter = ranges.iter(); - let Some(r) = RRoot::cast(root.syntax().clone()) else { - return code_with_breakpoints; - }; + for slot in list_node.slots() { + let biome_rowan::SyntaxSlot::Node(node) = slot else { + result_slots.push(None); + continue; + }; - let exprs: Vec<_> = r.expressions().into_iter().collect(); + // Get pre-computed line range for this expression + let Some(line_range) = range_iter.next() else { + result_slots.push(Some(node.into())); + continue; + }; - // Build the output with wrapping braces and verify calls - let mut output = String::from("{\n"); + // Add #line directive to expression + let decorated_node = add_line_directive_to_node(&node, line_range.start, uri)?; + result_slots.push(Some(decorated_node.into())); - // Track which original expression we're on - let mut original_expr_idx = 0; + let verify_call = create_verify_call(uri, line_range); + result_slots.push(Some(verify_call.into())); + } - for expr in exprs.iter() { - let expr_str = expr.syntax().to_string(); + // Replace all slots with the new list + let slot_count = list_node.slots().count(); + Ok(list_node.splice_slots(0..slot_count, result_slots)) +} - // Check if this is an injected breakpoint call (starts with base::.ark_auto_step) - let is_injected = expr_str - .trim_start() - .starts_with("base::.ark_auto_step(base::.ark_breakpoint"); +// We create new calls by parsing strings. Although less elegant, it's much less +// verbose and easier to see what's going on. - if is_injected { - // Just output the breakpoint call without #line or verify - output.push_str(expr_str.trim_start()); - output.push('\n'); - } else { - // This is an original expression - use the tracked original line range - if let Some(line_range) = original_ranges.get(original_expr_idx) { - // Add #line directive (R uses 1-based lines) - output.push_str(&format!("#line {} \"{}\"\n", line_range.start + 1, uri)); - - // Add the expression, stripping leading whitespace since we added our own newline - output.push_str(expr_str.trim_start()); - output.push('\n'); - - // Add verification call after the expression. - // Convert from 0-based to 1-based lines for R. - output.push_str(&format!( - "base::.ark_auto_step(base::.ark_verify_breakpoints_range(\"{}\", {}L, {}L))\n", - uri, - line_range.start + 1, - line_range.end + 1 - )); - - original_expr_idx += 1; - } - } - } +fn create_breakpoint_call(uri: &Url, id: i64) -> RSyntaxNode { + // NOTE: If you use `base::browser()` here in an attempt to prevent masking + // issues in case someone redefined `browser()`, you'll cause the function + // in which the breakpoint is injected to be bytecode-compiled. This is a + // limitation/bug of https://github.com/r-devel/r-svn/blob/e2aae817/src/library/compiler/R/cmp.R#L1273-L1290 + let code = format!( + "\nbase::{AUTO_STEP_FUNCTION}(base::.ark_breakpoint(browser(), \"{uri}\", \"{id}\"))\n" + ); + aether_parser::parse(&code, Default::default()).syntax() +} - output.push_str("}\n"); - output +fn create_verify_call(uri: &Url, line_range: &std::ops::Range) -> RSyntaxNode { + let code = format!( + "\nbase::{AUTO_STEP_FUNCTION}(base::.ark_verify_breakpoints_range(\"{}\", {}L, {}L))\n", + uri, + line_range.start + 1, + line_range.end + 1 + ); + aether_parser::parse(&code, Default::default()).syntax() } #[harp::register] @@ -670,7 +765,7 @@ pub unsafe extern "C-unwind" fn ps_annotate_source(uri: SEXP, code: SEXP) -> any return Ok(harp::r_null()); } - let annotated = annotate_source(&code, &uri, breakpoints.as_mut_slice()); + let annotated = annotate_source(&code, &uri, breakpoints.as_mut_slice())?; Ok(RObject::try_from(annotated)?.sexp) } @@ -694,7 +789,7 @@ mod tests { fn test_annotate_input_basic() { let code = "x <- 1\ny <- 2"; let location = make_location(0, 0); - let result = annotate_input(code, location, None); + let result = annotate_input(code, location, None).unwrap(); insta::assert_snapshot!(result); } @@ -702,7 +797,7 @@ mod tests { fn test_annotate_input_shifted_line() { let code = "x <- 1\ny <- 2"; let location = make_location(10, 0); - let result = annotate_input(code, location, None); + let result = annotate_input(code, location, None).unwrap(); insta::assert_snapshot!(result); } @@ -710,7 +805,7 @@ mod tests { fn test_annotate_input_shifted_character() { let code = "x <- 1\ny <- 2"; let location = make_location(0, 5); - let result = annotate_input(code, location, None); + let result = annotate_input(code, location, None).unwrap(); insta::assert_snapshot!(result); } @@ -718,7 +813,7 @@ mod tests { fn test_annotate_input_shifted_line_and_character() { let code = "x <- 1\ny <- 2"; let location = make_location(10, 5); - let result = annotate_input(code, location, None); + let result = annotate_input(code, location, None).unwrap(); insta::assert_snapshot!(result); } @@ -726,7 +821,7 @@ mod tests { fn test_annotate_input_with_existing_whitespace() { let code = " x <- 1\n y <- 2"; let location = make_location(0, 0); - let result = annotate_input(code, location, None); + let result = annotate_input(code, location, None).unwrap(); insta::assert_snapshot!(result); } @@ -734,7 +829,7 @@ mod tests { fn test_annotate_input_with_existing_whitespace_shifted() { let code = " x <- 1\n y <- 2"; let location = make_location(0, 2); - let result = annotate_input(code, location, None); + let result = annotate_input(code, location, None).unwrap(); insta::assert_snapshot!(result); } @@ -742,7 +837,7 @@ mod tests { fn test_annotate_input_with_existing_comment() { let code = "# comment\nx <- 1"; let location = make_location(0, 0); - let result = annotate_input(code, location, None); + let result = annotate_input(code, location, None).unwrap(); insta::assert_snapshot!(result); } @@ -750,7 +845,7 @@ mod tests { fn test_annotate_input_empty_code() { let code = ""; let location = make_location(0, 0); - let result = annotate_input(code, location, None); + let result = annotate_input(code, location, None).unwrap(); insta::assert_snapshot!(result); } @@ -777,7 +872,7 @@ mod tests { state: BreakpointState::Unverified, }]; - let result = annotate_input(code, location, Some(&mut breakpoints)); + let result = annotate_input(code, location, Some(&mut breakpoints)).unwrap(); insta::assert_snapshot!(result); // Breakpoint line should remain in document coordinates @@ -807,7 +902,7 @@ mod tests { state: BreakpointState::Unverified, }]; - let result = inject_breakpoints(code, location, &mut breakpoints, &line_index); + let result = inject_breakpoints(code, location, &mut breakpoints, &line_index).unwrap(); insta::assert_snapshot!(result); assert!(!matches!(breakpoints[0].state, BreakpointState::Invalid)); } @@ -841,7 +936,7 @@ mod tests { }, ]; - let result = inject_breakpoints(code, location, &mut breakpoints, &line_index); + let result = inject_breakpoints(code, location, &mut breakpoints, &line_index).unwrap(); insta::assert_snapshot!(result); assert!(!matches!(breakpoints[0].state, BreakpointState::Invalid)); assert!(!matches!(breakpoints[1].state, BreakpointState::Invalid)); @@ -868,7 +963,7 @@ mod tests { state: BreakpointState::Unverified, }]; - let result = inject_breakpoints(code, location, &mut breakpoints, &line_index); + let result = inject_breakpoints(code, location, &mut breakpoints, &line_index).unwrap(); insta::assert_snapshot!(result); assert!(!matches!(breakpoints[0].state, BreakpointState::Verified)); } @@ -894,7 +989,7 @@ mod tests { state: BreakpointState::Unverified, }]; - let result = inject_breakpoints(code, location, &mut breakpoints, &line_index); + let result = inject_breakpoints(code, location, &mut breakpoints, &line_index).unwrap(); // Should return unchanged code assert_eq!(result, code); assert!(!matches!(breakpoints[0].state, BreakpointState::Verified)); @@ -932,7 +1027,7 @@ mod tests { }, ]; - let result = inject_breakpoints(code, location, &mut breakpoints, &line_index); + let result = inject_breakpoints(code, location, &mut breakpoints, &line_index).unwrap(); insta::assert_snapshot!(result); // Both breakpoints are valid (inside brace lists) assert!(!matches!(breakpoints[0].state, BreakpointState::Invalid)); @@ -962,7 +1057,7 @@ mod tests { state: BreakpointState::Unverified, }]; - let result = inject_breakpoints(code, location, &mut breakpoints, &line_index); + let result = inject_breakpoints(code, location, &mut breakpoints, &line_index).unwrap(); insta::assert_snapshot!(result); assert!(!matches!(breakpoints[0].state, BreakpointState::Invalid)); } @@ -990,7 +1085,7 @@ mod tests { state: BreakpointState::Unverified, }]; - let result = inject_breakpoints(code, location, &mut breakpoints, &line_index); + let result = inject_breakpoints(code, location, &mut breakpoints, &line_index).unwrap(); // Should return unchanged code since breakpoint is invalid assert_eq!(result, code); // Marked as invalid @@ -1027,7 +1122,7 @@ mod tests { }, ]; - let result = inject_breakpoints(code, location, &mut breakpoints, &line_index); + let result = inject_breakpoints(code, location, &mut breakpoints, &line_index).unwrap(); insta::assert_snapshot!(result); // First breakpoint is invalid (on closing brace) @@ -1074,7 +1169,7 @@ mod tests { }, ]; - let result = inject_breakpoints(code, location, &mut breakpoints, &line_index); + let result = inject_breakpoints(code, location, &mut breakpoints, &line_index).unwrap(); insta::assert_snapshot!(result); // All breakpoints are valid (inside brace lists) assert!(!matches!(breakpoints[0].state, BreakpointState::Invalid)); @@ -1115,7 +1210,7 @@ mod tests { state: BreakpointState::Unverified, }]; - let result = inject_breakpoints(code, location, &mut breakpoints, &line_index); + let result = inject_breakpoints(code, location, &mut breakpoints, &line_index).unwrap(); insta::assert_snapshot!(result); // The breakpoint line should remain in document coordinates @@ -1147,7 +1242,7 @@ mod tests { state: BreakpointState::Unverified, }]; - let result = inject_breakpoints(code, location, &mut breakpoints, &line_index); + let result = inject_breakpoints(code, location, &mut breakpoints, &line_index).unwrap(); insta::assert_snapshot!(result); // The breakpoint line should remain in document coordinates @@ -1180,7 +1275,7 @@ mod tests { state: BreakpointState::Unverified, }]; - let result = inject_breakpoints(code, location, &mut breakpoints, &line_index); + let result = inject_breakpoints(code, location, &mut breakpoints, &line_index).unwrap(); insta::assert_snapshot!(result); // The breakpoint should be placed at line 2 @@ -1212,7 +1307,7 @@ mod tests { state: BreakpointState::Unverified, }]; - let result = inject_breakpoints(code, location, &mut breakpoints, &line_index); + let result = inject_breakpoints(code, location, &mut breakpoints, &line_index).unwrap(); insta::assert_snapshot!(result); // The breakpoint should be placed at line 3 @@ -1244,7 +1339,7 @@ mod tests { state: BreakpointState::Unverified, }]; - let result = inject_breakpoints(code, location, &mut breakpoints, &line_index); + let result = inject_breakpoints(code, location, &mut breakpoints, &line_index).unwrap(); // Should return unchanged code since breakpoint is invalid assert_eq!(result, code); // Marked as invalid @@ -1273,7 +1368,7 @@ mod tests { state: BreakpointState::Unverified, }]; - let result = inject_breakpoints(code, location, &mut breakpoints, &line_index); + let result = inject_breakpoints(code, location, &mut breakpoints, &line_index).unwrap(); // Code unchanged since breakpoint is invalid assert_eq!(result, code); // Breakpoint marked as invalid @@ -1309,7 +1404,7 @@ mod tests { }, ]; - let result = inject_breakpoints(code, location, &mut breakpoints, &line_index); + let result = inject_breakpoints(code, location, &mut breakpoints, &line_index).unwrap(); // Code unchanged since all breakpoints are invalid assert_eq!(result, code); // Both breakpoints marked as invalid @@ -1351,7 +1446,7 @@ mod tests { }, ]; - let result = inject_breakpoints(code, location, &mut breakpoints, &line_index); + let result = inject_breakpoints(code, location, &mut breakpoints, &line_index).unwrap(); // Code should contain breakpoint for nested expression only assert!(result.contains("base::.ark_breakpoint")); // Top-level breakpoints are invalid @@ -1367,7 +1462,7 @@ mod tests { let code = "x <- 1\ny <- 2"; let uri = Url::parse("file:///test.R").unwrap(); let mut breakpoints = vec![]; - let result = annotate_source(code, &uri, &mut breakpoints); + let result = annotate_source(code, &uri, &mut breakpoints).unwrap(); insta::assert_snapshot!(result); } @@ -1381,7 +1476,7 @@ mod tests { line: 1, state: BreakpointState::Unverified, }]; - let result = annotate_source(code, &uri, &mut breakpoints); + let result = annotate_source(code, &uri, &mut breakpoints).unwrap(); insta::assert_snapshot!(result); } @@ -1390,7 +1485,7 @@ mod tests { let code = "a <- 1\nb <- 2\nc <- 3"; let uri = Url::parse("file:///test.R").unwrap(); let mut breakpoints = vec![]; - let result = annotate_source(code, &uri, &mut breakpoints); + let result = annotate_source(code, &uri, &mut breakpoints).unwrap(); insta::assert_snapshot!(result); } @@ -1399,7 +1494,288 @@ mod tests { let code = "foo <- function(x) {\n x + 1\n}\nbar <- 2"; let uri = Url::parse("file:///test.R").unwrap(); let mut breakpoints = vec![]; - let result = annotate_source(code, &uri, &mut breakpoints); + let result = annotate_source(code, &uri, &mut breakpoints).unwrap(); + insta::assert_snapshot!(result); + } + + #[test] + fn test_inject_breakpoints_if_else_both_branches() { + let code = "if (TRUE) {\n x <- 1\n} else {\n y <- 2\n}"; + let location = CodeLocation { + uri: Url::parse("file:///test.R").unwrap(), + start: Position { + line: 0, + character: 0, + }, + end: Position { + line: 4, + character: 1, + }, + }; + let line_index = LineIndex::new(code); + let mut breakpoints = vec![ + Breakpoint { + id: 1, + line: 1, // `x <- 1` in if branch + state: BreakpointState::Unverified, + }, + Breakpoint { + id: 2, + line: 3, // `y <- 2` in else branch + state: BreakpointState::Unverified, + }, + ]; + + let result = inject_breakpoints(code, location, &mut breakpoints, &line_index).unwrap(); insta::assert_snapshot!(result); + + // Both breakpoints should be valid (not marked as invalid) + assert!( + !matches!(breakpoints[0].state, BreakpointState::Invalid), + "First breakpoint should not be invalid" + ); + assert!( + !matches!(breakpoints[1].state, BreakpointState::Invalid), + "Second breakpoint should not be invalid" + ); + } + + #[test] + fn test_inject_breakpoints_multiple_invalid_closing_braces() { + // Multiple breakpoints on closing braces should all be marked invalid + // without re-traversing the tree for each one. + let code = "{\n f <- function() {\n x <- 1\n }\n}"; + // Line 0: { + // Line 1: f <- function() { + // Line 2: x <- 1 + // Line 3: } <- bp1 (closing brace of function) + // Line 4: } <- bp2 (closing brace of outer block) + let location = CodeLocation { + uri: Url::parse("file:///test.R").unwrap(), + start: Position { + line: 0, + character: 0, + }, + end: Position { + line: 4, + character: 1, + }, + }; + let line_index = LineIndex::new(code); + let mut breakpoints = vec![ + Breakpoint { + id: 1, + line: 3, // closing brace of function + state: BreakpointState::Unverified, + }, + Breakpoint { + id: 2, + line: 4, // closing brace of outer block + state: BreakpointState::Unverified, + }, + ]; + + let result = inject_breakpoints(code, location, &mut breakpoints, &line_index).unwrap(); + + // Code should be unchanged (no valid breakpoints) + assert_eq!(result, code); + + // Both breakpoints should be marked invalid + assert!( + matches!(breakpoints[0].state, BreakpointState::Invalid), + "First breakpoint on closing brace should be invalid" + ); + assert!( + matches!(breakpoints[1].state, BreakpointState::Invalid), + "Second breakpoint on closing brace should be invalid" + ); + } + + #[test] + fn test_inject_breakpoints_empty_brace_sibling() { + // Breakpoint on an empty brace block that's a sibling to other expressions + let code = "{\n x <- 1\n {}\n}"; + // Line 0: { + // Line 1: x <- 1 + // Line 2: {} <- breakpoint here + // Line 3: } + let location = CodeLocation { + uri: Url::parse("file:///test.R").unwrap(), + start: Position { + line: 0, + character: 0, + }, + end: Position { + line: 3, + character: 1, + }, + }; + let line_index = LineIndex::new(code); + let mut breakpoints = vec![Breakpoint { + id: 1, + line: 2, // the empty {} expression + state: BreakpointState::Unverified, + }]; + + let result = inject_breakpoints(code, location, &mut breakpoints, &line_index).unwrap(); + insta::assert_snapshot!(result); + + // Should anchor to the empty {} expression (it's a valid expression) + assert!( + !matches!(breakpoints[0].state, BreakpointState::Invalid), + "Breakpoint on empty brace block should be valid" + ); + } + + #[test] + fn test_inject_breakpoints_nested_braces_same_line() { + // Test breakpoints on nested brace structures + let code = "{\n {\n }\n}"; + // Line 0: { <- outer open + // Line 1: { <- inner open (this is an expression in outer list) + // Line 2: } <- inner close (invalid - closing brace) + // Line 3: } <- outer close (invalid - closing brace) + let location = CodeLocation { + uri: Url::parse("file:///test.R").unwrap(), + start: Position { + line: 0, + character: 0, + }, + end: Position { + line: 3, + character: 1, + }, + }; + let line_index = LineIndex::new(code); + + // Test 1: Breakpoint on inner brace open line (valid - anchors to inner {} expression) + let mut breakpoints = vec![Breakpoint { + id: 1, + line: 1, + state: BreakpointState::Unverified, + }]; + let result = + inject_breakpoints(code, location.clone(), &mut breakpoints, &line_index).unwrap(); + assert!( + !matches!(breakpoints[0].state, BreakpointState::Invalid), + "Breakpoint on inner brace open should be valid" + ); + assert!(result.contains(".ark_breakpoint")); + + // Test 2: Breakpoint on inner closing brace (invalid) + let mut breakpoints = vec![Breakpoint { + id: 2, + line: 2, + state: BreakpointState::Unverified, + }]; + let result = + inject_breakpoints(code, location.clone(), &mut breakpoints, &line_index).unwrap(); + assert!( + matches!(breakpoints[0].state, BreakpointState::Invalid), + "Breakpoint on inner closing brace should be invalid" + ); + assert_eq!(result, code); + + // Test 3: Breakpoint on outer closing brace (invalid) + let mut breakpoints = vec![Breakpoint { + id: 3, + line: 3, + state: BreakpointState::Unverified, + }]; + let result = + inject_breakpoints(code, location.clone(), &mut breakpoints, &line_index).unwrap(); + assert!( + matches!(breakpoints[0].state, BreakpointState::Invalid), + "Breakpoint on outer closing brace should be invalid" + ); + assert_eq!(result, code); + } + + #[test] + fn test_inject_breakpoints_double_braces_same_lines() { + // Test breakpoints with {{ on one line and }} on another + let code = "{{\n}}"; + // Line 0: {{ <- outer and inner open + // Line 1: }} <- inner and outer close + let location = CodeLocation { + uri: Url::parse("file:///test.R").unwrap(), + start: Position { + line: 0, + character: 0, + }, + end: Position { + line: 1, + character: 2, + }, + }; + let line_index = LineIndex::new(code); + + // Test 1: Breakpoint on line 0 (valid - anchors to inner {} expression) + let mut breakpoints = vec![Breakpoint { + id: 1, + line: 0, + state: BreakpointState::Unverified, + }]; + let result = + inject_breakpoints(code, location.clone(), &mut breakpoints, &line_index).unwrap(); + assert!( + !matches!(breakpoints[0].state, BreakpointState::Invalid), + "Breakpoint on {{ line should be valid" + ); + assert!(result.contains(".ark_breakpoint")); + + // Test 2: Breakpoint on line 1 (invalid - closing braces) + let mut breakpoints = vec![Breakpoint { + id: 2, + line: 1, + state: BreakpointState::Unverified, + }]; + let result = + inject_breakpoints(code, location.clone(), &mut breakpoints, &line_index).unwrap(); + assert!( + matches!(breakpoints[0].state, BreakpointState::Invalid), + "Breakpoint on }} line should be invalid" + ); + assert_eq!(result, code); + } + + #[test] + fn test_inject_breakpoints_inside_multiline_call() { + // Test breakpoint placed on a line inside a multi-line call expression + // The breakpoint is on the argument line, not the start of the expression + let code = "{\n foo(\n 1\n )\n}"; + // Line 0: { + // Line 1: foo( + // Line 2: 1 <- breakpoint here + // Line 3: ) + // Line 4: } + let location = CodeLocation { + uri: Url::parse("file:///test.R").unwrap(), + start: Position { + line: 0, + character: 0, + }, + end: Position { + line: 4, + character: 1, + }, + }; + let line_index = LineIndex::new(code); + + let mut breakpoints = vec![Breakpoint { + id: 1, + line: 2, // Inside the foo() call, on the argument line + state: BreakpointState::Unverified, + }]; + + let result = + inject_breakpoints(code, location.clone(), &mut breakpoints, &line_index).unwrap(); + + // Breakpoint inside a multi-line expression (not at its start) is invalid + assert!( + matches!(breakpoints[0].state, BreakpointState::Invalid), + "Breakpoint inside multi-line call should be invalid" + ); + assert_eq!(result, code, "Invalid breakpoint should not modify code"); } } diff --git a/crates/ark/src/interface.rs b/crates/ark/src/interface.rs index fcf67695a..8c12af649 100644 --- a/crates/ark/src/interface.rs +++ b/crates/ark/src/interface.rs @@ -355,7 +355,7 @@ impl PendingInputs { breakpoints: Option<&mut [Breakpoint]>, ) -> anyhow::Result> { let input = if let Some(location) = location { - let annotated_code = annotate_input(code, location, breakpoints); + let annotated_code = annotate_input(code, location, breakpoints)?; log::trace!("Annotated code: \n```\n{annotated_code}\n```"); harp::ParseInput::SrcFile(&SrcFile::new_virtual_empty_filename(annotated_code.into())) } else if harp::get_option_bool("keep.source") { diff --git a/crates/ark/src/snapshots/ark__console_annotate__tests__inject_breakpoints_empty_brace_sibling.snap b/crates/ark/src/snapshots/ark__console_annotate__tests__inject_breakpoints_empty_brace_sibling.snap new file mode 100644 index 000000000..f7cc3bfcc --- /dev/null +++ b/crates/ark/src/snapshots/ark__console_annotate__tests__inject_breakpoints_empty_brace_sibling.snap @@ -0,0 +1,10 @@ +--- +source: crates/ark/src/console_annotate.rs +expression: result +--- +{ + x <- 1 +base::.ark_auto_step(base::.ark_breakpoint(browser(), "file:///test.R", "1")) +#line 3 "file:///test.R" + {} +} diff --git a/crates/ark/src/snapshots/ark__console_annotate__tests__inject_breakpoints_if_else_both_branches.snap b/crates/ark/src/snapshots/ark__console_annotate__tests__inject_breakpoints_if_else_both_branches.snap new file mode 100644 index 000000000..70815120a --- /dev/null +++ b/crates/ark/src/snapshots/ark__console_annotate__tests__inject_breakpoints_if_else_both_branches.snap @@ -0,0 +1,13 @@ +--- +source: crates/ark/src/console_annotate.rs +expression: result +--- +if (TRUE) { +base::.ark_auto_step(base::.ark_breakpoint(browser(), "file:///test.R", "1")) +#line 2 "file:///test.R" + x <- 1 +} else { +base::.ark_auto_step(base::.ark_breakpoint(browser(), "file:///test.R", "2")) +#line 4 "file:///test.R" + y <- 2 +} From ddde5d632b54948b884630d392feb487bebbb3b5 Mon Sep 17 00:00:00 2001 From: Lionel Henry Date: Wed, 7 Jan 2026 10:07:17 +0100 Subject: [PATCH 25/42] Log breakpoint structures --- crates/ark/src/console_debug.rs | 4 ++-- crates/ark/src/dap/dap_server.rs | 7 ++++--- crates/ark/src/modules/positron/debug.R | 13 ++++++++++--- 3 files changed, 16 insertions(+), 8 deletions(-) diff --git a/crates/ark/src/console_debug.rs b/crates/ark/src/console_debug.rs index 7ed53c152..99cdf0065 100644 --- a/crates/ark/src/console_debug.rs +++ b/crates/ark/src/console_debug.rs @@ -416,8 +416,8 @@ pub unsafe extern "C-unwind" fn ps_is_breakpoint_enabled( let console = RMain::get_mut(); let dap = console.debug_dap.lock().unwrap(); - let enabled: RObject = dap.is_breakpoint_enabled(&uri, id).into(); - Ok(enabled.sexp) + let enabled = dap.is_breakpoint_enabled(&uri, id); + Ok(RObject::from(enabled).sexp) } /// Verify breakpoints in the line range covered by a srcref. diff --git a/crates/ark/src/dap/dap_server.rs b/crates/ark/src/dap/dap_server.rs index 4c98bfa8a..977e4754b 100644 --- a/crates/ark/src/dap/dap_server.rs +++ b/crates/ark/src/dap/dap_server.rs @@ -1,7 +1,7 @@ // // dap_server.rs // -// Copyright (C) 2023 Posit Software, PBC. All rights reserved. +// Copyright (C) 2023-2026 Posit Software, PBC. All rights reserved. // // @@ -427,8 +427,9 @@ impl DapServer { }; log::trace!( - "DAP: URI {uri} now has {} breakpoints", - new_breakpoints.len() + "DAP: URI {uri} now has {} breakpoints:\n{:#?}", + new_breakpoints.len(), + new_breakpoints ); state diff --git a/crates/ark/src/modules/positron/debug.R b/crates/ark/src/modules/positron/debug.R index fe293a0fe..035cda7ea 100644 --- a/crates/ark/src/modules/positron/debug.R +++ b/crates/ark/src/modules/positron/debug.R @@ -712,11 +712,18 @@ is_breakpoint_enabled <- function(uri, id) { #' @export .ark_breakpoint <- structure( function(expr, uri, id) { + enabled <- is_breakpoint_enabled(uri, id) + log_trace(sprintf( + "DAP: Breakpoint %s for %s enabled: %s", + id, + uri, + enabled + )) + # Force `browser()` call only if breakpoint is enabled - if (!is_breakpoint_enabled(uri, id)) { - return() + if (enabled) { + expr } - expr }, class = "ark_breakpoint" ) From cc6fc19efcc52477bb2c582727e7c5ec03e32089 Mon Sep 17 00:00:00 2001 From: Lionel Henry Date: Wed, 7 Jan 2026 11:27:39 +0100 Subject: [PATCH 26/42] Fix issue with unexpected top-level browsing In a script, set a breakpoint in a braced block and evaluate it: { 1 # BP } Step through it. Then run another block that doesn't have a breakpoint: { 2 } Without the workaround, R will enter the debugger at the first expression of the block. --- crates/ark/src/interface.rs | 10 ++++++++++ crates/ark/tests/kernel-debugger.rs | 19 +++++++++++++++++++ crates/libr/src/r.rs | 2 ++ 3 files changed, 31 insertions(+) diff --git a/crates/ark/src/interface.rs b/crates/ark/src/interface.rs index 8c12af649..1b4d02925 100644 --- a/crates/ark/src/interface.rs +++ b/crates/ark/src/interface.rs @@ -1033,6 +1033,16 @@ impl RMain { // fall through to event loop let result = self.take_result(); self.handle_active_request(&info, ConsoleValue::Success(result)); + + // Reset debug flag on the global environment. This is a workaround + // for when a breakpoint was entered at top-level, in a `{}` block. + // In that case `browser()` marks the global environment as being + // debugged here: https://github.com/r-devel/r-svn/blob/476ffd4c/src/main/main.c#L1492-L1494. + // Only do it when the call stack is empty, as removing the flag + // prevents normal stepping with `source()`. + if harp::r_n_frame().unwrap_or(0) == 0 { + unsafe { libr::SET_RDEBUG(libr::R_GlobalEnv, 0) }; + } } // If debugger is active, to prevent injected expressions from diff --git a/crates/ark/tests/kernel-debugger.rs b/crates/ark/tests/kernel-debugger.rs index 7df93efd2..76a1a7770 100644 --- a/crates/ark/tests/kernel-debugger.rs +++ b/crates/ark/tests/kernel-debugger.rs @@ -360,6 +360,25 @@ fn test_browser_in_base_env() { assert_eq!(frontend.recv_shell_execute_reply(), input.execution_count); } +#[test] +fn test_execute_request_browser_braced_step_out() { + let frontend = DummyArkFrontend::lock(); + + // Evaluate `{browser()}` which enters the debugger + frontend.execute_request("{browser()}", |result| { + assert!(result.contains("Called from: top level")); + }); + + // Step once with `n` to leave the debugger (the braced expression completes) + frontend.execute_request_invisibly("n"); + + // Now evaluate `{1}` - this should NOT trigger the debugger + // and should return the result normally + frontend.execute_request("{1}", |result| { + assert!(result.contains("[1] 1")); + }); +} + // The minimal environment we can debug in: access to base via `::`. This might // be a problem for very specialised sandboxing environment, but they can // temporarily add `::` while debugging. diff --git a/crates/libr/src/r.rs b/crates/libr/src/r.rs index 54a9c0f1f..d74527023 100644 --- a/crates/libr/src/r.rs +++ b/crates/libr/src/r.rs @@ -274,6 +274,8 @@ functions::generate! { pub fn RDEBUG(x: SEXP) -> std::ffi::c_int; + pub fn SET_RDEBUG(x: SEXP, i: std::ffi::c_int); + pub fn REAL(x: SEXP) -> *mut f64; pub fn REAL_ELT(x: SEXP, i: R_xlen_t) -> f64; From 88f45e0b4e992f6a4ab593c130d8ed0272a81c26 Mon Sep 17 00:00:00 2001 From: Lionel Henry Date: Wed, 7 Jan 2026 12:06:35 +0100 Subject: [PATCH 27/42] Depend on modified Biome --- Cargo.lock | 68 ++++++++++++++++++------------------------------------ Cargo.toml | 12 +++++----- 2 files changed, 28 insertions(+), 52 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 028620d4d..f407b52b0 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -209,10 +209,11 @@ checksum = "320119579fcad9c21884f5c4861d16174d0e06250625266f50fe6898340abefa" [[package]] name = "aether_lsp_utils" version = "0.0.0" +source = "git+https://github.com/posit-dev/air?rev=9abbc6e68b270d3f5bf882fe811ce6a164f671ba#9abbc6e68b270d3f5bf882fe811ce6a164f671ba" dependencies = [ "anyhow", - "biome_line_index 0.1.0 (git+https://github.com/lionel-/biome?rev=41d799cfa4cedd25625fc3f6bd7898532873f051)", - "biome_text_size 0.5.7 (git+https://github.com/lionel-/biome?rev=41d799cfa4cedd25625fc3f6bd7898532873f051)", + "biome_line_index", + "biome_text_size", "dissimilar", "itertools 0.13.0", "line_ending", @@ -245,19 +246,21 @@ dependencies = [ [[package]] name = "air_r_factory" version = "0.0.0" +source = "git+https://github.com/posit-dev/air?rev=9abbc6e68b270d3f5bf882fe811ce6a164f671ba#9abbc6e68b270d3f5bf882fe811ce6a164f671ba" dependencies = [ "air_r_syntax", - "biome_rowan 0.5.7 (git+https://github.com/lionel-/biome?rev=41d799cfa4cedd25625fc3f6bd7898532873f051)", + "biome_rowan", ] [[package]] name = "air_r_parser" version = "0.0.0" +source = "git+https://github.com/posit-dev/air?rev=9abbc6e68b270d3f5bf882fe811ce6a164f671ba#9abbc6e68b270d3f5bf882fe811ce6a164f671ba" dependencies = [ "air_r_factory", "air_r_syntax", "biome_parser", - "biome_rowan 0.5.7 (git+https://github.com/lionel-/biome?rev=41d799cfa4cedd25625fc3f6bd7898532873f051)", + "biome_rowan", "biome_unicode_table", "serde", "tracing", @@ -268,8 +271,9 @@ dependencies = [ [[package]] name = "air_r_syntax" version = "0.0.0" +source = "git+https://github.com/posit-dev/air?rev=9abbc6e68b270d3f5bf882fe811ce6a164f671ba#9abbc6e68b270d3f5bf882fe811ce6a164f671ba" dependencies = [ - "biome_rowan 0.5.7 (git+https://github.com/lionel-/biome?rev=41d799cfa4cedd25625fc3f6bd7898532873f051)", + "biome_rowan", "serde", ] @@ -351,8 +355,8 @@ dependencies = [ "assert_matches", "async-trait", "base64 0.21.0", - "biome_line_index 0.1.0", - "biome_rowan 0.5.7", + "biome_line_index", + "biome_rowan", "blake3", "bus", "cc", @@ -487,7 +491,7 @@ version = "0.5.7" source = "git+https://github.com/lionel-/biome?rev=41d799cfa4cedd25625fc3f6bd7898532873f051#41d799cfa4cedd25625fc3f6bd7898532873f051" dependencies = [ "biome_markup", - "biome_text_size 0.5.7 (git+https://github.com/lionel-/biome?rev=41d799cfa4cedd25625fc3f6bd7898532873f051)", + "biome_text_size", "serde", "termcolor", "unicode-segmentation", @@ -503,9 +507,9 @@ dependencies = [ "biome_console", "biome_diagnostics_categories", "biome_diagnostics_macros", - "biome_rowan 0.5.7 (git+https://github.com/lionel-/biome?rev=41d799cfa4cedd25625fc3f6bd7898532873f051)", - "biome_text_edit 0.5.7 (git+https://github.com/lionel-/biome?rev=41d799cfa4cedd25625fc3f6bd7898532873f051)", - "biome_text_size 0.5.7 (git+https://github.com/lionel-/biome?rev=41d799cfa4cedd25625fc3f6bd7898532873f051)", + "biome_rowan", + "biome_text_edit", + "biome_text_size", "enumflags2", "serde", "serde_json", @@ -534,20 +538,12 @@ dependencies = [ "syn 1.0.109", ] -[[package]] -name = "biome_line_index" -version = "0.1.0" -dependencies = [ - "biome_text_size 0.5.7", - "rustc-hash", -] - [[package]] name = "biome_line_index" version = "0.1.0" source = "git+https://github.com/lionel-/biome?rev=41d799cfa4cedd25625fc3f6bd7898532873f051#41d799cfa4cedd25625fc3f6bd7898532873f051" dependencies = [ - "biome_text_size 0.5.7 (git+https://github.com/lionel-/biome?rev=41d799cfa4cedd25625fc3f6bd7898532873f051)", + "biome_text_size", "rustc-hash", ] @@ -568,57 +564,35 @@ source = "git+https://github.com/lionel-/biome?rev=41d799cfa4cedd25625fc3f6bd789 dependencies = [ "biome_console", "biome_diagnostics", - "biome_rowan 0.5.7 (git+https://github.com/lionel-/biome?rev=41d799cfa4cedd25625fc3f6bd7898532873f051)", + "biome_rowan", "biome_unicode_table", "drop_bomb", "enumflags2", "unicode-bom", ] -[[package]] -name = "biome_rowan" -version = "0.5.7" -dependencies = [ - "biome_text_edit 0.5.7", - "biome_text_size 0.5.7", - "hashbrown 0.15.5", - "rustc-hash", -] - [[package]] name = "biome_rowan" version = "0.5.7" source = "git+https://github.com/lionel-/biome?rev=41d799cfa4cedd25625fc3f6bd7898532873f051#41d799cfa4cedd25625fc3f6bd7898532873f051" dependencies = [ - "biome_text_edit 0.5.7 (git+https://github.com/lionel-/biome?rev=41d799cfa4cedd25625fc3f6bd7898532873f051)", - "biome_text_size 0.5.7 (git+https://github.com/lionel-/biome?rev=41d799cfa4cedd25625fc3f6bd7898532873f051)", + "biome_text_edit", + "biome_text_size", "hashbrown 0.15.5", "rustc-hash", "serde", ] -[[package]] -name = "biome_text_edit" -version = "0.5.7" -dependencies = [ - "biome_text_size 0.5.7", - "similar", -] - [[package]] name = "biome_text_edit" version = "0.5.7" source = "git+https://github.com/lionel-/biome?rev=41d799cfa4cedd25625fc3f6bd7898532873f051#41d799cfa4cedd25625fc3f6bd7898532873f051" dependencies = [ - "biome_text_size 0.5.7 (git+https://github.com/lionel-/biome?rev=41d799cfa4cedd25625fc3f6bd7898532873f051)", + "biome_text_size", "serde", "similar", ] -[[package]] -name = "biome_text_size" -version = "0.5.7" - [[package]] name = "biome_text_size" version = "0.5.7" @@ -1993,6 +1967,7 @@ dependencies = [ [[package]] name = "line_ending" version = "0.0.0" +source = "git+https://github.com/posit-dev/air?rev=9abbc6e68b270d3f5bf882fe811ce6a164f671ba#9abbc6e68b270d3f5bf882fe811ce6a164f671ba" dependencies = [ "memchr", "settings", @@ -3087,6 +3062,7 @@ dependencies = [ [[package]] name = "settings" version = "0.0.0" +source = "git+https://github.com/posit-dev/air?rev=9abbc6e68b270d3f5bf882fe811ce6a164f671ba#9abbc6e68b270d3f5bf882fe811ce6a164f671ba" [[package]] name = "sha1" diff --git a/Cargo.toml b/Cargo.toml index fccff5d2b..2a880760e 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -25,11 +25,11 @@ license = "MIT" authors = ["Posit Software, PBC"] [workspace.dependencies] -biome_line_index = { path = "/Users/lionel/Sync/Projects/Positron/biome/crates/biome_line_index" } -biome_rowan = { path = "/Users/lionel/Sync/Projects/Positron/biome/crates/biome_rowan" } -aether_factory = { path = "/Users/lionel/Sync/Projects/Positron/air/crates/air_r_factory", package = "air_r_factory" } -aether_lsp_utils = { path = "/Users/lionel/Sync/Projects/Positron/air/crates/aether_lsp_utils" } -aether_parser = { path = "/Users/lionel/Sync/Projects/Positron/air/crates/air_r_parser", package = "air_r_parser" } -aether_syntax = { path = "/Users/lionel/Sync/Projects/Positron/air/crates/air_r_syntax", package = "air_r_syntax" } +biome_line_index = { git = "https://github.com/lionel-/biome", rev = "41d799cfa4cedd25625fc3f6bd7898532873f051" } +biome_rowan = { git = "https://github.com/lionel-/biome", rev = "41d799cfa4cedd25625fc3f6bd7898532873f051" } +aether_factory = { git = "https://github.com/posit-dev/air", rev = "9abbc6e68b270d3f5bf882fe811ce6a164f671ba", package = "air_r_factory" } +aether_lsp_utils = { git = "https://github.com/posit-dev/air", rev = "9abbc6e68b270d3f5bf882fe811ce6a164f671ba" } +aether_parser = { git = "https://github.com/posit-dev/air", rev = "9abbc6e68b270d3f5bf882fe811ce6a164f671ba", package = "air_r_parser" } +aether_syntax = { git = "https://github.com/posit-dev/air", rev = "9abbc6e68b270d3f5bf882fe811ce6a164f671ba", package = "air_r_syntax" } # For https://github.com/ebkalderon/tower-lsp/pull/428 tower-lsp = { branch = "bugfix/patches", git = "https://github.com/lionel-/tower-lsp" } From 6217f65e18c99d70e3f64dba6531c68ae8bc8588 Mon Sep 17 00:00:00 2001 From: Lionel Henry Date: Wed, 7 Jan 2026 15:29:29 +0100 Subject: [PATCH 28/42] Document annotations --- crates/ark/src/console_annotate.rs | 72 ++++++++++++++++++++++++++++++ 1 file changed, 72 insertions(+) diff --git a/crates/ark/src/console_annotate.rs b/crates/ark/src/console_annotate.rs index 53ff6bc35..c2129c296 100644 --- a/crates/ark/src/console_annotate.rs +++ b/crates/ark/src/console_annotate.rs @@ -46,7 +46,79 @@ const AUTO_STEP_FUNCTION: &str = ".ark_auto_step"; // top-level stepping, inject breakpoints, and inject top-level verification // calls to let Ark know a breakpoint is now active after evaluation. This is // handled in a separate code path in `annotate_source()`. + +// Breakpoint injection +// +// A breakpoint injected on `expression_to_break_on` looks like this: +// +// ```r +// .ark_auto_step(.ark_breakpoint(browser(), "*url*", "*id*")) +// #line *line* "*url*" +// expression_to_break_on +// ``` +// +// - `.ark_auto_step()` is an identity function that serves as sentinel when R +// steps through code. If the user steps on an injected breakpoint, we detect +// the auto-step call in the `debug at` message emitted by R and automatically +// step over it (i.e. call `n`). +// +// - `.ark_breakpoint()` takes a `browser()` call promised in the current +// environment, a URL, and the breakpoint's unique ID. It only forces the +// browser argument if the breakpoint is active. Since the argument is promised +// in the call-site environment, this cause R to mark that environment as being +// debugged. +// +// It does not stop quite at the right place though, inside the +// `.ark_breakpoint()` wrapper, with `.ark_auto_step()` on the stack as well. To +// solve this, there is a second condition triggering auto-stepping in +// ReadConsole: if the function of the top stack frame is `.ark_breakpoint()` +// (which we detect through a class assigned to the function), then we +// auto-step. This causes R to resume evaluation. Since the call-site +// environment is being debugged, it stops at the next expression automatically, +// in this case `expression_to_break_on`. +// +// - The `#line` directive right above `expression_to_break_on` maps the source +// references to the original location in the source document. When R stops on +// the expression, it emits the original location, allowing the DAP to +// communicate the appropriate stopping place to the frontend. + +// Source instrumentation // +// `base::source()` and `devtools::load_all()` need two things: +// +// - Breakpoint injection as described above. +// +// - Top-level adjustments so it's possible to step through a script or +// top-level package file (the latter is rarely useful but is a side benefit +// from using the same implementation as `source()`). +// +// If the sourced file looks like: +// +// ```r +// 1 +// 2 +// ``` +// +// The instrumented version ends up as: +// +// ```r +// { +// #line 1 "file:///file.R" +// 1 +// base::.ark_auto_step(base::.ark_verify_breakpoints_range("file:///test.R", 1L, 2L)) +// #line 2 "file:///file.R" +// 2 +// base::.ark_auto_step(base::.ark_verify_breakpoints_range("file:///test.R", 2L, 3L)) +// } +// ``` +// +// - The whole source is wrapped in `{}` to allow R to step through the code. +// - Line directives map each expression to original source. +// - An auto-stepped `.ark_verify_breakpoints_range()` call after each +// expression lets the DAP know that any breakpoints spanned by the last +// expression are now "verified", i.e. the breakpoints have been injected and +// the code containing them has been evaluated. + // Breakpoint injection happens in two phases: // // - We first collect "anchors", i.e. the syntax node where a breakpoint should From fef3524c358537cd2b8b12901f2c1440f1a1890c Mon Sep 17 00:00:00 2001 From: Lionel Henry Date: Wed, 7 Jan 2026 16:06:36 +0100 Subject: [PATCH 29/42] Fix deadlock in integration tests --- crates/ark/src/interface.rs | 22 +++++++++++++--------- 1 file changed, 13 insertions(+), 9 deletions(-) diff --git a/crates/ark/src/interface.rs b/crates/ark/src/interface.rs index 1b4d02925..f4795e6be 100644 --- a/crates/ark/src/interface.rs +++ b/crates/ark/src/interface.rs @@ -1,7 +1,7 @@ // // interface.rs // -// Copyright (C) 2023-2024 Posit Software, PBC. All rights reserved. +// Copyright (C) 2023-2026 Posit Software, PBC. All rights reserved. // // @@ -559,14 +559,6 @@ impl RMain { let main = RMain::get_mut(); - // Spawn handler loop for async messages - r_task::spawn_interrupt({ - let dap_clone = main.debug_dap.clone(); - || async move { - RMain::process_console_notifications(console_notification_rx, dap_clone).await - } - }); - let mut r_args = r_args.clone(); // Record if the user has requested that we don't load the site/user level R profiles @@ -690,6 +682,18 @@ impl RMain { ); Self::complete_initialization(main.banner.take(), kernel_init_tx); + // Spawn handler loop for async messages from other components (e.g., LSP). + // Note that we do it after init is complete to avoid deadlocking + // integration tests by spawning an async task. The deadlock is caused + // by the `block_on()` behaviour in + // https://github.com/posit-dev/ark/blob/bd827e73/crates/ark/src/r_task.rs#L261. + r_task::spawn_interrupt({ + let dap_clone = main.debug_dap.clone(); + || async move { + RMain::process_console_notifications(console_notification_rx, dap_clone).await + } + }); + // Initialize the GD context on this thread. // Note that we do it after init is complete to avoid deadlocking // integration tests by spawning an async task. The deadlock is caused From a64b92ccb7587e185bc8205d37dbf100c65cfd59 Mon Sep 17 00:00:00 2001 From: Lionel Henry Date: Wed, 7 Jan 2026 16:17:57 +0100 Subject: [PATCH 30/42] Verify breakpoint that are about to stop R --- crates/ark/src/console_debug.rs | 18 +++++++++++++++ crates/ark/src/dap/dap.rs | 30 ++++++++++++++++++++++++- crates/ark/src/modules/positron/debug.R | 12 ++++++++++ 3 files changed, 59 insertions(+), 1 deletion(-) diff --git a/crates/ark/src/console_debug.rs b/crates/ark/src/console_debug.rs index 99cdf0065..5f71d1fb9 100644 --- a/crates/ark/src/console_debug.rs +++ b/crates/ark/src/console_debug.rs @@ -420,6 +420,24 @@ pub unsafe extern "C-unwind" fn ps_is_breakpoint_enabled( Ok(RObject::from(enabled).sexp) } +/// Verify a single breakpoint by ID. +/// Called when a breakpoint expression is about to be evaluated. +#[harp::register] +pub unsafe extern "C-unwind" fn ps_verify_breakpoint(uri: SEXP, id: SEXP) -> anyhow::Result { + let uri: String = RObject::view(uri).try_into()?; + let id: String = RObject::view(id).try_into()?; + + let Ok(uri) = Url::parse(&uri) else { + return Ok(libr::R_NilValue); + }; + + let main = RMain::get(); + let mut dap = main.debug_dap.lock().unwrap(); + dap.verify_breakpoint(&uri, &id); + + Ok(libr::R_NilValue) +} + /// Verify breakpoints in the line range covered by a srcref. /// Called after each expression is successfully evaluated in source(). #[harp::register] diff --git a/crates/ark/src/dap/dap.rs b/crates/ark/src/dap/dap.rs index 8cdba9b07..4013e9109 100644 --- a/crates/ark/src/dap/dap.rs +++ b/crates/ark/src/dap/dap.rs @@ -1,7 +1,7 @@ // // dap.rs // -// Copyright (C) 2023 Posit Software, PBC. All rights reserved. +// Copyright (C) 2023-2026 Posit Software, PBC. All rights reserved. // // @@ -314,6 +314,34 @@ impl Dap { } } + /// Verify a single breakpoint by ID + /// + /// Finds the breakpoint with the given ID for the URI and marks it as verified + /// if it was previously unverified. Sends a `BreakpointVerified` event. + pub fn verify_breakpoint(&mut self, uri: &Url, id: &str) { + let Some((_, bp_list)) = self.breakpoints.get_mut(uri) else { + return; + }; + let Some(bp) = bp_list.iter_mut().find(|bp| bp.id.to_string() == id) else { + return; + }; + + // Only verify unverified breakpoints + if !matches!(bp.state, BreakpointState::Unverified) { + return; + } + + bp.state = BreakpointState::Verified; + + if let Some(tx) = &self.backend_events_tx { + tx.send(DapBackendEvent::BreakpointState { + id: bp.id, + verified: true, + }) + .log_err(); + } + } + /// Called when a document changes. Removes all breakpoints for the URI /// and sends unverified events for each one. pub fn did_change_document(&mut self, uri: &Url) { diff --git a/crates/ark/src/modules/positron/debug.R b/crates/ark/src/modules/positron/debug.R index 035cda7ea..a290c370d 100644 --- a/crates/ark/src/modules/positron/debug.R +++ b/crates/ark/src/modules/positron/debug.R @@ -702,6 +702,10 @@ is_breakpoint_enabled <- function(uri, id) { .ps.Call("ps_is_breakpoint_enabled", uri, id) } +verify_breapoint <- function(uri, id) { + .ps.Call("ps_verify_breakpoint", uri, id) +} + # Injected breakpoint. This receives a `browser()` call in the `expr` argument. # The argument if forced if the breakpoint is enabled. Since `expr` is promised # in the calling frame environment, that environment is marked by R as being @@ -712,6 +716,14 @@ is_breakpoint_enabled <- function(uri, id) { #' @export .ark_breakpoint <- structure( function(expr, uri, id) { + # Verify breakpoint right away, if not already the case We normally + # verify breakpoints after each top-level expression has finished + # evaluating, but if we stop on a breakpoint right away (e.g. because + # it's in an `lapply()` rather than an assigned function) we must verify + # it directly. Otherwise it's confusing for users to stop on an unverified + # breakpoint that appears invalid. + verify_breapoint(uri, id) + enabled <- is_breakpoint_enabled(uri, id) log_trace(sprintf( "DAP: Breakpoint %s for %s enabled: %s", From 6145973d1a7ad9b3d066836b92286f834e6441f0 Mon Sep 17 00:00:00 2001 From: Lionel Henry Date: Thu, 8 Jan 2026 11:43:59 +0100 Subject: [PATCH 31/42] Display backtrace on error --- crates/ark/src/console_debug.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/crates/ark/src/console_debug.rs b/crates/ark/src/console_debug.rs index 5f71d1fb9..8b23c9393 100644 --- a/crates/ark/src/console_debug.rs +++ b/crates/ark/src/console_debug.rs @@ -1,7 +1,7 @@ // // repl_debug.rs // -// Copyright (C) 2025 Posit Software, PBC. All rights reserved. +// Copyright (C) 2026 Posit Software, PBC. All rights reserved. // use anyhow::anyhow; @@ -100,7 +100,7 @@ impl RMain { let mut dap = self.debug_dap.lock().unwrap(); dap.start_debug(stack, preserve_focus, fallback_sources) }, - Err(err) => log::error!("ReadConsole: Can't get stack info: {err}"), + Err(err) => log::error!("ReadConsole: Can't get stack info: {err:?}"), }; } From 5e71753ec6ccf3ea511ce622268a04f7e15f562e Mon Sep 17 00:00:00 2001 From: Lionel Henry Date: Thu, 8 Jan 2026 12:06:36 +0100 Subject: [PATCH 32/42] Allow top-level frame contents to be a call --- crates/ark/src/modules/positron/calls_deparse.R | 6 +++--- crates/ark/src/modules/positron/debug.R | 12 +++++------- 2 files changed, 8 insertions(+), 10 deletions(-) diff --git a/crates/ark/src/modules/positron/calls_deparse.R b/crates/ark/src/modules/positron/calls_deparse.R index 7929ea689..361e182e4 100644 --- a/crates/ark/src/modules/positron/calls_deparse.R +++ b/crates/ark/src/modules/positron/calls_deparse.R @@ -5,8 +5,8 @@ # # -call_deparse <- function(x) { - deparse(x, width.cutoff = 500L) +deparse_string <- function(x, cutoff = 500L) { + paste_line(deparse(x, width.cutoff = cutoff)) } as_label <- function(x) { @@ -16,7 +16,7 @@ as_label <- function(x) { } # Retain only first line - out <- call_deparse(x)[[1]] + out <- deparse(x)[[1]] # And first 20 characters if (nchar(out) > 20) { diff --git a/crates/ark/src/modules/positron/debug.R b/crates/ark/src/modules/positron/debug.R index a290c370d..ec4ed8dbf 100644 --- a/crates/ark/src/modules/positron/debug.R +++ b/crates/ark/src/modules/positron/debug.R @@ -1,7 +1,7 @@ # # debug.R # -# Copyright (C) 2023-2024 Posit Software, PBC. All rights reserved. +# Copyright (C) 2023-2026 Posit Software, PBC. All rights reserved. # # @@ -87,6 +87,7 @@ debugger_stack_info <- function( top_level_call_frame_info <- function(x) { source_name <- paste0(as_label(x), ".R") + contents <- deparse_string(x) srcref <- attr(x, "srcref", exact = TRUE) if (!is.null(srcref)) { @@ -108,7 +109,7 @@ top_level_call_frame_info <- function(x) { source_name = source_name, frame_name = "", file = NULL, - contents = x, + contents = contents, environment = NULL, start_line = 0L, start_column = 0L, @@ -144,9 +145,7 @@ intermediate_frame_infos <- function(n, calls, fns, environments, frame_calls) { attr(call, "srcref", exact = TRUE) }) call_texts <- lapply(calls, function(call) { - call_lines <- call_deparse(call) - call_text <- paste_line(call_lines) - call_text + deparse_string(call) }) frame_names <- lapply(frame_calls, function(call) as_label(call)) @@ -201,8 +200,7 @@ frame_info <- function( } # Only deparse if `srcref` failed! - fn_lines <- call_deparse(fn) - fn_text <- paste_line(fn_lines) + fn_text <- deparse_string(fn) # Reparse early on, so even if we fail to find `call_text` or fail to reparse, # we pass a `fn_text` to `frame_info_unknown_range()` where we've consistently removed From 527ebe4fe2e1cff651f0584f694e4e60e8f2f6da Mon Sep 17 00:00:00 2001 From: Lionel Henry Date: Thu, 8 Jan 2026 14:04:26 +0100 Subject: [PATCH 33/42] Always stop debug session before yielding back to R --- crates/ark/src/interface.rs | 21 +++++++++++++++++---- 1 file changed, 17 insertions(+), 4 deletions(-) diff --git a/crates/ark/src/interface.rs b/crates/ark/src/interface.rs index f4795e6be..b6b6e7bc4 100644 --- a/crates/ark/src/interface.rs +++ b/crates/ark/src/interface.rs @@ -299,7 +299,7 @@ pub struct RMain { pub(crate) debug_session_index: u32, /// The current frame `id`. Unique across all frames within a single debug session. - /// Reset after `stop_debug()`, not between debug steps. + /// Reset after `debug_stop()`, not between debug steps. pub(crate) debug_current_frame_id: i64, /// Tracks how many nested `r_read_console()` calls are on the stack. @@ -1001,9 +1001,10 @@ impl RMain { // from the last expression we evaluated self.debug_is_debugging = true; self.debug_start(self.debug_preserve_focus); - } else if self.debug_is_debugging { - self.debug_is_debugging = false; - self.debug_stop(); + + // Note that for simplicity this state is reset on exit via the + // cleanups registered in `r_read_console()`. Ideally we'd clean + // from here for symmetry. } if let Some(exception) = self.take_exception() { @@ -2608,6 +2609,18 @@ pub extern "C-unwind" fn r_read_console( // Restore current frame main.read_console_frame.replace(old_current_frame); + + // Always stop debug session when yielding back to R. This prevents + // the debug toolbar from lingering in situations like: + // + // ```r + // { local(browser()); Sys.sleep(10) } + // ``` + // + // For a more practical example see Shiny app example in + // https://github.com/rstudio/rstudio/pull/14848 + main.debug_is_debugging = false; + main.debug_stop(); }, ) } From 633b33948500c45f0cf448ea299a5a395a70b2e1 Mon Sep 17 00:00:00 2001 From: Lionel Henry Date: Fri, 9 Jan 2026 08:51:00 +0100 Subject: [PATCH 34/42] Log if parsed expression vector for `source()` is not size 1 --- crates/ark/src/modules/positron/hooks_source.R | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/crates/ark/src/modules/positron/hooks_source.R b/crates/ark/src/modules/positron/hooks_source.R index e924e8e56..7ef33eee9 100644 --- a/crates/ark/src/modules/positron/hooks_source.R +++ b/crates/ark/src/modules/positron/hooks_source.R @@ -113,9 +113,13 @@ make_ark_source <- function(original_source) { parsed <- parse(text = annotated, keep.source = TRUE) - for (expr in parsed) { - eval(parsed, env) + if (length(parsed) != 1) { + log_trace("`source()`: Expected a single `{}` expression") } + + # `eval()` loops over the expression vector, handling gracefully + # unexpected lengths (0 or >1) + eval(parsed, env) } } From 6ef2e547f70a5f83b977c08e03b3face4859678c Mon Sep 17 00:00:00 2001 From: Lionel Henry Date: Fri, 9 Jan 2026 09:10:14 +0100 Subject: [PATCH 35/42] Fix auto-stepping logic --- crates/ark/src/interface.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/crates/ark/src/interface.rs b/crates/ark/src/interface.rs index b6b6e7bc4..7145e9bc3 100644 --- a/crates/ark/src/interface.rs +++ b/crates/ark/src/interface.rs @@ -1060,11 +1060,11 @@ impl RMain { // look whether our sentinel `.ark_auto_step()` was emitted by R as part // of the `Debug at` output. if self.debug_is_debugging { - // Did we just step onto an injected breakpoint or verification call + // Did we just step onto an injected call (breakpoint or verify)? let at_auto_step = matches!( &self.debug_call_text, DebugCallText::Finalized(text, DebugCallTextKind::DebugAt) - if text.contains(".ark_auto_step") + if text.trim_start().starts_with("base::.ark_auto_step") ); // Are we stopped by an injected breakpoint From e3e5060f5aae98630cd0135f3a4f3648a2a9c99a Mon Sep 17 00:00:00 2001 From: Lionel Henry Date: Thu, 8 Jan 2026 14:28:14 +0100 Subject: [PATCH 36/42] Refactor to single pass - More consistent verification inside braces - More consistent implementation in `annotate_input()` and `annotate_source()` --- crates/ark/src/console_annotate.rs | 1526 +++++++++-------- crates/ark/src/dap/dap.rs | 24 +- crates/ark/src/dap/dap_server.rs | 28 +- ...tests__annotate_input_with_breakpoint.snap | 2 + ...nnotate__tests__annotate_source_basic.snap | 5 +- ...reakpoint_on_function_definition_line.snap | 14 + ...te_source_breakpoint_on_opening_brace.snap | 17 + ..._annotate_source_multiline_expression.snap | 5 +- ...ce_multiple_breakpoints_inside_braces.snap | 21 + ..._annotate_source_multiple_expressions.snap | 7 +- ..._annotate_source_top_level_breakpoint.snap | 16 + ...ests__annotate_source_with_breakpoint.snap | 5 +- ...reakpoints_before_within_after_nested.snap | 7 + ...ject_breakpoints_doubly_nested_braces.snap | 3 + ...nject_breakpoints_empty_brace_sibling.snap | 1 + ...ect_breakpoints_if_else_both_branches.snap | 1 + ...sts__inject_breakpoints_in_brace_list.snap | 1 + ...ect_breakpoints_inside_multiline_call.snap | 12 + ...nside_multiline_expr_anchors_to_start.snap | 14 + ...e__tests__inject_breakpoints_multiple.snap | 3 + ...ts__inject_breakpoints_multiple_lists.snap | 5 + ...kpoints_on_blank_line_anchors_to_next.snap | 12 + ...n_closing_brace_with_valid_breakpoint.snap | 1 + ...tests__inject_breakpoints_single_line.snap | 3 + ...ject_breakpoints_triply_nested_braces.snap | 1 + ...s__inject_breakpoints_with_blank_line.snap | 1 + ...__inject_breakpoints_with_line_offset.snap | 3 + ...t_breakpoints_with_line_offset_nested.snap | 1 + ...ple_breakpoints_collapse_to_same_line.snap | 13 + 29 files changed, 972 insertions(+), 780 deletions(-) create mode 100644 crates/ark/src/snapshots/ark__console_annotate__tests__annotate_source_breakpoint_on_function_definition_line.snap create mode 100644 crates/ark/src/snapshots/ark__console_annotate__tests__annotate_source_breakpoint_on_opening_brace.snap create mode 100644 crates/ark/src/snapshots/ark__console_annotate__tests__annotate_source_multiple_breakpoints_inside_braces.snap create mode 100644 crates/ark/src/snapshots/ark__console_annotate__tests__annotate_source_top_level_breakpoint.snap create mode 100644 crates/ark/src/snapshots/ark__console_annotate__tests__inject_breakpoints_inside_multiline_call.snap create mode 100644 crates/ark/src/snapshots/ark__console_annotate__tests__inject_breakpoints_inside_multiline_expr_anchors_to_start.snap create mode 100644 crates/ark/src/snapshots/ark__console_annotate__tests__inject_breakpoints_on_blank_line_anchors_to_next.snap create mode 100644 crates/ark/src/snapshots/ark__console_annotate__tests__multiple_breakpoints_collapse_to_same_line.snap diff --git a/crates/ark/src/console_annotate.rs b/crates/ark/src/console_annotate.rs index c2129c296..c5dfa5c9f 100644 --- a/crates/ark/src/console_annotate.rs +++ b/crates/ark/src/console_annotate.rs @@ -7,19 +7,16 @@ use aether_syntax::RBracedExpressions; use aether_syntax::RExpressionList; use aether_syntax::RLanguage; -use aether_syntax::RRoot; +use aether_syntax::RSyntaxElement; use aether_syntax::RSyntaxKind; use aether_syntax::RSyntaxNode; use amalthea::wire::execute_request::CodeLocation; use anyhow::anyhow; use biome_line_index::LineIndex; -use biome_rowan::syntax::SyntaxElementKey; use biome_rowan::AstNode; -use biome_rowan::AstNodeList; use biome_rowan::SyntaxRewriter; use biome_rowan::TriviaPieceKind; use biome_rowan::VisitNodeSignal; -use biome_rowan::WalkEvent; use harp::object::RObject; use libr::SEXP; use url::Url; @@ -47,7 +44,18 @@ const AUTO_STEP_FUNCTION: &str = ".ark_auto_step"; // calls to let Ark know a breakpoint is now active after evaluation. This is // handled in a separate code path in `annotate_source()`. -// Breakpoint injection +// Breakpoint injection at parse-time rather than eval-time. +// +// Traditional approaches (like RStudio's) inject `browser()` calls into live +// objects at eval-time. This is fragile: you must find all copies of the object +// (explicit/implicit exports, method tables, etc.), preserve source references +// after injection, and handle edge cases for each object system (R6, S7, ...). +// These injection routines are ad hoc and require ongoing maintenance to add +// missing injection places as the ecosystem evolves. +// +// By injecting breakpoints before R ever sees the code, we sidestep all of +// this. R only evaluates code that already contains breakpoint calls, so we +// never miss copies and source references are correct from the start. // // A breakpoint injected on `expression_to_break_on` looks like this: // @@ -119,51 +127,49 @@ const AUTO_STEP_FUNCTION: &str = ".ark_auto_step"; // expression are now "verified", i.e. the breakpoints have been injected and // the code containing them has been evaluated. -// Breakpoint injection happens in two phases: +// Breakpoint injection uses a Biome `SyntaxRewriter`, a tree visitor with +// preorder and postorder hooks that allows replacing nodes on the way out. // -// - We first collect "anchors", i.e. the syntax node where a breakpoint should -// be injected, its line in document coordinates, and a unique identifier. +// - Preorder (`visit_node`): Cache line information for braced expressions. +// We record where each expression starts and its line range. This must be +// done before any modifications because the postorder hook sees a partially +// rebuilt tree with shifted token offsets. // -// - In a second pass we use a Biome `SyntaxRewriter` to go ahead and modify the -// code. This is a tree visitor that allows replacing the node on the way out -// (currently via an extension to the Biome API that will be submitted to Biome -// later on). This approach was chosen over `BatchMutation`, which collects -// changes and applies them from deepest to shallowest, because the latter: +// - Postorder (`visit_node_post`): Process expression lists bottom-up. For +// lists inside braces, we inject breakpoint calls, add `#line` directives, +// and mark remaining breakpoints (e.g. on closing braces) as invalid. The +// cached line info represents original source positions, which is exactly +// what we need for anchoring breakpoints to document lines. // -// - Doesn't handle insertions in lists (though that could be contributed). -// Only replacements are currently supported. +// Note: The line info cached in pre-order visit can potentially become stale in +// the post-order hook, since the latter is operating on a reconstructed tree. +// However, we only use the cached info to reason about where original +// expressions lived in the source document, not where they end up in the +// rewritten tree. This is safe as long as we don't reorder or remove +// expressions (we only inject siblings and trivia). // -// - Doesn't handle nested changes in a node that is later _replaced_. -// If the scheduled changes were pure insertions (if insertions were -// supported) then nested changes would compose correctly. However, nested -// changes wouldn't work well as soon as a replacement is involved, because -// BatchMutation can't express "edit a descendant" and "replace an ancestor" -// in one batch without risking the ancestor replacement overwriting the -// descendant edit. +// We use `SyntaxRewriter` instead of `BatchMutation` because the latter: // -// That limitation interacts badly with Biome's strict stance on mutation. -// For example, you can't add a comment to a node; you have to create a new one -// that features a comment. This issue arises when adding a line directive, e.g. in: +// - Doesn't handle insertions in lists (only replacements). // -// ```r -// { # BP 1 -// 1 # BP 2 -// } -// ``` +// - Doesn't handle nested changes in a node that is later replaced. For example: // -// BP 2 causes changes inside the braces. Then BP 1 causes the whole brace -// expression to be replaced with a variant that has a line directive attached. -// But there is no way to express both these changes to BatchMutation because it -// takes modifications upfront. This is why we work instead with `SyntaxRewriter` -// which allows us to replace nodes from bottom to top as we go. +// ```r +// { # BP 1 +// 1 # BP 2 +// } +// ``` // -// Note that Rust-Analyzer's version of Rowan is much more flexible and allow you to -// create a mutable syntax tree that you can freely update (see `clone_for_update()` -// and the tree editor API). Unfortunately Biome has adopted a strict stance on -// immutable data structures so we don't have access to such affordances. +// BP 2 causes changes inside the braces. Then BP 1 causes the whole brace +// expression to be replaced with a variant that has a `#line` directive. +// BatchMutation can't express both changes because it takes modifications +// upfront. `SyntaxRewriter` lets us replace nodes bottom-up as we go. -// Called by ReadConsole to inject breakpoints (if any) and source reference -// mapping (via a line directive) +/// Annotate console input for `ReadConsole`. +/// +/// - Adds a `#line` directive to map the code back to its document location. +/// - Adds leading whitespace to align with the original character offset. +/// - Injects breakpoint calls if any breakpoints are set. pub(crate) fn annotate_input( code: &str, location: CodeLocation, @@ -172,9 +178,23 @@ pub(crate) fn annotate_input( // First, inject breakpoints into the original code. This must happen before // we add the outer line directive, otherwise the coordinates of inner line // directives are shifted by 1 line. - let code_with_breakpoints = if let Some(breakpoints) = breakpoints { + let annotated_code = if let Some(breakpoints) = breakpoints { + let root = aether_parser::parse(code, Default::default()).tree(); let line_index = LineIndex::new(code); - inject_breakpoints(code, location.clone(), breakpoints, &line_index)? + + // The line offset is `doc_line = code_line + line_offset`. + // Code line 0 corresponds to document line `location.start.line`. + let line_offset = location.start.line as i32; + + let mut rewriter = + AnnotationRewriter::new(&location.uri, breakpoints, line_offset, &line_index); + let out = rewriter.transform(root.syntax().clone()); + + if let Some(err) = rewriter.take_err() { + return Err(err); + } + + out.to_string() } else { code.to_string() }; @@ -192,475 +212,477 @@ pub(crate) fn annotate_input( let leading_padding = " ".repeat(location.start.character as usize); Ok(format!( - "{line_directive}\n{leading_padding}{code_with_breakpoints}" + "{line_directive}\n{leading_padding}{annotated_code}" )) } -pub(crate) fn inject_breakpoints( - code: &str, - location: CodeLocation, - breakpoints: &mut [Breakpoint], - line_index: &LineIndex, -) -> anyhow::Result { - let root = aether_parser::parse(code, Default::default()).tree(); - - // The offset between document coordinates and code coordinates. Breakpoints - // are in document coordinates, but AST nodes are in code coordinates - // (starting at line 0). - let line_offset = location.start.line; - - // Filter breakpoints to only those within the source's valid range. We - // collect both for simplicity and because we need to sort the vector - // later on. - let breakpoints: Vec<_> = breakpoints - .iter_mut() - .filter(|bp| bp.line >= location.start.line && bp.line <= location.end.line) - .collect(); - - if breakpoints.is_empty() { - return Ok(code.to_string()); - } - - // First collect all breakpoint anchors, then inject in a separate pass. - // This two-stage approach is not necessary but keeps the anchor-finding - // logic (with its edge cases like invalid breakpoints, nesting decisions, - // look-ahead) separate from the tree transformation with the - // `SyntaxRewriter`. - let anchors = find_breakpoint_anchors(root.syntax(), breakpoints, line_index, line_offset)?; - - if anchors.is_empty() { - return Ok(code.to_string()); - } - - // Build map of anchor key -> (breakpoint_id, doc_line). - // Anchors already store document coordinates. - let breakpoint_map: std::collections::HashMap<_, _> = anchors - .into_iter() - .map(|a| (a.anchor.key(), (a.breakpoint_id, a.doc_line))) - .collect(); - - // Now inject breakpoints with a `SyntaxRewriter`. This is the most - // practical option we have with Biome's Rowan because `BatchMutation` does - // not support 1-to-2 splicing (insert breakpoint call before an expression, - // keeping the original). - let mut rewriter = BreakpointRewriter::new(&location.uri, breakpoint_map); - let transformed = rewriter.transform(root.syntax().clone()); - - if let Some(err) = rewriter.take_err() { - return Err(err); - } - - Ok(transformed.to_string()) -} - /// Annotate source code for `source()` and `pkgload::load_all()`. /// -/// - Wraps the whole source in a `{}` block. This allows R to step through the -/// top-level expressions. +/// - Wraps the whole source in a `{}` block first. This allows R to step through +/// top-level expressions and makes all breakpoints "nested" inside braces. /// - Injects breakpoint calls (`.ark_auto_step(.ark_breakpoint(...))`) at /// breakpoint locations. /// - Injects verification calls (`.ark_auto_step(.ark_verify_breakpoints_range(...))`) -/// after each top-level expression. Verifying expression by expression allows -/// marking breakpoints as verified even when an expression fails mid-script. -/// - `#line` directives before each original expression so the debugger knows -/// where to step in the original file. +/// after expressions containing breakpoints. +/// - `#line` directives after injected calls to restore correct line mapping. pub(crate) fn annotate_source( code: &str, uri: &Url, breakpoints: &mut [Breakpoint], ) -> anyhow::Result { - let line_index = LineIndex::new(code); - - let root = aether_parser::parse(code, Default::default()).tree(); - let root_node = RRoot::cast(root.syntax().clone()) - .ok_or_else(|| anyhow!("Failed to cast parsed tree to RRoot"))?; - - // Collect line ranges for top-level expressions BEFORE any modifications - let top_level_ranges: Vec> = root_node - .expressions() - .into_iter() - .map(|expr| text_trimmed_line_range(expr.syntax(), &line_index)) - .collect::>>()?; - - if top_level_ranges.is_empty() { - return Ok(code.to_string()); - } - - // Find breakpoint anchors (may be nested within top-level expressions) - let bp_vec: Vec<_> = breakpoints.iter_mut().collect(); - let anchors = find_breakpoint_anchors(root.syntax(), bp_vec, &line_index, 0)?; - - // Build map of anchor key -> (breakpoint_id, doc_line). - let breakpoint_map: std::collections::HashMap<_, _> = anchors - .into_iter() - .map(|a| (a.anchor.key(), (a.breakpoint_id, a.doc_line))) - .collect(); - - let mut rewriter = BreakpointRewriter::new(uri, breakpoint_map); + // Wrap code in braces first. This: + // 1. Allows R to step through top-level expressions + // 2. Makes all breakpoints valid (they're now inside braces, at top-level they'd be invalid) + // This enables uniform treatment by `AnnotationRewriter` for input and source cases. + let wrapped = format!("{{\n{code}\n}}"); + let line_index = LineIndex::new(&wrapped); + + let root = aether_parser::parse(&wrapped, Default::default()).tree(); + + // `line_offset` = -1 because: + // - Wrapped line 0 is `{` + // - Wrapped line 1 is original line 0 + // - doc_line = code_line + line_offset = code_line - 1 + let line_offset: i32 = -1; + + let mut rewriter = AnnotationRewriter::new(uri, breakpoints, line_offset, &line_index); let transformed = rewriter.transform(root.syntax().clone()); if let Some(err) = rewriter.take_err() { return Err(err); } - let transformed_root = RRoot::cast(transformed) - .ok_or_else(|| anyhow!("Failed to cast transformed tree to RRoot"))?; - - // Rebuild root expression list with #line directives and verify calls - let annotated = annotate_root_list( - transformed_root.expressions().syntax().clone(), - &top_level_ranges, - uri, - )?; - - // Wrap in braces so R can step through expressions - Ok(format!("{{\n{annotated}}}\n")) -} - -struct BreakpointAnchor { - /// Unique identifier for the breakpoint, injected as argument in the code - breakpoint_id: i64, - /// The line in document coordinates (0-based) - doc_line: u32, - /// The anchor node (expression to place breakpoint before) - anchor: RSyntaxNode, -} - -fn find_breakpoint_anchors( - root: &RSyntaxNode, - mut breakpoints: Vec<&mut Breakpoint>, - line_index: &LineIndex, - line_offset: u32, -) -> anyhow::Result> { - // Sort breakpoints by ascending line so we can walk the expression lists in - // DFS order, and match breakpoints to expressions by comparing lines. Both - // sequences proceed in roughly the same order (by line number), so we can - // consume breakpoints one by one as we find their anchors without needing - // to go backward in either sequence. - breakpoints.sort_by_key(|bp| bp.line); - - // Peekable so we can inspect the next breakpoint's line without consuming it, - // deciding whether to place it at the current expression or continue to the - // next expression without consuming the current breakpoint. - let mut bp_iter = breakpoints.into_iter().peekable(); - - let mut anchors = Vec::new(); - - // Start from the root's expression list - let r = - RRoot::cast(root.clone()).ok_or_else(|| anyhow!("Failed to cast parsed tree to RRoot"))?; - let root_list = r.expressions(); - - find_anchors_in_list( - &root_list, - &mut bp_iter, - &mut anchors, - line_index, - line_offset, - true, - )?; - - Ok(anchors) -} - -// Takes an expression list, either from the root node or a brace node -fn find_anchors_in_list<'a>( - list: &RExpressionList, - breakpoints: &mut std::iter::Peekable>, - anchors: &mut Vec, - line_index: &LineIndex, - line_offset: u32, - is_root: bool, -) -> anyhow::Result<()> { - // Collect to allow indexed look-ahead and re-checking the same element - // without consuming an iterator - let elements: Vec<_> = list.into_iter().collect(); - - if elements.is_empty() { - return Ok(()); - } - - let mut i = 0; - while i < elements.len() { - let Some(bp) = breakpoints.peek() else { - // No more breakpoints - return Ok(()); - }; - - // Convert breakpoint line from document coordinates to code coordinates - let bp_code_line = bp.line - line_offset; - - let current = &elements[i]; - let current_line = text_trimmed_line_range(current.syntax(), line_index)?.start; - - let next_line = if i + 1 < elements.len() { - let next = &elements[i + 1]; - let next_line = text_trimmed_line_range(next.syntax(), line_index)?.start; - - // If the breakpoint is at or past the next element, move on - if bp_code_line >= next_line { - i += 1; - continue; - } - - // Otherwise the breakpoint is either at `current_line` or between - // `current_line` and `next_line` - Some(next_line) - } else { - // There is no next element. The breakpoint either belongs to the - // current element or is past the current list and we need to - // backtrack and explore sibling trees. - None - }; - - // Try to place in a nested brace list first - let found_nested = find_anchors_in_nested_list( - current.syntax(), - breakpoints, - anchors, - line_index, - line_offset, - )?; - - if found_nested { - let Some(bp) = breakpoints.peek() else { - // No breakpoints left to process - return Ok(()); - }; - - let bp_code_line = bp.line - line_offset; - - // If next breakpoint is at or past next element, advance - if next_line.is_some_and(|next| bp_code_line >= next) { - i += 1; - continue; - } - - // Breakpoint is still within this element but wasn't placed. - // It means it's on a closing brace so consume it and mark invalid. - let bp = breakpoints.next().unwrap(); - bp.state = BreakpointState::Invalid; - - i += 1; - continue; - } - - if is_root { - // We never place breakpoints at top-level. R can only step through a `{` list. - let bp = breakpoints.next().unwrap(); - bp.state = BreakpointState::Invalid; - - i += 1; - continue; - } - - if next_line.is_none() && bp_code_line > current_line { - // Breakpoint is past this scope entirely, in a sibling tree. Let - // parent handle it. - return Ok(()); - } - - // Place breakpoint at current element of the `{` list - let bp = breakpoints.next().unwrap(); - let doc_line = current_line + line_offset; - bp.line = doc_line; - anchors.push(BreakpointAnchor { - breakpoint_id: bp.id, - doc_line, - anchor: current.syntax().clone(), - }); - } - - Ok(()) -} - -fn find_anchors_in_nested_list<'a>( - element: &RSyntaxNode, - breakpoints: &mut std::iter::Peekable>, - anchors: &mut Vec, - line_index: &LineIndex, - line_offset: u32, -) -> anyhow::Result { - let mut found_any = false; - let mut skip_until: Option = None; - - // Search for brace lists in descendants - for event in element.preorder() { - match event { - WalkEvent::Leave(node) => { - // If we're leaving the node we're skipping, clear the skip flag - if skip_until.as_ref() == Some(&node) { - skip_until = None; - } - continue; - }, - - WalkEvent::Enter(node) => { - // If we're currently skipping a subtree, continue - if skip_until.is_some() { - continue; - } - - if let Some(braced) = RBracedExpressions::cast(node.clone()) { - let expr_list = braced.expressions(); - if !expr_list.is_empty() { - // Found a non-empty brace list, recurse into it - find_anchors_in_list( - &expr_list, - breakpoints, - anchors, - line_index, - line_offset, - false, - )?; - found_any = true; - - // Skip this node's subtree to avoid double-processing - skip_until = Some(node); - } - } - }, - } - } - - Ok(found_any) + let transformed_code = transformed.to_string(); + + // Add a trailing verify call to handle any injected breakpoint in trailing + // position. Normally we'd inject a verify call as well a line directive + // that ensures source references remain correct after the verify call. + // But for the last expression in a list, there is no sibling node to attach + // the line directive trivia to. So, instead of adding a verify call, we + // rely on verification in a parent list instead. If trailing, there won't + // be any verification calls at all though, so we manually add one there: + // + // ```r + // { + // foo({ + // .ark_auto_step(.breakpoint(...)) + // #line ... + // expr + // }) + // } + // .ark_auto_step(.ark_verify_breakpoints(...)) # <- Manual injection + // ``` + // + // This is unconditional for simplicity. + let last_line = code.lines().count() as u32; + let trailing_verify = format_verify_call(uri, &(0..last_line)); + + Ok(format!("{}\n{trailing_verify}\n", transformed_code.trim())) } -/// Rewriter that injects breakpoint calls into expression lists. -/// -/// We use `SyntaxRewriter` rather than `BatchMutation` because we need 1-to-2 -/// splicing (insert breakpoint call before an expression, keeping the -/// original). `BatchMutation` only supports 1-to-1 or 1-to-0 replacements. -struct BreakpointRewriter<'a> { +/// Rewriter that handles all code annotation inside braced expression lists: +/// - Breakpoint calls on statements +/// - Verification calls after statements containing breakpoints +/// - `#line` directives after injected calls to restore sourceref bookkeeping +struct AnnotationRewriter<'a> { uri: &'a Url, - - /// Map from anchor key to (breakpoint_id, line_in_document_coords) - breakpoint_map: std::collections::HashMap, - - /// Stack of pending injections, one frame per expression list we're inside. - injection_stack: Vec>, - + /// Breakpoints in document coordinates, will be mutated to mark invalid ones + breakpoints: &'a mut [Breakpoint], + /// Offset for coordinate conversion: doc_line = code_line + line_offset + line_offset: i32, + /// Line index for the parsed code + line_index: &'a LineIndex, + /// Set of breakpoint IDs that have been consumed (placed in nested lists) + consumed: std::collections::HashSet, + /// Stack tracking braced expression context. Each entry contains precomputed + /// line information captured before child transformations (which can corrupt ranges). + brace_stack: Vec, /// First error encountered during transformation (if any) err: Option, } -/// Pending injection to be applied when visiting the parent expression list. -struct PendingInjection { - /// Slot index in the parent list - slot_index: usize, - /// Nodes to insert before this slot - insert_before: Vec, +/// Holds precomputed line information for a braced expression list. +/// Captured on entry to a braced expression since line info becomes unreliable +/// after child transformations. +struct BraceFrame { + /// Code line of the opening `{` + brace_code_line: u32, + /// Line info for each expression (indexed by slot position) + expr_info: Vec, +} + +/// Precomputed line information for a single expression in a braced list. +struct ExprLineInfo { + /// Code line where the expression starts (from first token) + start: u32, + /// Code line range [start, end) for the expression + range: std::ops::Range, } -impl<'a> BreakpointRewriter<'a> { +impl<'a> AnnotationRewriter<'a> { fn new( uri: &'a Url, - breakpoint_map: std::collections::HashMap, + breakpoints: &'a mut [Breakpoint], + line_offset: i32, + line_index: &'a LineIndex, ) -> Self { + // Sort so that `find_breakpoint_for_expr` (which uses `position()`) finds + // the earliest-line breakpoint first when multiple could match + breakpoints.sort_by_key(|bp| bp.line); + Self { uri, - breakpoint_map, - injection_stack: Vec::new(), + breakpoints, + line_offset, + line_index, + consumed: std::collections::HashSet::new(), + brace_stack: Vec::new(), err: None, } } - /// Take the error (if any) out of the rewriter. fn take_err(&mut self) -> Option { self.err.take() } - /// Record an error and return the original node unchanged. fn fail(&mut self, err: anyhow::Error, node: RSyntaxNode) -> RSyntaxNode { if self.err.is_none() { self.err = Some(err); } node } + + /// Convert code line to document line. Can be negative for the wrapper + /// brace in `annotate_source(). + fn to_doc_line(&self, code_line: u32) -> i32 { + code_line as i32 + self.line_offset + } + + /// Check if a breakpoint is available (not consumed and not invalid) + fn is_available(&self, bp: &Breakpoint) -> bool { + !self.consumed.contains(&bp.id) && !matches!(bp.state, BreakpointState::Invalid) + } + + /// Find all available breakpoints that anchor to this expression: At or + /// after the previous expression's end, up to and including the + /// expression's last line. Returns the indices of all matching breakpoints. + fn match_breakpoints( + &self, + prev_doc_end: Option, + expr_last_line: i32, + ) -> anyhow::Result> { + if expr_last_line < 0 { + return Err(anyhow!( + "Unexpected negative `expr_last_line` ({expr_last_line})" + )); + } + let expr_last_line = expr_last_line as u32; + + let result: Vec = self + .breakpoints + .iter() + .enumerate() + .filter_map(|(idx, bp)| { + if !self.is_available(bp) { + return None; + } + + // Breakpoint is after this expression + if bp.line > expr_last_line { + return None; + } + + // There is no previous expression so that's a match + let Some(prev_doc_end) = prev_doc_end else { + return Some(idx); + }; + + // Breakpoint must be after the end of the previous expression. + // Note we allow blank lines between expressions to anchor to + // the next expression. + if (bp.line as i32) >= prev_doc_end { + Some(idx) + } else { + None + } + }) + .collect(); + Ok(result) + } + + /// Check if any breakpoint (including consumed ones) falls within the line + /// range [start, end) in document coordinates. This is used to determine + /// whether a statement contains breakpoints and thus needs to be followed + /// by a verify call. + fn has_breakpoints_in_range(&self, start: i32, end: i32) -> bool { + self.breakpoints.iter().any(|bp| { + let bp_line = bp.line as i32; + !matches!(bp.state, BreakpointState::Invalid) && bp_line >= start && bp_line < end + }) + } } -impl SyntaxRewriter for BreakpointRewriter<'_> { +impl SyntaxRewriter for AnnotationRewriter<'_> { type Language = RLanguage; fn visit_node(&mut self, node: RSyntaxNode) -> VisitNodeSignal { - // Only push frames for expression lists, not other list types - if node.kind() == RSyntaxKind::R_EXPRESSION_LIST { - self.injection_stack.push(Vec::new()); + if self.err.is_some() { + // Something is wrong but we can't short-circuit the visit. Just + // visit nodes until exhaustion. + return VisitNodeSignal::Traverse(node); + } + + // Track `BraceFrame` information when we enter a braced expression. + // This must be done on entry, since the exit hook only sees the + // (partially) rebuilt tree with invalid line information. + // + // Note: we intentionally cache line info from the original parse tree. + // Downstream injections (breakpoint calls, `#line` trivia, verify calls) + // can change token offsets and make line lookups on the rebuilt nodes + // unreliable, but they do not change where the original expressions + // lived in the source. We only rely on these cached source positions + // when anchoring and invalidating breakpoints, tasks for which we need + // the _original_ coordinates, not the new ones. + if let Some(braced) = RBracedExpressions::cast(node.clone()) { + let Some(brace_code_line) = first_token_code_line(&node, self.line_index) else { + self.err = Some(anyhow!("Failed to get line for opening brace")); + return VisitNodeSignal::Traverse(node); + }; + + let mut expr_info = Vec::new(); + + for expr in braced.expressions() { + let Some(start) = first_token_code_line(expr.syntax(), self.line_index) else { + self.err = Some(anyhow!("Failed to get start line for expression")); + return VisitNodeSignal::Traverse(node); + }; + let range = match text_trimmed_line_range(expr.syntax(), self.line_index) { + Ok(range) => range, + Err(err) => { + self.err = Some(err); + return VisitNodeSignal::Traverse(node); + }, + }; + + expr_info.push(ExprLineInfo { start, range }); + } + + self.brace_stack.push(BraceFrame { + brace_code_line, + expr_info, + }); } VisitNodeSignal::Traverse(node) } fn visit_node_post(&mut self, node: RSyntaxNode) -> RSyntaxNode { - // If we already have an error, skip processing if self.err.is_some() { + // Something is wrong but we can't short-circuit the visit. Just + // visit nodes until exhaustion. return node; } - // If an expression list, apply any pending injections - if node.kind() == RSyntaxKind::R_EXPRESSION_LIST { - let injections = self.injection_stack.pop().unwrap_or_default(); + // Only process expression lists + if node.kind() != RSyntaxKind::R_EXPRESSION_LIST { + return node; + } - if injections.is_empty() { + // Note we assume that only braced expressions and the root list have + // `R_EXPRESSION_LIST`, which is the case in our syntax + if let Some(frame) = self.brace_stack.pop() { + // Empty braces have no expressions to break on; any breakpoints + // in this range belong to an outer scope + if frame.expr_info.is_empty() { return node; - } else { - return Self::apply_injections(node, injections); } - } - let Some(&(breakpoint_id, line)) = self.breakpoint_map.get(&node.key()) else { - // Not a breakpoint anchor, nothing to inject - return node; - }; + // Brace range in document coordinates. Since we checked for empty + // expr_info above, first/last are guaranteed to exist. + let Some(last_info) = frame.expr_info.last() else { + return self.fail(anyhow!("expr_info unexpectedly empty"), node); + }; + let Some(first_info) = frame.expr_info.first() else { + return self.fail(anyhow!("expr_info unexpectedly empty"), node); + }; - // Anchors are always inside expression lists, so we must have a frame - let Some(frame) = self.injection_stack.last_mut() else { - return self.fail( - anyhow!("Breakpoint anchor found outside expression list"), - node, - ); - }; + let brace_doc_start = self.to_doc_line(frame.brace_code_line); + let brace_doc_end = self.to_doc_line(last_info.range.end); + let first_expr_doc_start = self.to_doc_line(first_info.start); - // Add line directive to current node right away - let decorated_node = match add_line_directive_to_node(&node, line, self.uri) { - Ok(n) => n, - Err(err) => return self.fail(err, node), - }; + // Annotate statements in the braced list + let result = self.annotate_braced_list(node, frame.brace_code_line, frame.expr_info); - // Queue breakpoint injection for parent expression list - let breakpoint_call = create_breakpoint_call(self.uri, breakpoint_id); - frame.push(PendingInjection { - slot_index: node.index(), - insert_before: vec![breakpoint_call], - }); + // Mark any remaining breakpoints in this brace range as invalid + let invalidation_floor = breakpoint_floor(brace_doc_start, first_expr_doc_start); + self.mark_remaining_breakpoints_invalid(Some(invalidation_floor), Some(brace_doc_end)); - decorated_node + result + } else { + // We're at the root expression list, mark all remaining breakpoints as invalid + self.mark_remaining_breakpoints_invalid(None, None); + node + } } } -impl BreakpointRewriter<'_> { - /// Apply pending injections to an expression list node. - fn apply_injections( - mut node: RSyntaxNode, - mut injections: Vec, +impl AnnotationRewriter<'_> { + /// Annotate an expression list inside braces with breakpoints, `#line` + /// directives, and verification calls. + fn annotate_braced_list( + &mut self, + list_node: RSyntaxNode, + brace_code_line: u32, + expr_info: Vec, ) -> RSyntaxNode { - // Sort by slot index descending so we can splice without invalidating indices - injections.sort_by(|a, b| b.slot_index.cmp(&a.slot_index)); - - for injection in injections { - // Insert before (at the slot index) - if !injection.insert_before.is_empty() { - node = node.splice_slots( - injection.slot_index..injection.slot_index, - injection.insert_before.into_iter().map(|n| Some(n.into())), - ); + let Some(list) = RExpressionList::cast(list_node.clone()) else { + return list_node; + }; + + let elements: Vec<_> = list.into_iter().collect(); + if elements.is_empty() { + return list_node; + } + + // Convert brace code line to document coordinates. This is the floor + // for breakpoint matching, breakpoints before this line belong to an + // outer scope, not this braced list. Note that due to the injected + // wrapper braces in `annotate_source()`, this can be -1 (before any doc + // line). + let brace_doc_start: i32 = self.to_doc_line(brace_code_line); + + let mut result_slots: Vec> = Vec::new(); + let mut needs_line_directive = false; + + let first_expr_doc_start = expr_info + .first() + .map(|info| self.to_doc_line(info.start)) + .unwrap_or(brace_doc_start); + let mut prev_doc_end: Option = + Some(breakpoint_floor(brace_doc_start, first_expr_doc_start)); + + for (i, expr) in elements.iter().enumerate() { + // Use precomputed line info captured on the preorder visit, when + // positions were still valid + let Some(info) = expr_info.get(i) else { + return self.fail(anyhow!("Missing line info for expression {i}"), list_node); + }; + + let expr_doc_start = self.to_doc_line(info.start); + let expr_doc_end = self.to_doc_line(info.range.end); + + // Find all breakpoints that anchor to this expression: + // - At or after the previous expression's end + // - At or before the expression's last line (expr_doc_end - 1, since end is exclusive) + // This includes breakpoints on blank lines before the expression and + // breakpoints inside multiline expressions (which all anchor to the start). + let bp_indices = match self.match_breakpoints(prev_doc_end, expr_doc_end - 1) { + Ok(indices) => indices, + Err(e) => return self.fail(e, list_node), + }; + + if !bp_indices.is_empty() { + // Use the first breakpoint's id for the injected call + let first_bp_id = self.breakpoints[bp_indices[0]].id; + + // Update all matching breakpoints: anchor to expr start and mark consumed + for &bp_idx in &bp_indices { + let bp = &mut self.breakpoints[bp_idx]; + bp.line = expr_doc_start as u32; + self.consumed.insert(bp.id); + } + + // Inject a single breakpoint call for all matching breakpoints + // (all breakpoints are shown at the same location in the + // frontend, once verified) + let breakpoint_call = create_breakpoint_call(self.uri, first_bp_id); + result_slots.push(Some(breakpoint_call.into())); + + // We've injected an expression so we'll need to fix sourcerefs + // with a line directive + needs_line_directive = true; } + + // There are two reasons we might need a line directive: + // - We've just injected a breakpoint + // - We've injected a verify call at last iteration + let expr_node = if needs_line_directive { + match add_line_directive_to_node(expr.syntax(), expr_doc_start, self.uri) { + Ok(n) => n, + Err(e) => return self.fail(e, list_node), + } + } else { + expr.syntax().clone() + }; + result_slots.push(Some(expr_node.into())); + + // If this expression's range contains any breakpoints, we inject a + // verify call right after it to ensure that they are immediately + // verified after stepping over this expression. The very last + // expression in the list is an exception. We don't inject a verify + // call because we have nowhere to attach a corresponding line + // directive. Instead we rely on the parent list to verify. + let is_last = i == elements.len() - 1; + if !is_last && self.has_breakpoints_in_range(expr_doc_start, expr_doc_end) { + let start_u32 = expr_doc_start.max(0) as u32; + let end_u32 = expr_doc_end.max(0) as u32; + let verify_call = create_verify_call(self.uri, &(start_u32..end_u32)); + result_slots.push(Some(verify_call.into())); + + // Next expression will need a line directive no matter what + // (even if there is no injected breakpoint) + needs_line_directive = true; + } else { + needs_line_directive = false; + } + + prev_doc_end = Some(expr_doc_end); } - node + // Replace all slots with the new list + let slot_count = list_node.slots().count(); + list_node.splice_slots(0..slot_count, result_slots) + } + + /// Mark remaining unconsumed breakpoints as invalid within the given range. + /// If range bounds are None, all remaining breakpoints are marked invalid. + fn mark_remaining_breakpoints_invalid(&mut self, start: Option, end: Option) { + for bp in self.breakpoints.iter_mut() { + let is_available = + !self.consumed.contains(&bp.id) && !matches!(bp.state, BreakpointState::Invalid); + if !is_available { + continue; + } + + let bp_line = bp.line as i32; + let in_range = + start.map_or(true, |s| bp_line >= s) && end.map_or(true, |e| bp_line <= e); + if in_range { + bp.state = BreakpointState::Invalid; + } + } } } +/// Compute the floor line for breakpoint matching in a braced list. When +/// content starts on a later line than the brace, we use `brace_doc_start + 1` +/// to avoid claiming breakpoints on the brace line, as those belong to the +/// parent scope. +fn breakpoint_floor(brace_doc_start: i32, first_expr_doc_start: i32) -> i32 { + if first_expr_doc_start > brace_doc_start { + brace_doc_start + 1 + } else { + brace_doc_start + } +} + +/// Returns the code line of the node's first token. +fn first_token_code_line(node: &RSyntaxNode, line_index: &LineIndex) -> Option { + let token = node.first_token()?; + let offset = token.text_trimmed_range().start(); + line_index.line_col(offset).map(|lc| lc.line) +} + /// Returns the line range [start, end) for the node's trimmed text. fn text_trimmed_line_range( node: &RSyntaxNode, @@ -729,9 +751,16 @@ fn insert_before_trailing_whitespace( fn add_line_directive_to_node( node: &RSyntaxNode, - line: u32, + line: i32, uri: &Url, ) -> anyhow::Result { + if line < 0 { + return Err(anyhow!( + "Line directive line is negative ({line}), this shouldn't happen" + )); + } + let line = line as u32; + let first_token = node .first_token() .ok_or_else(|| anyhow!("Node has no first token for line directive"))?; @@ -757,43 +786,6 @@ fn add_line_directive_to_node( .ok_or_else(|| anyhow!("Failed to replace first token with line directive")) } -/// Rebuild the root expression list with #line directives and verify calls for -/// each expression. -fn annotate_root_list( - list_node: RSyntaxNode, - ranges: &[std::ops::Range], - uri: &Url, -) -> anyhow::Result { - let mut result_slots: Vec>> = Vec::new(); - - // Use pre-computed line ranges (from before any transformations) - let mut range_iter = ranges.iter(); - - for slot in list_node.slots() { - let biome_rowan::SyntaxSlot::Node(node) = slot else { - result_slots.push(None); - continue; - }; - - // Get pre-computed line range for this expression - let Some(line_range) = range_iter.next() else { - result_slots.push(Some(node.into())); - continue; - }; - - // Add #line directive to expression - let decorated_node = add_line_directive_to_node(&node, line_range.start, uri)?; - result_slots.push(Some(decorated_node.into())); - - let verify_call = create_verify_call(uri, line_range); - result_slots.push(Some(verify_call.into())); - } - - // Replace all slots with the new list - let slot_count = list_node.slots().count(); - Ok(list_node.splice_slots(0..slot_count, result_slots)) -} - // We create new calls by parsing strings. Although less elegant, it's much less // verbose and easier to see what's going on. @@ -809,13 +801,18 @@ fn create_breakpoint_call(uri: &Url, id: i64) -> RSyntaxNode { } fn create_verify_call(uri: &Url, line_range: &std::ops::Range) -> RSyntaxNode { - let code = format!( - "\nbase::{AUTO_STEP_FUNCTION}(base::.ark_verify_breakpoints_range(\"{}\", {}L, {}L))\n", + let code = format!("\n{}\n", format_verify_call(uri, line_range)); + aether_parser::parse(&code, Default::default()).syntax() +} + +/// Formats a verify call as a string. Takes 0-indexed line range. +fn format_verify_call(uri: &Url, line_range: &std::ops::Range) -> String { + format!( + "base::{AUTO_STEP_FUNCTION}(base::.ark_verify_breakpoints_range(\"{}\", {}L, {}L))", uri, line_range.start + 1, line_range.end + 1 - ); - aether_parser::parse(&code, Default::default()).syntax() + ) } #[harp::register] @@ -938,11 +935,7 @@ mod tests { }, }; // Breakpoint at document line 5 (code line 2, i.e., `1`) - let mut breakpoints = vec![Breakpoint { - id: 1, - line: 5, - state: BreakpointState::Unverified, - }]; + let mut breakpoints = vec![Breakpoint::new(1, 5, BreakpointState::Unverified)]; let result = annotate_input(code, location, Some(&mut breakpoints)).unwrap(); insta::assert_snapshot!(result); @@ -967,14 +960,9 @@ mod tests { character: 1, }, }; - let line_index = LineIndex::new(code); - let mut breakpoints = vec![Breakpoint { - id: 1, - line: 2, // `y <- 2` - state: BreakpointState::Unverified, - }]; + let mut breakpoints = vec![Breakpoint::new(1, 2, BreakpointState::Unverified)]; - let result = inject_breakpoints(code, location, &mut breakpoints, &line_index).unwrap(); + let result = annotate_input(code, location, Some(&mut breakpoints)).unwrap(); insta::assert_snapshot!(result); assert!(!matches!(breakpoints[0].state, BreakpointState::Invalid)); } @@ -994,21 +982,12 @@ mod tests { character: 1, }, }; - let line_index = LineIndex::new(code); let mut breakpoints = vec![ - Breakpoint { - id: 1, - line: 2, // `y <- 2` - state: BreakpointState::Unverified, - }, - Breakpoint { - id: 2, - line: 4, // `w <- 4` - state: BreakpointState::Unverified, - }, + Breakpoint::new(1, 2, BreakpointState::Unverified), + Breakpoint::new(2, 4, BreakpointState::Unverified), ]; - let result = inject_breakpoints(code, location, &mut breakpoints, &line_index).unwrap(); + let result = annotate_input(code, location, Some(&mut breakpoints)).unwrap(); insta::assert_snapshot!(result); assert!(!matches!(breakpoints[0].state, BreakpointState::Invalid)); assert!(!matches!(breakpoints[1].state, BreakpointState::Invalid)); @@ -1028,14 +1007,9 @@ mod tests { character: 1, }, }; - let line_index = LineIndex::new(code); - let mut breakpoints = vec![Breakpoint { - id: 1, - line: 2, - state: BreakpointState::Unverified, - }]; + let mut breakpoints = vec![Breakpoint::new(1, 2, BreakpointState::Unverified)]; - let result = inject_breakpoints(code, location, &mut breakpoints, &line_index).unwrap(); + let result = annotate_input(code, location, Some(&mut breakpoints)).unwrap(); insta::assert_snapshot!(result); assert!(!matches!(breakpoints[0].state, BreakpointState::Verified)); } @@ -1054,16 +1028,12 @@ mod tests { character: 6, }, }; - let line_index = LineIndex::new(code); - let mut breakpoints = vec![Breakpoint { - id: 1, - line: 10, - state: BreakpointState::Unverified, - }]; - - let result = inject_breakpoints(code, location, &mut breakpoints, &line_index).unwrap(); - // Should return unchanged code - assert_eq!(result, code); + let mut breakpoints = vec![Breakpoint::new(1, 10, BreakpointState::Unverified)]; + + let result = annotate_input(code, location, Some(&mut breakpoints)).unwrap(); + // annotate_input always adds #line directive for srcref mapping + let expected = format!("#line 1 \"file:///test.R\"\n{code}"); + assert_eq!(result, expected); assert!(!matches!(breakpoints[0].state, BreakpointState::Verified)); } @@ -1085,27 +1055,132 @@ mod tests { character: 1, }, }; - let line_index = LineIndex::new(code); let mut breakpoints = vec![ - Breakpoint { - id: 1, - line: 3, // Inside function - `y <- 2` - state: BreakpointState::Unverified, - }, - Breakpoint { - id: 2, - line: 6, // In outer braces - `w <- 4` - state: BreakpointState::Unverified, - }, + Breakpoint::new(1, 3, BreakpointState::Unverified), + Breakpoint::new(2, 6, BreakpointState::Unverified), ]; - let result = inject_breakpoints(code, location, &mut breakpoints, &line_index).unwrap(); + let result = annotate_input(code, location, Some(&mut breakpoints)).unwrap(); insta::assert_snapshot!(result); // Both breakpoints are valid (inside brace lists) assert!(!matches!(breakpoints[0].state, BreakpointState::Invalid)); assert!(!matches!(breakpoints[1].state, BreakpointState::Invalid)); } + #[test] + fn test_inject_breakpoints_inside_multiline_expr_anchors_to_start() { + // A breakpoint on an intermediate line of a multiline expression should + // anchor to the start of that expression. + // Lines: + // 0: { + // 1: x + + // 2: y + // 3: z + // 4: } + let code = "{\n x +\n y\n z\n}"; + let location = CodeLocation { + uri: Url::parse("file:///test.R").unwrap(), + start: Position { + line: 0, + character: 0, + }, + end: Position { + line: 4, + character: 1, + }, + }; + let mut breakpoints = vec![Breakpoint::new(1, 2, BreakpointState::Unverified)]; + + let result = annotate_input(code, location, Some(&mut breakpoints)).unwrap(); + insta::assert_snapshot!(result); + // Breakpoint inside multiline expression should anchor to expression start + assert!(!matches!(breakpoints[0].state, BreakpointState::Invalid)); + assert_eq!(breakpoints[0].line, 1); // Anchored to line 1 (x +) + } + + #[test] + fn test_inject_breakpoints_on_blank_line_anchors_to_next() { + // A breakpoint on a blank line between expressions should anchor to + // the next expression. + // Lines: + // 0: { + // 1: x <- 1 + // 2: (blank) + // 3: y <- 2 + // 4: } + let code = "{\n x <- 1\n\n y <- 2\n}"; + let location = CodeLocation { + uri: Url::parse("file:///test.R").unwrap(), + start: Position { + line: 0, + character: 0, + }, + end: Position { + line: 4, + character: 1, + }, + }; + let mut breakpoints = vec![Breakpoint::new(1, 2, BreakpointState::Unverified)]; + + let result = annotate_input(code, location, Some(&mut breakpoints)).unwrap(); + insta::assert_snapshot!(result); + // Breakpoint on blank line should anchor to next expression (valid) + assert!(!matches!(breakpoints[0].state, BreakpointState::Invalid)); + // Line should be updated to the actual anchor position (line 3) + assert_eq!(breakpoints[0].line, 3); + } + + #[test] + fn test_multiple_breakpoints_collapse_to_same_line() { + // Multiple breakpoints matching the same expression should all anchor + // to the expression start, but only one breakpoint call is injected. + // Lines: + // 0: { + // 1: (blank) + // 2: foo( + // 3: 1 + // 4: ) + // 5: } + let code = "{\n\n foo(\n 1\n )\n}"; + let location = CodeLocation { + uri: Url::parse("file:///test.R").unwrap(), + start: Position { + line: 0, + character: 0, + }, + end: Position { + line: 5, + character: 1, + }, + }; + // Three breakpoints: blank line, expression start, and inside expression + let mut breakpoints = vec![ + Breakpoint::new(1, 1, BreakpointState::Unverified), + Breakpoint::new(2, 2, BreakpointState::Unverified), + Breakpoint::new(3, 3, BreakpointState::Unverified), + ]; + + let result = annotate_input(code, location, Some(&mut breakpoints)).unwrap(); + insta::assert_snapshot!(result); + + // All breakpoints should be valid and anchored to line 2 (expression start) + for bp in &breakpoints { + assert!( + !matches!(bp.state, BreakpointState::Invalid), + "Breakpoint {} should be valid", + bp.id + ); + assert_eq!(bp.line, 2, "Breakpoint {} should anchor to line 2", bp.id); + } + + // Only one breakpoint call should be injected (count occurrences) + let bp_call_count = result.matches(".ark_breakpoint").count(); + assert_eq!( + bp_call_count, 1, + "Only one breakpoint call should be injected" + ); + } + #[test] fn test_inject_breakpoints_with_blank_line() { // Test that blank lines before an anchor are preserved @@ -1122,14 +1197,9 @@ mod tests { character: 1, }, }; - let line_index = LineIndex::new(code); - let mut breakpoints = vec![Breakpoint { - id: 1, - line: 4, // `y <- 2` - state: BreakpointState::Unverified, - }]; + let mut breakpoints = vec![Breakpoint::new(1, 4, BreakpointState::Unverified)]; - let result = inject_breakpoints(code, location, &mut breakpoints, &line_index).unwrap(); + let result = annotate_input(code, location, Some(&mut breakpoints)).unwrap(); insta::assert_snapshot!(result); assert!(!matches!(breakpoints[0].state, BreakpointState::Invalid)); } @@ -1150,16 +1220,12 @@ mod tests { character: 6, }, }; - let line_index = LineIndex::new(code); - let mut breakpoints = vec![Breakpoint { - id: 1, - line: 2, // The `}` line - state: BreakpointState::Unverified, - }]; - - let result = inject_breakpoints(code, location, &mut breakpoints, &line_index).unwrap(); - // Should return unchanged code since breakpoint is invalid - assert_eq!(result, code); + let mut breakpoints = vec![Breakpoint::new(1, 2, BreakpointState::Unverified)]; + + let result = annotate_input(code, location, Some(&mut breakpoints)).unwrap(); + // annotate_input always adds #line directive for srcref mapping + let expected = format!("#line 1 \"file:///test.R\"\n{code}"); + assert_eq!(result, expected); // Marked as invalid assert!(matches!(breakpoints[0].state, BreakpointState::Invalid)); } @@ -1180,21 +1246,12 @@ mod tests { character: 1, }, }; - let line_index = LineIndex::new(code); let mut breakpoints = vec![ - Breakpoint { - id: 1, - line: 3, // The `}` line of the function - invalid - state: BreakpointState::Unverified, - }, - Breakpoint { - id: 2, - line: 4, // `y <- 2` - in outer braces, valid - state: BreakpointState::Unverified, - }, + Breakpoint::new(1, 3, BreakpointState::Unverified), + Breakpoint::new(2, 4, BreakpointState::Unverified), ]; - let result = inject_breakpoints(code, location, &mut breakpoints, &line_index).unwrap(); + let result = annotate_input(code, location, Some(&mut breakpoints)).unwrap(); insta::assert_snapshot!(result); // First breakpoint is invalid (on closing brace) @@ -1222,26 +1279,13 @@ mod tests { character: 1, }, }; - let line_index = LineIndex::new(code); let mut breakpoints = vec![ - Breakpoint { - id: 1, - line: 1, // `x <- 1` - in outer braces - state: BreakpointState::Unverified, - }, - Breakpoint { - id: 2, - line: 3, // `y <- 2` - within nested function - state: BreakpointState::Unverified, - }, - Breakpoint { - id: 3, - line: 6, // `w <- 4` - in outer braces - state: BreakpointState::Unverified, - }, + Breakpoint::new(1, 1, BreakpointState::Unverified), + Breakpoint::new(2, 3, BreakpointState::Unverified), + Breakpoint::new(3, 6, BreakpointState::Unverified), ]; - let result = inject_breakpoints(code, location, &mut breakpoints, &line_index).unwrap(); + let result = annotate_input(code, location, Some(&mut breakpoints)).unwrap(); insta::assert_snapshot!(result); // All breakpoints are valid (inside brace lists) assert!(!matches!(breakpoints[0].state, BreakpointState::Invalid)); @@ -1273,16 +1317,11 @@ mod tests { character: 1, }, }; - let line_index = LineIndex::new(code); // Breakpoint at document line 12 (which is code line 2, i.e., `y <- 2`) - let mut breakpoints = vec![Breakpoint { - id: 1, - line: 12, - state: BreakpointState::Unverified, - }]; + let mut breakpoints = vec![Breakpoint::new(1, 12, BreakpointState::Unverified)]; - let result = inject_breakpoints(code, location, &mut breakpoints, &line_index).unwrap(); + let result = annotate_input(code, location, Some(&mut breakpoints)).unwrap(); insta::assert_snapshot!(result); // The breakpoint line should remain in document coordinates @@ -1305,16 +1344,11 @@ mod tests { character: 1, }, }; - let line_index = LineIndex::new(code); // Breakpoint at document line 22 (code line 2, i.e., `y <- 2`) - let mut breakpoints = vec![Breakpoint { - id: 1, - line: 22, - state: BreakpointState::Unverified, - }]; + let mut breakpoints = vec![Breakpoint::new(1, 22, BreakpointState::Unverified)]; - let result = inject_breakpoints(code, location, &mut breakpoints, &line_index).unwrap(); + let result = annotate_input(code, location, Some(&mut breakpoints)).unwrap(); insta::assert_snapshot!(result); // The breakpoint line should remain in document coordinates @@ -1338,16 +1372,11 @@ mod tests { character: 1, }, }; - let line_index = LineIndex::new(code); // Breakpoint at line 2 (the `1` expression inside the inner braces) - let mut breakpoints = vec![Breakpoint { - id: 1, - line: 2, - state: BreakpointState::Unverified, - }]; + let mut breakpoints = vec![Breakpoint::new(1, 2, BreakpointState::Unverified)]; - let result = inject_breakpoints(code, location, &mut breakpoints, &line_index).unwrap(); + let result = annotate_input(code, location, Some(&mut breakpoints)).unwrap(); insta::assert_snapshot!(result); // The breakpoint should be placed at line 2 @@ -1370,16 +1399,11 @@ mod tests { character: 1, }, }; - let line_index = LineIndex::new(code); // Breakpoint at line 3 (the `1` expression inside the innermost braces) - let mut breakpoints = vec![Breakpoint { - id: 1, - line: 3, - state: BreakpointState::Unverified, - }]; + let mut breakpoints = vec![Breakpoint::new(1, 3, BreakpointState::Unverified)]; - let result = inject_breakpoints(code, location, &mut breakpoints, &line_index).unwrap(); + let result = annotate_input(code, location, Some(&mut breakpoints)).unwrap(); insta::assert_snapshot!(result); // The breakpoint should be placed at line 3 @@ -1402,18 +1426,14 @@ mod tests { character: 1, }, }; - let line_index = LineIndex::new(code); // Breakpoint at line 3 (the inner `}` line) - let mut breakpoints = vec![Breakpoint { - id: 1, - line: 3, - state: BreakpointState::Unverified, - }]; - - let result = inject_breakpoints(code, location, &mut breakpoints, &line_index).unwrap(); - // Should return unchanged code since breakpoint is invalid - assert_eq!(result, code); + let mut breakpoints = vec![Breakpoint::new(1, 3, BreakpointState::Unverified)]; + + let result = annotate_input(code, location, Some(&mut breakpoints)).unwrap(); + // annotate_input always adds #line directive for srcref mapping + let expected = format!("#line 1 \"file:///test.R\"\n{code}"); + assert_eq!(result, expected); // Marked as invalid assert!(matches!(breakpoints[0].state, BreakpointState::Invalid)); } @@ -1433,16 +1453,12 @@ mod tests { character: 6, }, }; - let line_index = LineIndex::new(code); - let mut breakpoints = vec![Breakpoint { - id: 1, - line: 1, - state: BreakpointState::Unverified, - }]; - - let result = inject_breakpoints(code, location, &mut breakpoints, &line_index).unwrap(); - // Code unchanged since breakpoint is invalid - assert_eq!(result, code); + let mut breakpoints = vec![Breakpoint::new(1, 1, BreakpointState::Unverified)]; + + let result = annotate_input(code, location, Some(&mut breakpoints)).unwrap(); + // annotate_input always adds #line directive for srcref mapping + let expected = format!("#line 1 \"file:///test.R\"\n{code}"); + assert_eq!(result, expected); // Breakpoint marked as invalid assert!(matches!(breakpoints[0].state, BreakpointState::Invalid)); } @@ -1462,23 +1478,15 @@ mod tests { character: 6, }, }; - let line_index = LineIndex::new(code); let mut breakpoints = vec![ - Breakpoint { - id: 1, - line: 0, - state: BreakpointState::Unverified, - }, - Breakpoint { - id: 2, - line: 2, - state: BreakpointState::Unverified, - }, + Breakpoint::new(1, 0, BreakpointState::Unverified), + Breakpoint::new(2, 2, BreakpointState::Unverified), ]; - let result = inject_breakpoints(code, location, &mut breakpoints, &line_index).unwrap(); - // Code unchanged since all breakpoints are invalid - assert_eq!(result, code); + let result = annotate_input(code, location, Some(&mut breakpoints)).unwrap(); + // annotate_input always adds #line directive for srcref mapping + let expected = format!("#line 1 \"file:///test.R\"\n{code}"); + assert_eq!(result, expected); // Both breakpoints marked as invalid assert!(matches!(breakpoints[0].state, BreakpointState::Invalid)); assert!(matches!(breakpoints[1].state, BreakpointState::Invalid)); @@ -1499,26 +1507,13 @@ mod tests { character: 6, }, }; - let line_index = LineIndex::new(code); let mut breakpoints = vec![ - Breakpoint { - id: 1, - line: 0, // `x <- 1` - top-level, invalid - state: BreakpointState::Unverified, - }, - Breakpoint { - id: 2, - line: 2, // `y <- 2` - inside function, valid - state: BreakpointState::Unverified, - }, - Breakpoint { - id: 3, - line: 4, // `z <- 3` - top-level, invalid - state: BreakpointState::Unverified, - }, + Breakpoint::new(1, 0, BreakpointState::Unverified), + Breakpoint::new(2, 2, BreakpointState::Unverified), + Breakpoint::new(3, 4, BreakpointState::Unverified), ]; - let result = inject_breakpoints(code, location, &mut breakpoints, &line_index).unwrap(); + let result = annotate_input(code, location, Some(&mut breakpoints)).unwrap(); // Code should contain breakpoint for nested expression only assert!(result.contains("base::.ark_breakpoint")); // Top-level breakpoints are invalid @@ -1543,11 +1538,7 @@ mod tests { let code = "foo <- function() {\n x <- 1\n y <- 2\n}\nbar <- 3"; let uri = Url::parse("file:///test.R").unwrap(); // Breakpoint at line 2 (inside the function, 0-indexed) - let mut breakpoints = vec![Breakpoint { - id: 1, - line: 1, - state: BreakpointState::Unverified, - }]; + let mut breakpoints = vec![Breakpoint::new(1, 1, BreakpointState::Unverified)]; let result = annotate_source(code, &uri, &mut breakpoints).unwrap(); insta::assert_snapshot!(result); } @@ -1570,6 +1561,84 @@ mod tests { insta::assert_snapshot!(result); } + #[test] + fn test_annotate_source_top_level_breakpoint() { + let code = "x <- 1\ny <- 2\nz <- 3"; + let uri = Url::parse("file:///test.R").unwrap(); + // Breakpoints on top-level expressions (lines 0, 1, 2 in 0-indexed) + let mut breakpoints = vec![ + Breakpoint::new(1, 0, BreakpointState::Unverified), + Breakpoint::new(2, 2, BreakpointState::Unverified), + ]; + let result = annotate_source(code, &uri, &mut breakpoints).unwrap(); + + // Top-level breakpoints should be valid in annotate_source (code is wrapped in braces) + assert_eq!(breakpoints[0].state, BreakpointState::Unverified); + assert_eq!(breakpoints[1].state, BreakpointState::Unverified); + insta::assert_snapshot!(result); + } + + #[test] + fn test_annotate_source_multiple_breakpoints_inside_braces() { + // Breakpoints at lines 1 and 2 (1-based), i.e. lines 0 and 1 (0-indexed) + // Line 0: `{` + // Line 1: ` 1` + let code = "{\n 1\n 2\n}\n\n2"; + let uri = Url::parse("file:///test.R").unwrap(); + let mut breakpoints = vec![ + Breakpoint::new(1, 0, BreakpointState::Unverified), + Breakpoint::new(2, 1, BreakpointState::Unverified), + ]; + let result = annotate_source(code, &uri, &mut breakpoints).unwrap(); + + // Breakpoint 1 should be at line 0 (the `{`) + // Breakpoint 2 should be at line 1 (the `1`) + assert_eq!(breakpoints[0].line, 0); + assert_eq!(breakpoints[1].line, 1); + assert_eq!(breakpoints[0].state, BreakpointState::Unverified); + assert_eq!(breakpoints[1].state, BreakpointState::Unverified); + insta::assert_snapshot!(result); + } + + #[test] + fn test_annotate_source_breakpoint_on_opening_brace() { + // Breakpoint on line 0 (the `{` line) should anchor to the braced expression, + // not dive into the nested list and anchor to line 1. + let code = "{\n 1\n 2\n}\n\n2"; + let uri = Url::parse("file:///test.R").unwrap(); + let mut breakpoints = vec![Breakpoint::new(1, 0, BreakpointState::Unverified)]; + let result = annotate_source(code, &uri, &mut breakpoints).unwrap(); + + // Breakpoint should remain at line 0, not shifted to line 1 + assert_eq!(breakpoints[0].line, 0); + assert_eq!(breakpoints[0].state, BreakpointState::Unverified); + insta::assert_snapshot!(result); + } + + #[test] + fn test_annotate_source_breakpoint_on_function_definition_line() { + // Breakpoint on the function definition line (which includes the opening `{`) + // should anchor to the assignment expression, not dive into the function body. + // Line 0: `f <- function(x) {` + // Line 1: ` 1` + // Line 2: `}` + let code = "f <- function(x) {\n 1\n}"; + let uri = Url::parse("file:///test.R").unwrap(); + let mut breakpoints = vec![ + Breakpoint::new(1, 0, BreakpointState::Unverified), + Breakpoint::new(2, 1, BreakpointState::Unverified), + ]; + let result = annotate_source(code, &uri, &mut breakpoints).unwrap(); + + // Breakpoint 1 should remain at line 0 (the function definition) + // Breakpoint 2 should be at line 1 (inside the function body) + assert_eq!(breakpoints[0].line, 0); + assert_eq!(breakpoints[1].line, 1); + assert_eq!(breakpoints[0].state, BreakpointState::Unverified); + assert_eq!(breakpoints[1].state, BreakpointState::Unverified); + insta::assert_snapshot!(result); + } + #[test] fn test_inject_breakpoints_if_else_both_branches() { let code = "if (TRUE) {\n x <- 1\n} else {\n y <- 2\n}"; @@ -1584,21 +1653,12 @@ mod tests { character: 1, }, }; - let line_index = LineIndex::new(code); let mut breakpoints = vec![ - Breakpoint { - id: 1, - line: 1, // `x <- 1` in if branch - state: BreakpointState::Unverified, - }, - Breakpoint { - id: 2, - line: 3, // `y <- 2` in else branch - state: BreakpointState::Unverified, - }, + Breakpoint::new(1, 1, BreakpointState::Unverified), + Breakpoint::new(2, 3, BreakpointState::Unverified), ]; - let result = inject_breakpoints(code, location, &mut breakpoints, &line_index).unwrap(); + let result = annotate_input(code, location, Some(&mut breakpoints)).unwrap(); insta::assert_snapshot!(result); // Both breakpoints should be valid (not marked as invalid) @@ -1633,24 +1693,16 @@ mod tests { character: 1, }, }; - let line_index = LineIndex::new(code); let mut breakpoints = vec![ - Breakpoint { - id: 1, - line: 3, // closing brace of function - state: BreakpointState::Unverified, - }, - Breakpoint { - id: 2, - line: 4, // closing brace of outer block - state: BreakpointState::Unverified, - }, + Breakpoint::new(1, 3, BreakpointState::Unverified), + Breakpoint::new(2, 4, BreakpointState::Unverified), ]; - let result = inject_breakpoints(code, location, &mut breakpoints, &line_index).unwrap(); + let result = annotate_input(code, location, Some(&mut breakpoints)).unwrap(); - // Code should be unchanged (no valid breakpoints) - assert_eq!(result, code); + // annotate_input always adds #line directive for srcref mapping + let expected = format!("#line 1 \"file:///test.R\"\n{code}"); + assert_eq!(result, expected); // Both breakpoints should be marked invalid assert!( @@ -1682,14 +1734,9 @@ mod tests { character: 1, }, }; - let line_index = LineIndex::new(code); - let mut breakpoints = vec![Breakpoint { - id: 1, - line: 2, // the empty {} expression - state: BreakpointState::Unverified, - }]; + let mut breakpoints = vec![Breakpoint::new(1, 2, BreakpointState::Unverified)]; - let result = inject_breakpoints(code, location, &mut breakpoints, &line_index).unwrap(); + let result = annotate_input(code, location, Some(&mut breakpoints)).unwrap(); insta::assert_snapshot!(result); // Should anchor to the empty {} expression (it's a valid expression) @@ -1718,49 +1765,35 @@ mod tests { character: 1, }, }; - let line_index = LineIndex::new(code); // Test 1: Breakpoint on inner brace open line (valid - anchors to inner {} expression) - let mut breakpoints = vec![Breakpoint { - id: 1, - line: 1, - state: BreakpointState::Unverified, - }]; - let result = - inject_breakpoints(code, location.clone(), &mut breakpoints, &line_index).unwrap(); + let mut breakpoints = vec![Breakpoint::new(1, 1, BreakpointState::Unverified)]; + let result = annotate_input(code, location.clone(), Some(&mut breakpoints)).unwrap(); assert!( !matches!(breakpoints[0].state, BreakpointState::Invalid), "Breakpoint on inner brace open should be valid" ); assert!(result.contains(".ark_breakpoint")); - // Test 2: Breakpoint on inner closing brace (invalid) - let mut breakpoints = vec![Breakpoint { - id: 2, - line: 2, - state: BreakpointState::Unverified, - }]; - let result = - inject_breakpoints(code, location.clone(), &mut breakpoints, &line_index).unwrap(); + // Test 2: Breakpoint on inner closing brace (anchors to inner {} expression start) + let mut breakpoints = vec![Breakpoint::new(2, 2, BreakpointState::Unverified)]; + let result = annotate_input(code, location.clone(), Some(&mut breakpoints)).unwrap(); assert!( - matches!(breakpoints[0].state, BreakpointState::Invalid), - "Breakpoint on inner closing brace should be invalid" + !matches!(breakpoints[0].state, BreakpointState::Invalid), + "Breakpoint on inner closing brace should anchor to inner {{ expression" ); - assert_eq!(result, code); - - // Test 3: Breakpoint on outer closing brace (invalid) - let mut breakpoints = vec![Breakpoint { - id: 3, - line: 3, - state: BreakpointState::Unverified, - }]; - let result = - inject_breakpoints(code, location.clone(), &mut breakpoints, &line_index).unwrap(); + assert_eq!(breakpoints[0].line, 1, "Should anchor to line 1"); + assert!(result.contains(".ark_breakpoint")); + + // Test 3: Breakpoint on outer closing brace (invalid - not part of any expression in the list) + let mut breakpoints = vec![Breakpoint::new(3, 3, BreakpointState::Unverified)]; + let result = annotate_input(code, location.clone(), Some(&mut breakpoints)).unwrap(); assert!( matches!(breakpoints[0].state, BreakpointState::Invalid), "Breakpoint on outer closing brace should be invalid" ); - assert_eq!(result, code); + let expected = format!("#line 1 \"file:///test.R\"\n{code}"); + assert_eq!(result, expected); } #[test] @@ -1780,41 +1813,33 @@ mod tests { character: 2, }, }; - let line_index = LineIndex::new(code); // Test 1: Breakpoint on line 0 (valid - anchors to inner {} expression) - let mut breakpoints = vec![Breakpoint { - id: 1, - line: 0, - state: BreakpointState::Unverified, - }]; - let result = - inject_breakpoints(code, location.clone(), &mut breakpoints, &line_index).unwrap(); + let mut breakpoints = vec![Breakpoint::new(1, 0, BreakpointState::Unverified)]; + let result = annotate_input(code, location.clone(), Some(&mut breakpoints)).unwrap(); assert!( !matches!(breakpoints[0].state, BreakpointState::Invalid), "Breakpoint on {{ line should be valid" ); assert!(result.contains(".ark_breakpoint")); - // Test 2: Breakpoint on line 1 (invalid - closing braces) - let mut breakpoints = vec![Breakpoint { - id: 2, - line: 1, - state: BreakpointState::Unverified, - }]; - let result = - inject_breakpoints(code, location.clone(), &mut breakpoints, &line_index).unwrap(); + // Test 2: Breakpoint on line 1 (anchors to inner {} which spans lines 0-1) + let mut breakpoints = vec![Breakpoint::new(2, 1, BreakpointState::Unverified)]; + let result = annotate_input(code, location.clone(), Some(&mut breakpoints)).unwrap(); + // Breakpoint on }} line anchors to the inner {} expression start (line 0) assert!( - matches!(breakpoints[0].state, BreakpointState::Invalid), - "Breakpoint on }} line should be invalid" + !matches!(breakpoints[0].state, BreakpointState::Invalid), + "Breakpoint on }} line should anchor to inner {{ expression" ); - assert_eq!(result, code); + assert_eq!(breakpoints[0].line, 0, "Should anchor to line 0"); + assert!(result.contains(".ark_breakpoint")); } #[test] fn test_inject_breakpoints_inside_multiline_call() { - // Test breakpoint placed on a line inside a multi-line call expression - // The breakpoint is on the argument line, not the start of the expression + // Test breakpoint placed on a line inside a multi-line call expression. + // The breakpoint is on the argument line, not the start of the expression, + // but should anchor to the start of the expression. let code = "{\n foo(\n 1\n )\n}"; // Line 0: { // Line 1: foo( @@ -1832,22 +1857,17 @@ mod tests { character: 1, }, }; - let line_index = LineIndex::new(code); - let mut breakpoints = vec![Breakpoint { - id: 1, - line: 2, // Inside the foo() call, on the argument line - state: BreakpointState::Unverified, - }]; + let mut breakpoints = vec![Breakpoint::new(1, 2, BreakpointState::Unverified)]; - let result = - inject_breakpoints(code, location.clone(), &mut breakpoints, &line_index).unwrap(); + let result = annotate_input(code, location.clone(), Some(&mut breakpoints)).unwrap(); + insta::assert_snapshot!(result); - // Breakpoint inside a multi-line expression (not at its start) is invalid - assert!( - matches!(breakpoints[0].state, BreakpointState::Invalid), - "Breakpoint inside multi-line call should be invalid" + // Breakpoint inside a multi-line expression should anchor to expression start + assert!(!matches!(breakpoints[0].state, BreakpointState::Invalid)); + assert_eq!( + breakpoints[0].line, 1, + "Breakpoint should anchor to expression start" ); - assert_eq!(result, code, "Invalid breakpoint should not modify code"); } } diff --git a/crates/ark/src/dap/dap.rs b/crates/ark/src/dap/dap.rs index 4013e9109..b65f99f66 100644 --- a/crates/ark/src/dap/dap.rs +++ b/crates/ark/src/dap/dap.rs @@ -35,11 +35,26 @@ pub enum BreakpointState { #[derive(Debug, Clone)] pub struct Breakpoint { pub id: i64, - pub line: u32, // 0-based + /// The line where the breakpoint is actually placed (may be anchored to expression start). + /// 0-based. + pub line: u32, + /// The line originally requested by the user (before anchoring). Used to match breakpoints + /// across SetBreakpoints requests. 0-based. + pub original_line: u32, pub state: BreakpointState, } impl Breakpoint { + /// Create a new breakpoint. The `original_line` is set to the same as `line`. + pub fn new(id: i64, line: u32, state: BreakpointState) -> Self { + Self { + id, + line, + original_line: line, + state, + } + } + /// Convert from DAP 1-based line to internal 0-based line pub fn from_dap_line(line: i64) -> u32 { (line - 1) as u32 @@ -65,7 +80,9 @@ pub enum DapBackendEvent { Stopped(DapStoppedEvent), /// Event sent when a breakpoint state changes (verified or unverified) - BreakpointState { id: i64, verified: bool }, + /// The line is included so the frontend can update the breakpoint's position + /// (e.g., when a breakpoint inside a multiline expression anchors to its start) + BreakpointState { id: i64, line: u32, verified: bool }, } #[derive(Debug, Copy, Clone)] @@ -306,6 +323,7 @@ impl Dap { if let Some(tx) = &self.backend_events_tx { tx.send(DapBackendEvent::BreakpointState { id: bp.id, + line: bp.line, verified: true, }) .log_err(); @@ -336,6 +354,7 @@ impl Dap { if let Some(tx) = &self.backend_events_tx { tx.send(DapBackendEvent::BreakpointState { id: bp.id, + line: bp.line, verified: true, }) .log_err(); @@ -355,6 +374,7 @@ impl Dap { for bp in breakpoints { tx.send(DapBackendEvent::BreakpointState { id: bp.id, + line: bp.line, verified: false, }) .log_err(); diff --git a/crates/ark/src/dap/dap_server.rs b/crates/ark/src/dap/dap_server.rs index 977e4754b..3646700f6 100644 --- a/crates/ark/src/dap/dap_server.rs +++ b/crates/ark/src/dap/dap_server.rs @@ -176,11 +176,12 @@ fn listen_dap_events( Event::Terminated(None) }, - DapBackendEvent::BreakpointState { id, verified } => { + DapBackendEvent::BreakpointState { id, line, verified } => { Event::Breakpoint(BreakpointEventBody { reason: BreakpointEventReason::Changed, breakpoint: dap::types::Breakpoint { id: Some(id), + line: Some(Breakpoint::to_dap_line(line)), verified, ..Default::default() }, @@ -360,10 +361,14 @@ impl DapServer { // Replace all existing breakpoints by new, unverified ones args_breakpoints .iter() - .map(|bp| Breakpoint { - id: state.next_breakpoint_id(), - line: Breakpoint::from_dap_line(bp.line), - state: BreakpointState::Unverified, + .map(|bp| { + let line = Breakpoint::from_dap_line(bp.line); + Breakpoint { + id: state.next_breakpoint_id(), + line, + original_line: line, + state: BreakpointState::Unverified, + } }) .collect() } else { @@ -371,9 +376,10 @@ impl DapServer { // Unwrap Safety: `doc_changed` is false, so `existing_breakpoints` is Some let (_, old_breakpoints) = old_breakpoints.unwrap(); + // Use original_line for lookup since that's what the frontend sends back let mut old_by_line: HashMap = old_breakpoints .into_iter() - .map(|bp| (bp.line, bp)) + .map(|bp| (bp.original_line, bp)) .collect(); let mut breakpoints: Vec = Vec::new(); @@ -392,7 +398,9 @@ impl DapServer { breakpoints.push(Breakpoint { id: old_bp.id, - line, + // Preserve the actual (anchored) line from previous verification + line: old_bp.line, + original_line: line, state: new_state, }); } else { @@ -400,6 +408,7 @@ impl DapServer { breakpoints.push(Breakpoint { id: state.next_breakpoint_id(), line, + original_line: line, state: BreakpointState::Unverified, }); } @@ -413,11 +422,12 @@ impl DapServer { // Unverified/Invalid breakpoints on the other hand are simply // dropped since there's no verified state that needs to be // preserved. - for (line, old_bp) in old_by_line { + for (original_line, old_bp) in old_by_line { if matches!(old_bp.state, BreakpointState::Verified) { breakpoints.push(Breakpoint { id: old_bp.id, - line, + line: old_bp.line, + original_line, state: BreakpointState::Disabled, }); } diff --git a/crates/ark/src/snapshots/ark__console_annotate__tests__annotate_input_with_breakpoint.snap b/crates/ark/src/snapshots/ark__console_annotate__tests__annotate_input_with_breakpoint.snap index 961572c6e..d271b9a9d 100644 --- a/crates/ark/src/snapshots/ark__console_annotate__tests__annotate_input_with_breakpoint.snap +++ b/crates/ark/src/snapshots/ark__console_annotate__tests__annotate_input_with_breakpoint.snap @@ -8,5 +8,7 @@ expression: result base::.ark_auto_step(base::.ark_breakpoint(browser(), "file:///test.R", "1")) #line 6 "file:///test.R" 1 +base::.ark_auto_step(base::.ark_verify_breakpoints_range("file:///test.R", 6L, 7L)) +#line 7 "file:///test.R" 2 } diff --git a/crates/ark/src/snapshots/ark__console_annotate__tests__annotate_source_basic.snap b/crates/ark/src/snapshots/ark__console_annotate__tests__annotate_source_basic.snap index 67b94d6d0..ab7db2b55 100644 --- a/crates/ark/src/snapshots/ark__console_annotate__tests__annotate_source_basic.snap +++ b/crates/ark/src/snapshots/ark__console_annotate__tests__annotate_source_basic.snap @@ -3,10 +3,7 @@ source: crates/ark/src/console_annotate.rs expression: result --- { -#line 1 "file:///test.R" x <- 1 -base::.ark_auto_step(base::.ark_verify_breakpoints_range("file:///test.R", 1L, 2L)) -#line 2 "file:///test.R" y <- 2 -base::.ark_auto_step(base::.ark_verify_breakpoints_range("file:///test.R", 2L, 3L)) } +base::.ark_auto_step(base::.ark_verify_breakpoints_range("file:///test.R", 1L, 3L)) diff --git a/crates/ark/src/snapshots/ark__console_annotate__tests__annotate_source_breakpoint_on_function_definition_line.snap b/crates/ark/src/snapshots/ark__console_annotate__tests__annotate_source_breakpoint_on_function_definition_line.snap new file mode 100644 index 000000000..49c0c2e59 --- /dev/null +++ b/crates/ark/src/snapshots/ark__console_annotate__tests__annotate_source_breakpoint_on_function_definition_line.snap @@ -0,0 +1,14 @@ +--- +source: crates/ark/src/console_annotate.rs +expression: result +--- +{ +base::.ark_auto_step(base::.ark_breakpoint(browser(), "file:///test.R", "1")) +#line 1 "file:///test.R" +f <- function(x) { +base::.ark_auto_step(base::.ark_breakpoint(browser(), "file:///test.R", "2")) +#line 2 "file:///test.R" + 1 +} +} +base::.ark_auto_step(base::.ark_verify_breakpoints_range("file:///test.R", 1L, 4L)) diff --git a/crates/ark/src/snapshots/ark__console_annotate__tests__annotate_source_breakpoint_on_opening_brace.snap b/crates/ark/src/snapshots/ark__console_annotate__tests__annotate_source_breakpoint_on_opening_brace.snap new file mode 100644 index 000000000..0930cc597 --- /dev/null +++ b/crates/ark/src/snapshots/ark__console_annotate__tests__annotate_source_breakpoint_on_opening_brace.snap @@ -0,0 +1,17 @@ +--- +source: crates/ark/src/console_annotate.rs +expression: result +--- +{ +base::.ark_auto_step(base::.ark_breakpoint(browser(), "file:///test.R", "1")) +#line 1 "file:///test.R" +{ + 1 + 2 +} +base::.ark_auto_step(base::.ark_verify_breakpoints_range("file:///test.R", 1L, 5L)) + +#line 6 "file:///test.R" +2 +} +base::.ark_auto_step(base::.ark_verify_breakpoints_range("file:///test.R", 1L, 7L)) diff --git a/crates/ark/src/snapshots/ark__console_annotate__tests__annotate_source_multiline_expression.snap b/crates/ark/src/snapshots/ark__console_annotate__tests__annotate_source_multiline_expression.snap index 5a67cbc93..51ece33f1 100644 --- a/crates/ark/src/snapshots/ark__console_annotate__tests__annotate_source_multiline_expression.snap +++ b/crates/ark/src/snapshots/ark__console_annotate__tests__annotate_source_multiline_expression.snap @@ -3,12 +3,9 @@ source: crates/ark/src/console_annotate.rs expression: result --- { -#line 1 "file:///test.R" foo <- function(x) { x + 1 } -base::.ark_auto_step(base::.ark_verify_breakpoints_range("file:///test.R", 1L, 4L)) -#line 4 "file:///test.R" bar <- 2 -base::.ark_auto_step(base::.ark_verify_breakpoints_range("file:///test.R", 4L, 5L)) } +base::.ark_auto_step(base::.ark_verify_breakpoints_range("file:///test.R", 1L, 5L)) diff --git a/crates/ark/src/snapshots/ark__console_annotate__tests__annotate_source_multiple_breakpoints_inside_braces.snap b/crates/ark/src/snapshots/ark__console_annotate__tests__annotate_source_multiple_breakpoints_inside_braces.snap new file mode 100644 index 000000000..a86f5d2e0 --- /dev/null +++ b/crates/ark/src/snapshots/ark__console_annotate__tests__annotate_source_multiple_breakpoints_inside_braces.snap @@ -0,0 +1,21 @@ +--- +source: crates/ark/src/console_annotate.rs +expression: result +--- +{ +base::.ark_auto_step(base::.ark_breakpoint(browser(), "file:///test.R", "1")) +#line 1 "file:///test.R" +{ +base::.ark_auto_step(base::.ark_breakpoint(browser(), "file:///test.R", "2")) +#line 2 "file:///test.R" + 1 +base::.ark_auto_step(base::.ark_verify_breakpoints_range("file:///test.R", 2L, 3L)) +#line 3 "file:///test.R" + 2 +} +base::.ark_auto_step(base::.ark_verify_breakpoints_range("file:///test.R", 1L, 5L)) + +#line 6 "file:///test.R" +2 +} +base::.ark_auto_step(base::.ark_verify_breakpoints_range("file:///test.R", 1L, 7L)) diff --git a/crates/ark/src/snapshots/ark__console_annotate__tests__annotate_source_multiple_expressions.snap b/crates/ark/src/snapshots/ark__console_annotate__tests__annotate_source_multiple_expressions.snap index feefc52bd..56f4bf2d7 100644 --- a/crates/ark/src/snapshots/ark__console_annotate__tests__annotate_source_multiple_expressions.snap +++ b/crates/ark/src/snapshots/ark__console_annotate__tests__annotate_source_multiple_expressions.snap @@ -3,13 +3,8 @@ source: crates/ark/src/console_annotate.rs expression: result --- { -#line 1 "file:///test.R" a <- 1 -base::.ark_auto_step(base::.ark_verify_breakpoints_range("file:///test.R", 1L, 2L)) -#line 2 "file:///test.R" b <- 2 -base::.ark_auto_step(base::.ark_verify_breakpoints_range("file:///test.R", 2L, 3L)) -#line 3 "file:///test.R" c <- 3 -base::.ark_auto_step(base::.ark_verify_breakpoints_range("file:///test.R", 3L, 4L)) } +base::.ark_auto_step(base::.ark_verify_breakpoints_range("file:///test.R", 1L, 4L)) diff --git a/crates/ark/src/snapshots/ark__console_annotate__tests__annotate_source_top_level_breakpoint.snap b/crates/ark/src/snapshots/ark__console_annotate__tests__annotate_source_top_level_breakpoint.snap new file mode 100644 index 000000000..9d50b925e --- /dev/null +++ b/crates/ark/src/snapshots/ark__console_annotate__tests__annotate_source_top_level_breakpoint.snap @@ -0,0 +1,16 @@ +--- +source: crates/ark/src/console_annotate.rs +expression: result +--- +{ +base::.ark_auto_step(base::.ark_breakpoint(browser(), "file:///test.R", "1")) +#line 1 "file:///test.R" +x <- 1 +base::.ark_auto_step(base::.ark_verify_breakpoints_range("file:///test.R", 1L, 2L)) +#line 2 "file:///test.R" +y <- 2 +base::.ark_auto_step(base::.ark_breakpoint(browser(), "file:///test.R", "2")) +#line 3 "file:///test.R" +z <- 3 +} +base::.ark_auto_step(base::.ark_verify_breakpoints_range("file:///test.R", 1L, 4L)) diff --git a/crates/ark/src/snapshots/ark__console_annotate__tests__annotate_source_with_breakpoint.snap b/crates/ark/src/snapshots/ark__console_annotate__tests__annotate_source_with_breakpoint.snap index 0e247ad4f..a657c9b2e 100644 --- a/crates/ark/src/snapshots/ark__console_annotate__tests__annotate_source_with_breakpoint.snap +++ b/crates/ark/src/snapshots/ark__console_annotate__tests__annotate_source_with_breakpoint.snap @@ -3,15 +3,16 @@ source: crates/ark/src/console_annotate.rs expression: result --- { -#line 1 "file:///test.R" foo <- function() { base::.ark_auto_step(base::.ark_breakpoint(browser(), "file:///test.R", "1")) #line 2 "file:///test.R" x <- 1 +base::.ark_auto_step(base::.ark_verify_breakpoints_range("file:///test.R", 2L, 3L)) +#line 3 "file:///test.R" y <- 2 } base::.ark_auto_step(base::.ark_verify_breakpoints_range("file:///test.R", 1L, 5L)) #line 5 "file:///test.R" bar <- 3 -base::.ark_auto_step(base::.ark_verify_breakpoints_range("file:///test.R", 5L, 6L)) } +base::.ark_auto_step(base::.ark_verify_breakpoints_range("file:///test.R", 1L, 6L)) diff --git a/crates/ark/src/snapshots/ark__console_annotate__tests__inject_breakpoints_before_within_after_nested.snap b/crates/ark/src/snapshots/ark__console_annotate__tests__inject_breakpoints_before_within_after_nested.snap index f3f24ab75..38753c145 100644 --- a/crates/ark/src/snapshots/ark__console_annotate__tests__inject_breakpoints_before_within_after_nested.snap +++ b/crates/ark/src/snapshots/ark__console_annotate__tests__inject_breakpoints_before_within_after_nested.snap @@ -2,16 +2,23 @@ source: crates/ark/src/console_annotate.rs expression: result --- +#line 1 "file:///test.R" { base::.ark_auto_step(base::.ark_breakpoint(browser(), "file:///test.R", "1")) #line 2 "file:///test.R" x <- 1 +base::.ark_auto_step(base::.ark_verify_breakpoints_range("file:///test.R", 2L, 3L)) +#line 3 "file:///test.R" f <- function() { base::.ark_auto_step(base::.ark_breakpoint(browser(), "file:///test.R", "2")) #line 4 "file:///test.R" y <- 2 +base::.ark_auto_step(base::.ark_verify_breakpoints_range("file:///test.R", 4L, 5L)) +#line 5 "file:///test.R" z <- 3 } +base::.ark_auto_step(base::.ark_verify_breakpoints_range("file:///test.R", 3L, 7L)) + base::.ark_auto_step(base::.ark_breakpoint(browser(), "file:///test.R", "3")) #line 7 "file:///test.R" w <- 4 diff --git a/crates/ark/src/snapshots/ark__console_annotate__tests__inject_breakpoints_doubly_nested_braces.snap b/crates/ark/src/snapshots/ark__console_annotate__tests__inject_breakpoints_doubly_nested_braces.snap index f9b6e0172..8efc88530 100644 --- a/crates/ark/src/snapshots/ark__console_annotate__tests__inject_breakpoints_doubly_nested_braces.snap +++ b/crates/ark/src/snapshots/ark__console_annotate__tests__inject_breakpoints_doubly_nested_braces.snap @@ -2,11 +2,14 @@ source: crates/ark/src/console_annotate.rs expression: result --- +#line 1 "file:///test.R" { { base::.ark_auto_step(base::.ark_breakpoint(browser(), "file:///test.R", "1")) #line 3 "file:///test.R" 1 +base::.ark_auto_step(base::.ark_verify_breakpoints_range("file:///test.R", 3L, 4L)) +#line 4 "file:///test.R" 2 } } diff --git a/crates/ark/src/snapshots/ark__console_annotate__tests__inject_breakpoints_empty_brace_sibling.snap b/crates/ark/src/snapshots/ark__console_annotate__tests__inject_breakpoints_empty_brace_sibling.snap index f7cc3bfcc..8a745cc37 100644 --- a/crates/ark/src/snapshots/ark__console_annotate__tests__inject_breakpoints_empty_brace_sibling.snap +++ b/crates/ark/src/snapshots/ark__console_annotate__tests__inject_breakpoints_empty_brace_sibling.snap @@ -2,6 +2,7 @@ source: crates/ark/src/console_annotate.rs expression: result --- +#line 1 "file:///test.R" { x <- 1 base::.ark_auto_step(base::.ark_breakpoint(browser(), "file:///test.R", "1")) diff --git a/crates/ark/src/snapshots/ark__console_annotate__tests__inject_breakpoints_if_else_both_branches.snap b/crates/ark/src/snapshots/ark__console_annotate__tests__inject_breakpoints_if_else_both_branches.snap index 70815120a..c5cf4229b 100644 --- a/crates/ark/src/snapshots/ark__console_annotate__tests__inject_breakpoints_if_else_both_branches.snap +++ b/crates/ark/src/snapshots/ark__console_annotate__tests__inject_breakpoints_if_else_both_branches.snap @@ -2,6 +2,7 @@ source: crates/ark/src/console_annotate.rs expression: result --- +#line 1 "file:///test.R" if (TRUE) { base::.ark_auto_step(base::.ark_breakpoint(browser(), "file:///test.R", "1")) #line 2 "file:///test.R" diff --git a/crates/ark/src/snapshots/ark__console_annotate__tests__inject_breakpoints_in_brace_list.snap b/crates/ark/src/snapshots/ark__console_annotate__tests__inject_breakpoints_in_brace_list.snap index c24e14073..0a57ee73e 100644 --- a/crates/ark/src/snapshots/ark__console_annotate__tests__inject_breakpoints_in_brace_list.snap +++ b/crates/ark/src/snapshots/ark__console_annotate__tests__inject_breakpoints_in_brace_list.snap @@ -2,6 +2,7 @@ source: crates/ark/src/console_annotate.rs expression: result --- +#line 1 "file:///test.R" f <- function() { x <- 1 base::.ark_auto_step(base::.ark_breakpoint(browser(), "file:///test.R", "1")) diff --git a/crates/ark/src/snapshots/ark__console_annotate__tests__inject_breakpoints_inside_multiline_call.snap b/crates/ark/src/snapshots/ark__console_annotate__tests__inject_breakpoints_inside_multiline_call.snap new file mode 100644 index 000000000..94e68c0de --- /dev/null +++ b/crates/ark/src/snapshots/ark__console_annotate__tests__inject_breakpoints_inside_multiline_call.snap @@ -0,0 +1,12 @@ +--- +source: crates/ark/src/console_annotate.rs +expression: result +--- +#line 1 "file:///test.R" +{ +base::.ark_auto_step(base::.ark_breakpoint(browser(), "file:///test.R", "1")) +#line 2 "file:///test.R" + foo( + 1 + ) +} diff --git a/crates/ark/src/snapshots/ark__console_annotate__tests__inject_breakpoints_inside_multiline_expr_anchors_to_start.snap b/crates/ark/src/snapshots/ark__console_annotate__tests__inject_breakpoints_inside_multiline_expr_anchors_to_start.snap new file mode 100644 index 000000000..b9084dac9 --- /dev/null +++ b/crates/ark/src/snapshots/ark__console_annotate__tests__inject_breakpoints_inside_multiline_expr_anchors_to_start.snap @@ -0,0 +1,14 @@ +--- +source: crates/ark/src/console_annotate.rs +expression: result +--- +#line 1 "file:///test.R" +{ +base::.ark_auto_step(base::.ark_breakpoint(browser(), "file:///test.R", "1")) +#line 2 "file:///test.R" + x + + y +base::.ark_auto_step(base::.ark_verify_breakpoints_range("file:///test.R", 2L, 4L)) +#line 4 "file:///test.R" + z +} diff --git a/crates/ark/src/snapshots/ark__console_annotate__tests__inject_breakpoints_multiple.snap b/crates/ark/src/snapshots/ark__console_annotate__tests__inject_breakpoints_multiple.snap index 80e415701..da44327e0 100644 --- a/crates/ark/src/snapshots/ark__console_annotate__tests__inject_breakpoints_multiple.snap +++ b/crates/ark/src/snapshots/ark__console_annotate__tests__inject_breakpoints_multiple.snap @@ -2,11 +2,14 @@ source: crates/ark/src/console_annotate.rs expression: result --- +#line 1 "file:///test.R" { x <- 1 base::.ark_auto_step(base::.ark_breakpoint(browser(), "file:///test.R", "1")) #line 3 "file:///test.R" y <- 2 +base::.ark_auto_step(base::.ark_verify_breakpoints_range("file:///test.R", 3L, 4L)) +#line 4 "file:///test.R" z <- 3 base::.ark_auto_step(base::.ark_breakpoint(browser(), "file:///test.R", "2")) #line 5 "file:///test.R" diff --git a/crates/ark/src/snapshots/ark__console_annotate__tests__inject_breakpoints_multiple_lists.snap b/crates/ark/src/snapshots/ark__console_annotate__tests__inject_breakpoints_multiple_lists.snap index 763ecf020..bc9638eec 100644 --- a/crates/ark/src/snapshots/ark__console_annotate__tests__inject_breakpoints_multiple_lists.snap +++ b/crates/ark/src/snapshots/ark__console_annotate__tests__inject_breakpoints_multiple_lists.snap @@ -2,14 +2,19 @@ source: crates/ark/src/console_annotate.rs expression: result --- +#line 1 "file:///test.R" { x <- 1 f <- function() { base::.ark_auto_step(base::.ark_breakpoint(browser(), "file:///test.R", "1")) #line 4 "file:///test.R" y <- 2 +base::.ark_auto_step(base::.ark_verify_breakpoints_range("file:///test.R", 4L, 5L)) +#line 5 "file:///test.R" z <- 3 } +base::.ark_auto_step(base::.ark_verify_breakpoints_range("file:///test.R", 3L, 7L)) + base::.ark_auto_step(base::.ark_breakpoint(browser(), "file:///test.R", "2")) #line 7 "file:///test.R" w <- 4 diff --git a/crates/ark/src/snapshots/ark__console_annotate__tests__inject_breakpoints_on_blank_line_anchors_to_next.snap b/crates/ark/src/snapshots/ark__console_annotate__tests__inject_breakpoints_on_blank_line_anchors_to_next.snap new file mode 100644 index 000000000..0ec2b2fb1 --- /dev/null +++ b/crates/ark/src/snapshots/ark__console_annotate__tests__inject_breakpoints_on_blank_line_anchors_to_next.snap @@ -0,0 +1,12 @@ +--- +source: crates/ark/src/console_annotate.rs +expression: result +--- +#line 1 "file:///test.R" +{ + x <- 1 +base::.ark_auto_step(base::.ark_breakpoint(browser(), "file:///test.R", "1")) + +#line 4 "file:///test.R" + y <- 2 +} diff --git a/crates/ark/src/snapshots/ark__console_annotate__tests__inject_breakpoints_on_closing_brace_with_valid_breakpoint.snap b/crates/ark/src/snapshots/ark__console_annotate__tests__inject_breakpoints_on_closing_brace_with_valid_breakpoint.snap index a4f965bc3..908b7d7c7 100644 --- a/crates/ark/src/snapshots/ark__console_annotate__tests__inject_breakpoints_on_closing_brace_with_valid_breakpoint.snap +++ b/crates/ark/src/snapshots/ark__console_annotate__tests__inject_breakpoints_on_closing_brace_with_valid_breakpoint.snap @@ -2,6 +2,7 @@ source: crates/ark/src/console_annotate.rs expression: result --- +#line 1 "file:///test.R" { f <- function() { x <- 1 diff --git a/crates/ark/src/snapshots/ark__console_annotate__tests__inject_breakpoints_single_line.snap b/crates/ark/src/snapshots/ark__console_annotate__tests__inject_breakpoints_single_line.snap index ea388b4b7..68dc135cd 100644 --- a/crates/ark/src/snapshots/ark__console_annotate__tests__inject_breakpoints_single_line.snap +++ b/crates/ark/src/snapshots/ark__console_annotate__tests__inject_breakpoints_single_line.snap @@ -2,10 +2,13 @@ source: crates/ark/src/console_annotate.rs expression: result --- +#line 1 "file:///test.R" { x <- 1 base::.ark_auto_step(base::.ark_breakpoint(browser(), "file:///test.R", "1")) #line 3 "file:///test.R" y <- 2 +base::.ark_auto_step(base::.ark_verify_breakpoints_range("file:///test.R", 3L, 4L)) +#line 4 "file:///test.R" z <- 3 } diff --git a/crates/ark/src/snapshots/ark__console_annotate__tests__inject_breakpoints_triply_nested_braces.snap b/crates/ark/src/snapshots/ark__console_annotate__tests__inject_breakpoints_triply_nested_braces.snap index b861a2f0d..4d39fb036 100644 --- a/crates/ark/src/snapshots/ark__console_annotate__tests__inject_breakpoints_triply_nested_braces.snap +++ b/crates/ark/src/snapshots/ark__console_annotate__tests__inject_breakpoints_triply_nested_braces.snap @@ -2,6 +2,7 @@ source: crates/ark/src/console_annotate.rs expression: result --- +#line 1 "file:///test.R" { { { diff --git a/crates/ark/src/snapshots/ark__console_annotate__tests__inject_breakpoints_with_blank_line.snap b/crates/ark/src/snapshots/ark__console_annotate__tests__inject_breakpoints_with_blank_line.snap index c992ee6c7..60433f946 100644 --- a/crates/ark/src/snapshots/ark__console_annotate__tests__inject_breakpoints_with_blank_line.snap +++ b/crates/ark/src/snapshots/ark__console_annotate__tests__inject_breakpoints_with_blank_line.snap @@ -2,6 +2,7 @@ source: crates/ark/src/console_annotate.rs expression: result --- +#line 1 "file:///test.R" { x <- 1 base::.ark_auto_step(base::.ark_breakpoint(browser(), "file:///test.R", "1")) diff --git a/crates/ark/src/snapshots/ark__console_annotate__tests__inject_breakpoints_with_line_offset.snap b/crates/ark/src/snapshots/ark__console_annotate__tests__inject_breakpoints_with_line_offset.snap index bdb3170ff..f5090d0a6 100644 --- a/crates/ark/src/snapshots/ark__console_annotate__tests__inject_breakpoints_with_line_offset.snap +++ b/crates/ark/src/snapshots/ark__console_annotate__tests__inject_breakpoints_with_line_offset.snap @@ -2,10 +2,13 @@ source: crates/ark/src/console_annotate.rs expression: result --- +#line 11 "file:///test.R" { x <- 1 base::.ark_auto_step(base::.ark_breakpoint(browser(), "file:///test.R", "1")) #line 13 "file:///test.R" y <- 2 +base::.ark_auto_step(base::.ark_verify_breakpoints_range("file:///test.R", 13L, 14L)) +#line 14 "file:///test.R" z <- 3 } diff --git a/crates/ark/src/snapshots/ark__console_annotate__tests__inject_breakpoints_with_line_offset_nested.snap b/crates/ark/src/snapshots/ark__console_annotate__tests__inject_breakpoints_with_line_offset_nested.snap index 423387e00..8f5fdab41 100644 --- a/crates/ark/src/snapshots/ark__console_annotate__tests__inject_breakpoints_with_line_offset_nested.snap +++ b/crates/ark/src/snapshots/ark__console_annotate__tests__inject_breakpoints_with_line_offset_nested.snap @@ -2,6 +2,7 @@ source: crates/ark/src/console_annotate.rs expression: result --- +#line 21 "file:///test.R" f <- function() { x <- 1 base::.ark_auto_step(base::.ark_breakpoint(browser(), "file:///test.R", "1")) diff --git a/crates/ark/src/snapshots/ark__console_annotate__tests__multiple_breakpoints_collapse_to_same_line.snap b/crates/ark/src/snapshots/ark__console_annotate__tests__multiple_breakpoints_collapse_to_same_line.snap new file mode 100644 index 000000000..069cfb01f --- /dev/null +++ b/crates/ark/src/snapshots/ark__console_annotate__tests__multiple_breakpoints_collapse_to_same_line.snap @@ -0,0 +1,13 @@ +--- +source: crates/ark/src/console_annotate.rs +expression: result +--- +#line 1 "file:///test.R" +{ +base::.ark_auto_step(base::.ark_breakpoint(browser(), "file:///test.R", "1")) + +#line 3 "file:///test.R" + foo( + 1 + ) +} From bad445129af8f42c3944bec88c883caf95008c4b Mon Sep 17 00:00:00 2001 From: Lionel Henry Date: Mon, 12 Jan 2026 14:17:13 +0100 Subject: [PATCH 37/42] State reason for invalid breakpoints --- crates/ark/src/console_annotate.rs | 214 +++++++++++++++++++---------- crates/ark/src/dap/dap.rs | 55 +++++++- crates/ark/src/dap/dap_server.rs | 20 ++- crates/ark/src/interface.rs | 8 ++ 4 files changed, 211 insertions(+), 86 deletions(-) diff --git a/crates/ark/src/console_annotate.rs b/crates/ark/src/console_annotate.rs index c5dfa5c9f..b4acbcbb6 100644 --- a/crates/ark/src/console_annotate.rs +++ b/crates/ark/src/console_annotate.rs @@ -15,6 +15,7 @@ use anyhow::anyhow; use biome_line_index::LineIndex; use biome_rowan::AstNode; use biome_rowan::SyntaxRewriter; +use biome_rowan::TextSize; use biome_rowan::TriviaPieceKind; use biome_rowan::VisitNodeSignal; use harp::object::RObject; @@ -23,6 +24,7 @@ use url::Url; use crate::dap::dap::Breakpoint; use crate::dap::dap::BreakpointState; +use crate::dap::dap::InvalidReason; use crate::interface::RMain; /// Function name used for auto-stepping over injected calls such as breakpoints @@ -307,6 +309,8 @@ struct AnnotationRewriter<'a> { struct BraceFrame { /// Code line of the opening `{` brace_code_line: u32, + /// Code line of the closing `}` + closing_brace_code_line: u32, /// Line info for each expression (indexed by slot position) expr_info: Vec, } @@ -352,6 +356,11 @@ impl<'a> AnnotationRewriter<'a> { node } + /// Convert a text offset to a code line number. + fn to_code_line(&self, offset: TextSize) -> Option { + self.line_index.line_col(offset).map(|lc| lc.line) + } + /// Convert code line to document line. Can be negative for the wrapper /// brace in `annotate_source(). fn to_doc_line(&self, code_line: u32) -> i32 { @@ -360,7 +369,7 @@ impl<'a> AnnotationRewriter<'a> { /// Check if a breakpoint is available (not consumed and not invalid) fn is_available(&self, bp: &Breakpoint) -> bool { - !self.consumed.contains(&bp.id) && !matches!(bp.state, BreakpointState::Invalid) + !self.consumed.contains(&bp.id) && !matches!(bp.state, BreakpointState::Invalid(_)) } /// Find all available breakpoints that anchor to this expression: At or @@ -417,7 +426,7 @@ impl<'a> AnnotationRewriter<'a> { fn has_breakpoints_in_range(&self, start: i32, end: i32) -> bool { self.breakpoints.iter().any(|bp| { let bp_line = bp.line as i32; - !matches!(bp.state, BreakpointState::Invalid) && bp_line >= start && bp_line < end + !matches!(bp.state, BreakpointState::Invalid(_)) && bp_line >= start && bp_line < end }) } } @@ -449,6 +458,15 @@ impl SyntaxRewriter for AnnotationRewriter<'_> { return VisitNodeSignal::Traverse(node); }; + let Some(closing_brace_code_line) = braced + .r_curly_token() + .ok() + .and_then(|token| self.to_code_line(token.text_trimmed_range().start())) + else { + self.err = Some(anyhow!("Failed to get line for closing brace")); + return VisitNodeSignal::Traverse(node); + }; + let mut expr_info = Vec::new(); for expr in braced.expressions() { @@ -469,6 +487,7 @@ impl SyntaxRewriter for AnnotationRewriter<'_> { self.brace_stack.push(BraceFrame { brace_code_line, + closing_brace_code_line, expr_info, }); } @@ -491,9 +510,36 @@ impl SyntaxRewriter for AnnotationRewriter<'_> { // Note we assume that only braced expressions and the root list have // `R_EXPRESSION_LIST`, which is the case in our syntax if let Some(frame) = self.brace_stack.pop() { - // Empty braces have no expressions to break on; any breakpoints - // in this range belong to an outer scope if frame.expr_info.is_empty() { + // Empty braces have no expressions to break on. Mark breakpoints + // strictly inside as "empty braces", and the closing brace as + // "closing brace". Breakpoints on the opening brace line belong + // to the parent scope (for `{}` on a single line, this means no + // breakpoints are marked invalid here). + + let brace_doc_start = self.to_doc_line(frame.brace_code_line); + let closing_doc_line = self.to_doc_line(frame.closing_brace_code_line); + + // Mark lines strictly inside as "empty braces" + let inner_start = brace_doc_start + 1; + let inner_end = closing_doc_line - 1; + if inner_start <= inner_end { + self.mark_breakpoints_invalid( + Some(inner_start), + Some(inner_end), + InvalidReason::EmptyBraces, + ); + } + + // Mark the closing brace line as "closing brace" + if closing_doc_line > brace_doc_start { + self.mark_breakpoints_invalid( + Some(closing_doc_line), + Some(closing_doc_line), + InvalidReason::ClosingBrace, + ); + } + return node; } @@ -513,14 +559,18 @@ impl SyntaxRewriter for AnnotationRewriter<'_> { // Annotate statements in the braced list let result = self.annotate_braced_list(node, frame.brace_code_line, frame.expr_info); - // Mark any remaining breakpoints in this brace range as invalid + // Mark any remaining breakpoints in this brace range as invalid (closing braces) let invalidation_floor = breakpoint_floor(brace_doc_start, first_expr_doc_start); - self.mark_remaining_breakpoints_invalid(Some(invalidation_floor), Some(brace_doc_end)); + self.mark_breakpoints_invalid( + Some(invalidation_floor), + Some(brace_doc_end), + InvalidReason::ClosingBrace, + ); result } else { - // We're at the root expression list, mark all remaining breakpoints as invalid - self.mark_remaining_breakpoints_invalid(None, None); + // Root expression list: leave breakpoints as Unverified. + // They can't be hit in console input, but may be valid when sourcing. node } } @@ -646,10 +696,15 @@ impl AnnotationRewriter<'_> { /// Mark remaining unconsumed breakpoints as invalid within the given range. /// If range bounds are None, all remaining breakpoints are marked invalid. - fn mark_remaining_breakpoints_invalid(&mut self, start: Option, end: Option) { + fn mark_breakpoints_invalid( + &mut self, + start: Option, + end: Option, + reason: InvalidReason, + ) { for bp in self.breakpoints.iter_mut() { let is_available = - !self.consumed.contains(&bp.id) && !matches!(bp.state, BreakpointState::Invalid); + !self.consumed.contains(&bp.id) && !matches!(bp.state, BreakpointState::Invalid(_)); if !is_available { continue; } @@ -658,7 +713,7 @@ impl AnnotationRewriter<'_> { let in_range = start.map_or(true, |s| bp_line >= s) && end.map_or(true, |e| bp_line <= e); if in_range { - bp.state = BreakpointState::Invalid; + bp.state = BreakpointState::Invalid(reason); } } } @@ -826,15 +881,22 @@ pub unsafe extern "C-unwind" fn ps_annotate_source(uri: SEXP, code: SEXP) -> any let mut dap_guard = main.debug_dap.lock().unwrap(); // If there are no breakpoints for this file, return NULL to signal no - // annotation needed - let Some((_, breakpoints)) = dap_guard.breakpoints.get_mut(&uri) else { - return Ok(harp::r_null()); + // annotation needed. Scope the mutable borrow so we can re-borrow after. + let annotated = { + let Some((_, breakpoints)) = dap_guard.breakpoints.get_mut(&uri) else { + return Ok(harp::r_null()); + }; + if breakpoints.is_empty() { + return Ok(harp::r_null()); + } + annotate_source(&code, &uri, breakpoints.as_mut_slice())? }; - if breakpoints.is_empty() { - return Ok(harp::r_null()); + + // Notify frontend about any breakpoints marked invalid during annotation + if let Some((_, breakpoints)) = dap_guard.breakpoints.get(&uri) { + dap_guard.notify_invalid_breakpoints(breakpoints); } - let annotated = annotate_source(&code, &uri, breakpoints.as_mut_slice())?; Ok(RObject::try_from(annotated)?.sexp) } @@ -942,7 +1004,7 @@ mod tests { // Breakpoint line should remain in document coordinates assert_eq!(breakpoints[0].line, 5); - assert!(!matches!(breakpoints[0].state, BreakpointState::Invalid)); + assert!(!matches!(breakpoints[0].state, BreakpointState::Invalid(_))); } #[test] @@ -964,7 +1026,7 @@ mod tests { let result = annotate_input(code, location, Some(&mut breakpoints)).unwrap(); insta::assert_snapshot!(result); - assert!(!matches!(breakpoints[0].state, BreakpointState::Invalid)); + assert!(!matches!(breakpoints[0].state, BreakpointState::Invalid(_))); } #[test] @@ -989,8 +1051,8 @@ mod tests { let result = annotate_input(code, location, Some(&mut breakpoints)).unwrap(); insta::assert_snapshot!(result); - assert!(!matches!(breakpoints[0].state, BreakpointState::Invalid)); - assert!(!matches!(breakpoints[1].state, BreakpointState::Invalid)); + assert!(!matches!(breakpoints[0].state, BreakpointState::Invalid(_))); + assert!(!matches!(breakpoints[1].state, BreakpointState::Invalid(_))); } #[test] @@ -1063,8 +1125,8 @@ mod tests { let result = annotate_input(code, location, Some(&mut breakpoints)).unwrap(); insta::assert_snapshot!(result); // Both breakpoints are valid (inside brace lists) - assert!(!matches!(breakpoints[0].state, BreakpointState::Invalid)); - assert!(!matches!(breakpoints[1].state, BreakpointState::Invalid)); + assert!(!matches!(breakpoints[0].state, BreakpointState::Invalid(_))); + assert!(!matches!(breakpoints[1].state, BreakpointState::Invalid(_))); } #[test] @@ -1094,7 +1156,7 @@ mod tests { let result = annotate_input(code, location, Some(&mut breakpoints)).unwrap(); insta::assert_snapshot!(result); // Breakpoint inside multiline expression should anchor to expression start - assert!(!matches!(breakpoints[0].state, BreakpointState::Invalid)); + assert!(!matches!(breakpoints[0].state, BreakpointState::Invalid(_))); assert_eq!(breakpoints[0].line, 1); // Anchored to line 1 (x +) } @@ -1125,7 +1187,7 @@ mod tests { let result = annotate_input(code, location, Some(&mut breakpoints)).unwrap(); insta::assert_snapshot!(result); // Breakpoint on blank line should anchor to next expression (valid) - assert!(!matches!(breakpoints[0].state, BreakpointState::Invalid)); + assert!(!matches!(breakpoints[0].state, BreakpointState::Invalid(_))); // Line should be updated to the actual anchor position (line 3) assert_eq!(breakpoints[0].line, 3); } @@ -1166,7 +1228,7 @@ mod tests { // All breakpoints should be valid and anchored to line 2 (expression start) for bp in &breakpoints { assert!( - !matches!(bp.state, BreakpointState::Invalid), + !matches!(bp.state, BreakpointState::Invalid(_)), "Breakpoint {} should be valid", bp.id ); @@ -1201,7 +1263,7 @@ mod tests { let result = annotate_input(code, location, Some(&mut breakpoints)).unwrap(); insta::assert_snapshot!(result); - assert!(!matches!(breakpoints[0].state, BreakpointState::Invalid)); + assert!(!matches!(breakpoints[0].state, BreakpointState::Invalid(_))); } #[test] @@ -1227,7 +1289,7 @@ mod tests { let expected = format!("#line 1 \"file:///test.R\"\n{code}"); assert_eq!(result, expected); // Marked as invalid - assert!(matches!(breakpoints[0].state, BreakpointState::Invalid)); + assert!(matches!(breakpoints[0].state, BreakpointState::Invalid(_))); } #[test] @@ -1255,9 +1317,9 @@ mod tests { insta::assert_snapshot!(result); // First breakpoint is invalid (on closing brace) - assert!(matches!(breakpoints[0].state, BreakpointState::Invalid)); + assert!(matches!(breakpoints[0].state, BreakpointState::Invalid(_))); // Second breakpoint is valid (in outer brace list) - assert!(!matches!(breakpoints[1].state, BreakpointState::Invalid)); + assert!(!matches!(breakpoints[1].state, BreakpointState::Invalid(_))); } #[test] @@ -1288,9 +1350,9 @@ mod tests { let result = annotate_input(code, location, Some(&mut breakpoints)).unwrap(); insta::assert_snapshot!(result); // All breakpoints are valid (inside brace lists) - assert!(!matches!(breakpoints[0].state, BreakpointState::Invalid)); - assert!(!matches!(breakpoints[1].state, BreakpointState::Invalid)); - assert!(!matches!(breakpoints[2].state, BreakpointState::Invalid)); + assert!(!matches!(breakpoints[0].state, BreakpointState::Invalid(_))); + assert!(!matches!(breakpoints[1].state, BreakpointState::Invalid(_))); + assert!(!matches!(breakpoints[2].state, BreakpointState::Invalid(_))); } #[test] @@ -1326,7 +1388,7 @@ mod tests { // The breakpoint line should remain in document coordinates assert_eq!(breakpoints[0].line, 12); - assert!(!matches!(breakpoints[0].state, BreakpointState::Invalid)); + assert!(!matches!(breakpoints[0].state, BreakpointState::Invalid(_))); } #[test] @@ -1353,7 +1415,7 @@ mod tests { // The breakpoint line should remain in document coordinates assert_eq!(breakpoints[0].line, 22); - assert!(!matches!(breakpoints[0].state, BreakpointState::Invalid)); + assert!(!matches!(breakpoints[0].state, BreakpointState::Invalid(_))); } #[test] @@ -1381,7 +1443,7 @@ mod tests { // The breakpoint should be placed at line 2 assert_eq!(breakpoints[0].line, 2); - assert!(!matches!(breakpoints[0].state, BreakpointState::Invalid)); + assert!(!matches!(breakpoints[0].state, BreakpointState::Invalid(_))); } #[test] @@ -1408,7 +1470,7 @@ mod tests { // The breakpoint should be placed at line 3 assert_eq!(breakpoints[0].line, 3); - assert!(!matches!(breakpoints[0].state, BreakpointState::Invalid)); + assert!(!matches!(breakpoints[0].state, BreakpointState::Invalid(_))); } #[test] @@ -1435,12 +1497,13 @@ mod tests { let expected = format!("#line 1 \"file:///test.R\"\n{code}"); assert_eq!(result, expected); // Marked as invalid - assert!(matches!(breakpoints[0].state, BreakpointState::Invalid)); + assert!(matches!(breakpoints[0].state, BreakpointState::Invalid(_))); } #[test] - fn test_top_level_breakpoint_single_invalid() { - // Top-level breakpoints are invalid (R can't step at top-level) + fn test_top_level_breakpoint_single_unverified() { + // Top-level breakpoints stay Unverified (can't be hit in console, but + // may be valid when sourcing the same file) let code = "x <- 1\ny <- 2\nz <- 3"; let location = CodeLocation { uri: Url::parse("file:///test.R").unwrap(), @@ -1459,13 +1522,14 @@ mod tests { // annotate_input always adds #line directive for srcref mapping let expected = format!("#line 1 \"file:///test.R\"\n{code}"); assert_eq!(result, expected); - // Breakpoint marked as invalid - assert!(matches!(breakpoints[0].state, BreakpointState::Invalid)); + + // Breakpoint stays Unverified (not Invalid) + assert!(matches!(breakpoints[0].state, BreakpointState::Unverified)); } #[test] - fn test_top_level_breakpoint_multiple_invalid() { - // Multiple top-level breakpoints are all invalid + fn test_top_level_breakpoint_multiple_unverified() { + // Multiple top-level breakpoints stay Unverified let code = "x <- 1\ny <- 2\nz <- 3\nw <- 4"; let location = CodeLocation { uri: Url::parse("file:///test.R").unwrap(), @@ -1487,14 +1551,14 @@ mod tests { // annotate_input always adds #line directive for srcref mapping let expected = format!("#line 1 \"file:///test.R\"\n{code}"); assert_eq!(result, expected); - // Both breakpoints marked as invalid - assert!(matches!(breakpoints[0].state, BreakpointState::Invalid)); - assert!(matches!(breakpoints[1].state, BreakpointState::Invalid)); + // Both breakpoints stay Unverified + assert!(matches!(breakpoints[0].state, BreakpointState::Unverified)); + assert!(matches!(breakpoints[1].state, BreakpointState::Unverified)); } #[test] - fn test_top_level_breakpoint_mixed_invalid_and_nested() { - // Top-level breakpoints are invalid even when mixed with nested ones + fn test_top_level_breakpoint_mixed_unverified_and_nested() { + // Top-level breakpoints stay Unverified, nested ones get consumed let code = "x <- 1\nf <- function() {\n y <- 2\n}\nz <- 3"; let location = CodeLocation { uri: Url::parse("file:///test.R").unwrap(), @@ -1516,12 +1580,12 @@ mod tests { let result = annotate_input(code, location, Some(&mut breakpoints)).unwrap(); // Code should contain breakpoint for nested expression only assert!(result.contains("base::.ark_breakpoint")); - // Top-level breakpoints are invalid - assert!(matches!(breakpoints[0].state, BreakpointState::Invalid)); - // Nested breakpoint is valid - assert!(!matches!(breakpoints[1].state, BreakpointState::Invalid)); - // Top-level breakpoint is invalid - assert!(matches!(breakpoints[2].state, BreakpointState::Invalid)); + // Top-level breakpoints stay Unverified + assert!(matches!(breakpoints[0].state, BreakpointState::Unverified)); + // Nested breakpoint is consumed (not Invalid) + assert!(!matches!(breakpoints[1].state, BreakpointState::Invalid(_))); + // Top-level breakpoint stays Unverified + assert!(matches!(breakpoints[2].state, BreakpointState::Unverified)); } #[test] @@ -1663,11 +1727,11 @@ mod tests { // Both breakpoints should be valid (not marked as invalid) assert!( - !matches!(breakpoints[0].state, BreakpointState::Invalid), + !matches!(breakpoints[0].state, BreakpointState::Invalid(_)), "First breakpoint should not be invalid" ); assert!( - !matches!(breakpoints[1].state, BreakpointState::Invalid), + !matches!(breakpoints[1].state, BreakpointState::Invalid(_)), "Second breakpoint should not be invalid" ); } @@ -1706,11 +1770,11 @@ mod tests { // Both breakpoints should be marked invalid assert!( - matches!(breakpoints[0].state, BreakpointState::Invalid), + matches!(breakpoints[0].state, BreakpointState::Invalid(_)), "First breakpoint on closing brace should be invalid" ); assert!( - matches!(breakpoints[1].state, BreakpointState::Invalid), + matches!(breakpoints[1].state, BreakpointState::Invalid(_)), "Second breakpoint on closing brace should be invalid" ); } @@ -1741,7 +1805,7 @@ mod tests { // Should anchor to the empty {} expression (it's a valid expression) assert!( - !matches!(breakpoints[0].state, BreakpointState::Invalid), + !matches!(breakpoints[0].state, BreakpointState::Invalid(_)), "Breakpoint on empty brace block should be valid" ); } @@ -1770,26 +1834,26 @@ mod tests { let mut breakpoints = vec![Breakpoint::new(1, 1, BreakpointState::Unverified)]; let result = annotate_input(code, location.clone(), Some(&mut breakpoints)).unwrap(); assert!( - !matches!(breakpoints[0].state, BreakpointState::Invalid), + !matches!(breakpoints[0].state, BreakpointState::Invalid(_)), "Breakpoint on inner brace open should be valid" ); assert!(result.contains(".ark_breakpoint")); - // Test 2: Breakpoint on inner closing brace (anchors to inner {} expression start) + // Test 2: Breakpoint on inner closing brace is invalid (inner {} is empty) let mut breakpoints = vec![Breakpoint::new(2, 2, BreakpointState::Unverified)]; let result = annotate_input(code, location.clone(), Some(&mut breakpoints)).unwrap(); assert!( - !matches!(breakpoints[0].state, BreakpointState::Invalid), - "Breakpoint on inner closing brace should anchor to inner {{ expression" + matches!(breakpoints[0].state, BreakpointState::Invalid(_)), + "Breakpoint on closing brace of empty braces should be invalid" ); - assert_eq!(breakpoints[0].line, 1, "Should anchor to line 1"); - assert!(result.contains(".ark_breakpoint")); + // No breakpoint injected + assert!(!result.contains(".ark_breakpoint")); // Test 3: Breakpoint on outer closing brace (invalid - not part of any expression in the list) let mut breakpoints = vec![Breakpoint::new(3, 3, BreakpointState::Unverified)]; let result = annotate_input(code, location.clone(), Some(&mut breakpoints)).unwrap(); assert!( - matches!(breakpoints[0].state, BreakpointState::Invalid), + matches!(breakpoints[0].state, BreakpointState::Invalid(_)), "Breakpoint on outer closing brace should be invalid" ); let expected = format!("#line 1 \"file:///test.R\"\n{code}"); @@ -1818,21 +1882,21 @@ mod tests { let mut breakpoints = vec![Breakpoint::new(1, 0, BreakpointState::Unverified)]; let result = annotate_input(code, location.clone(), Some(&mut breakpoints)).unwrap(); assert!( - !matches!(breakpoints[0].state, BreakpointState::Invalid), + !matches!(breakpoints[0].state, BreakpointState::Invalid(_)), "Breakpoint on {{ line should be valid" ); assert!(result.contains(".ark_breakpoint")); - // Test 2: Breakpoint on line 1 (anchors to inner {} which spans lines 0-1) + // Test 2: Breakpoint on line 1 (}} line) is invalid - inner {} is empty let mut breakpoints = vec![Breakpoint::new(2, 1, BreakpointState::Unverified)]; let result = annotate_input(code, location.clone(), Some(&mut breakpoints)).unwrap(); - // Breakpoint on }} line anchors to the inner {} expression start (line 0) + // Breakpoint on closing brace of empty braces is invalid assert!( - !matches!(breakpoints[0].state, BreakpointState::Invalid), - "Breakpoint on }} line should anchor to inner {{ expression" + matches!(breakpoints[0].state, BreakpointState::Invalid(_)), + "Breakpoint on }} of empty braces should be invalid" ); - assert_eq!(breakpoints[0].line, 0, "Should anchor to line 0"); - assert!(result.contains(".ark_breakpoint")); + // No breakpoint injected + assert!(!result.contains(".ark_breakpoint")); } #[test] @@ -1864,7 +1928,7 @@ mod tests { insta::assert_snapshot!(result); // Breakpoint inside a multi-line expression should anchor to expression start - assert!(!matches!(breakpoints[0].state, BreakpointState::Invalid)); + assert!(!matches!(breakpoints[0].state, BreakpointState::Invalid(_))); assert_eq!( breakpoints[0].line, 1, "Breakpoint should anchor to expression start" diff --git a/crates/ark/src/dap/dap.rs b/crates/ark/src/dap/dap.rs index b65f99f66..da93518c3 100644 --- a/crates/ark/src/dap/dap.rs +++ b/crates/ark/src/dap/dap.rs @@ -28,10 +28,25 @@ use crate::thread::RThreadSafe; pub enum BreakpointState { Unverified, Verified, - Invalid, + Invalid(InvalidReason), Disabled, } +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum InvalidReason { + ClosingBrace, + EmptyBraces, +} + +impl InvalidReason { + pub fn message(&self) -> &'static str { + match self { + InvalidReason::ClosingBrace => "Can't break on closing `}` brace", + InvalidReason::EmptyBraces => "Can't break inside empty braces", + } + } +} + #[derive(Debug, Clone)] pub struct Breakpoint { pub id: i64, @@ -66,7 +81,7 @@ impl Breakpoint { } } -#[derive(Debug, Copy, Clone)] +#[derive(Debug, Clone)] pub enum DapBackendEvent { /// Event sent when a normal (non-browser) prompt marks the end of a /// debugging session. @@ -79,10 +94,16 @@ pub enum DapBackendEvent { /// debugging session Stopped(DapStoppedEvent), - /// Event sent when a breakpoint state changes (verified or unverified) + /// Event sent when a breakpoint state changes (verified, unverified, or invalid) /// The line is included so the frontend can update the breakpoint's position /// (e.g., when a breakpoint inside a multiline expression anchors to its start) - BreakpointState { id: i64, line: u32, verified: bool }, + /// The message is included for invalid breakpoints to explain why. + BreakpointState { + id: i64, + line: u32, + verified: bool, + message: Option, + }, } #[derive(Debug, Copy, Clone)] @@ -311,7 +332,7 @@ impl Dap { // Invalid breakpoints never get verified so we skip them too. if matches!( bp.state, - BreakpointState::Verified | BreakpointState::Disabled | BreakpointState::Invalid + BreakpointState::Verified | BreakpointState::Disabled | BreakpointState::Invalid(_) ) { continue; } @@ -325,6 +346,7 @@ impl Dap { id: bp.id, line: bp.line, verified: true, + message: None, }) .log_err(); } @@ -356,6 +378,7 @@ impl Dap { id: bp.id, line: bp.line, verified: true, + message: None, }) .log_err(); } @@ -376,6 +399,28 @@ impl Dap { id: bp.id, line: bp.line, verified: false, + message: None, + }) + .log_err(); + } + } + + /// Notify the frontend about breakpoints that were marked invalid during annotation. + /// Sends a `BreakpointState` event with verified=false and a message for each. + pub fn notify_invalid_breakpoints(&self, breakpoints: &[Breakpoint]) { + let Some(tx) = &self.backend_events_tx else { + return; + }; + + for bp in breakpoints { + let BreakpointState::Invalid(reason) = &bp.state else { + continue; + }; + tx.send(DapBackendEvent::BreakpointState { + id: bp.id, + line: bp.line, + verified: false, + message: Some(reason.message().to_string()), }) .log_err(); } diff --git a/crates/ark/src/dap/dap_server.rs b/crates/ark/src/dap/dap_server.rs index 3646700f6..7832f8ec8 100644 --- a/crates/ark/src/dap/dap_server.rs +++ b/crates/ark/src/dap/dap_server.rs @@ -176,13 +176,14 @@ fn listen_dap_events( Event::Terminated(None) }, - DapBackendEvent::BreakpointState { id, line, verified } => { + DapBackendEvent::BreakpointState { id, line, verified, message } => { Event::Breakpoint(BreakpointEventBody { reason: BreakpointEventReason::Changed, breakpoint: dap::types::Breakpoint { id: Some(id), line: Some(Breakpoint::to_dap_line(line)), verified, + message, ..Default::default() }, }) @@ -451,11 +452,18 @@ impl DapServer { let response_breakpoints: Vec = new_breakpoints .iter() .filter(|bp| !matches!(bp.state, BreakpointState::Disabled)) - .map(|bp| dap::types::Breakpoint { - id: Some(bp.id), - verified: matches!(bp.state, BreakpointState::Verified), - line: Some(Breakpoint::to_dap_line(bp.line)), - ..Default::default() + .map(|bp| { + let message = match &bp.state { + BreakpointState::Invalid(reason) => Some(reason.message().to_string()), + _ => None, + }; + dap::types::Breakpoint { + id: Some(bp.id), + verified: matches!(bp.state, BreakpointState::Verified), + line: Some(Breakpoint::to_dap_line(bp.line)), + message, + ..Default::default() + } }) .collect(); diff --git a/crates/ark/src/interface.rs b/crates/ark/src/interface.rs index 7145e9bc3..ce7d44e0a 100644 --- a/crates/ark/src/interface.rs +++ b/crates/ark/src/interface.rs @@ -1438,6 +1438,7 @@ impl RMain { // Keep the DAP lock while we are updating breakpoints let mut dap_guard = self.debug_dap.lock().unwrap(); + let uri = loc.as_ref().map(|l| l.uri.clone()); let breakpoints = loc .as_ref() .and_then(|loc| dap_guard.breakpoints.get_mut(&loc.uri)) @@ -1456,6 +1457,13 @@ impl RMain { ))); }, } + + // Notify frontend about any breakpoints marked invalid during annotation + if let Some(uri) = &uri { + if let Some((_, bps)) = dap_guard.breakpoints.get(uri) { + dap_guard.notify_invalid_breakpoints(bps); + } + } drop(dap_guard); // Evaluate first expression if there is one From c121e813cbc948150c4e17d8addc2e4e47d1db74 Mon Sep 17 00:00:00 2001 From: Lionel Henry Date: Mon, 12 Jan 2026 14:59:43 +0100 Subject: [PATCH 38/42] Make sure disabled breakpoints are unavailable --- crates/ark/src/console_annotate.rs | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/crates/ark/src/console_annotate.rs b/crates/ark/src/console_annotate.rs index b4acbcbb6..0a3353a8b 100644 --- a/crates/ark/src/console_annotate.rs +++ b/crates/ark/src/console_annotate.rs @@ -367,9 +367,14 @@ impl<'a> AnnotationRewriter<'a> { code_line as i32 + self.line_offset } - /// Check if a breakpoint is available (not consumed and not invalid) + /// Check if a breakpoint is available (not consumed, not invalid, and not + /// disabled) fn is_available(&self, bp: &Breakpoint) -> bool { - !self.consumed.contains(&bp.id) && !matches!(bp.state, BreakpointState::Invalid(_)) + !self.consumed.contains(&bp.id) && + !matches!( + bp.state, + BreakpointState::Invalid(_) | BreakpointState::Disabled + ) } /// Find all available breakpoints that anchor to this expression: At or @@ -426,7 +431,11 @@ impl<'a> AnnotationRewriter<'a> { fn has_breakpoints_in_range(&self, start: i32, end: i32) -> bool { self.breakpoints.iter().any(|bp| { let bp_line = bp.line as i32; - !matches!(bp.state, BreakpointState::Invalid(_)) && bp_line >= start && bp_line < end + !matches!( + bp.state, + BreakpointState::Invalid(_) | BreakpointState::Disabled + ) && bp_line >= start && + bp_line < end }) } } From 4582b440e21b3a5083a5f45ffdea8b90b182562b Mon Sep 17 00:00:00 2001 From: Lionel Henry Date: Mon, 12 Jan 2026 15:23:09 +0100 Subject: [PATCH 39/42] Prevent non-injected breakpoints from becoming verified --- crates/ark/src/console_annotate.rs | 3 ++- crates/ark/src/dap/dap.rs | 10 +++++++++- crates/ark/src/dap/dap_server.rs | 5 +++++ 3 files changed, 16 insertions(+), 2 deletions(-) diff --git a/crates/ark/src/console_annotate.rs b/crates/ark/src/console_annotate.rs index 0a3353a8b..38b8aaeaf 100644 --- a/crates/ark/src/console_annotate.rs +++ b/crates/ark/src/console_annotate.rs @@ -644,10 +644,11 @@ impl AnnotationRewriter<'_> { // Use the first breakpoint's id for the injected call let first_bp_id = self.breakpoints[bp_indices[0]].id; - // Update all matching breakpoints: anchor to expr start and mark consumed + // Update all matching breakpoints: anchor to expr start and mark consumed/injected for &bp_idx in &bp_indices { let bp = &mut self.breakpoints[bp_idx]; bp.line = expr_doc_start as u32; + bp.injected = true; self.consumed.insert(bp.id); } diff --git a/crates/ark/src/dap/dap.rs b/crates/ark/src/dap/dap.rs index da93518c3..e458831b9 100644 --- a/crates/ark/src/dap/dap.rs +++ b/crates/ark/src/dap/dap.rs @@ -57,6 +57,9 @@ pub struct Breakpoint { /// across SetBreakpoints requests. 0-based. pub original_line: u32, pub state: BreakpointState, + /// Whether this breakpoint was actually injected into code during annotation. + /// Only injected breakpoints can be verified by range-based verification. + pub injected: bool, } impl Breakpoint { @@ -67,6 +70,7 @@ impl Breakpoint { line, original_line: line, state, + injected: false, } } @@ -330,10 +334,14 @@ impl Dap { for bp in bp_list.iter_mut() { // Verified and Disabled breakpoints are both already verified. // Invalid breakpoints never get verified so we skip them too. + // Only injected breakpoints can be verified by range. Non-injected + // breakpoints were added by the user after the code was parsed and + // remain unverified until re-parsing / re-evaluation. if matches!( bp.state, BreakpointState::Verified | BreakpointState::Disabled | BreakpointState::Invalid(_) - ) { + ) || !bp.injected + { continue; } diff --git a/crates/ark/src/dap/dap_server.rs b/crates/ark/src/dap/dap_server.rs index 7832f8ec8..75a820dc8 100644 --- a/crates/ark/src/dap/dap_server.rs +++ b/crates/ark/src/dap/dap_server.rs @@ -369,6 +369,7 @@ impl DapServer { line, original_line: line, state: BreakpointState::Unverified, + injected: false, } }) .collect() @@ -403,6 +404,8 @@ impl DapServer { line: old_bp.line, original_line: line, state: new_state, + // Preserve injected status from old breakpoint + injected: old_bp.injected, }); } else { // New breakpoints always start as Unverified, until they get evaluated once @@ -411,6 +414,7 @@ impl DapServer { line, original_line: line, state: BreakpointState::Unverified, + injected: false, }); } } @@ -430,6 +434,7 @@ impl DapServer { line: old_bp.line, original_line, state: BreakpointState::Disabled, + injected: true, }); } } From 2e911024e605b13a71cb91b9ae6e29fdfe24d4ef Mon Sep 17 00:00:00 2001 From: Lionel Henry Date: Tue, 13 Jan 2026 11:04:24 +0100 Subject: [PATCH 40/42] Check for call before checking call type Fixes `View()` --- crates/ark/src/modules/positron/calls_deparse.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crates/ark/src/modules/positron/calls_deparse.R b/crates/ark/src/modules/positron/calls_deparse.R index 361e182e4..911fa5e21 100644 --- a/crates/ark/src/modules/positron/calls_deparse.R +++ b/crates/ark/src/modules/positron/calls_deparse.R @@ -11,7 +11,7 @@ deparse_string <- function(x, cutoff = 500L) { as_label <- function(x) { # Remove arguments of call expressions - if (call_print_type(x) == "prefix") { + if (is.call(x) && call_print_type(x) == "prefix") { x <- x[1] } From 0dd4307001434e412c92486d6c228eb5ff2fab6d Mon Sep 17 00:00:00 2001 From: Lionel Henry Date: Tue, 13 Jan 2026 14:39:28 +0100 Subject: [PATCH 41/42] More documentation --- crates/ark/src/console_annotate.rs | 7 ++++++- crates/ark/src/dap/dap.rs | 13 +++++++++++-- crates/ark/src/dap/dap_server.rs | 15 +++++++++++++++ 3 files changed, 32 insertions(+), 3 deletions(-) diff --git a/crates/ark/src/console_annotate.rs b/crates/ark/src/console_annotate.rs index 38b8aaeaf..48ec29e06 100644 --- a/crates/ark/src/console_annotate.rs +++ b/crates/ark/src/console_annotate.rs @@ -644,7 +644,12 @@ impl AnnotationRewriter<'_> { // Use the first breakpoint's id for the injected call let first_bp_id = self.breakpoints[bp_indices[0]].id; - // Update all matching breakpoints: anchor to expr start and mark consumed/injected + // Update all matching breakpoints: anchor to expr start and + // mark consumed/injected. The `injected` flag is crucial: + // `verify_breakpoints()` only verifies breakpoints where + // `injected == true`. This prevents a bug where a breakpoint + // added _after_ parsing gets incorrectly verified when stopping + // at another breakpoint in the same function. for &bp_idx in &bp_indices { let bp = &mut self.breakpoints[bp_idx]; bp.line = expr_doc_start as u32; diff --git a/crates/ark/src/dap/dap.rs b/crates/ark/src/dap/dap.rs index e458831b9..c84397934 100644 --- a/crates/ark/src/dap/dap.rs +++ b/crates/ark/src/dap/dap.rs @@ -202,6 +202,11 @@ impl Dap { shared } + /// Notify the frontend that we've entered the debugger. + /// + /// The DAP session is expected to always be connected (to receive breakpoint + /// updates). The `start_debug` comm message is a hint for the frontend to + /// show the debug toolbar, not a session lifecycle event. pub fn start_debug( &mut self, mut stack: Vec, @@ -230,6 +235,12 @@ impl Dap { } } + /// Notify the frontend that we've exited the debugger. + /// + /// The DAP session remains connected. The `stop_debug` comm message is a + /// hint for the frontend to hide the debug toolbar. We send `Continued` + /// (not `Terminated`) so the DAP connection stays active for receiving + /// breakpoint updates. pub fn stop_debug(&mut self) { // Reset state self.stack = None; @@ -246,8 +257,6 @@ impl Dap { .send(amalthea::comm_rpc_message!("stop_debug")) .log_err(); - // Let frontend know we've quit the debugger so it can - // terminate the debugging session and disconnect. if let Some(datp_tx) = &self.backend_events_tx { datp_tx.send(DapBackendEvent::Continued).log_err(); } diff --git a/crates/ark/src/dap/dap_server.rs b/crates/ark/src/dap/dap_server.rs index 75a820dc8..39b1393d9 100644 --- a/crates/ark/src/dap/dap_server.rs +++ b/crates/ark/src/dap/dap_server.rs @@ -316,6 +316,21 @@ impl DapServer { self.send_event(Event::Initialized); } + // Handle SetBreakpoints requests from the frontend. + // + // Breakpoint state survives DAP server disconnections via document hashing. + // Disconnections happen when the user uses the disconnect command (the + // frontend automatically reconnects) or when the console session goes to + // the background (the LSP is also disabled, so we don't receive document + // change notifications). When we come back online, we compare the document + // content against our stored hash to detect if breakpoints are now stale. + // + // Key implementation details: + // - We use `original_line` for lookup since the frontend doesn't know about + // our line adjustments and always sends back the original line numbers. + // - When a user unchecks a breakpoint, it appears as a deletion (omitted + // from the request). We preserve verified breakpoints as Disabled so we + // can restore their state when re-enabled without requiring re-sourcing. fn handle_set_breakpoints(&mut self, req: Request, args: SetBreakpointsArguments) { let path = args.source.path.clone().unwrap_or_default(); From 26e192f8594a8613c2ee49808f188c751689587c Mon Sep 17 00:00:00 2001 From: Lionel Henry Date: Fri, 16 Jan 2026 08:21:29 +0100 Subject: [PATCH 42/42] Update cargo lock --- Cargo.lock | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index f407b52b0..4e73c00f8 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -620,15 +620,16 @@ checksum = "b4682ae6287fcf752ecaabbfcc7b6f9b72aa33933dc23a554d853aea8eea8635" [[package]] name = "blake3" -version = "1.8.2" +version = "1.8.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3888aaa89e4b2a40fca9848e400f6a658a5a3978de7be858e209cafa8be9a4a0" +checksum = "2468ef7d57b3fb7e16b576e8377cdbde2320c60e1491e961d11da40fc4f02a2d" dependencies = [ "arrayref", "arrayvec", "cc", "cfg-if", "constant_time_eq", + "cpufeatures", ] [[package]] @@ -767,9 +768,9 @@ dependencies = [ [[package]] name = "constant_time_eq" -version = "0.3.1" +version = "0.4.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7c74b8349d32d297c9134b8c88677813a227df8f779daa29bfc29c183fe3dca6" +checksum = "3d52eff69cd5e647efe296129160853a42795992097e8af39800e1060caeea9b" [[package]] name = "convert_case" @@ -796,9 +797,9 @@ checksum = "e496a50fda8aacccc86d7529e2c1e0892dbd0f898a6b5645b5561b89c3210efa" [[package]] name = "cpufeatures" -version = "0.2.7" +version = "0.2.17" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3e4c1eaa2012c47becbbad2ab175484c2a84d1185b566fb2cc5b8707343dfe58" +checksum = "59ed5838eebb26a2bb2e58f6d5b5316989ae9d08bab10e0e6d103e656d1b0280" dependencies = [ "libc", ]