Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
22 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/cla.yml
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ jobs:
before we accept your contribution. You can sign the CLA by posting
a comment on this PR saying:

allowlist: DavisVaughan, dependabot[bot], dfalbel, isabelizimm, jonvanausdeln, lionel-, nstrayer, petetronic, positron-bot[bot], seeM, sharon-wang, softwarenerd, timtmok, wesm, copilot[bot], copilot-swe-agent[bot], Copilot
allowlist: DavisVaughan, dependabot[bot], dfalbel, isabelizimm, jonvanausdeln, lionel-, nstrayer, petetronic, positron-bot[bot], seeM, sharon-wang, softwarenerd, timtmok, wesm, Copilot

# the followings are the optional inputs - If the optional inputs are not given, then default values will be taken
#remote-organization-name: enter the remote organization name where the signatures should be stored (Default is storing the signatures in the same repository)
Expand Down
42 changes: 40 additions & 2 deletions crates/amalthea/src/comm/data_explorer_comm.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
// @generated

/*---------------------------------------------------------------------------------------------
* Copyright (C) 2024-2025 Posit Software, PBC. All rights reserved.
* Copyright (C) 2024-2026 Posit Software, PBC. All rights reserved.
*--------------------------------------------------------------------------------------------*/

//
Expand Down Expand Up @@ -60,6 +60,13 @@ pub struct FilterResult {
pub had_errors: Option<bool>
}

/// Result of setting import options
#[derive(Clone, Debug, Serialize, Deserialize, PartialEq)]
pub struct SetDatasetImportOptionsResult {
/// An error message if setting the options failed
pub error_message: Option<String>
}

/// The current backend state for the data explorer
#[derive(Clone, Debug, Serialize, Deserialize, PartialEq)]
pub struct BackendState {
Expand Down Expand Up @@ -94,7 +101,11 @@ pub struct BackendState {

/// Optional experimental parameter to provide an explanation when
/// connected=false. This parameter may change.
pub error_message: Option<String>
pub error_message: Option<String>,

/// Optional formatting options provided by the backend for displaying
/// data values
pub format_options: Option<FormatOptions>
}

/// Schema for a column in a table
Expand Down Expand Up @@ -703,6 +714,15 @@ pub struct ColumnSelection {
pub spec: ArraySelection
}

/// Import options for file-based data sources. Currently supports options
/// for delimited text files (CSV, TSV).
#[derive(Clone, Debug, Serialize, Deserialize, PartialEq)]
pub struct DatasetImportOptions {
/// Whether the first row contains column headers (for delimited text
/// files)
pub has_header_row: Option<bool>
}

/// Possible values for SortOrder in SearchSchema
#[derive(Clone, Debug, Serialize, Deserialize, PartialEq, strum_macros::Display, strum_macros::EnumString)]
pub enum SearchSchemaSortOrder {
Expand Down Expand Up @@ -1193,6 +1213,13 @@ pub struct GetColumnProfilesParams {
pub format_options: FormatOptions,
}

/// Parameters for the SetDatasetImportOptions method.
#[derive(Clone, Debug, Serialize, Deserialize, PartialEq)]
pub struct SetDatasetImportOptionsParams {
/// Import options to apply
pub options: DatasetImportOptions,
}

/// Parameters for the ReturnColumnProfiles method.
#[derive(Clone, Debug, Serialize, Deserialize, PartialEq)]
pub struct ReturnColumnProfilesParams {
Expand Down Expand Up @@ -1289,6 +1316,14 @@ pub enum DataExplorerBackendRequest {
#[serde(rename = "get_column_profiles")]
GetColumnProfiles(GetColumnProfilesParams),

/// Set import options for file-based data sources
///
/// Set import options for file-based data sources (like CSV files) and
/// reimport the data. This method is primarily used by file-based
/// backends like DuckDB.
#[serde(rename = "set_dataset_import_options")]
SetDatasetImportOptions(SetDatasetImportOptionsParams),

/// Get the state
///
/// Request the current backend state (table metadata, explorer state, and
Expand Down Expand Up @@ -1337,6 +1372,9 @@ pub enum DataExplorerBackendReply {
/// Reply for the get_column_profiles method (no result)
GetColumnProfilesReply(),

/// Result of setting import options
SetDatasetImportOptionsReply(SetDatasetImportOptionsResult),

/// The current backend state for the data explorer
GetStateReply(BackendState),

Expand Down
36 changes: 36 additions & 0 deletions crates/ark/src/data_explorer/r_data_explorer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,7 @@ use crossbeam::channel::Sender;
use crossbeam::select;
use harp::exec::RFunction;
use harp::exec::RFunctionExt;
use harp::get_option;
use harp::object::RObject;
use harp::r_symbol;
use harp::table_kind;
Expand Down Expand Up @@ -550,6 +551,10 @@ impl RDataExplorer {
return Err(anyhow!("Data Explorer: Not yet supported"));
},

DataExplorerBackendRequest::SetDatasetImportOptions(_) => {
return Err(anyhow!("Data Explorer: Not yet supported"));
},

DataExplorerBackendRequest::GetRowLabels(req) => {
let row_labels =
r_task(|| self.r_get_row_labels(req.selection, &req.format_options))?;
Expand Down Expand Up @@ -1159,6 +1164,7 @@ impl RDataExplorer {
}]),
},
},
format_options: Some(Self::current_format_options()),
};
Ok(DataExplorerBackendReply::GetStateReply(state))
}
Expand Down Expand Up @@ -1299,6 +1305,36 @@ impl RDataExplorer {
// Call the conversion function with resolved sort keys
convert_to_code::convert_to_code(params, object_name, &resolved_sort_keys)
}

/// Returns the current format options as a `FormatOptions` object.
///
/// These options are derived from the R options "scipen" and "digits". The
/// thresholds for scientific notation are calculated based on these
/// options. The `max_value_length` is set to 1000, and `thousands_sep`
/// is set to `None`.
fn current_format_options() -> FormatOptions {
// In R 4.2, Rf_GetOption1 (used by get_option) doesn't work correctly for scipen
// due to special handling added in later versions. We need to use the R interpreter's
// getOption() function instead. See: https://github.com/wch/r-source/commit/7f20c19
// Note: 'digits' works fine with Rf_GetOption1, so we don't need to change it.
let scipen: i64 = harp::parse_eval_global("as.integer(getOption('scipen', 0))")
.and_then(|obj| obj.try_into())
.unwrap_or(0); // R default
let digits: i64 = get_option("digits").try_into().unwrap_or(7); // R default

// Calculate thresholds for scientific notation
let max_integral_digits = (scipen + 5).max(1); // only depends on scipen
let large_num_digits = (digits - 5).max(0); // default to 2 d.p.
let small_num_digits = (scipen + 6).max(1); // only depends on scipen

FormatOptions {
large_num_digits,
small_num_digits,
max_integral_digits,
max_value_length: 1000,
thousands_sep: None,
}
}
}

/// Open an R object in the data viewer.
Expand Down
60 changes: 60 additions & 0 deletions crates/ark/tests/data_explorer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3108,3 +3108,63 @@ fn test_single_row_data_frame_column_profiles() {
});
}
}

#[test]
fn test_format_options_state_change() {
let _lock = r_test_lock();

// Save the current R options so we can restore them later
let (original_scipen, original_digits) = r_task(|| {
let scipen_obj = harp::parse_eval_global("getOption('scipen')").expect("Failed to get scipen option");
let digits_obj = harp::parse_eval_global("getOption('digits')").expect("Failed to get digits option");
let scipen: i64 = scipen_obj.try_into().unwrap_or(0);
let digits: i64 = digits_obj.try_into().unwrap_or(7);
(scipen, digits)
});

// Set known R options for testing
r_task(|| {
harp::parse_eval_global("options(scipen = 0, digits = 7)").unwrap();
});

// Open a data explorer with mtcars
let setup = TestSetup::new("mtcars");
let socket = setup.socket();

// Get the initial state and verify format_options
TestAssertions::assert_state(&socket, |state| {
let format_options = state.format_options.as_ref().expect("format_options should be present");

// With scipen=0, digits=7:
// max_integral_digits = (0 + 5).max(1) = 5
// large_num_digits = (7 - 5).max(0) = 2
// small_num_digits = (0 + 6).max(1) = 6
assert_eq!(format_options.max_integral_digits, 5);
assert_eq!(format_options.large_num_digits, 2);
assert_eq!(format_options.small_num_digits, 6);
});

// Change R options to different values
r_task(|| {
harp::parse_eval_global("options(scipen = 10, digits = 10)").unwrap();
});

// Get the state again and verify format_options changed
TestAssertions::assert_state(&socket, |state| {
let format_options = state.format_options.as_ref().expect("format_options should be present");

// With scipen=10, digits=10:
// max_integral_digits = (10 + 5).max(1) = 15
// large_num_digits = (10 - 5).max(0) = 5
// small_num_digits = (10 + 6).max(1) = 16
assert_eq!(format_options.max_integral_digits, 15);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Looks like tests are failing with: thread 'test_format_options_state_change' (32147) panicked at crates/ark/tests/data_explorer.rs:3160:9: assertion `left == right` failed left: 5 right: 15

Any idea @kv9898 ?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It only fails for R 4.2 🤔

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

IDK, this might be related: wch/r-source@7f20c19

There's something special about scipen. Because digits are being correctly evaluated. Perhaps we can't use get_option for this, and instead we bay need to evaluate getOption from the R interpreter.

Copy link
Contributor Author

@kv9898 kv9898 Dec 17, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It only fails for R 4.2 🤔

Thx for checking that out. Because the test worked for me with R 4.5.2:

E:\ark  [scipen ≡] > cargo test -p ark --test data_explorer
   Compiling ark v0.1.220 (E:\ark\crates\ark)
warning: unused import: `std::time::Duration`
 --> crates\ark\src\fixtures\dummy_frontend.rs:7:5
  |
7 | use std::time::Duration;
  |     ^^^^^^^^^^^^^^^^^^^
  |
  = note: `#[warn(unused_imports)]` (part of `#[warn(unused)]`) on by default

warning: `ark` (lib) generated 1 warning (run `cargo fix --lib -p ark` to apply 1 suggestion)
    Finished `test` profile [unoptimized + debuginfo] target(s) in 14.20s
     Running tests\data_explorer.rs (target\debug\deps\data_explorer-27782ce78c282663.exe)

running 40 tests
test test_export_with_sort_order ... ok
test test_basic_mtcars ... ok
test test_column_labels_edge_cases ... ok
test test_column_labels ... ok
test test_column_labels_haven_compatibility ... ok
test test_column_labels_missing ... ok
test test_data_explorer_special_values ... ok
test test_data_table_support ... ok
test test_data_update_num_rows ... ok
test test_empty_data_frame_column_profiles ... ok
test test_empty_data_frame_data_values ... ok
test test_empty_data_frame_schema ... ok
test test_export_data ... ok
test test_format_options_state_change ... ok
test test_empty_data_frame_state ... ok
test test_frequency_table ... ok
test test_get_data_values_by_indices ... ok
test test_histogram ... ok
test test_histogram_single_bin_same_values ... ok
test test_invalid_filters_preserved ... ok
test test_live_updates ... ok
test test_matrix_support ... ok
test test_null_counts ... ok
test test_row_names_matrix ... ok
test test_schema_identification ... ok
test test_search_filters ... ok
test test_search_schema_combined_filters ... ok
test test_search_schema_data_type_filters ... ok
test test_search_schema_edge_cases ... ok
test test_search_schema_no_matches ... ok
test test_search_schema_sort_orders ... ok
test test_search_schema_text_filters ... ok
test test_search_schema_text_with_sort_orders ... ok
test test_search_schema_type_sort_orders ... ok
test test_set_membership_filter ... ok
test test_single_row_data_frame_column_profiles ... ok
test test_summary_stats ... ok
test test_tibble_support ... ok
test test_update_data_filters_reapplied ... ok
test test_women_dataset ... ok

test result: ok. 40 passed; 0 failed; 0 ignored; 0 measured; 0 filtered out; finished in 1.83s

Copy link
Contributor Author

@kv9898 kv9898 Dec 17, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Currently, I don't have much of a clue. Feel free to edit my code if you find a solution! Wrapping the call in as.integer() seems to work.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

IDK, this might be related: wch/r-source@7f20c19

There's something special about scipen. Because digits are being correctly evaluated. Perhaps we can't use get_option for this, and instead we bay need to evaluate getOption from the R interpreter.

Tried that but sadly it didn't work.

assert_eq!(format_options.large_num_digits, 5);
assert_eq!(format_options.small_num_digits, 16);
});

// Restore original R options
r_task(|| {
let restore_cmd = format!("options(scipen = {}, digits = {})", original_scipen, original_digits);
harp::parse_eval_global(&restore_cmd).expect("Failed to restore original R options");
});
}