From cbc835742ae1f6391d73246534d26d816fb5c972 Mon Sep 17 00:00:00 2001 From: tison Date: Wed, 7 Jan 2026 13:55:02 +0800 Subject: [PATCH 1/2] fix: tdigest quantile edge case Signed-off-by: tison --- datasketches/src/tdigest/sketch.rs | 2 +- datasketches/tests/tdigest_test.rs | 9 +++++++++ 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/datasketches/src/tdigest/sketch.rs b/datasketches/src/tdigest/sketch.rs index 037953d..91c90c7 100644 --- a/datasketches/src/tdigest/sketch.rs +++ b/datasketches/src/tdigest/sketch.rs @@ -1175,7 +1175,7 @@ impl TDigestView<'_> { } let mut right_weight = 0.; if self.centroids[i + 1].weight.get() == 1 { - if weight_so_far + dw - weight < 0.5 { + if weight_so_far + dw - weight <= 0.5 { return Some(self.centroids[i + 1].mean); } right_weight = 0.5; diff --git a/datasketches/tests/tdigest_test.rs b/datasketches/tests/tdigest_test.rs index 870f3de..b61d779 100644 --- a/datasketches/tests/tdigest_test.rs +++ b/datasketches/tests/tdigest_test.rs @@ -228,3 +228,12 @@ fn test_invalid_inputs() { } assert!(td.is_empty()); } + +#[test] +fn test_estimate_repeat_values() { + let mut tdigest = TDigestMut::default(); + for _ in 0..20 { + tdigest.update(1.0); + } + assert_eq!(tdigest.quantile(0.9), Some(1.0)); +} From 2057ea1d3bce96713133ce11ef953a7b19a73690 Mon Sep 17 00:00:00 2001 From: tison Date: Thu, 8 Jan 2026 09:05:37 +0800 Subject: [PATCH 2/2] specify ref impl branch Signed-off-by: tison --- licenserc.toml | 2 +- tools/generate_serialization_test_data.py | 20 ++++++++++++++++++-- 2 files changed, 19 insertions(+), 3 deletions(-) diff --git a/licenserc.toml b/licenserc.toml index 1937a22..82c6c6b 100644 --- a/licenserc.toml +++ b/licenserc.toml @@ -17,4 +17,4 @@ headerPath = "Apache-2.0-ASF.txt" -includes = ['**/*.rs', '**/*.yml', '**/*.yaml', '**/*.toml'] +includes = ['**/*.py', '**/*.rs', '**/*.yml', '**/*.yaml', '**/*.toml'] diff --git a/tools/generate_serialization_test_data.py b/tools/generate_serialization_test_data.py index 7fb488b..450e8f3 100755 --- a/tools/generate_serialization_test_data.py +++ b/tools/generate_serialization_test_data.py @@ -71,7 +71,15 @@ def generate_java_files(workspace_dir, project_dir): # 4. Clone repository repo_url = "https://github.com/apache/datasketches-java.git" - run_command(["git", "clone", repo_url, str(temp_dir)]) + branch = "9.0.0" # FIXME: temporarily use fixed branch until mvn issue is resolved + run_command([ + "git", "clone", + "--depth", "1", + "--branch", branch, + "--single-branch", + repo_url, + str(temp_dir) + ]) # 5. Run Maven to generate files mvn_cmd = ["mvn", "test", "-P", "generate-java-files"] @@ -125,7 +133,15 @@ def generate_cpp_files(workspace_dir, project_root): # 4. Clone repository repo_url = "https://github.com/apache/datasketches-cpp.git" - run_command(["git", "clone", repo_url, str(temp_dir)]) + branch = "master" + run_command([ + "git", "clone", + "--depth", "1", + "--branch", branch, + "--single-branch", + repo_url, + str(temp_dir) + ]) # 5. Build and Run CMake build_dir = temp_dir / "build"