From fdf8c9f87bd1c727a090d65228557896caf37894 Mon Sep 17 00:00:00 2001 From: lani_karrot Date: Fri, 9 Jan 2026 19:41:38 +0900 Subject: [PATCH 1/2] test: cross language test cases for reservoir sampling sketch --- .../sampling/ReservoirCrossLanguageTest.java | 226 ++++++++++++++++++ 1 file changed, 226 insertions(+) create mode 100644 src/test/java/org/apache/datasketches/sampling/ReservoirCrossLanguageTest.java diff --git a/src/test/java/org/apache/datasketches/sampling/ReservoirCrossLanguageTest.java b/src/test/java/org/apache/datasketches/sampling/ReservoirCrossLanguageTest.java new file mode 100644 index 000000000..f486882d2 --- /dev/null +++ b/src/test/java/org/apache/datasketches/sampling/ReservoirCrossLanguageTest.java @@ -0,0 +1,226 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.datasketches.sampling; + +import org.apache.datasketches.common.ArrayOfDoublesSerDe; +import org.apache.datasketches.common.ArrayOfLongsSerDe; +import org.apache.datasketches.common.ArrayOfStringsSerDe; +import org.testng.annotations.Test; + +import java.io.IOException; +import java.nio.file.Files; + +import static org.apache.datasketches.common.TestUtil.GENERATE_JAVA_FILES; +import static org.apache.datasketches.common.TestUtil.javaPath; + +/** + * Serialize binary sketches to be tested by other language code. + * Test deserialization of binary sketches serialized by other language code. + */ +public class ReservoirCrossLanguageTest { + + @Test(groups = {GENERATE_JAVA_FILES}) + public void generateReservoirLongsSketchEmpty() throws IOException { + final int k = 128; + final ReservoirLongsSketch sk = ReservoirLongsSketch.newInstance(k); + + Files.newOutputStream(javaPath.resolve("reservoir_longs_empty_k" + k + "_java.sk")) + .write(sk.toByteArray()); + } + + @Test(groups = {GENERATE_JAVA_FILES}) + public void generateReservoirLongsSketchExact() throws IOException { + final int k = 128; + final int[] nArr = {1, 10, 32, 100, 128}; + + for (final int n : nArr) { + final ReservoirLongsSketch sk = ReservoirLongsSketch.newInstance(k); + for (int i = 0; i < n; i++) { + sk.update(i); + } + Files.newOutputStream(javaPath.resolve("reservoir_longs_n_exact" + n + "_k" + k + "_java.sk")) + .write(sk.toByteArray()); + } + } + + @Test(groups = {GENERATE_JAVA_FILES}) + public void generateReservoirLongsSketchSampling() throws IOException { + final int[] kArr = {32, 64, 128}; + final long n = 1000; + + for (final int k : kArr) { + final long[] predeterminedSamples = new long[k]; + for (int i = 0; i < k; i++) { + predeterminedSamples[i] = i * 2; + } + + final ReservoirLongsSketch sk = ReservoirLongsSketch.getInstance( + predeterminedSamples, + n, + org.apache.datasketches.common.ResizeFactor.X8, + k + ); + + Files.newOutputStream(javaPath.resolve("reservoir_longs_n" + n + "_k" + k + "_sampling_java.sk")) + .write(sk.toByteArray()); + } + } + + @Test(groups = {GENERATE_JAVA_FILES}) + public void generateReservoirItemsSketchLongEmpty() throws IOException { + final int k = 128; + final ReservoirItemsSketch sk = ReservoirItemsSketch.newInstance(k); + + Files.newOutputStream(javaPath.resolve("reservoir_items_long_empty_k" + k + "_java.sk")) + .write(sk.toByteArray(new ArrayOfLongsSerDe())); + } + + @Test(groups = {GENERATE_JAVA_FILES}) + public void generateReservoirItemsSketchLongExact() throws IOException { + final int k = 128; + final int[] nArr = {1, 10, 32, 100, 128}; + + for (final int n : nArr) { + final ReservoirItemsSketch sk = ReservoirItemsSketch.newInstance(k); + for (int i = 0; i < n; i++) { + sk.update((long) i); + } + Files.newOutputStream(javaPath.resolve("reservoir_items_long_n_exact" + n + "_k" + k + "_java.sk")) + .write(sk.toByteArray(new ArrayOfLongsSerDe())); + } + } + + @Test(groups = {GENERATE_JAVA_FILES}) + public void generateReservoirItemsSketchLongSampling() throws IOException { + final int[] kArr = {32, 64, 128}; + final long n = 1000; + + for (final int k : kArr) { + final java.util.ArrayList predeterminedSamples = new java.util.ArrayList<>(); + for (int i = 0; i < k; i++) { + predeterminedSamples.add((long) (i * 2)); + } + + final ReservoirItemsSketch sk = ReservoirItemsSketch.newInstance( + predeterminedSamples, + n, + org.apache.datasketches.common.ResizeFactor.X8, + k + ); + + Files.newOutputStream(javaPath.resolve("reservoir_items_long_n" + n + "_k" + k + "_sampling_java.sk")) + .write(sk.toByteArray(new ArrayOfLongsSerDe())); + } + } + + @Test(groups = {GENERATE_JAVA_FILES}) + public void generateReservoirItemsSketchDoubleEmpty() throws IOException { + final int k = 128; + final ReservoirItemsSketch sk = ReservoirItemsSketch.newInstance(k); + + Files.newOutputStream(javaPath.resolve("reservoir_items_double_empty_k" + k + "_java.sk")) + .write(sk.toByteArray(new ArrayOfDoublesSerDe())); + } + + @Test(groups = {GENERATE_JAVA_FILES}) + public void generateReservoirItemsSketchDoubleExact() throws IOException { + final int k = 128; + final int[] nArr = {1, 10, 32, 100, 128}; + + for (final int n : nArr) { + final ReservoirItemsSketch sk = ReservoirItemsSketch.newInstance(k); + for (int i = 0; i < n; i++) { + sk.update((double) i); + } + Files.newOutputStream(javaPath.resolve("reservoir_items_double_n_exact" + n + "_k" + k + "_java.sk")) + .write(sk.toByteArray(new ArrayOfDoublesSerDe())); + } + } + + @Test(groups = {GENERATE_JAVA_FILES}) + public void generateReservoirItemsSketchDoubleSampling() throws IOException { + final int[] kArr = {32, 64, 128}; + final long n = 1000; + + for (final int k : kArr) { + final java.util.ArrayList predeterminedSamples = new java.util.ArrayList<>(); + for (int i = 0; i < k; i++) { + predeterminedSamples.add((double) (i * 2)); + } + + final ReservoirItemsSketch sk = ReservoirItemsSketch.newInstance( + predeterminedSamples, + n, + org.apache.datasketches.common.ResizeFactor.X8, + k + ); + + Files.newOutputStream(javaPath.resolve("reservoir_items_double_n" + n + "_k" + k + "_sampling_java.sk")) + .write(sk.toByteArray(new ArrayOfDoublesSerDe())); + } + } + + @Test(groups = {GENERATE_JAVA_FILES}) + public void generateReservoirItemsSketchStringEmpty() throws IOException { + final int k = 128; + final ReservoirItemsSketch sk = ReservoirItemsSketch.newInstance(k); + + Files.newOutputStream(javaPath.resolve("reservoir_items_string_empty_k" + k + "_java.sk")) + .write(sk.toByteArray(new ArrayOfStringsSerDe())); + } + + @Test(groups = {GENERATE_JAVA_FILES}) + public void generateReservoirItemsSketchStringExact() throws IOException { + final int k = 128; + final int[] nArr = {1, 10, 32, 100, 128}; + + for (final int n : nArr) { + final ReservoirItemsSketch sk = ReservoirItemsSketch.newInstance(k); + for (int i = 0; i < n; i++) { + sk.update("item" + i); + } + Files.newOutputStream(javaPath.resolve("reservoir_items_string_n_exact" + n + "_k" + k + "_java.sk")) + .write(sk.toByteArray(new ArrayOfStringsSerDe())); + } + } + + @Test(groups = {GENERATE_JAVA_FILES}) + public void generateReservoirItemsSketchStringSampling() throws IOException { + final int[] kArr = {32, 64, 128}; + final long n = 1000; + + for (final int k : kArr) { + final java.util.ArrayList predeterminedSamples = new java.util.ArrayList<>(); + for (int i = 0; i < k; i++) { + predeterminedSamples.add("item" + (i * 2)); + } + + final ReservoirItemsSketch sk = ReservoirItemsSketch.newInstance( + predeterminedSamples, + n, + org.apache.datasketches.common.ResizeFactor.X8, + k + ); + + Files.newOutputStream(javaPath.resolve("reservoir_items_string_n" + n + "_k" + k + "_sampling_java.sk")) + .write(sk.toByteArray(new ArrayOfStringsSerDe())); + } + } +} From f406b06c3c4e6603fecf84ffefc40cc252ce2de2 Mon Sep 17 00:00:00 2001 From: lani_karrot Date: Fri, 9 Jan 2026 22:09:24 +0900 Subject: [PATCH 2/2] style: unify generated sketches name --- .../sampling/ReservoirCrossLanguageTest.java | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/src/test/java/org/apache/datasketches/sampling/ReservoirCrossLanguageTest.java b/src/test/java/org/apache/datasketches/sampling/ReservoirCrossLanguageTest.java index f486882d2..717874a93 100644 --- a/src/test/java/org/apache/datasketches/sampling/ReservoirCrossLanguageTest.java +++ b/src/test/java/org/apache/datasketches/sampling/ReservoirCrossLanguageTest.java @@ -55,7 +55,7 @@ public void generateReservoirLongsSketchExact() throws IOException { for (int i = 0; i < n; i++) { sk.update(i); } - Files.newOutputStream(javaPath.resolve("reservoir_longs_n_exact" + n + "_k" + k + "_java.sk")) + Files.newOutputStream(javaPath.resolve("reservoir_longs_exact_n" + n + "_k" + k + "_java.sk")) .write(sk.toByteArray()); } } @@ -78,7 +78,7 @@ public void generateReservoirLongsSketchSampling() throws IOException { k ); - Files.newOutputStream(javaPath.resolve("reservoir_longs_n" + n + "_k" + k + "_sampling_java.sk")) + Files.newOutputStream(javaPath.resolve("reservoir_longs_sampling_n" + n + "_k" + k + "_java.sk")) .write(sk.toByteArray()); } } @@ -102,7 +102,7 @@ public void generateReservoirItemsSketchLongExact() throws IOException { for (int i = 0; i < n; i++) { sk.update((long) i); } - Files.newOutputStream(javaPath.resolve("reservoir_items_long_n_exact" + n + "_k" + k + "_java.sk")) + Files.newOutputStream(javaPath.resolve("reservoir_items_long_exact_n" + n + "_k" + k + "_java.sk")) .write(sk.toByteArray(new ArrayOfLongsSerDe())); } } @@ -125,7 +125,7 @@ public void generateReservoirItemsSketchLongSampling() throws IOException { k ); - Files.newOutputStream(javaPath.resolve("reservoir_items_long_n" + n + "_k" + k + "_sampling_java.sk")) + Files.newOutputStream(javaPath.resolve("reservoir_items_long_sampling_n" + n + "_k" + k + "_java.sk")) .write(sk.toByteArray(new ArrayOfLongsSerDe())); } } @@ -149,7 +149,7 @@ public void generateReservoirItemsSketchDoubleExact() throws IOException { for (int i = 0; i < n; i++) { sk.update((double) i); } - Files.newOutputStream(javaPath.resolve("reservoir_items_double_n_exact" + n + "_k" + k + "_java.sk")) + Files.newOutputStream(javaPath.resolve("reservoir_items_double_exact_n" + n + "_k" + k + "_java.sk")) .write(sk.toByteArray(new ArrayOfDoublesSerDe())); } } @@ -172,7 +172,7 @@ public void generateReservoirItemsSketchDoubleSampling() throws IOException { k ); - Files.newOutputStream(javaPath.resolve("reservoir_items_double_n" + n + "_k" + k + "_sampling_java.sk")) + Files.newOutputStream(javaPath.resolve("reservoir_items_double_sampling_n" + n + "_k" + k + "_java.sk")) .write(sk.toByteArray(new ArrayOfDoublesSerDe())); } } @@ -196,7 +196,7 @@ public void generateReservoirItemsSketchStringExact() throws IOException { for (int i = 0; i < n; i++) { sk.update("item" + i); } - Files.newOutputStream(javaPath.resolve("reservoir_items_string_n_exact" + n + "_k" + k + "_java.sk")) + Files.newOutputStream(javaPath.resolve("reservoir_items_string_exact_n" + n + "_k" + k + "_java.sk")) .write(sk.toByteArray(new ArrayOfStringsSerDe())); } } @@ -219,7 +219,7 @@ public void generateReservoirItemsSketchStringSampling() throws IOException { k ); - Files.newOutputStream(javaPath.resolve("reservoir_items_string_n" + n + "_k" + k + "_sampling_java.sk")) + Files.newOutputStream(javaPath.resolve("reservoir_items_string_sampling_n" + n + "_k" + k + "_java.sk")) .write(sk.toByteArray(new ArrayOfStringsSerDe())); } }