diff --git a/packages/core/src/methods/dataframe/filtering/at.js b/packages/core/src/methods/dataframe/filtering/at.js
new file mode 100644
index 0000000..97709d8
--- /dev/null
+++ b/packages/core/src/methods/dataframe/filtering/at.js
@@ -0,0 +1,48 @@
+/* -------------------------------------------------------------- *
+ | DataFrame → filtering · at() |
+ * -------------------------------------------------------------- */
+
+/**
+ * Returns a row at the specified index.
+ * `df.at(5)` → returns an object representing the row at index 5.
+ *
+ * @param {import('../../../data/model/DataFrame.js').DataFrame} df
+ * @param {number} index - Row index to select
+ * @returns {Object} - Object representing the selected row
+ * @throws {Error} If index is invalid or out of bounds
+ */
+export function at(df, index) {
+ // Validate index is an integer
+ if (!Number.isInteger(index)) {
+ throw new Error(
+ `Index must be an integer, got ${typeof index === 'number' ? index : typeof index}`
+ );
+ }
+
+ // Validate index is not negative
+ if (index < 0) {
+ throw new Error(`Index out of bounds: ${index} is negative`);
+ }
+
+ const rows = df.toArray();
+
+ // Check if DataFrame is empty
+ if (rows.length === 0) {
+ throw new Error('Index out of bounds: DataFrame is empty');
+ }
+
+ // Check if index is within range
+ if (index >= rows.length) {
+ throw new Error(
+ `Index out of bounds: ${index} >= ${rows.length}`
+ );
+ }
+
+ return rows[index];
+}
+
+/* -------------------------------------------------------------- *
+ | Pool for extendDataFrame |
+ * -------------------------------------------------------------- */
+export default { at };
+
diff --git a/packages/core/src/methods/dataframe/filtering/drop.js b/packages/core/src/methods/dataframe/filtering/drop.js
new file mode 100644
index 0000000..1be19f4
--- /dev/null
+++ b/packages/core/src/methods/dataframe/filtering/drop.js
@@ -0,0 +1,66 @@
+/* -------------------------------------------------------------- *
+ | DataFrame → filtering · drop() |
+ * -------------------------------------------------------------- */
+
+/**
+ * Removes specified columns from a DataFrame.
+ * `df.drop(['age', 'name'])` → returns a new DataFrame without the specified columns.
+ * Can accept either an array of column names or a single column name as string.
+ *
+ * @param {import('../../../data/model/DataFrame.js').DataFrame} df
+ * @param {string|string[]} columns - Column name(s) to remove
+ * @returns {DataFrame} - New DataFrame without the dropped columns
+ * @throws {Error} If any column doesn't exist or if dropping all columns
+ */
+export function drop(df, columns) {
+ // Convert columns to array if it's not already
+ const columnsArray = Array.isArray(columns) ? columns : [columns];
+
+ // Handle empty column list - return a copy
+ if (columnsArray.length === 0) {
+ // Create a shallow copy using toArray() and fromRecords
+ const builder =
+ typeof df.constructor.fromRecords === 'function'
+ ? df.constructor.fromRecords
+ : (rows) => new df.constructor(rows);
+ return builder(df.toArray());
+ }
+
+ // Get all column names
+ const allColumns = df.columns;
+
+ // Check that all columns to drop exist
+ for (const col of columnsArray) {
+ if (!allColumns.includes(col)) {
+ throw new Error(`Column not found: '${col}'`);
+ }
+ }
+
+ // Create list of columns to keep
+ const columnsToKeep = allColumns.filter(col => !columnsArray.includes(col));
+
+ // Cannot drop all columns
+ if (columnsToKeep.length === 0) {
+ throw new Error('Cannot drop all columns');
+ }
+
+ // Create new data object with only the kept columns
+ const rows = df.toArray();
+ const result = {};
+
+ // For each column to keep, extract its data
+ for (const col of columnsToKeep) {
+ // Use the public API to get column data
+ const colData = df.col(col).toArray();
+ result[col] = colData;
+ }
+
+ // Create a new DataFrame with the kept columns
+ return new df.constructor(result, df._options);
+}
+
+/* -------------------------------------------------------------- *
+ | Pool for extendDataFrame |
+ * -------------------------------------------------------------- */
+export default { drop };
+
diff --git a/packages/core/src/methods/dataframe/filtering/expr$.js b/packages/core/src/methods/dataframe/filtering/expr$.js
new file mode 100644
index 0000000..81d4a92
--- /dev/null
+++ b/packages/core/src/methods/dataframe/filtering/expr$.js
@@ -0,0 +1,137 @@
+/**
+ * Filtering method: expr$
+ *
+ * This file provides the expr$ method for DataFrame rows using template literals
+ * This provides a more intuitive syntax for filtering
+ *
+ * @module methods/dataframe/filtering/expr$
+ */
+
+import { createTypedSeries } from '../../../data/utils/createTypedArray.js';
+
+/**
+ * Filters rows in a DataFrame using a template literal expression.
+ * This provides a more intuitive syntax for filtering.
+ *
+ * @param {Object} df - DataFrame instance
+ * @param {TemplateStringsArray} strings - Template strings array
+ * @param {...any} values - Values to interpolate into the template
+ * @returns {Object} - New DataFrame with filtered rows
+ *
+ * @example
+ * // Filter rows where age > 30 and city includes "York"
+ * df.expr$`age > 30 && city_includes("York")`
+ */
+export function expr$(df, strings, ...values) {
+ // Create an expression from the template string
+ const expression = String.raw({ raw: strings }, ...values);
+
+ // Transform the expression, replacing string methods with special functions
+ const processedExpr = expression
+ .replace(/([a-zA-Z0-9_]+)_includes\(([^)]+)\)/g, '$1.includes($2)')
+ .replace(/([a-zA-Z0-9_]+)_startsWith\(([^)]+)\)/g, '$1.startsWith($2)')
+ .replace(/([a-zA-Z0-9_]+)_endsWith\(([^)]+)\)/g, '$1.endsWith($2)')
+ .replace(/([a-zA-Z0-9_]+)_match\(([^)]+)\)/g, '$1.match($2)');
+
+ // Create a predicate function for filtering rows
+ const predicate = createPredicate(processedExpr);
+
+ // Get DataFrame rows
+ const rows = df.toArray();
+ const allColumns = df.columns;
+
+ // Filter rows by predicate
+ const filteredRows = rows.filter((row) => predicate(row));
+
+ // If no matching rows, return an empty DataFrame with the same columns and column types
+ if (filteredRows.length === 0) {
+ // Create a new DataFrame instance with the same options as the original
+ const result = new df.constructor({}, df._options);
+
+ // For each column, create a Series with the appropriate type
+ for (const col of allColumns) {
+ // Get the original column data to determine its type
+ const originalColumn = df._columns[col];
+ const originalArray = originalColumn.vector.__data;
+
+ // Create an empty array with the same type
+ if (ArrayBuffer.isView(originalArray) && !(originalArray instanceof DataView)) {
+ const TypedArrayConstructor = originalArray.constructor;
+ const emptyTypedArray = new TypedArrayConstructor(0);
+ result._columns[col] = createTypedSeries(emptyTypedArray, col, df);
+ } else {
+ result._columns[col] = createTypedSeries([], col, df);
+ }
+
+ // Add to column order
+ if (!result._order.includes(col)) {
+ result._order.push(col);
+ }
+ }
+
+ return result;
+ }
+
+ // For non-empty results, create a new DataFrame with filtered rows
+ // Create a new DataFrame instance with the same options as the original
+ const result = new df.constructor({}, df._options);
+
+ // For each column, create a Series with the appropriate type
+ for (const col of allColumns) {
+ // Get the original column data to determine its type
+ const originalColumn = df._columns[col];
+ const originalArray = originalColumn.vector.__data;
+
+ // Extract values for this column from the filtered rows
+ const values = filteredRows.map(row => row[col]);
+
+ // Preserve the array type if it's a typed array
+ if (ArrayBuffer.isView(originalArray) && !(originalArray instanceof DataView)) {
+ const TypedArrayConstructor = originalArray.constructor;
+ const typedValues = new TypedArrayConstructor(values.length);
+ values.forEach((value, i) => {
+ typedValues[i] = value;
+ });
+ result._columns[col] = createTypedSeries(typedValues, col, df);
+ } else {
+ result._columns[col] = createTypedSeries(values, col, df);
+ }
+
+ // Add to column order
+ if (!result._order.includes(col)) {
+ result._order.push(col);
+ }
+ }
+
+ return result;
+}
+
+/**
+ * Create a predicate function for filtering rows
+ *
+ * @param {string} expr - Expression to evaluate
+ * @returns {Function} - Predicate function
+ * @private
+ */
+function createPredicate(expr) {
+ try {
+ // Use Function instead of eval for better security
+ return new Function(
+ 'row',
+ `
+ try {
+ with (row) {
+ return ${expr};
+ }
+ } catch (e) {
+ return false;
+ }
+ `,
+ );
+ } catch (e) {
+ throw new Error(`Invalid expression: ${expr}. Error: ${e.message}`);
+ }
+}
+
+// Export the expr$ method directly
+export { expr$ };
diff --git a/packages/core/src/methods/dataframe/filtering/filter.js b/packages/core/src/methods/dataframe/filtering/filter.js
new file mode 100644
index 0000000..d92e940
--- /dev/null
+++ b/packages/core/src/methods/dataframe/filtering/filter.js
@@ -0,0 +1,92 @@
+/*-------------------------------------------------------------------------*
+ | DataFrame › filtering · filter() |
+ | |
+ | df.filter(row => row.age > 30) → new DataFrame with matching rows |
+ | Supports predicate functions and string expressions. |
+ *-------------------------------------------------------------------------*/
+
+import { createTypedSeries } from '../../../data/utils/createTypedArray.js';
+
+/**
+ * Filters rows in a DataFrame based on a predicate function
+ *
+ * @param {Object} df - DataFrame instance
+ * @param {Function} predicate - Function to apply to each row
+ * @returns {Object} - New DataFrame with filtered rows
+ */
+export function filter(df, predicate) {
+ // Check that the argument is a function
+ if (typeof predicate !== 'function') {
+ throw new Error('Predicate must be a function');
+ }
+
+ // Convert DataFrame to array of rows
+ const rows = df.toArray();
+ const allColumns = df.columns;
+
+ // Apply predicate to each row
+ const filteredRows = rows.filter(predicate);
+
+ // If no results, create an empty DataFrame with the same columns and column types
+ if (filteredRows.length === 0) {
+ // Create a new DataFrame instance with the same options as the original
+ const result = new df.constructor({}, df._options);
+
+ // For each column, create a Series with the appropriate type
+ for (const col of allColumns) {
+ // Get the original column data to determine its type
+ const originalColumn = df._columns[col];
+ const originalArray = originalColumn.vector.__data;
+
+ // Create an empty array with the same type
+ if (ArrayBuffer.isView(originalArray) && !(originalArray instanceof DataView)) {
+ const TypedArrayConstructor = originalArray.constructor;
+ const emptyTypedArray = new TypedArrayConstructor(0);
+ result._columns[col] = createTypedSeries(emptyTypedArray, col, df);
+ } else {
+ result._columns[col] = createTypedSeries([], col, df);
+ }
+
+ // Add to column order
+ if (!result._order.includes(col)) {
+ result._order.push(col);
+ }
+ }
+
+ return result;
+ }
+
+ // For non-empty results, create a new DataFrame with filtered rows
+ // Create a new DataFrame instance with the same options as the original
+ const result = new df.constructor({}, df._options);
+
+ // For each column, create a Series with the appropriate type
+ for (const col of allColumns) {
+ // Get the original column data to determine its type
+ const originalColumn = df._columns[col];
+ const originalArray = originalColumn.vector.__data;
+ const values = filteredRows.map(row => row[col]);
+
+ // Preserve the array type if it's a typed array
+ if (ArrayBuffer.isView(originalArray) && !(originalArray instanceof DataView)) {
+ const TypedArrayConstructor = originalArray.constructor;
+ const typedValues = new TypedArrayConstructor(values.length);
+ values.forEach((value, i) => {
+ typedValues[i] = value;
+ });
+ result._columns[col] = createTypedSeries(typedValues, col, df);
+ } else {
+ result._columns[col] = createTypedSeries(values, col, df);
+ }
+
+ // Add to column order
+ if (!result._order.includes(col)) {
+ result._order.push(col);
+ }
+ }
+
+ return result;
+}
+
+// Export the filter method directly
+export { filter };
diff --git a/packages/core/src/methods/dataframe/filtering/head.js b/packages/core/src/methods/dataframe/filtering/head.js
new file mode 100644
index 0000000..ac96f02
--- /dev/null
+++ b/packages/core/src/methods/dataframe/filtering/head.js
@@ -0,0 +1,45 @@
+/* -------------------------------------------------------------- *
+ | DataFrame → filtering · head() |
+ * -------------------------------------------------------------- */
+
+/**
+ * Returns the first n rows of a DataFrame.
+ * `df.head(5)` → returns a new DataFrame with the first 5 rows.
+ * Similar to pandas' head() function.
+ *
+ * @param {import('../../../data/model/DataFrame.js').DataFrame} df
+ * @param {number} [n=5] - Number of rows to return
+ * @param {Object} [options] - Additional options
+ * @param {boolean} [options.print=false] - Option for compatibility with other libraries
+ * @returns {DataFrame} - New DataFrame with the first n rows
+ * @throws {Error} If n is not a positive integer
+ */
+export function head(df, n = 5, options = { print: false }) {
+ // Validate input parameters
+ if (n <= 0) {
+ throw new Error('Number of rows must be a positive integer');
+ }
+ if (!Number.isInteger(n)) {
+ throw new Error('Number of rows must be an integer');
+ }
+
+ // Get data from DataFrame
+ const rows = df.toArray();
+
+ // Select first n rows (or all if there are fewer than n)
+ const selectedRows = rows.slice(0, n);
+
+ // Create a new DataFrame from the selected rows
+ const builder =
+ typeof df.constructor.fromRecords === 'function'
+ ? df.constructor.fromRecords
+ : (rows) => new df.constructor(rows);
+
+ return builder(selectedRows);
+}
+
+/* -------------------------------------------------------------- *
+ | Pool for extendDataFrame |
+ * -------------------------------------------------------------- */
+export default { head };
+
diff --git a/packages/core/src/methods/dataframe/filtering/iloc.js b/packages/core/src/methods/dataframe/filtering/iloc.js
new file mode 100644
index 0000000..035e756
--- /dev/null
+++ b/packages/core/src/methods/dataframe/filtering/iloc.js
@@ -0,0 +1,151 @@
+/*-------------------------------------------------------------------------*
+ | DataFrame -› filtering · iloc() |
+ | |
+ | Выбор строк и колонок из DataFrame по целочисленным позициям. |
+ | |
+ | df.iloc(5) → выбор строки с индексом 5 |
+ | df.iloc([1, 3, 5]) → выбор строк с указанными индексами |
+ | df.iloc(5, 2) → выбор значения в строке 5, колонке 2 |
+ | df.iloc([1, 3], [0, 2]) → выбор строк 1,3 и колонок 0,2 |
+ *-------------------------------------------------------------------------*/
+
+/**
+ * Method for selecting rows and columns by indices
+ *
+ * @module methods/dataframe/filtering/iloc
+ */
+
+// Import function for creating typed arrays
+import { createTypedSeries } from '../../../data/utils/createTypedArray.js';
+
+/**
+ * Method for selecting rows and columns by indices (similar to iloc in pandas)
+ * @param {DataFrame} df - DataFrame instance
+ * @param {number|number[]|function} rowSelector - Row index, array of indices, or predicate function
+ * @param {number|number[]|function} colSelector - Column index, array of indices, or predicate function
+ * @returns {DataFrame|*} - New DataFrame with selected rows and columns or a cell value
+ */
+export function iloc(df, rowSelector = null, colSelector = null) {
+ // Get all rows as array of objects
+ const rows = df.toArray();
+ const allColumns = df.columns;
+ const rowCount = df.rowCount;
+
+ if (rowCount === 0) {
+ throw new Error('Row index out of bounds');
+ }
+
+ // Indices of selected rows
+ let selectedIndices = [];
+
+ // Process row selector
+ if (rowSelector === null || rowSelector === undefined) {
+ // If selector is null, select all rows
+ selectedIndices = Array.from({ length: rowCount }, (_, i) => i);
+ } else if (typeof rowSelector === 'number') {
+ // Single row index
+ const idx = rowSelector < 0 ? rowCount + rowSelector : rowSelector;
+ if (idx < 0 || idx >= rowCount) {
+ throw new Error('Row index out of bounds');
+ }
+ selectedIndices = [idx];
+ } else if (Array.isArray(rowSelector)) {
+ // Array of row indices
+ selectedIndices = rowSelector.map((idx) => {
+ const adjustedIdx = idx < 0 ? rowCount + idx : idx;
+ if (adjustedIdx < 0 || adjustedIdx >= rowCount) {
+ throw new Error('Row index out of bounds');
+ }
+ return adjustedIdx;
+ });
+ } else if (typeof rowSelector === 'function') {
+ // Function returning true/false for each row index
+ for (let i = 0; i < rowCount; i++) {
+ if (rowSelector(i)) {
+ selectedIndices.push(i);
+ }
+ }
+ } else {
+ throw new Error('Invalid row selector type');
+ }
+
+ // Indices of selected columns
+ let selectedColumnIndices = [];
+
+ // Process column selector
+ if (colSelector === null || colSelector === undefined) {
+ // If selector is null, select all columns
+ selectedColumnIndices = Array.from({ length: allColumns.length }, (_, i) => i);
+ } else if (typeof colSelector === 'number') {
+ // Single column index
+ const idx = colSelector < 0 ? allColumns.length + colSelector : colSelector;
+ if (idx < 0 || idx >= allColumns.length) {
+ throw new Error('Column index out of bounds');
+ }
+ selectedColumnIndices = [idx];
+ } else if (Array.isArray(colSelector)) {
+ // Array of column indices
+ selectedColumnIndices = colSelector.map((idx) => {
+ const adjustedIdx = idx < 0 ? allColumns.length + idx : idx;
+ if (adjustedIdx < 0 || adjustedIdx >= allColumns.length) {
+ throw new Error('Column index out of bounds');
+ }
+ return adjustedIdx;
+ });
+ } else if (typeof colSelector === 'function') {
+ // Function returning true/false for each column index
+ for (let i = 0; i < allColumns.length; i++) {
+ if (colSelector(i)) {
+ selectedColumnIndices.push(i);
+ }
+ }
+ } else {
+ throw new Error('Invalid column selector type');
+ }
+
+ // Get names of selected columns
+ const selectedColumns = selectedColumnIndices.map((idx) => allColumns[idx]);
+
+ // If only one row and one column are selected, return the value
+ if (
+ selectedIndices.length === 1 &&
+ selectedColumns.length === 1 &&
+ typeof rowSelector === 'number' &&
+ typeof colSelector === 'number'
+ ) {
+ return rows[selectedIndices[0]][selectedColumns[0]];
+ }
+
+ // Create a new DataFrame instance with the same options as the original
+ const result = new df.constructor({}, df._options);
+
+ // For each selected column, create a Series with the appropriate type
+ for (const col of selectedColumns) {
+ // Get the original column data to determine its type
+ const originalColumn = df._columns[col];
+ const originalArray = originalColumn.vector.__data;
+ const values = selectedIndices.map(index => rows[index][col]);
+
+ // Preserve the array type if it's a typed array
+ if (ArrayBuffer.isView(originalArray) && !(originalArray instanceof DataView)) {
+ const TypedArrayConstructor = originalArray.constructor;
+ const typedValues = new TypedArrayConstructor(values.length);
+ values.forEach((value, i) => {
+ typedValues[i] = value;
+ });
+ result._columns[col] = createTypedSeries(typedValues, col, df);
+ } else {
+ result._columns[col] = createTypedSeries(values, col, df);
+ }
+
+ // Add to column order
+ if (!result._order.includes(col)) {
+ result._order.push(col);
+ }
+ }
+
+ return result;
+}
+
+// Export the method for the pool
+export default { iloc };
diff --git a/packages/core/src/methods/dataframe/filtering/index.js b/packages/core/src/methods/dataframe/filtering/index.js
new file mode 100644
index 0000000..82ff7cf
--- /dev/null
+++ b/packages/core/src/methods/dataframe/filtering/index.js
@@ -0,0 +1,18 @@
+/**
+ * DataFrame filtering methods
+ *
+ * This module exports all filtering methods for DataFrame.
+ * Methods are registered using extendDataFrame.
+ *
+ * @module methods/dataframe/filtering
+ */
+
+import { DataFrame } from '../../../data/model/index.js';
+import { extendDataFrame } from '../../../data/model/extendDataFrame.js';
+import * as pool from './pool.js';
+
+// Register methods for DataFrame without namespace
+extendDataFrame(DataFrame.prototype, pool);
+
+// Export methods directly for functional style calls
+export * from './pool.js';
diff --git a/packages/core/src/methods/dataframe/filtering/loc.js b/packages/core/src/methods/dataframe/filtering/loc.js
new file mode 100644
index 0000000..86f96bd
--- /dev/null
+++ b/packages/core/src/methods/dataframe/filtering/loc.js
@@ -0,0 +1,295 @@
+/*-------------------------------------------------------------------------*
+ | DataFrame -› filtering · loc() |
+ | |
+ | Selection of rows and columns from DataFrame by labels (names). |
+ | |
+ | df.loc(5) → select row with index 5 |
+ | df.loc([1, 3, 5]) → select rows with specified indices |
+ | df.loc(5, 'age') → select value in row 5, column 'age' |
+ | df.loc([1, 3], ['name', 'age']) → select rows 1,3 and columns 'name','age' |
+ | df.loc(row => row.age > 30) → select rows where age > 30 |
+ | df.loc({city: 'Chicago'}) → select rows where city equals 'Chicago' |
+ *-------------------------------------------------------------------------*/
+
+/**
+ * Row and column selection by label or position
+ *
+ * @module methods/dataframe/filtering/loc
+ */
+
+import { createTypedArray } from '../../../data/utils/createTypedArray.js';
+
+/**
+ * Selects rows and columns by label or position
+ *
+ * @param {DataFrame} df - DataFrame to select from
+ * @param {*} rowSelector - Row selector (label, array of labels, predicate function, or condition object)
+ * @param {*} colSelector - Column selector (name, array of names, or null for all columns)
+ * @returns {DataFrame} - New DataFrame with selected rows and columns
+ */
+export function loc(df, rowSelector, colSelector) {
+ // Get data from DataFrame
+ const rows = df.toArray();
+ const rowCount = df.rowCount;
+
+ // Define rows to select
+ let selectedRows = [];
+ let selectedIndices = [];
+
+ // Check if DataFrame has an index set
+ const hasIndex = df._index !== null && df._indexMap !== undefined && df._indexMap.size > 0;
+
+ if (rowSelector === null) {
+ // If rowSelector is null, select all rows
+ selectedRows = [...rows];
+ selectedIndices = Array.from({ length: rowCount }, (_, i) => i);
+ } else if (Array.isArray(rowSelector)) {
+ // If rowSelector is an array of indices or labels
+ if (hasIndex) {
+ // Use index for selection
+ selectedIndices = [];
+ selectedRows = [];
+
+ for (const label of rowSelector) {
+ const index = df._indexMap.get(label);
+ if (index === undefined) {
+ throw new Error('Row label not found');
+ }
+ selectedIndices.push(index);
+ selectedRows.push(rows[index]);
+ }
+ } else {
+ // Use numeric indices
+ for (const index of rowSelector) {
+ if (index < 0 || index >= rowCount) {
+ throw new Error(
+ `Row index ${index} is out of bounds for DataFrame with ${rowCount} rows`,
+ );
+ }
+ }
+ selectedIndices = rowSelector;
+ selectedRows = rows.filter((_, index) => rowSelector.includes(index));
+ }
+ } else if (typeof rowSelector === 'number' || typeof rowSelector === 'string') {
+ // If rowSelector is a number or string (index or label)
+ if (hasIndex && typeof rowSelector === 'string') {
+ // Use index for selection
+ const index = df._indexMap.get(rowSelector);
+ if (index === undefined) {
+ throw new Error('Row label not found');
+ }
+ selectedIndices = [index];
+ selectedRows = [rows[index]];
+ } else if (typeof rowSelector === 'number') {
+ // Use numeric index
+ if (rowSelector < 0 || rowSelector >= rowCount) {
+ throw new Error(
+ `Row index ${rowSelector} is out of bounds for DataFrame with ${rowCount} rows`,
+ );
+ }
+ selectedIndices = [rowSelector];
+ selectedRows = [rows[rowSelector]];
+ } else {
+ throw new Error('Row label not found');
+ }
+ } else if (typeof rowSelector === 'function') {
+ // If rowSelector is a predicate function
+ selectedRows = rows.filter(rowSelector);
+ selectedIndices = rows
+ .map((row, index) => (rowSelector(row) ? index : -1))
+ .filter((index) => index !== -1);
+ } else if (typeof rowSelector === 'object' && rowSelector !== null) {
+ // If rowSelector is an object with conditions
+ selectedIndices = [];
+ selectedRows = [];
+ rows.forEach((row, index) => {
+ let match = true;
+ for (const [key, value] of Object.entries(rowSelector)) {
+ if (row[key] !== value) {
+ match = false;
+ break;
+ }
+ }
+ if (match) {
+ selectedIndices.push(index);
+ selectedRows.push(row);
+ }
+ });
+ } else {
+ throw new Error('Invalid row selector type');
+ }
+
+ // If column selector is not specified, return all columns
+ if (colSelector === undefined) {
+ // If only one row is selected and rowSelector is not a function, we need to decide
+ // whether to return an object or a DataFrame with one row
+ if (selectedRows.length === 1 && typeof rowSelector !== 'function') {
+ // In tests, we need to return a DataFrame with rowCount property
+ // Create a DataFrame with one row
+ const result = df.constructor.fromRecords([selectedRows[0]], df._options);
+
+ // Copy column metadata to preserve typed arrays
+ for (const col of result.columns) {
+ if (df._columns[col] && df._columns[col].vector && df._columns[col].vector.__data) {
+ const originalArray = df._columns[col].vector.__data;
+ if (ArrayBuffer.isView(originalArray) && !(originalArray instanceof DataView)) {
+ const TypedArrayConstructor = originalArray.constructor;
+ // Create a new typed array with the same type
+ const newArray = new TypedArrayConstructor([selectedRows[0][col]]);
+ result._columns[col].vector.__data = newArray;
+ }
+ }
+ }
+
+ return result;
+ }
+
+ // If no results, create an empty DataFrame with the same columns
+ if (selectedRows.length === 0) {
+ const emptyData = {};
+ for (const col of df.columns) {
+ // Preserve array type if it's a typed array
+ const originalArray = df._columns[col].vector.__data;
+ if (ArrayBuffer.isView(originalArray) && !(originalArray instanceof DataView)) {
+ const TypedArrayConstructor = originalArray.constructor;
+ emptyData[col] = new TypedArrayConstructor(0);
+ } else {
+ emptyData[col] = [];
+ }
+ }
+ return new df.constructor(emptyData, df._options);
+ }
+
+ // Create a new DataFrame with the same options as the original
+ const result = df.constructor.fromRecords(selectedRows, df._options);
+
+ // Process each column to preserve typed arrays
+ for (const col of df.columns) {
+ if (df._columns[col] && df._columns[col].vector && df._columns[col].vector.__data) {
+ const originalArray = df._columns[col].vector.__data;
+ if (ArrayBuffer.isView(originalArray)) {
+ // Get column options if specified
+ const columnOptions = df._options?.columns?.[col] || {};
+
+ // Extract values for this column from selected rows
+ const values = selectedRows.map(row => row[col]);
+
+ // Create a new typed array with the same type
+ const newArray = createTypedArray(values, originalArray, columnOptions);
+
+ // Replace the array in the result DataFrame
+ if (result._columns[col] && result._columns[col].vector) {
+ result._columns[col].vector.__data = newArray;
+ }
+ }
+ }
+ }
+
+ return result;
+ }
+
+ // Define columns to select
+ let selectedColumns = [];
+
+ if (colSelector === null) {
+ // If colSelector is null, select all columns
+ selectedColumns = df.columns;
+ } else if (Array.isArray(colSelector)) {
+ // If colSelector is an array of column names
+ selectedColumns = colSelector;
+ } else if (typeof colSelector === 'string') {
+ // If colSelector is a single column name
+ selectedColumns = [colSelector];
+ } else {
+ throw new Error('Invalid column selector type');
+ }
+
+ // Check that all specified columns exist
+ for (const column of selectedColumns) {
+ if (!df.columns.includes(column)) {
+ throw new Error('Column not found');
+ }
+ }
+
+ // If only one row and one column are selected, return the value
+ if (
+ selectedRows.length === 1 &&
+ selectedColumns.length === 1 &&
+ typeof rowSelector !== 'function'
+ ) {
+ return selectedRows[0][selectedColumns[0]];
+ }
+
+ // If no results, create an empty DataFrame with selected columns
+ if (selectedRows.length === 0) {
+ const emptyData = {};
+ for (const col of selectedColumns) {
+ // Preserve array type if it's a typed array
+ const originalArray = df._columns[col].vector.__data;
+ if (ArrayBuffer.isView(originalArray) && !(originalArray instanceof DataView)) {
+ const TypedArrayConstructor = originalArray.constructor;
+ emptyData[col] = new TypedArrayConstructor(0);
+ } else {
+ emptyData[col] = [];
+ }
+ }
+ return new df.constructor(emptyData, df._options);
+ }
+
+ // If only one row and one column are selected, but we need a DataFrame
+ if (selectedRows.length === 1 && selectedColumns.length === 1 && typeof rowSelector === 'function') {
+ const singleColData = {};
+ const col = selectedColumns[0];
+ const value = selectedRows[0][col];
+
+ // Preserve array type if it's a typed array
+ const originalArray = df._columns[col].vector.__data;
+ if (ArrayBuffer.isView(originalArray) && !(originalArray instanceof DataView)) {
+ const TypedArrayConstructor = originalArray.constructor;
+ singleColData[col] = new TypedArrayConstructor([value]);
+ } else {
+ singleColData[col] = [value];
+ }
+
+ return new df.constructor(singleColData, df._options);
+ }
+
+ // Create a new DataFrame with only selected columns
+ const filteredRows = selectedRows.map(row => {
+ const filteredRow = {};
+ for (const col of selectedColumns) {
+ filteredRow[col] = row[col];
+ }
+ return filteredRow;
+ });
+
+ // Create a new DataFrame with the same options as the original
+ const result = df.constructor.fromRecords(filteredRows, df._options);
+
+ // Process each column to preserve typed arrays
+ for (const col of selectedColumns) {
+ if (df._columns[col] && df._columns[col].vector && df._columns[col].vector.__data) {
+ const originalArray = df._columns[col].vector.__data;
+ if (ArrayBuffer.isView(originalArray)) {
+ // Get column options if specified
+ const columnOptions = df._options?.columns?.[col] || {};
+
+ // Extract values for this column from filtered rows
+ const values = filteredRows.map(row => row[col]);
+
+ // Create a new typed array with the same type
+ const newArray = createTypedArray(values, originalArray, columnOptions);
+
+ // Replace the array in the result DataFrame
+ if (result._columns[col] && result._columns[col].vector) {
+ result._columns[col].vector.__data = newArray;
+ }
+ }
+ }
+ }
+
+ return result;
+}
+
+// Export the loc method directly
+export { loc };
diff --git a/packages/core/src/methods/dataframe/filtering/pool.js b/packages/core/src/methods/dataframe/filtering/pool.js
new file mode 100644
index 0000000..7807656
--- /dev/null
+++ b/packages/core/src/methods/dataframe/filtering/pool.js
@@ -0,0 +1,30 @@
+/**
+ * DataFrame filtering method pool
+ *
+ * This file re-exports all filtering methods for use with extendDataFrame
+ *
+ * @module methods/dataframe/filtering/pool
+ */
+
+// Row filtering methods
+export { filter } from './filter.js';
+export { query } from './query.js';
+export { where } from './where.js';
+export { expr$ } from './expr$.js';
+export { query$ } from './query$.js';
+
+// Row sampling methods
+export { sample } from './sample.js';
+export { stratifiedSample } from './stratifiedSample.js';
+export { head } from './head.js';
+export { tail } from './tail.js';
+
+// Column selection methods
+export { select } from './select.js';
+export { drop } from './drop.js';
+export { selectByPattern } from './selectByPattern.js';
+
+// Row/column access methods
+export { at } from './at.js';
+export { iloc } from './iloc.js';
+export { loc } from './loc.js';
diff --git a/packages/core/src/methods/dataframe/filtering/query$.js b/packages/core/src/methods/dataframe/filtering/query$.js
new file mode 100644
index 0000000..d2a13de
--- /dev/null
+++ b/packages/core/src/methods/dataframe/filtering/query$.js
@@ -0,0 +1,116 @@
+/**
+ * Filtering method: query$
+ *
+ * This file provides the query$ method for filtering DataFrame rows using template literals
+ * for more intuitive syntax
+ *
+ * @module methods/dataframe/filtering/query$
+ */
+
+/**
+ * Filters rows in a DataFrame using a template literal expression.
+ * This provides a more intuitive syntax for filtering.
+ *
+ * @param {Object} df - DataFrame instance
+ * @param {TemplateStringsArray} strings - Template strings array
+ * @param {...any} values - Values to interpolate into the template
+ * @returns {Object} - New DataFrame with filtered rows
+ *
+ * @example
+ * // Filter rows where age > 40
+ * df.query$`age > 40`
+ * // Filter rows where age > 30 and salary > 100000
+ * df.query$`age > 30 && salary > 100000`
+ * // Filter rows where city includes "Francisco"
+ * df.query$`city_includes("Francisco")`
+ */
+export function query$(df, strings, ...values) {
+ // Create an expression from the template string
+ const expression = String.raw({ raw: strings }, ...values);
+
+ // Transform the expression, replacing string methods with special functions
+ const processedExpr = expression
+ .replace(/([a-zA-Z0-9_]+)_includes\(([^)]+)\)/g, '$1.includes($2)')
+ .replace(/([a-zA-Z0-9_]+)_startsWith\(([^)]+)\)/g, '$1.startsWith($2)')
+ .replace(/([a-zA-Z0-9_]+)_endsWith\(([^)]+)\)/g, '$1.endsWith($2)')
+ .replace(/([a-zA-Z0-9_]+)_match\(([^)]+)\)/g, '$1.match($2)');
+
+ // Create a predicate function for filtering rows
+ const predicate = createPredicate(processedExpr);
+
+ // Get DataFrame rows
+ const rows = df.toArray();
+
+ // Filter rows by predicate
+ const filteredRows = rows.filter((row) => predicate(row));
+
+ // If no matching rows, return an empty DataFrame with the same structure
+ if (filteredRows.length === 0) {
+ const emptyData = {};
+ for (const col of df.columns) {
+ emptyData[col] = [];
+ }
+ return new df.constructor(emptyData, df._options);
+ }
+
+ // Create a new DataFrame from filtered rows while preserving array types
+ const filteredData = {};
+ const allColumns = df.columns;
+
+ // Get indices of rows that passed the filter
+ const selectedIndices = [];
+ for (let i = 0; i < rows.length; i++) {
+ if (predicate(rows[i])) {
+ selectedIndices.push(i);
+ }
+ }
+
+ // Create new columns while preserving array types
+ for (const col of allColumns) {
+ const originalArray = df.col(col).toArray();
+ const values = selectedIndices.map((index) => originalArray[index]);
+
+ // If the original array was typed, create a new typed array
+ if (
+ ArrayBuffer.isView(originalArray) &&
+ !(originalArray instanceof DataView)
+ ) {
+ const TypedArrayConstructor = originalArray.constructor;
+ filteredData[col] = new TypedArrayConstructor(values);
+ } else {
+ filteredData[col] = values;
+ }
+ }
+
+ return new df.constructor(filteredData, df._options);
+}
+
+/**
+ * Create a predicate function for filtering rows
+ *
+ * @param {string} expr - Expression to evaluate
+ * @returns {Function} - Predicate function
+ * @private
+ */
+function createPredicate(expr) {
+ try {
+ // Use Function instead of eval for better security
+ return new Function(
+ 'row',
+ `
+ try {
+ with (row) {
+ return ${expr};
+ }
+ } catch (e) {
+ return false;
+ }
+ `,
+ );
+ } catch (e) {
+ throw new Error(`Invalid expression: ${expr}. Error: ${e.message}`);
+ }
+}
+
+// Export the query$ method directly
+export { query$ };
diff --git a/packages/core/src/methods/dataframe/filtering/query.js b/packages/core/src/methods/dataframe/filtering/query.js
new file mode 100644
index 0000000..99a7318
--- /dev/null
+++ b/packages/core/src/methods/dataframe/filtering/query.js
@@ -0,0 +1,253 @@
+/*-------------------------------------------------------------------------*
+ | DataFrame › filtering · query() |
+ | |
+ | df.query("SELECT * WHERE age > 30") → new DataFrame with matching rows |
+ | Supports SQL-like syntax. |
+ *-------------------------------------------------------------------------*/
+
+import { createTypedSeries } from '../../../data/utils/createTypedArray.js';
+
+/**
+ * Filters DataFrame rows using SQL-like syntax
+ *
+ * @param {Object} df - DataFrame instance
+ * @param {string} queryString - SQL-like query string
+ * @returns {Object} - New DataFrame with filtered rows
+ */
+export function query(df, queryString) {
+ if (typeof queryString !== 'string') {
+ throw new Error('Query must be a string');
+ }
+
+ // Parse SQL-like query
+ const parsedQuery = parseQuery(queryString);
+
+ // Determine which columns to include in the result
+ const columnsToInclude = parsedQuery.columns[0] === '*' ?
+ df.columns :
+ parsedQuery.columns.filter(col => df.columns.includes(col));
+
+ // Get data from DataFrame
+ let rows = df.toArray();
+
+ // Apply WHERE condition if present
+ if (parsedQuery.whereClause) {
+ const evaluateQuery = createQueryEvaluator(parsedQuery.whereClause);
+ rows = rows.filter((row) => {
+ try {
+ return evaluateQuery(row);
+ } catch (e) {
+ throw new Error(`Error evaluating query for row: ${e.message}`);
+ }
+ });
+ }
+
+ // Apply ORDER BY sorting if present
+ if (parsedQuery.orderBy) {
+ const { column, direction } = parsedQuery.orderBy;
+ rows.sort((a, b) => {
+ const valueA = a[column];
+ const valueB = b[column];
+
+ if (valueA === valueB) return 0;
+
+ const comparison = valueA < valueB ? -1 : 1;
+ return direction === 'ASC' ? comparison : -comparison;
+ });
+ }
+
+ // Apply LIMIT restriction if present
+ if (parsedQuery.limit !== null) {
+ rows = rows.slice(0, parsedQuery.limit);
+ }
+
+ // If no rows, return an empty DataFrame with the same columns and column types
+ if (rows.length === 0) {
+ // Create a new DataFrame instance with the same options as the original
+ const result = new df.constructor({}, df._options);
+
+ // For each column, create a Series with the appropriate type
+ for (const col of df.columns) {
+ // Get the original column data to determine its type
+ const originalColumn = df._columns[col];
+ const originalArray = originalColumn.vector.__data;
+
+ // Create an empty array with the same type
+ if (ArrayBuffer.isView(originalArray) && !(originalArray instanceof DataView)) {
+ const TypedArrayConstructor = originalArray.constructor;
+ const emptyTypedArray = new TypedArrayConstructor(0);
+ result._columns[col] = createTypedSeries(emptyTypedArray, col, df);
+ } else {
+ result._columns[col] = createTypedSeries([], col, df);
+ }
+
+ // Add to column order
+ if (!result._order.includes(col)) {
+ result._order.push(col);
+ }
+ }
+
+ return result;
+ }
+
+ // For non-empty results, create a new DataFrame with filtered rows
+ // Create a new DataFrame instance with the same options as the original
+ const result = new df.constructor({}, df._options);
+
+ // Determine which columns to include based on the query
+ const columnsToProcess = parsedQuery.columns[0] === '*' ? df.columns : columnsToInclude;
+
+ // For each column, create a Series with the appropriate type
+ for (const col of columnsToProcess) {
+ // Skip columns that don't exist in the original DataFrame
+ if (!df.columns.includes(col)) continue;
+
+ // Get the original column data to determine its type
+ const originalColumn = df._columns[col];
+ const originalArray = originalColumn.vector.__data;
+
+ // Extract values for this column from the filtered rows
+ const values = rows.map(row => row[col]);
+
+ // Preserve the array type if it's a typed array
+ if (ArrayBuffer.isView(originalArray) && !(originalArray instanceof DataView)) {
+ const TypedArrayConstructor = originalArray.constructor;
+ const typedValues = new TypedArrayConstructor(values.length);
+ values.forEach((value, i) => {
+ typedValues[i] = value;
+ });
+ result._columns[col] = createTypedSeries(typedValues, col, df);
+ } else {
+ result._columns[col] = createTypedSeries(values, col, df);
+ }
+
+ // Add to column order
+ if (!result._order.includes(col)) {
+ result._order.push(col);
+ }
+ }
+
+ return result;
+}
+
+/**
+ * Parses an SQL-like query string into its components
+ *
+ * @param {string} queryString - SQL-like query string
+ * @returns {Object} - Parsed query components
+ * @private
+ */
+function parseQuery(queryString) {
+ // Initialize default values
+ const result = {
+ columns: ['*'],
+ whereClause: null,
+ orderBy: null,
+ limit: null
+ };
+
+ // Extract LIMIT clause if present
+ const limitMatch = queryString.match(/\s+LIMIT\s+(\d+)\s*$/i);
+ if (limitMatch) {
+ result.limit = parseInt(limitMatch[1], 10);
+ queryString = queryString.replace(/\s+LIMIT\s+\d+\s*$/i, '');
+ }
+
+ // Extract ORDER BY clause if present
+ const orderByMatch = queryString.match(/\s+ORDER\s+BY\s+([\w.]+)(?:\s+(ASC|DESC))?\s*$/i);
+ if (orderByMatch) {
+ result.orderBy = {
+ column: orderByMatch[1],
+ direction: (orderByMatch[2] || 'ASC').toUpperCase()
+ };
+ queryString = queryString.replace(/\s+ORDER\s+BY\s+[\w.]+(?:\s+(?:ASC|DESC))?\s*$/i, '');
+ }
+
+ // Extract SELECT and WHERE parts
+ const selectMatch = queryString.match(/^\s*SELECT\s+(.+?)(?:\s+WHERE\s+(.+))?\s*$/i);
+ if (selectMatch) {
+ // Parse columns
+ const columnsStr = selectMatch[1].trim();
+ if (columnsStr !== '*') {
+ result.columns = columnsStr.split(',').map(col => col.trim());
+ }
+
+ // Parse WHERE clause
+ if (selectMatch[2]) {
+ result.whereClause = selectMatch[2].trim();
+ }
+ } else {
+ // If no SELECT keyword, treat the whole string as WHERE clause
+ result.whereClause = queryString.trim();
+ }
+
+ return result;
+}
+
+/**
+ * Creates a function to evaluate a WHERE clause
+ *
+ * @param {string} whereClause - WHERE clause from SQL-like query
+ * @returns {Function} - Function evaluating the clause for a row
+ * @private
+ */
+function createQueryEvaluator(whereClause) {
+ if (!whereClause) {
+ return () => true; // No WHERE clause means all rows match
+ }
+
+ // Transform SQL-like query into JavaScript expression
+ let jsQuery = whereClause;
+
+ // Process logical operators first (to avoid conflicts with BETWEEN...AND)
+ jsQuery = jsQuery
+ .replace(/\bAND\b/gi, '&&')
+ .replace(/\bOR\b/gi, '||')
+ .replace(/\bNOT\b/gi, '!');
+
+ // Process basic comparison operators
+ jsQuery = jsQuery
+ // Replace single equals with double equals
+ .replace(/([\w.]+)\s*=\s*([^=\s][^=]*)/g, '$1 == $2')
+ // Process IN operator
+ .replace(
+ /([\w.]+)\s+IN\s+\(([^)]+)\)/gi,
+ (match, col, values) => {
+ // Split values by comma and remove extra spaces
+ const cleanValues = values.split(',').map(v => v.trim()).join(', ');
+ return `[${cleanValues}].includes(${col})`;
+ }
+ )
+ // Process LIKE with % at beginning and end (contains)
+ .replace(/([\w.]+)\s+LIKE\s+['"]%(.+?)%['"]\s*/gi, '$1.toString().includes("$2")')
+ // Process LIKE with % at end (starts with)
+ .replace(/([\w.]+)\s+LIKE\s+['"](.+?)%['"]\s*/gi, '$1.toString().startsWith("$2")')
+ // Process LIKE with % at beginning (ends with)
+ .replace(/([\w.]+)\s+LIKE\s+['"]%(.+?)['"]\s*/gi, '$1.toString().endsWith("$2")')
+ // Process BETWEEN
+ .replace(
+ /([\w.]+)\s+BETWEEN\s+(\S+)\s+AND\s+(\S+)/gi,
+ '($1 >= $2 && $1 <= $3)'
+ );
+
+ // Create function to evaluate the query
+ try {
+ return new Function(
+ 'row',
+ `
+ try {
+ with (row) {
+ return ${jsQuery};
+ }
+ } catch (e) {
+ return false;
+ }
+ `
+ );
+ } catch (e) {
+ throw new Error(`Invalid query syntax: ${e.message}`);
+ }
+}
+
+// Export object with method for the pool
+export default { query };
diff --git a/packages/core/src/methods/dataframe/filtering/sample.js b/packages/core/src/methods/dataframe/filtering/sample.js
new file mode 100644
index 0000000..8dc3e50
--- /dev/null
+++ b/packages/core/src/methods/dataframe/filtering/sample.js
@@ -0,0 +1,117 @@
+/* -------------------------------------------------------------- *
+ | DataFrame → filtering · sample() |
+ * -------------------------------------------------------------- */
+
+/**
+ * Returns a random sample of rows from a DataFrame.
+ * `df.sample(10)` → returns a new DataFrame with 10 randomly selected rows.
+ * `df.sample({ fraction: 0.1 })` → returns a sample of 10% of rows.
+ *
+ * @param {import('../../../data/model/DataFrame.js').DataFrame} df
+ * @param {number|Object} n - Number of rows to sample or options object
+ * @param {Object} [options] - Additional options
+ * @param {number} [options.seed] - Seed for random number generator
+ * @param {boolean} [options.replace=false] - Sample with replacement
+ * @param {number} [options.fraction] - Fraction of rows to sample (0 < fraction <= 1)
+ * @returns {DataFrame} - New DataFrame with sampled rows
+ * @throws {Error} If sampling parameters are invalid
+ */
+export function sample(df, n, options = {}) {
+ // Handle case when n is an options object
+ if (typeof n === 'object') {
+ options = n;
+ n = undefined;
+ }
+
+ // Get data from DataFrame
+ const rows = df.toArray();
+ if (rows.length === 0) {
+ // For empty DataFrame, return an empty DataFrame with the same structure
+ const builder =
+ typeof df.constructor.fromRecords === 'function'
+ ? df.constructor.fromRecords
+ : (rows) => new df.constructor(rows);
+
+ return builder([]);
+ }
+
+ // Determine sample size
+ let sampleSize;
+ if (options.fraction !== undefined) {
+ if (options.fraction <= 0 || options.fraction > 1) {
+ throw new Error('Fraction must be in the range (0, 1]');
+ }
+ sampleSize = Math.round(rows.length * options.fraction);
+ } else {
+ sampleSize = n !== undefined ? n : 1;
+ }
+
+ // Validate sample size
+ if (sampleSize <= 0) {
+ throw new Error('Number of rows to sample must be a positive integer');
+ }
+
+ // Check that sample size is an integer
+ if (!Number.isInteger(sampleSize)) {
+ throw new Error('Number of rows to sample must be an integer');
+ }
+
+ // If sampling without replacement and sample size is greater than number of rows
+ if (!options.replace && sampleSize > rows.length) {
+ throw new Error(
+ `Sample size (${sampleSize}) cannot be greater than number of rows (${rows.length})`
+ );
+ }
+
+ // Create random number generator with seed if specified
+ const random =
+ options.seed !== undefined ? createSeededRandom(options.seed) : Math.random;
+
+ // Sample rows
+ const sampledRows = [];
+ if (options.replace) {
+ // Sampling with replacement
+ for (let i = 0; i < sampleSize; i++) {
+ const index = Math.floor(random() * rows.length);
+ sampledRows.push(rows[index]);
+ }
+ } else {
+ // Sampling without replacement (using Fisher-Yates shuffle algorithm)
+ const indices = Array.from({ length: rows.length }, (_, i) => i);
+ for (let i = indices.length - 1; i > 0; i--) {
+ const j = Math.floor(random() * (i + 1));
+ [indices[i], indices[j]] = [indices[j], indices[i]];
+ }
+ for (let i = 0; i < sampleSize; i++) {
+ sampledRows.push(rows[indices[i]]);
+ }
+ }
+
+ // Create a new DataFrame from the sampled rows
+ const builder =
+ typeof df.constructor.fromRecords === 'function'
+ ? df.constructor.fromRecords
+ : (rows) => new df.constructor(rows);
+
+ return builder(sampledRows);
+}
+
+/**
+ * Creates a seeded random number generator
+ *
+ * @param {number} seed - Seed for the random number generator
+ * @returns {Function} - Function that returns a pseudo-random number in the range [0, 1)
+ * @private
+ */
+function createSeededRandom(seed) {
+ return function () {
+ // Simple linear congruential generator
+ seed = (seed * 9301 + 49297) % 233280;
+ return seed / 233280;
+ };
+}
+
+/* -------------------------------------------------------------- *
+ | Pool for extendDataFrame |
+ * -------------------------------------------------------------- */
+export default { sample };
diff --git a/packages/core/src/methods/dataframe/filtering/sample.js.new b/packages/core/src/methods/dataframe/filtering/sample.js.new
new file mode 100644
index 0000000..acea678
--- /dev/null
+++ b/packages/core/src/methods/dataframe/filtering/sample.js.new
@@ -0,0 +1,151 @@
+/**
+ * Filtering method: sample
+ *
+ * This file provides the sample method for selecting a random sample of rows from a DataFrame
+ *
+ * @module methods/dataframe/filtering/sample
+ */
+
+import { createTypedSeries, createEmptyTypedSeries } from '../../../data/utils/createTypedArray.js';
+
+/**
+ * Selects a random sample of rows from DataFrame
+ *
+ * @param {Object} df - DataFrame instance
+ * @param {number|Object} n - Number of rows to sample or options object
+ * @param {Object} [options] - Additional options
+ * @param {number} [options.seed] - Seed for random number generator
+ * @param {boolean} [options.replace=false] - Sampling with replacement
+ * @param {boolean} [options.fraction] - Fraction of rows to sample (0 < fraction <= 1)
+ * @returns {Object} - New DataFrame with sampled rows
+ */
+export function sample(df, n, options = {}) {
+ // Handle case when n is an options object
+ if (typeof n === 'object') {
+ options = n;
+ n = undefined;
+ }
+
+ // Get data from DataFrame
+ const rows = df.toArray();
+ if (rows.length === 0) {
+ // Create a new DataFrame instance with the same options as the original
+ const result = new df.constructor({}, df._options);
+
+ // For empty results, we need to include all original columns
+ for (const col of df.columns) {
+ // Create an empty Series with the same type as the original
+ result._columns[col] = createEmptyTypedSeries(col, df);
+
+ // Add to order
+ if (!result._order.includes(col)) {
+ result._order.push(col);
+ }
+ }
+
+ return result;
+ }
+
+ // Determine the number of rows to sample
+ let sampleSize;
+ if (options.fraction !== undefined) {
+ if (options.fraction <= 0 || options.fraction > 1) {
+ throw new Error('Fraction must be in the range (0, 1]');
+ }
+ sampleSize = Math.round(rows.length * options.fraction);
+ } else {
+ sampleSize = n !== undefined ? n : 1;
+ }
+
+ // Check the validity of the number of rows
+ if (sampleSize <= 0) {
+ throw new Error('Number of rows to sample must be a positive number');
+ }
+
+ // Check that the sample size is an integer
+ if (!Number.isInteger(sampleSize)) {
+ throw new Error('Number of rows to sample must be an integer');
+ }
+
+ // If sampling without replacement and sample size is greater than number of rows
+ if (!options.replace && sampleSize > rows.length) {
+ throw new Error(
+ `Sample size (${sampleSize}) cannot be greater than number of rows (${rows.length})`,
+ );
+ }
+
+ // Create a random number generator with seed if specified
+ const random =
+ options.seed !== undefined ? createSeededRandom(options.seed) : Math.random;
+
+ // Select rows
+ const sampledRows = [];
+ if (options.replace) {
+ // Sampling with replacement
+ for (let i = 0; i < sampleSize; i++) {
+ const index = Math.floor(random() * rows.length);
+ sampledRows.push(rows[index]);
+ }
+ } else {
+ // Sampling without replacement (using Fisher-Yates algorithm)
+ const indices = Array.from({ length: rows.length }, (_, i) => i);
+ for (let i = indices.length - 1; i > 0; i--) {
+ const j = Math.floor(random() * (i + 1));
+ [indices[i], indices[j]] = [indices[j], indices[i]];
+ }
+ for (let i = 0; i < sampleSize; i++) {
+ sampledRows.push(rows[indices[i]]);
+ }
+ }
+
+ // Create a new DataFrame instance with the same options as the original
+ const result = new df.constructor({}, df._options);
+
+ // If no rows match, return an empty DataFrame with all original columns
+ if (sampledRows.length === 0) {
+ // For empty results, we need to include all original columns
+ for (const col of df.columns) {
+ // Create an empty Series with the same type as the original
+ result._columns[col] = createEmptyTypedSeries(col, df);
+
+ // Add to order
+ if (!result._order.includes(col)) {
+ result._order.push(col);
+ }
+ }
+
+ return result;
+ }
+
+ // For non-empty results
+ // Create Series for each column
+ for (const col of df.columns) {
+ // Extract values for this column from filtered rows
+ const values = sampledRows.map(row => row[col]);
+
+ // Create a new Series with the appropriate type
+ result._columns[col] = createTypedSeries(values, col, df);
+
+ // Add to order
+ if (!result._order.includes(col)) {
+ result._order.push(col);
+ }
+ }
+
+ return result;
+}
+
+/**
+ * Creates a random number generator with seed
+ *
+ * @param {number} seed - Seed for random number generator
+ * @returns {Function} - Function returning pseudorandom number in range [0, 1)
+ * @private
+ */
+function createSeededRandom(seed) {
+ return function () {
+ // Simple linear congruential generator
+ seed = (seed * 9301 + 49297) % 233280;
+ return seed / 233280;
+ };
+}
diff --git a/packages/core/src/methods/dataframe/filtering/select.js b/packages/core/src/methods/dataframe/filtering/select.js
new file mode 100644
index 0000000..75d248a
--- /dev/null
+++ b/packages/core/src/methods/dataframe/filtering/select.js
@@ -0,0 +1,62 @@
+/*-------------------------------------------------------------------------*
+ | DataFrame -› filtering · select() |
+ | |
+ | df.select(['age', 'name']) → new DataFrame with only the specified |
+ | columns. |
+ *-------------------------------------------------------------------------*/
+
+/**
+ * Returns a new DataFrame with only the specified columns.
+ * `df.select(['name', 'age'])` → returns a new DataFrame with only the 'name' and 'age' columns.
+ *
+ * @param {import('../../../data/model/DataFrame.js').DataFrame} df
+ * @param {Array} columns - Array of column names to select
+ * @returns {DataFrame} - New DataFrame with only the specified columns
+ * @throws {Error} If any column does not exist or if columns is empty
+ */
+export function select(df, columns) {
+ // Validate input parameters
+ if (!Array.isArray(columns)) {
+ throw new Error('Columns must be an array');
+ }
+
+ if (columns.length === 0) {
+ throw new Error('Column list cannot be empty');
+ }
+
+ // Validate that all columns exist
+ for (const col of columns) {
+ if (!df.columns.includes(col)) {
+ throw new Error(`Column '${col}' not found`);
+ }
+ }
+
+ // Create records with only the selected columns
+ const records = df.toArray().map(row => {
+ const newRow = {};
+ for (const col of columns) {
+ newRow[col] = row[col];
+ }
+ return newRow;
+ });
+
+ // Create options for the new DataFrame with column type information
+ const newOptions = { ...df._options };
+
+ // If there are column type definitions, filter them to include only selected columns
+ if (newOptions.columns) {
+ const filteredColumns = {};
+ for (const col of columns) {
+ if (newOptions.columns[col]) {
+ filteredColumns[col] = newOptions.columns[col];
+ }
+ }
+ newOptions.columns = filteredColumns;
+ }
+
+ // Create new DataFrame from records with preserved column types
+ return df.constructor.fromRecords(records, newOptions);
+}
+
+// Export object with method for the pool
+export default { select };
diff --git a/packages/core/src/methods/dataframe/filtering/selectByPattern.js b/packages/core/src/methods/dataframe/filtering/selectByPattern.js
new file mode 100644
index 0000000..254580e
--- /dev/null
+++ b/packages/core/src/methods/dataframe/filtering/selectByPattern.js
@@ -0,0 +1,65 @@
+/*-------------------------------------------------------------------------*
+ | DataFrame -› filtering · selectByPattern() |
+ | |
+ | df.selectByPattern(/^price/) → new DataFrame with only columns |
+ | whose names match the regular expression. |
+ *-------------------------------------------------------------------------*/
+
+/**
+ * Returns a new DataFrame with only columns whose names match the pattern.
+ * `df.selectByPattern(/^price/)` → returns a new DataFrame with columns that start with 'price'.
+ *
+ * @param {import('../../../data/model/DataFrame.js').DataFrame} df
+ * @param {RegExp|string} pattern - Regular expression or string pattern to match
+ * @returns {DataFrame} - New DataFrame with only the matched columns
+ * @throws {Error} If no columns match the pattern
+ * @throws {TypeError} If pattern is not a string or regular expression
+ */
+export function selectByPattern(df, pattern) {
+ // Validate pattern type
+ if (typeof pattern !== 'string' && !(pattern instanceof RegExp)) {
+ throw new TypeError('Pattern must be a string or regular expression');
+ }
+
+ // Convert string to regular expression if needed
+ const regex = pattern instanceof RegExp ? pattern : new RegExp(pattern);
+
+ // Find columns matching the pattern
+ const matchedColumns = df.columns.filter((column) => regex.test(column));
+
+ // If no columns match, throw an error
+ if (matchedColumns.length === 0) {
+ throw new Error('No columns match the pattern');
+ }
+
+ // Create records with only the matched columns
+ const records = df.toArray().map(row => {
+ const newRow = {};
+ for (const col of matchedColumns) {
+ newRow[col] = row[col];
+ }
+ return newRow;
+ });
+
+ // Create options for the new DataFrame with column type information
+ const newOptions = { ...df._options };
+
+ // If there are column type definitions, filter them to include only matched columns
+ if (newOptions.columns) {
+ const filteredColumns = {};
+ for (const col of matchedColumns) {
+ if (newOptions.columns[col]) {
+ filteredColumns[col] = newOptions.columns[col];
+ }
+ }
+ newOptions.columns = filteredColumns;
+ }
+
+ // Create new DataFrame from records with preserved column types
+ return df.constructor.fromRecords(records, newOptions);
+}
+
+/* -------------------------------------------------------------- *
+ | Pool for extendDataFrame |
+ * -------------------------------------------------------------- */
+export default { selectByPattern };
diff --git a/packages/core/src/methods/dataframe/filtering/stratifiedSample.js b/packages/core/src/methods/dataframe/filtering/stratifiedSample.js
new file mode 100644
index 0000000..04a1b37
--- /dev/null
+++ b/packages/core/src/methods/dataframe/filtering/stratifiedSample.js
@@ -0,0 +1,180 @@
+/*-------------------------------------------------------------------------*
+ | DataFrame -› filtering · stratifiedSample() |
+ | |
+ | df.stratifiedSample('category', 100) → sample of 100 rows preserving |
+ | category proportions. |
+ | df.stratifiedSample('category', { frac: 0.1 }) → sample of 10% rows. |
+ *-------------------------------------------------------------------------*/
+
+import { createTypedSeries } from '../../../data/utils/createTypedArray.js';
+
+/**
+ * Selects a stratified sample from a DataFrame, preserving category proportions.
+ *
+ * @param {Object} df - DataFrame instance
+ * @param {string} stratifyColumn - Column name to stratify by
+ * @param {number|Object} nOrOptions - Number of rows to sample or options object with frac property
+ * @param {Object} [options] - Additional options
+ * @param {number} [options.seed] - Seed for random number generator
+ * @returns {Object} - New DataFrame with sampled rows
+ */
+export function stratifiedSample(df, stratifyColumn, nOrOptions, options = {}) {
+ // Check that DataFrame is not empty
+ if (df.rowCount === 0) {
+ throw new Error('DataFrame is empty');
+ }
+
+ // Check if the stratify column exists
+ if (!df.columns.includes(stratifyColumn)) {
+ throw new Error("Column not found");
+ }
+
+ // Determine if we're using count (n) or fraction (frac)
+ let n;
+ let fraction;
+
+ if (typeof nOrOptions === 'object' && nOrOptions !== null) {
+ // Use options object with frac property
+ fraction = nOrOptions.frac;
+ if (fraction === undefined) {
+ throw new Error('When using options object, frac property must be specified');
+ }
+ if (fraction <= 0 || fraction > 1) {
+ throw new Error('Fraction must be in the range (0, 1]');
+ }
+ // Calculate n based on fraction
+ n = Math.round(df.rowCount * fraction);
+ // Merge options
+ options = { ...nOrOptions, ...options };
+ } else {
+ // Use n (count) directly
+ n = nOrOptions;
+ // Validate n
+ if (typeof n !== 'number') {
+ throw new Error('Number of rows to sample must be a number');
+ }
+ if (n < 0) {
+ throw new Error('Number of rows to sample must be a positive number');
+ }
+ if (!Number.isInteger(n)) {
+ throw new Error('Number of rows to sample must be an integer');
+ }
+ if (n > df.rowCount) {
+ throw new Error(`Sample size (${n}) cannot be greater than number of rows (${df.rowCount})`);
+ }
+ // Calculate fraction based on n
+ fraction = n / df.rowCount;
+ }
+
+ // Get data from DataFrame
+ const rows = df.toArray();
+
+ // Group rows by categories
+ const categories = {};
+ rows.forEach((row) => {
+ const category = row[stratifyColumn];
+ if (!categories[category]) {
+ categories[category] = [];
+ }
+ categories[category].push(row);
+ });
+
+ // Create random number generator with seed if specified
+ const random =
+ options.seed !== undefined ? createSeededRandom(options.seed) : Math.random;
+
+ // Sample rows from each category, preserving proportions
+ const sampledRows = [];
+ Object.entries(categories).forEach(([category, categoryRows]) => {
+ // Calculate number of rows to sample from this category
+ let sampleSize = Math.round(categoryRows.length * fraction);
+
+ // Ensure each category has at least one row
+ sampleSize = Math.max(1, sampleSize);
+ sampleSize = Math.min(categoryRows.length, sampleSize);
+
+ // Shuffle rows and select the required number
+ const shuffled = [...categoryRows].sort(() => 0.5 - random());
+ sampledRows.push(...shuffled.slice(0, sampleSize));
+ });
+
+ // If no results, create an empty DataFrame with the same columns and column types
+ if (sampledRows.length === 0) {
+ // Create a new DataFrame instance with the same options as the original
+ const result = new df.constructor({}, df._options);
+
+ // For each column, create a Series with the appropriate type
+ for (const col of df.columns) {
+ // Get the original column data to determine its type
+ const originalColumn = df._columns[col];
+ const originalArray = originalColumn.vector.__data;
+
+ // Create an empty array with the same type
+ if (ArrayBuffer.isView(originalArray) && !(originalArray instanceof DataView)) {
+ const TypedArrayConstructor = originalArray.constructor;
+ const emptyTypedArray = new TypedArrayConstructor(0);
+ result._columns[col] = createTypedSeries(emptyTypedArray, col, df);
+ } else {
+ result._columns[col] = createTypedSeries([], col, df);
+ }
+
+ // Add to column order
+ if (!result._order.includes(col)) {
+ result._order.push(col);
+ }
+ }
+
+ return result;
+ }
+
+ // For non-empty results, create a new DataFrame with filtered rows
+ // Create a new DataFrame instance with the same options as the original
+ const result = new df.constructor({}, df._options);
+
+ // For each column, create a Series with the appropriate type
+ for (const col of df.columns) {
+ // Get the original column data to determine its type
+ const originalColumn = df._columns[col];
+ const originalArray = originalColumn.vector.__data;
+
+ // Extract values for this column from the sampled rows
+ const values = sampledRows.map(row => row[col]);
+
+ // Preserve the array type if it's a typed array
+ if (ArrayBuffer.isView(originalArray) && !(originalArray instanceof DataView)) {
+ const TypedArrayConstructor = originalArray.constructor;
+ const typedValues = new TypedArrayConstructor(values.length);
+ values.forEach((value, i) => {
+ typedValues[i] = value;
+ });
+ result._columns[col] = createTypedSeries(typedValues, col, df);
+ } else {
+ result._columns[col] = createTypedSeries(values, col, df);
+ }
+
+ // Add to column order
+ if (!result._order.includes(col)) {
+ result._order.push(col);
+ }
+ }
+
+ return result;
+}
+
+/**
+ * Creates a seeded random number generator
+ *
+ * @param {number} seed - Seed for random number generator
+ * @returns {Function} - Function returning a pseudo-random number in range [0, 1)
+ * @private
+ */
+function createSeededRandom(seed) {
+ return function () {
+ // Simple linear congruential generator
+ seed = (seed * 9301 + 49297) % 233280;
+ return seed / 233280;
+ };
+}
+
+// Export object with method for the pool
+export default { stratifiedSample };
diff --git a/packages/core/src/methods/dataframe/filtering/tail.js b/packages/core/src/methods/dataframe/filtering/tail.js
new file mode 100644
index 0000000..dbe5ffe
--- /dev/null
+++ b/packages/core/src/methods/dataframe/filtering/tail.js
@@ -0,0 +1,45 @@
+/* -------------------------------------------------------------- *
+ | DataFrame → filtering · tail() |
+ * -------------------------------------------------------------- */
+
+/**
+ * Returns the last n rows of a DataFrame.
+ * `df.tail(5)` → returns a new DataFrame with the last 5 rows.
+ * Similar to pandas' tail() function.
+ *
+ * @param {import('../../../data/model/DataFrame.js').DataFrame} df
+ * @param {number} [n=5] - Number of rows to return
+ * @param {Object} [options] - Additional options
+ * @param {boolean} [options.print=false] - Option for compatibility with other libraries
+ * @returns {DataFrame} - New DataFrame with the last n rows
+ * @throws {Error} If n is not a positive integer
+ */
+export function tail(df, n = 5, options = { print: false }) {
+ // Validate input parameters
+ if (n <= 0) {
+ throw new Error('Number of rows must be a positive integer');
+ }
+ if (!Number.isInteger(n)) {
+ throw new Error('Number of rows must be an integer');
+ }
+
+ // Get data from DataFrame
+ const rows = df.toArray();
+
+ // Select last n rows (or all if there are fewer than n)
+ const selectedRows = rows.slice(-n);
+
+ // Create a new DataFrame from the selected rows
+ const builder =
+ typeof df.constructor.fromRecords === 'function'
+ ? df.constructor.fromRecords
+ : (rows) => new df.constructor(rows);
+
+ return builder(selectedRows);
+}
+
+/* -------------------------------------------------------------- *
+ | Pool for extendDataFrame |
+ * -------------------------------------------------------------- */
+export default { tail };
+
diff --git a/packages/core/src/methods/dataframe/filtering/where.js b/packages/core/src/methods/dataframe/filtering/where.js
new file mode 100644
index 0000000..37dd417
--- /dev/null
+++ b/packages/core/src/methods/dataframe/filtering/where.js
@@ -0,0 +1,62 @@
+/*-------------------------------------------------------------------------*
+ | DataFrame -› filtering · where() |
+ | |
+ | df.where('price', '>', 100) → new DataFrame with only rows where the |
+ | 'price' column values are greater than 100. |
+ *-------------------------------------------------------------------------*/
+import { validateColumn } from '../../../data/utils/validators.js';
+
+/** Operator → predicate map */
+const OPS = {
+ '==': (a, b) => a == b, // eslint-disable-line eqeqeq
+ '===': (a, b) => a === b,
+ '!=': (a, b) => a != b, // eslint-disable-line eqeqeq
+ '!==': (a, b) => a !== b,
+ '>': (a, b) => a > b,
+ '>=': (a, b) => a >= b,
+ '<': (a, b) => a < b,
+ '<=': (a, b) => a <= b,
+ in: (a, b) => Array.isArray(b) && b.includes(a),
+ contains: (a, b) => String(a).includes(String(b)),
+ startsWith: (a, b) => String(a).startsWith(String(b)),
+ startswith: (a, b) => String(a).startsWith(String(b)),
+ endsWith: (a, b) => String(a).endsWith(String(b)),
+ endswith: (a, b) => String(a).endsWith(String(b)),
+ matches: (a, b) =>
+ b instanceof RegExp ? b.test(String(a)) : new RegExp(b).test(String(a)),
+};
+
+/**
+ * Returns a new DataFrame with only rows that match the condition.
+ * `df.where('price', '>', 100)` → returns a new DataFrame with rows where price > 100.
+ *
+ * @param {import('../../../data/model/DataFrame.js').DataFrame} df
+ * @param {string} column - Column name to filter on
+ * @param {keyof typeof OPS} operator - Comparison operator
+ * @param {*} value - Value to compare against
+ * @returns {DataFrame} - New DataFrame with only matching rows
+ * @throws {Error} If column doesn't exist or operator is not supported
+ */
+export function where(df, column, operator, value) {
+ validateColumn(df, column);
+
+ const pred = OPS[operator];
+ if (!pred) throw new Error(`Unsupported operator: '${operator}'`);
+
+ const colVals = df.col(column).toArray(); // safer than vector.get
+ const srcRows = df.toArray();
+
+ const outRows = [];
+ for (let i = 0; i < colVals.length; i++) {
+ if (pred(colVals[i], value)) outRows.push(srcRows[i]);
+ }
+
+ // Create options for the new DataFrame with column type information
+ const newOptions = { ...df._options };
+
+ // Create new DataFrame from filtered rows with preserved column types
+ return df.constructor.fromRecords(outRows, newOptions);
+}
+
+// Export the where method directly
+export { where };
\ No newline at end of file
diff --git a/tests/core/methods/dataframe/filtering/at.test.js b/tests/core/methods/dataframe/filtering/at.test.js
new file mode 100644
index 0000000..3e03d9a
--- /dev/null
+++ b/tests/core/methods/dataframe/filtering/at.test.js
@@ -0,0 +1,95 @@
+/**
+ * Unit tests for at method
+ */
+
+import { describe, test, expect } from 'vitest';
+import { DataFrame } from '../../../../../packages/core/src/data/model/DataFrame.js';
+import { at } from '../../../../../packages/core/src/methods/dataframe/filtering/at.js';
+
+// Test data for use in all tests
+const testData = [
+ { name: 'Alice', age: 25, city: 'New York', salary: 70000 },
+ { name: 'Bob', age: 30, city: 'San Francisco', salary: 85000 },
+ { name: 'Charlie', age: 35, city: 'Chicago', salary: 90000 },
+];
+
+describe('At Method', () => {
+ // Add at method to DataFrame prototype
+ DataFrame.prototype.at = function(index) {
+ return at(this, index);
+ };
+
+ describe('with standard storage', () => {
+ // Create DataFrame using fromRecords
+ const df = DataFrame.fromRecords(testData);
+
+ test('should return row at specified index', () => {
+ const result = df.at(1);
+
+ // Check that the result is the correct row
+ expect(result).toEqual({
+ name: 'Bob',
+ age: 30,
+ city: 'San Francisco',
+ salary: 85000,
+ });
+ });
+
+ test('should handle index 0', () => {
+ const result = df.at(0);
+
+ expect(result).toEqual({
+ name: 'Alice',
+ age: 25,
+ city: 'New York',
+ salary: 70000,
+ });
+ });
+
+ test('should handle last index', () => {
+ const result = df.at(2);
+
+ expect(result).toEqual({
+ name: 'Charlie',
+ age: 35,
+ city: 'Chicago',
+ salary: 90000,
+ });
+ });
+
+ test('should throw error for negative index', () => {
+ expect(() => df.at(-1)).toThrow('Index out of bounds: -1 is negative');
+ });
+
+ test('should throw error for index >= rowCount', () => {
+ expect(() => df.at(3)).toThrow('Index out of bounds: 3 >= 3');
+ });
+
+ test('should throw error for non-integer index', () => {
+ expect(() => df.at(1.5)).toThrow('Index must be an integer');
+ });
+
+ test('should handle typed arrays correctly', () => {
+ // Create DataFrame with typed arrays
+ const typedDf = DataFrame.fromRecords(testData, {
+ columns: {
+ age: { type: 'int32' },
+ salary: { type: 'float64' },
+ },
+ });
+
+ // Get row at index
+ const result = typedDf.at(1);
+
+ // Check that the values are correct
+ expect(result.age).toBe(30);
+ expect(result.salary).toBe(85000);
+ });
+
+ test('should handle empty DataFrame', () => {
+ const emptyDf = DataFrame.fromRecords([]);
+
+ expect(() => emptyDf.at(0)).toThrow('Index out of bounds: DataFrame is empty');
+ });
+ });
+});
diff --git a/tests/core/methods/dataframe/filtering/drop.test.js b/tests/core/methods/dataframe/filtering/drop.test.js
new file mode 100644
index 0000000..4cf3b91
--- /dev/null
+++ b/tests/core/methods/dataframe/filtering/drop.test.js
@@ -0,0 +1,103 @@
+/**
+ * Unit tests for drop method
+ */
+
+import { describe, test, expect } from 'vitest';
+import { DataFrame } from '../../../../../packages/core/src/data/model/DataFrame.js';
+import { drop } from '../../../../../packages/core/src/methods/dataframe/filtering/drop.js';
+
+// Test data for use in all tests
+const testData = [
+ { name: 'Alice', age: 25, city: 'New York', salary: 70000 },
+ { name: 'Bob', age: 30, city: 'San Francisco', salary: 85000 },
+ { name: 'Charlie', age: 35, city: 'Chicago', salary: 90000 },
+];
+
+describe('Drop Method', () => {
+ // Add drop method to DataFrame prototype
+ DataFrame.prototype.drop = function(columns) {
+ return drop(this, columns);
+ };
+
+ describe('with standard storage', () => {
+ // Create DataFrame using fromRecords
+ const df = DataFrame.fromRecords(testData);
+
+ test('should drop specified columns', () => {
+ const result = df.drop(['city', 'salary']);
+
+ // Check that the result has only the remaining columns
+ expect(result.columns.sort()).toEqual(['age', 'name'].sort());
+ expect(result.rowCount).toBe(3);
+ expect(result.toArray()).toEqual([
+ { name: 'Alice', age: 25 },
+ { name: 'Bob', age: 30 },
+ { name: 'Charlie', age: 35 },
+ ]);
+ });
+
+ test('should handle single column as string', () => {
+ const result = df.drop('name');
+
+ // Check that the result has all columns except the dropped one
+ expect(result.columns.sort()).toEqual(['age', 'city', 'salary'].sort());
+ expect(result.rowCount).toBe(3);
+ expect(result.toArray()).toEqual([
+ { age: 25, city: 'New York', salary: 70000 },
+ { age: 30, city: 'San Francisco', salary: 85000 },
+ { age: 35, city: 'Chicago', salary: 90000 },
+ ]);
+ });
+
+ test('should throw error for non-existent column', () => {
+ expect(() => df.drop(['name', 'nonexistent'])).toThrow('Column not found: \'nonexistent\'');
+ });
+
+ test('should return a new DataFrame instance', () => {
+ const result = df.drop(['city', 'salary']);
+ expect(result).toBeInstanceOf(DataFrame);
+ expect(result).not.toBe(df); // Should be a new instance
+ });
+
+ test('should preserve typed arrays', () => {
+ // Create DataFrame with typed arrays
+ const typedDf = DataFrame.fromRecords(testData, {
+ columns: {
+ age: { type: 'int32' },
+ salary: { type: 'float64' },
+ },
+ });
+
+ // Drop columns
+ const result = typedDf.drop(['city']);
+
+ // Check that the result has the correct columns
+ expect(result.columns.sort()).toEqual(['age', 'name', 'salary'].sort());
+
+ // Check that the data types are preserved (using the public API)
+ const ageCol = result.col('age');
+ const salaryCol = result.col('salary');
+ expect(ageCol.toArray()).toEqual([25, 30, 35]);
+ expect(salaryCol.toArray()).toEqual([70000, 85000, 90000]);
+ });
+
+ test('should handle empty DataFrame', () => {
+ const emptyDf = DataFrame.fromRecords([]);
+
+ expect(() => emptyDf.drop(['name'])).toThrow('Column not found: \'name\'');
+ });
+
+ test('should handle empty column list', () => {
+ const result = df.drop([]);
+
+ // Should return a copy of the original DataFrame
+ expect(result.columns.sort()).toEqual(df.columns.sort());
+ expect(result.rowCount).toBe(df.rowCount);
+ expect(result).not.toBe(df); // Should be a new instance
+ });
+
+ test('should throw error when dropping all columns', () => {
+ expect(() => df.drop(['name', 'age', 'city', 'salary'])).toThrow('Cannot drop all columns');
+ });
+ });
+});
diff --git a/tests/core/methods/dataframe/filtering/expr$.test.js b/tests/core/methods/dataframe/filtering/expr$.test.js
new file mode 100644
index 0000000..298d04c
--- /dev/null
+++ b/tests/core/methods/dataframe/filtering/expr$.test.js
@@ -0,0 +1,114 @@
+/**
+ * Unit tests for expr$ method
+ */
+
+import { describe, test, expect } from 'vitest';
+import { DataFrame } from '../../../../../packages/core/src/data/model/DataFrame.js';
+import { expr$ } from '../../../../../packages/core/src/methods/dataframe/filtering/expr$.js';
+
+// Test data for use in all tests
+const testData = [
+ { name: 'Alice', age: 25, city: 'New York', salary: 70000 },
+ { name: 'Bob', age: 30, city: 'San Francisco', salary: 85000 },
+ { name: 'Charlie', age: 35, city: 'Chicago', salary: 90000 },
+];
+
+describe('Expr$ Method', () => {
+ // Add expr$ method to DataFrame prototype
+ DataFrame.prototype.expr$ = function(expression) {
+ return expr$(this, expression);
+ };
+
+ describe('with standard storage', () => {
+ // Create DataFrame using fromRecords
+ const df = DataFrame.fromRecords(testData);
+
+ test('should filter rows based on a simple expression', () => {
+ const result = df.expr$('age > 25');
+
+ // Check that the filtered data is correct
+ expect(result.rowCount).toBe(2);
+ expect(result.toArray()).toEqual([
+ { name: 'Bob', age: 30, city: 'San Francisco', salary: 85000 },
+ { name: 'Charlie', age: 35, city: 'Chicago', salary: 90000 },
+ ]);
+ });
+
+ test('should handle complex expressions with logical operators', () => {
+ const result = df.expr$('age > 25 && salary > 85000');
+
+ // Check that the filtered data is correct
+ expect(result.rowCount).toBe(1);
+ expect(result.toArray()).toEqual([
+ { name: 'Charlie', age: 35, city: 'Chicago', salary: 90000 },
+ ]);
+ });
+
+ test('should handle string methods', () => {
+ const result = df.expr$('city.includes("Francisco")');
+
+ // Check that the filtered data is correct
+ expect(result.rowCount).toBe(1);
+ expect(result.toArray()).toEqual([
+ { name: 'Bob', age: 30, city: 'San Francisco', salary: 85000 },
+ ]);
+ });
+
+ test('should return empty DataFrame when no rows match', () => {
+ const result = df.expr$('age > 100');
+
+ // Should have all columns but no rows
+ expect(result.columns.sort()).toEqual(
+ ['age', 'city', 'name', 'salary'].sort(),
+ );
+ expect(result.rowCount).toBe(0);
+ });
+
+ test('should throw error for invalid expression', () => {
+ expect(() => df.expr$('age >< 25')).toThrow();
+ });
+
+ test('should return a new DataFrame instance', () => {
+ const result = df.expr$('age > 25');
+ expect(result).toBeInstanceOf(DataFrame);
+ expect(result).not.toBe(df); // Should be a new instance
+ });
+
+ test('should preserve typed arrays', () => {
+ // Create DataFrame with typed arrays
+ const typedDf = DataFrame.fromRecords(testData, {
+ columns: {
+ age: { type: 'int32' },
+ salary: { type: 'float64' },
+ },
+ });
+
+ // Filter the data
+ const result = typedDf.expr$('age > 25');
+
+ // Check that the result contains typed arrays
+ expect(result._columns.age.vector.__data).toBeInstanceOf(Int32Array);
+ expect(result._columns.salary.vector.__data).toBeInstanceOf(Float64Array);
+ });
+
+ test('should handle empty DataFrame', () => {
+ const emptyDf = DataFrame.fromRecords([]);
+ const result = emptyDf.expr$('age > 25');
+
+ expect(result.rowCount).toBe(0);
+ expect(result.columns).toEqual([]);
+ });
+
+ test('should handle expressions with variables', () => {
+ const minAge = 30;
+ const result = df.expr$(`age >= ${minAge}`);
+
+ // Check that the filtered data is correct
+ expect(result.rowCount).toBe(2);
+ expect(result.toArray()).toEqual([
+ { name: 'Bob', age: 30, city: 'San Francisco', salary: 85000 },
+ { name: 'Charlie', age: 35, city: 'Chicago', salary: 90000 },
+ ]);
+ });
+ });
+});
diff --git a/tests/core/methods/dataframe/filtering/filter.test.js b/tests/core/methods/dataframe/filtering/filter.test.js
new file mode 100644
index 0000000..26bb529
--- /dev/null
+++ b/tests/core/methods/dataframe/filtering/filter.test.js
@@ -0,0 +1,92 @@
+/**
+ * Unit tests for filter method
+ */
+
+import { describe, test, expect } from 'vitest';
+import { DataFrame } from '../../../../../packages/core/src/data/model/DataFrame.js';
+import { filter } from '../../../../../packages/core/src/methods/dataframe/filtering/filter.js';
+
+// Test data for use in all tests
+const testData = [
+ { name: 'Alice', age: 25, city: 'New York', salary: 70000 },
+ { name: 'Bob', age: 30, city: 'San Francisco', salary: 85000 },
+ { name: 'Charlie', age: 35, city: 'Chicago', salary: 90000 },
+];
+
+describe('Filter Method', () => {
+ // Add filter method to DataFrame prototype
+ DataFrame.prototype.filter = function(predicate) {
+ return filter(this, predicate);
+ };
+
+ describe('with standard storage', () => {
+ // Create DataFrame using fromRecords
+ const df = DataFrame.fromRecords(testData);
+
+ test('should filter rows based on predicate function', () => {
+ const result = df.filter(row => row.age > 25);
+
+ // Check that the filtered data is correct
+ expect(result.rowCount).toBe(2);
+ expect(result.toArray()).toEqual([
+ { name: 'Bob', age: 30, city: 'San Francisco', salary: 85000 },
+ { name: 'Charlie', age: 35, city: 'Chicago', salary: 90000 },
+ ]);
+ });
+
+ test('should handle complex predicates', () => {
+ const result = df.filter(row => row.age > 25 && row.salary > 85000);
+
+ // Check that the filtered data is correct
+ expect(result.rowCount).toBe(1);
+ expect(result.toArray()).toEqual([
+ { name: 'Charlie', age: 35, city: 'Chicago', salary: 90000 },
+ ]);
+ });
+
+ test('should return empty DataFrame when no rows match', () => {
+ const result = df.filter(row => row.age > 100);
+
+ // Should have all columns but no rows
+ expect(result.columns.sort()).toEqual(
+ ['age', 'city', 'name', 'salary'].sort(),
+ );
+ expect(result.rowCount).toBe(0);
+ });
+
+ test('should throw error for non-function predicate', () => {
+ expect(() => df.filter('not a function')).toThrow('Predicate must be a function');
+ });
+
+ test('should return a new DataFrame instance', () => {
+ const result = df.filter(row => row.age > 25);
+ expect(result).toBeInstanceOf(DataFrame);
+ expect(result).not.toBe(df); // Should be a new instance
+ });
+
+ test('should preserve typed arrays', () => {
+ // Create DataFrame with typed arrays
+ const typedDf = DataFrame.fromRecords(testData, {
+ columns: {
+ age: { type: 'int32' },
+ salary: { type: 'float64' },
+ },
+ });
+
+ // Filter the data
+ const result = typedDf.filter(row => row.age > 25);
+
+ // Check that the result contains typed arrays
+ expect(result._columns.age.vector.__data).toBeInstanceOf(Int32Array);
+ expect(result._columns.salary.vector.__data).toBeInstanceOf(Float64Array);
+ });
+
+ test('should handle empty DataFrame', () => {
+ const emptyDf = DataFrame.fromRecords([]);
+ const result = emptyDf.filter(row => true);
+
+ expect(result.rowCount).toBe(0);
+ expect(result.columns).toEqual([]);
+ });
+ });
+});
diff --git a/tests/core/methods/dataframe/filtering/head.test.js b/tests/core/methods/dataframe/filtering/head.test.js
new file mode 100644
index 0000000..9e00083
--- /dev/null
+++ b/tests/core/methods/dataframe/filtering/head.test.js
@@ -0,0 +1,96 @@
+/**
+ * Unit tests for head method
+ */
+
+import { describe, test, expect } from 'vitest';
+import { DataFrame } from '../../../../../packages/core/src/data/model/DataFrame.js';
+import { head } from '../../../../../packages/core/src/methods/dataframe/filtering/head.js';
+
+// Test data for use in all tests
+const testData = [
+ { name: 'Alice', age: 25, city: 'New York', salary: 70000 },
+ { name: 'Bob', age: 30, city: 'San Francisco', salary: 85000 },
+ { name: 'Charlie', age: 35, city: 'Chicago', salary: 90000 },
+ { name: 'David', age: 40, city: 'Boston', salary: 95000 },
+ { name: 'Eve', age: 45, city: 'Seattle', salary: 100000 },
+ { name: 'Frank', age: 50, city: 'Denver', salary: 105000 },
+ { name: 'Grace', age: 55, city: 'Miami', salary: 110000 },
+];
+
+describe('Head Method', () => {
+ // Add head method to DataFrame prototype
+ DataFrame.prototype.head = function(n, options) {
+ return head(this, n, options);
+ };
+
+ describe('with standard storage', () => {
+ // Create DataFrame using fromRecords
+ const df = DataFrame.fromRecords(testData);
+
+ test('should return first 5 rows by default', () => {
+ const result = df.head();
+
+ // Check that the result has 5 rows
+ expect(result.rowCount).toBe(5);
+ expect(result.toArray()).toEqual(testData.slice(0, 5));
+ });
+
+ test('should return specified number of rows', () => {
+ const result = df.head(3);
+
+ // Check that the result has 3 rows
+ expect(result.rowCount).toBe(3);
+ expect(result.toArray()).toEqual(testData.slice(0, 3));
+ });
+
+ test('should handle n greater than number of rows', () => {
+ const result = df.head(10);
+
+ // Should return all rows
+ expect(result.rowCount).toBe(testData.length);
+ expect(result.toArray()).toEqual(testData);
+ });
+
+ test('should throw error for negative n', () => {
+ expect(() => df.head(-1)).toThrow('Number of rows must be a positive integer');
+ });
+
+ test('should throw error for non-integer n', () => {
+ expect(() => df.head(2.5)).toThrow('Number of rows must be an integer');
+ });
+
+ test('should return a new DataFrame instance', () => {
+ const result = df.head(3);
+ expect(result).toBeInstanceOf(DataFrame);
+ expect(result).not.toBe(df); // Should be a new instance
+ });
+
+ test('should preserve typed arrays', () => {
+ // Create DataFrame with typed arrays
+ const typedDf = DataFrame.fromRecords(testData, {
+ columns: {
+ age: { type: 'int32' },
+ salary: { type: 'float64' },
+ },
+ });
+
+ // Get head of the data
+ const result = typedDf.head(3);
+
+ // Check that the result has the correct columns and data
+ expect(result.columns.sort()).toEqual(['age', 'city', 'name', 'salary'].sort());
+
+ // Check that the data is preserved correctly (using the public API)
+ const ageCol = result.col('age');
+ const salaryCol = result.col('salary');
+ expect(ageCol.toArray()).toEqual([25, 30, 35]);
+ expect(salaryCol.toArray()).toEqual([70000, 85000, 90000]);
+ });
+
+ test('should accept options object', () => {
+ // The print option is for API compatibility and doesn't affect the result
+ const result = df.head(3, { print: true });
+ expect(result.rowCount).toBe(3);
+ });
+ });
+});
diff --git a/tests/core/methods/dataframe/filtering/iloc.test.js b/tests/core/methods/dataframe/filtering/iloc.test.js
new file mode 100644
index 0000000..9c19b37
--- /dev/null
+++ b/tests/core/methods/dataframe/filtering/iloc.test.js
@@ -0,0 +1,145 @@
+/**
+ * Unit tests for iloc method
+ */
+
+import { describe, test, expect } from 'vitest';
+import { DataFrame } from '../../../../../packages/core/src/data/model/DataFrame.js';
+import { iloc } from '../../../../../packages/core/src/methods/dataframe/filtering/iloc.js';
+
+// Test data for use in all tests
+const testData = [
+ { name: 'Alice', age: 25, city: 'New York', salary: 70000 },
+ { name: 'Bob', age: 30, city: 'San Francisco', salary: 85000 },
+ { name: 'Charlie', age: 35, city: 'Chicago', salary: 90000 },
+ { name: 'David', age: 40, city: 'Boston', salary: 95000 },
+ { name: 'Eve', age: 45, city: 'Seattle', salary: 100000 },
+];
+
+describe('Iloc Method', () => {
+ // Add iloc method to DataFrame prototype
+ DataFrame.prototype.iloc = function(rowSelector, columnSelector) {
+ return iloc(this, rowSelector, columnSelector);
+ };
+
+ describe('with standard storage', () => {
+ // Create DataFrame using fromRecords
+ const df = DataFrame.fromRecords(testData);
+
+ test('should select rows by integer index', () => {
+ const result = df.iloc(1);
+
+ // Check that the result is a DataFrame with one row
+ expect(result.rowCount).toBe(1);
+ expect(result.toArray()).toEqual([testData[1]]);
+ });
+
+ test('should select rows by array of indices', () => {
+ const result = df.iloc([0, 2, 4]);
+
+ // Check that the result contains the selected rows
+ expect(result.rowCount).toBe(3);
+ expect(result.toArray()).toEqual([
+ testData[0],
+ testData[2],
+ testData[4],
+ ]);
+ });
+
+ test('should select rows by predicate function', () => {
+ const result = df.iloc((i) => i % 2 === 0);
+
+ // Should select rows at indices 0, 2, 4
+ expect(result.rowCount).toBe(3);
+ expect(result.toArray()).toEqual([
+ testData[0],
+ testData[2],
+ testData[4],
+ ]);
+ });
+
+ test('should select columns by integer index', () => {
+ const result = df.iloc(null, 1);
+
+ // Should select the 'age' column for all rows
+ expect(result.columns).toEqual(['age']);
+ expect(result.rowCount).toBe(5);
+ expect(result.col('age').toArray()).toEqual([25, 30, 35, 40, 45]);
+ });
+
+ test('should select columns by array of indices', () => {
+ const result = df.iloc(null, [0, 2]);
+
+ // Should select the 'name' and 'city' columns
+ expect(result.columns.sort()).toEqual(['city', 'name'].sort());
+ expect(result.rowCount).toBe(5);
+ });
+
+ test('should select rows and columns by indices', () => {
+ const result = df.iloc([1, 3], [0, 2]);
+
+ // Should select rows 1 and 3, columns 'name' and 'city'
+ expect(result.rowCount).toBe(2);
+ expect(result.columns.sort()).toEqual(['city', 'name'].sort());
+ expect(result.toArray()).toEqual([
+ { name: 'Bob', city: 'San Francisco' },
+ { name: 'David', city: 'Boston' },
+ ]);
+ });
+
+ test('should handle null for rows to select all rows', () => {
+ const result = df.iloc(null, 1);
+
+ // Should select all rows, but only the 'age' column
+ expect(result.rowCount).toBe(5);
+ expect(result.columns).toEqual(['age']);
+ });
+
+ test('should handle null for columns to select all columns', () => {
+ const result = df.iloc(2, null);
+
+ // Should select row 2, all columns
+ expect(result.rowCount).toBe(1);
+ expect(result.columns.sort()).toEqual(['age', 'city', 'name', 'salary'].sort());
+ expect(result.toArray()).toEqual([testData[2]]);
+ });
+
+ test('should throw error for out of bounds row index', () => {
+ expect(() => df.iloc(10)).toThrow('Row index out of bounds');
+ });
+
+ test('should throw error for out of bounds column index', () => {
+ expect(() => df.iloc(null, 10)).toThrow('Column index out of bounds');
+ });
+
+ test('should throw error for invalid row selector type', () => {
+ expect(() => df.iloc('invalid')).toThrow('Invalid row selector type');
+ });
+
+ test('should throw error for invalid column selector type', () => {
+ expect(() => df.iloc(null, 'invalid')).toThrow('Invalid column selector type');
+ });
+
+ test('should preserve typed arrays', () => {
+ // Create DataFrame with typed arrays
+ const typedDf = DataFrame.fromRecords(testData, {
+ columns: {
+ age: { type: 'int32' },
+ salary: { type: 'float64' },
+ },
+ });
+
+ // Select rows and columns
+ const result = typedDf.iloc([1, 3], [1, 3]);
+
+ // Check that the result contains typed arrays
+ expect(result._columns.age.vector.__data).toBeInstanceOf(Int32Array);
+ expect(result._columns.salary.vector.__data).toBeInstanceOf(Float64Array);
+ });
+
+ test('should handle empty DataFrame', () => {
+ const emptyDf = DataFrame.fromRecords([]);
+
+ expect(() => emptyDf.iloc(0)).toThrow('Row index out of bounds');
+ });
+ });
+});
diff --git a/tests/core/methods/dataframe/filtering/loc.test.js b/tests/core/methods/dataframe/filtering/loc.test.js
new file mode 100644
index 0000000..159cb50
--- /dev/null
+++ b/tests/core/methods/dataframe/filtering/loc.test.js
@@ -0,0 +1,151 @@
+/**
+ * Unit tests for loc method
+ */
+
+import { describe, test, expect } from 'vitest';
+import { DataFrame } from '../../../../../packages/core/src/data/model/DataFrame.js';
+import { loc } from '../../../../../packages/core/src/methods/dataframe/filtering/loc.js';
+
+// Test data for use in all tests
+const testData = [
+ { id: 'a1', name: 'Alice', age: 25, city: 'New York', salary: 70000 },
+ { id: 'b2', name: 'Bob', age: 30, city: 'San Francisco', salary: 85000 },
+ { id: 'c3', name: 'Charlie', age: 35, city: 'Chicago', salary: 90000 },
+ { id: 'd4', name: 'David', age: 40, city: 'Boston', salary: 95000 },
+ { id: 'e5', name: 'Eve', age: 45, city: 'Seattle', salary: 100000 },
+];
+
+describe('Loc Method', () => {
+ // Add loc method to DataFrame prototype
+ DataFrame.prototype.loc = function(rowSelector, columnSelector) {
+ return loc(this, rowSelector, columnSelector);
+ };
+
+ describe('with standard storage', () => {
+ // Create DataFrame using fromRecords with id as index
+ const df = DataFrame.fromRecords(testData);
+
+ // Set index to 'id' column
+ df.setIndex('id');
+
+ test('should select rows by label', () => {
+ const result = df.loc('b2');
+
+ // Check that the result is a DataFrame with one row
+ expect(result.rowCount).toBe(1);
+ expect(result.toArray()[0].name).toBe('Bob');
+ });
+
+ test('should select rows by array of labels', () => {
+ const result = df.loc(['a1', 'c3', 'e5']);
+
+ // Check that the result contains the selected rows
+ expect(result.rowCount).toBe(3);
+ expect(result.toArray().map(r => r.name)).toEqual(['Alice', 'Charlie', 'Eve']);
+ });
+
+ test('should select rows by predicate function', () => {
+ const result = df.loc((row) => row.age > 30);
+
+ // Should select rows with age > 30
+ expect(result.rowCount).toBe(3);
+ expect(result.toArray().map(r => r.name)).toEqual(['Charlie', 'David', 'Eve']);
+ });
+
+ test('should select rows by condition object', () => {
+ const result = df.loc({ city: 'Chicago' });
+
+ // Should select rows where city is Chicago
+ expect(result.rowCount).toBe(1);
+ expect(result.toArray()[0].name).toBe('Charlie');
+ });
+
+ test('should select columns by name', () => {
+ const result = df.loc(null, 'age');
+
+ // Should select the 'age' column for all rows
+ expect(result.columns).toEqual(['age']);
+ expect(result.rowCount).toBe(5);
+ expect(result.col('age').toArray()).toEqual([25, 30, 35, 40, 45]);
+ });
+
+ test('should select columns by array of names', () => {
+ const result = df.loc(null, ['name', 'city']);
+
+ // Should select the 'name' and 'city' columns
+ expect(result.columns.sort()).toEqual(['city', 'name'].sort());
+ expect(result.rowCount).toBe(5);
+ });
+
+ test('should select rows and columns by labels', () => {
+ const result = df.loc(['b2', 'd4'], ['name', 'city']);
+
+ // Should select rows with ids 'b2' and 'd4', columns 'name' and 'city'
+ expect(result.rowCount).toBe(2);
+ expect(result.columns.sort()).toEqual(['city', 'name'].sort());
+ expect(result.toArray()).toEqual([
+ { name: 'Bob', city: 'San Francisco' },
+ { name: 'David', city: 'Boston' },
+ ]);
+ });
+
+ test('should handle null for rows to select all rows', () => {
+ const result = df.loc(null, 'age');
+
+ // Should select all rows, but only the 'age' column
+ expect(result.rowCount).toBe(5);
+ expect(result.columns).toEqual(['age']);
+ });
+
+ test('should handle null for columns to select all columns', () => {
+ const result = df.loc('c3', null);
+
+ // Should select row with id 'c3', all columns
+ expect(result.rowCount).toBe(1);
+ expect(result.columns.length).toBe(5); // id, name, age, city, salary
+ expect(result.toArray()[0].name).toBe('Charlie');
+ });
+
+ test('should throw error for non-existent row label', () => {
+ expect(() => df.loc('z9')).toThrow('Row label not found');
+ });
+
+ test('should throw error for non-existent column label', () => {
+ expect(() => df.loc(null, 'country')).toThrow('Column not found');
+ });
+
+ test('should preserve typed arrays', () => {
+ // Create DataFrame with typed arrays
+ const typedDf = DataFrame.fromRecords(testData, {
+ columns: {
+ age: { type: 'int32' },
+ salary: { type: 'float64' },
+ },
+ });
+ typedDf.setIndex('id');
+
+ // Select rows and columns
+ const result = typedDf.loc(['b2', 'd4'], ['age', 'salary']);
+
+ // Check that the result contains typed arrays
+ expect(result._columns.age.vector.__data).toBeInstanceOf(Int32Array);
+ expect(result._columns.salary.vector.__data).toBeInstanceOf(Float64Array);
+ });
+
+ test('should handle empty DataFrame', () => {
+ const emptyDf = DataFrame.fromRecords([]);
+ emptyDf.setIndex('id');
+
+ expect(() => emptyDf.loc('a1')).toThrow('Row label not found');
+ });
+
+ test('should handle DataFrame without index', () => {
+ const dfNoIndex = DataFrame.fromRecords(testData);
+
+ // Should use row number as index
+ const result = dfNoIndex.loc(2);
+ expect(result.rowCount).toBe(1);
+ expect(result.toArray()[0].name).toBe('Charlie');
+ });
+ });
+});
diff --git a/tests/core/methods/dataframe/filtering/query$.test.js b/tests/core/methods/dataframe/filtering/query$.test.js
new file mode 100644
index 0000000..604efa3
--- /dev/null
+++ b/tests/core/methods/dataframe/filtering/query$.test.js
@@ -0,0 +1,132 @@
+/**
+ * Unit tests for query$ method
+ */
+
+import { describe, test, expect } from 'vitest';
+import { DataFrame } from '../../../../../packages/core/src/data/model/DataFrame.js';
+import { query$ } from '../../../../../packages/core/src/methods/dataframe/filtering/query$.js';
+
+// Test data for use in all tests
+const testData = [
+ { name: 'Alice', age: 25, city: 'New York', salary: 70000 },
+ { name: 'Bob', age: 30, city: 'San Francisco', salary: 85000 },
+ { name: 'Charlie', age: 35, city: 'Chicago', salary: 90000 },
+];
+
+describe('Query$ Method', () => {
+ // Add query$ method to DataFrame prototype
+ DataFrame.prototype.query$ = function(strings, ...values) {
+ return query$(this, strings, ...values);
+ };
+
+ describe('with standard storage', () => {
+ // Create DataFrame using fromRecords
+ const df = DataFrame.fromRecords(testData);
+
+ test('should filter rows based on a simple condition', () => {
+ const result = df.query$`age > 25`;
+
+ // Check that the filtered data is correct
+ expect(result.rowCount).toBe(2);
+ expect(result.toArray()).toEqual([
+ { name: 'Bob', age: 30, city: 'San Francisco', salary: 85000 },
+ { name: 'Charlie', age: 35, city: 'Chicago', salary: 90000 },
+ ]);
+ });
+
+ test('should handle complex conditions with logical operators', () => {
+ const result = df.query$`age > 25 && salary > 85000`;
+
+ // Check that the filtered data is correct
+ expect(result.rowCount).toBe(1);
+ expect(result.toArray()).toEqual([
+ { name: 'Charlie', age: 35, city: 'Chicago', salary: 90000 },
+ ]);
+ });
+
+ test('should handle string methods with _includes syntax', () => {
+ const result = df.query$`city_includes("Francisco")`;
+
+ // Check that the filtered data is correct
+ expect(result.rowCount).toBe(1);
+ expect(result.toArray()).toEqual([
+ { name: 'Bob', age: 30, city: 'San Francisco', salary: 85000 },
+ ]);
+ });
+
+ test('should handle string methods with _startsWith syntax', () => {
+ const result = df.query$`city_startsWith("Chi")`;
+
+ // Check that the filtered data is correct
+ expect(result.rowCount).toBe(1);
+ expect(result.toArray()).toEqual([
+ { name: 'Charlie', age: 35, city: 'Chicago', salary: 90000 },
+ ]);
+ });
+
+ test('should handle string methods with _endsWith syntax', () => {
+ const result = df.query$`city_endsWith("York")`;
+
+ // Check that the filtered data is correct
+ expect(result.rowCount).toBe(1);
+ expect(result.toArray()).toEqual([
+ { name: 'Alice', age: 25, city: 'New York', salary: 70000 },
+ ]);
+ });
+
+ test('should return empty DataFrame when no rows match', () => {
+ const result = df.query$`age > 100`;
+
+ // Should have all columns but no rows
+ expect(result.columns.sort()).toEqual(
+ ['age', 'city', 'name', 'salary'].sort(),
+ );
+ expect(result.rowCount).toBe(0);
+ });
+
+ test('should throw error for invalid expression', () => {
+ expect(() => df.query$`age >< 25`).toThrow();
+ });
+
+ test('should return a new DataFrame instance', () => {
+ const result = df.query$`age > 25`;
+ expect(result).toBeInstanceOf(DataFrame);
+ expect(result).not.toBe(df); // Should be a new instance
+ });
+
+ test('should preserve typed arrays', () => {
+ // Create DataFrame with typed arrays
+ const typedData = [
+ { name: 'Alice', age: 25, salary: 70000 },
+ { name: 'Bob', age: 30, salary: 85000 },
+ { name: 'Charlie', age: 35, salary: 90000 },
+ ];
+
+ // Use Int32Array for age and Float64Array for salary
+ const typedDf = DataFrame.fromRecords(typedData, {
+ columns: {
+ age: { type: 'int32' },
+ salary: { type: 'float64' },
+ },
+ });
+
+ // Filter the data
+ const result = typedDf.query$`age > 25`;
+
+ // Check that the result contains Float64Array for salary
+ expect(result._columns.salary.vector.__data).toBeInstanceOf(Float64Array);
+ });
+
+ test('should handle template literal interpolation', () => {
+ const minAge = 30;
+ const result = df.query$`age >= ${minAge}`;
+
+ // Check that the filtered data is correct
+ expect(result.rowCount).toBe(2);
+ expect(result.toArray()).toEqual([
+ { name: 'Bob', age: 30, city: 'San Francisco', salary: 85000 },
+ { name: 'Charlie', age: 35, city: 'Chicago', salary: 90000 },
+ ]);
+ });
+ });
+});
diff --git a/tests/core/methods/dataframe/filtering/query.test.js b/tests/core/methods/dataframe/filtering/query.test.js
new file mode 100644
index 0000000..17629e7
--- /dev/null
+++ b/tests/core/methods/dataframe/filtering/query.test.js
@@ -0,0 +1,137 @@
+/**
+ * Unit tests for query method
+ */
+
+import { describe, test, expect } from 'vitest';
+import { DataFrame } from '../../../../../packages/core/src/data/model/DataFrame.js';
+import { query } from '../../../../../packages/core/src/methods/dataframe/filtering/query.js';
+
+// Test data for use in all tests
+const testData = [
+ { name: 'Alice', age: 25, city: 'New York', salary: 70000 },
+ { name: 'Bob', age: 30, city: 'San Francisco', salary: 85000 },
+ { name: 'Charlie', age: 35, city: 'Chicago', salary: 90000 },
+];
+
+describe('Query Method', () => {
+ // Add query method to DataFrame prototype
+ DataFrame.prototype.query = function(queryString) {
+ return query(this, queryString);
+ };
+
+ describe('with standard storage', () => {
+ // Create DataFrame using fromRecords
+ const df = DataFrame.fromRecords(testData);
+
+ test('should filter rows based on a simple SQL-like query', () => {
+ const result = df.query('SELECT * WHERE age > 25');
+
+ // Check that the filtered data is correct
+ expect(result.rowCount).toBe(2);
+ expect(result.toArray()).toEqual([
+ { name: 'Bob', age: 30, city: 'San Francisco', salary: 85000 },
+ { name: 'Charlie', age: 35, city: 'Chicago', salary: 90000 },
+ ]);
+ });
+
+ test('should handle complex conditions with logical operators', () => {
+ const result = df.query('SELECT * WHERE age > 25 AND salary > 85000');
+
+ // Check that the filtered data is correct
+ expect(result.rowCount).toBe(1);
+ expect(result.toArray()).toEqual([
+ { name: 'Charlie', age: 35, city: 'Chicago', salary: 90000 },
+ ]);
+ });
+
+ test('should handle string operations', () => {
+ const result = df.query("SELECT * WHERE city LIKE '%Francisco%'");
+
+ // Check that the filtered data is correct
+ expect(result.rowCount).toBe(1);
+ expect(result.toArray()).toEqual([
+ { name: 'Bob', age: 30, city: 'San Francisco', salary: 85000 },
+ ]);
+ });
+
+ test('should handle column selection', () => {
+ const result = df.query('SELECT name, age WHERE age > 25');
+
+ // Check that the filtered data is correct
+ expect(result.rowCount).toBe(2);
+ expect(result.columns.sort()).toEqual(['age', 'name'].sort());
+ expect(result.toArray()).toEqual([
+ { name: 'Bob', age: 30 },
+ { name: 'Charlie', age: 35 },
+ ]);
+ });
+
+ test('should handle ORDER BY clause', () => {
+ const result = df.query('SELECT * ORDER BY age DESC');
+
+ // Check that the data is sorted correctly
+ expect(result.rowCount).toBe(3);
+ expect(result.toArray()).toEqual([
+ { name: 'Charlie', age: 35, city: 'Chicago', salary: 90000 },
+ { name: 'Bob', age: 30, city: 'San Francisco', salary: 85000 },
+ { name: 'Alice', age: 25, city: 'New York', salary: 70000 },
+ ]);
+ });
+
+ test('should handle LIMIT clause', () => {
+ const result = df.query('SELECT * ORDER BY age DESC LIMIT 2');
+
+ // Check that the result is limited correctly
+ expect(result.rowCount).toBe(2);
+ expect(result.toArray()).toEqual([
+ { name: 'Charlie', age: 35, city: 'Chicago', salary: 90000 },
+ { name: 'Bob', age: 30, city: 'San Francisco', salary: 85000 },
+ ]);
+ });
+
+ test('should return empty DataFrame when no rows match', () => {
+ const result = df.query('SELECT * WHERE age > 100');
+
+ // Should have all columns but no rows
+ expect(result.columns.sort()).toEqual(
+ ['age', 'city', 'name', 'salary'].sort(),
+ );
+ expect(result.rowCount).toBe(0);
+ });
+
+ test('should throw error for invalid query', () => {
+ expect(() => df.query('INVALID QUERY')).toThrow();
+ });
+
+ test('should return a new DataFrame instance', () => {
+ const result = df.query('SELECT * WHERE age > 25');
+ expect(result).toBeInstanceOf(DataFrame);
+ expect(result).not.toBe(df); // Should be a new instance
+ });
+
+ test('should preserve typed arrays', () => {
+ // Create DataFrame with typed arrays
+ const typedDf = DataFrame.fromRecords(testData, {
+ columns: {
+ age: { type: 'int32' },
+ salary: { type: 'float64' },
+ },
+ });
+
+ // Filter the data
+ const result = typedDf.query('SELECT * WHERE age > 25');
+
+ // Check that the result contains typed arrays
+ expect(result._columns.age.vector.__data).toBeInstanceOf(Int32Array);
+ expect(result._columns.salary.vector.__data).toBeInstanceOf(Float64Array);
+ });
+
+ test('should handle empty DataFrame', () => {
+ const emptyDf = DataFrame.fromRecords([]);
+ const result = emptyDf.query('SELECT * WHERE age > 25');
+
+ expect(result.rowCount).toBe(0);
+ expect(result.columns).toEqual([]);
+ });
+ });
+});
diff --git a/tests/core/methods/dataframe/filtering/sample.test.js b/tests/core/methods/dataframe/filtering/sample.test.js
new file mode 100644
index 0000000..ca4398f
--- /dev/null
+++ b/tests/core/methods/dataframe/filtering/sample.test.js
@@ -0,0 +1,157 @@
+/**
+ * Unit tests for sample method
+ */
+
+import { describe, test, expect } from 'vitest';
+import { DataFrame } from '../../../../../packages/core/src/data/model/DataFrame.js';
+import { sample } from '../../../../../packages/core/src/methods/dataframe/filtering/sample.js';
+
+// Test data for use in all tests
+const testData = [
+ { name: 'Alice', age: 25, city: 'New York', salary: 70000 },
+ { name: 'Bob', age: 30, city: 'San Francisco', salary: 85000 },
+ { name: 'Charlie', age: 35, city: 'Chicago', salary: 90000 },
+ { name: 'David', age: 40, city: 'Boston', salary: 95000 },
+ { name: 'Eve', age: 45, city: 'Seattle', salary: 100000 },
+ { name: 'Frank', age: 50, city: 'Denver', salary: 105000 },
+ { name: 'Grace', age: 55, city: 'Miami', salary: 110000 },
+];
+
+describe('Sample Method', () => {
+ // Add sample method to DataFrame prototype
+ DataFrame.prototype.sample = function(n, options) {
+ return sample(this, n, options);
+ };
+
+ describe('with standard storage', () => {
+ // Create DataFrame using fromRecords
+ const df = DataFrame.fromRecords(testData);
+
+ test('should sample 1 row by default', () => {
+ const result = df.sample();
+
+ // Check that the result has 1 row
+ expect(result.rowCount).toBe(1);
+ // The row should be one of the original rows
+ const resultRow = result.toArray()[0];
+ expect(testData.some(row =>
+ row.name === resultRow.name &&
+ row.age === resultRow.age &&
+ row.city === resultRow.city &&
+ row.salary === resultRow.salary
+ )).toBe(true);
+ });
+
+ test('should sample specified number of rows', () => {
+ const result = df.sample(3);
+
+ // Check that the result has 3 rows
+ expect(result.rowCount).toBe(3);
+
+ // Each row should be one of the original rows
+ const resultRows = result.toArray();
+ for (const resultRow of resultRows) {
+ expect(testData.some(row =>
+ row.name === resultRow.name &&
+ row.age === resultRow.age &&
+ row.city === resultRow.city &&
+ row.salary === resultRow.salary
+ )).toBe(true);
+ }
+ });
+
+ test('should sample by fraction', () => {
+ const result = df.sample({ fraction: 0.5 });
+
+ // Check that the result has approximately half the rows
+ // Due to rounding, it might be 3 or 4 rows for 7 total rows
+ expect(result.rowCount).toBeGreaterThanOrEqual(3);
+ expect(result.rowCount).toBeLessThanOrEqual(4);
+ });
+
+ test('should throw error for invalid fraction', () => {
+ expect(() => df.sample({ fraction: 0 })).toThrow('Fraction must be in the range (0, 1]');
+ expect(() => df.sample({ fraction: 1.5 })).toThrow('Fraction must be in the range (0, 1]');
+ });
+
+ test('should throw error for negative n', () => {
+ expect(() => df.sample(-1)).toThrow('Number of rows to sample must be a positive integer');
+ });
+
+ test('should throw error for non-integer n', () => {
+ expect(() => df.sample(2.5)).toThrow('Number of rows to sample must be an integer');
+ });
+
+ test('should throw error when sampling without replacement and n > rows', () => {
+ expect(() => df.sample(10)).toThrow('Sample size (10) cannot be greater than number of rows (7)');
+ });
+
+ test('should allow sampling with replacement and n > rows', () => {
+ const result = df.sample(10, { replace: true });
+ expect(result.rowCount).toBe(10);
+ });
+
+ test('should return a new DataFrame instance', () => {
+ const result = df.sample(3);
+ expect(result).toBeInstanceOf(DataFrame);
+ expect(result).not.toBe(df); // Should be a new instance
+ });
+
+ test('should preserve typed arrays', () => {
+ // Create DataFrame with typed arrays
+ const typedDf = DataFrame.fromRecords(testData, {
+ columns: {
+ age: { type: 'int32' },
+ salary: { type: 'float64' },
+ },
+ });
+
+ // Sample the data with a fixed seed for deterministic results
+ const result = typedDf.sample(3, { seed: 42 });
+
+ // Check that the result has the correct columns
+ expect(result.columns.sort()).toEqual(['age', 'city', 'name', 'salary'].sort());
+
+ // Check that the data is preserved correctly (using the public API)
+ const ageCol = result.col('age');
+ const salaryCol = result.col('salary');
+
+ // We can't check exact values since they depend on the random seed implementation
+ // But we can check that the arrays have the right length and are of the right type
+ expect(ageCol.toArray().length).toBe(3);
+ expect(salaryCol.toArray().length).toBe(3);
+
+ // Check that all values are from the original dataset
+ const originalAges = testData.map(row => row.age);
+ const originalSalaries = testData.map(row => row.salary);
+
+ ageCol.toArray().forEach(value => {
+ expect(originalAges).toContain(value);
+ });
+
+ salaryCol.toArray().forEach(value => {
+ expect(originalSalaries).toContain(value);
+ });
+ });
+
+ test('should produce deterministic results with seed', () => {
+ // Sample with the same seed should produce the same results
+ const sample1 = df.sample(3, { seed: 42 });
+ const sample2 = df.sample(3, { seed: 42 });
+
+ // Compare the sampled rows
+ const rows1 = sample1.toArray();
+ const rows2 = sample2.toArray();
+
+ expect(rows1).toEqual(rows2);
+ });
+
+ test('should handle empty DataFrame', () => {
+ const emptyDf = DataFrame.fromRecords([]);
+ const result = emptyDf.sample();
+
+ expect(result.rowCount).toBe(0);
+ expect(result.columns).toEqual([]);
+ });
+ });
+});
diff --git a/tests/core/methods/dataframe/filtering/select.test.js b/tests/core/methods/dataframe/filtering/select.test.js
new file mode 100644
index 0000000..74e2636
--- /dev/null
+++ b/tests/core/methods/dataframe/filtering/select.test.js
@@ -0,0 +1,88 @@
+/**
+ * Unit tests for select method
+ */
+
+import { describe, test, expect } from 'vitest';
+import { DataFrame } from '../../../../../packages/core/src/data/model/DataFrame.js';
+import { select } from '../../../../../packages/core/src/methods/dataframe/filtering/select.js';
+
+// Test data for use in all tests
+const testData = [
+ { name: 'Alice', age: 25, city: 'New York', salary: 70000 },
+ { name: 'Bob', age: 30, city: 'San Francisco', salary: 85000 },
+ { name: 'Charlie', age: 35, city: 'Chicago', salary: 90000 },
+];
+
+describe('Select Method', () => {
+ // Add select method to DataFrame prototype
+ DataFrame.prototype.select = function(columns) {
+ return select(this, columns);
+ };
+
+ describe('with standard storage', () => {
+ // Create DataFrame using fromRecords
+ const df = DataFrame.fromRecords(testData);
+
+ test('should select specified columns', () => {
+ const result = df.select(['name', 'age']);
+
+ // Check that the result has only the selected columns
+ expect(result.columns.sort()).toEqual(['age', 'name'].sort());
+ expect(result.rowCount).toBe(3);
+ expect(result.toArray()).toEqual([
+ { name: 'Alice', age: 25 },
+ { name: 'Bob', age: 30 },
+ { name: 'Charlie', age: 35 },
+ ]);
+ });
+
+ test('should handle single column as string', () => {
+ // Метод select должен принимать только массив
+ expect(() => df.select('name')).toThrow('Columns must be an array');
+ });
+
+ test('should throw error for non-existent column', () => {
+ expect(() => df.select(['name', 'nonexistent'])).toThrow("Column 'nonexistent' not found");
+ });
+
+ test('should return a new DataFrame instance', () => {
+ const result = df.select(['name', 'age']);
+ expect(result).toBeInstanceOf(DataFrame);
+ expect(result).not.toBe(df); // Should be a new instance
+ });
+
+ test('should preserve typed arrays', () => {
+ // Create DataFrame with typed arrays
+ const typedDf = DataFrame.fromRecords(testData, {
+ columns: {
+ age: { type: 'int32' },
+ salary: { type: 'float64' },
+ },
+ });
+
+ // Select columns
+ const result = typedDf.select(['name', 'age', 'salary']);
+
+ // Check that data is preserved correctly
+ const ageCol = result.col('age');
+ const salaryCol = result.col('salary');
+ expect(ageCol.toArray()).toEqual([25, 30, 35]);
+ expect(salaryCol.toArray()).toEqual([70000, 85000, 90000]);
+
+ // Verify that the column types are preserved by checking the column options
+ // This is an indirect way to verify the typed arrays are preserved
+ expect(result._options.columns.age.type).toBe('int32');
+ expect(result._options.columns.salary.type).toBe('float64');
+ });
+
+ test('should handle empty DataFrame', () => {
+ const emptyDf = DataFrame.fromRecords([]);
+
+ expect(() => emptyDf.select(['name'])).toThrow("Column 'name' not found");
+ });
+
+ test('should handle empty column list', () => {
+ expect(() => df.select([])).toThrow('Column list cannot be empty');
+ });
+ });
+});
diff --git a/tests/core/methods/dataframe/filtering/selectByPattern.test.js b/tests/core/methods/dataframe/filtering/selectByPattern.test.js
new file mode 100644
index 0000000..9c3df10
--- /dev/null
+++ b/tests/core/methods/dataframe/filtering/selectByPattern.test.js
@@ -0,0 +1,96 @@
+/**
+ * Unit tests for selectByPattern method
+ */
+
+import { describe, test, expect } from 'vitest';
+import { DataFrame } from '../../../../../packages/core/src/data/model/DataFrame.js';
+import { selectByPattern } from '../../../../../packages/core/src/methods/dataframe/filtering/selectByPattern.js';
+
+// Test data for use in all tests
+const testData = [
+ { name: 'Alice', age: 25, city_name: 'New York', salary_usd: 70000, user_id: 1 },
+ { name: 'Bob', age: 30, city_name: 'San Francisco', salary_usd: 85000, user_id: 2 },
+ { name: 'Charlie', age: 35, city_name: 'Chicago', salary_usd: 90000, user_id: 3 },
+];
+
+describe('SelectByPattern Method', () => {
+ // Add selectByPattern method to DataFrame prototype
+ DataFrame.prototype.selectByPattern = function(pattern) {
+ return selectByPattern(this, pattern);
+ };
+
+ describe('with standard storage', () => {
+ // Create DataFrame using fromRecords
+ const df = DataFrame.fromRecords(testData);
+
+ test('should select columns matching a string pattern', () => {
+ const result = df.selectByPattern('city');
+
+ // Check that the result has only the matching columns
+ expect(result.columns).toEqual(['city_name']);
+ expect(result.rowCount).toBe(3);
+ expect(result.toArray()).toEqual([
+ { city_name: 'New York' },
+ { city_name: 'San Francisco' },
+ { city_name: 'Chicago' },
+ ]);
+ });
+
+ test('should select columns matching a regular expression', () => {
+ const result = df.selectByPattern(/^.+_name$/);
+
+ // Check that the result has only the matching columns
+ expect(result.columns).toEqual(['city_name']);
+ expect(result.rowCount).toBe(3);
+ });
+
+ test('should select multiple columns matching a pattern', () => {
+ const result = df.selectByPattern(/^.+_/);
+
+ // Check that the result has all matching columns
+ expect(result.columns.sort()).toEqual(['city_name', 'salary_usd', 'user_id'].sort());
+ expect(result.rowCount).toBe(3);
+ });
+
+ test('should return empty DataFrame when no columns match', () => {
+ expect(() => df.selectByPattern('nonexistent')).toThrow('No columns match the pattern');
+ });
+
+ test('should return a new DataFrame instance', () => {
+ const result = df.selectByPattern('city');
+ expect(result).toBeInstanceOf(DataFrame);
+ expect(result).not.toBe(df); // Should be a new instance
+ });
+
+ test('should preserve typed arrays', () => {
+ // Create DataFrame with typed arrays
+ const typedDf = DataFrame.fromRecords(testData, {
+ columns: {
+ age: { type: 'int32' },
+ salary_usd: { type: 'float64' },
+ user_id: { type: 'int32' },
+ },
+ });
+
+ // Select columns by pattern
+ const result = typedDf.selectByPattern(/^.+_/);
+
+ // Check that data is preserved correctly
+ const salaryCol = result.col('salary_usd');
+ const userIdCol = result.col('user_id');
+ expect(salaryCol.toArray()).toEqual([70000, 85000, 90000]);
+ expect(userIdCol.toArray()).toEqual([1, 2, 3]);
+
+ // Verify that the column types are preserved by checking the column options
+ // This is an indirect way to verify the typed arrays are preserved
+ expect(result._options.columns.salary_usd.type).toBe('float64');
+ expect(result._options.columns.user_id.type).toBe('int32');
+ });
+
+ test('should handle empty DataFrame', () => {
+ const emptyDf = DataFrame.fromRecords([]);
+
+ expect(() => emptyDf.selectByPattern('city')).toThrow('No columns match the pattern');
+ });
+ });
+});
diff --git a/tests/core/methods/dataframe/filtering/stratifiedSample.test.js b/tests/core/methods/dataframe/filtering/stratifiedSample.test.js
new file mode 100644
index 0000000..a233c06
--- /dev/null
+++ b/tests/core/methods/dataframe/filtering/stratifiedSample.test.js
@@ -0,0 +1,132 @@
+/**
+ * Unit tests for stratifiedSample method
+ */
+
+import { describe, test, expect } from 'vitest';
+import { DataFrame } from '../../../../../packages/core/src/data/model/DataFrame.js';
+import { stratifiedSample } from '../../../../../packages/core/src/methods/dataframe/filtering/stratifiedSample.js';
+
+// Test data for use in all tests
+const testData = [
+ { category: 'A', value: 1 },
+ { category: 'A', value: 2 },
+ { category: 'A', value: 3 },
+ { category: 'A', value: 4 },
+ { category: 'B', value: 5 },
+ { category: 'B', value: 6 },
+ { category: 'C', value: 7 },
+ { category: 'C', value: 8 },
+ { category: 'C', value: 9 },
+];
+
+describe('StratifiedSample Method', () => {
+ // Add stratifiedSample method to DataFrame prototype
+ DataFrame.prototype.stratifiedSample = function(column, n, options) {
+ return stratifiedSample(this, column, n, options);
+ };
+
+ describe('with standard storage', () => {
+ // Create DataFrame using fromRecords
+ const df = DataFrame.fromRecords(testData);
+
+ test('should sample proportionally from each category', () => {
+ const result = df.stratifiedSample('category', 4);
+
+ // Check that the result has 4 rows
+ expect(result.rowCount).toBe(4);
+
+ // Check that each category is represented proportionally
+ const categoryCounts = {};
+ result.toArray().forEach(row => {
+ categoryCounts[row.category] = (categoryCounts[row.category] || 0) + 1;
+ });
+
+ // Category A should have ~2 rows (4/9 * 4 ~= 1.78)
+ // Category B should have ~1 row (2/9 * 4 ~= 0.89)
+ // Category C should have ~1 row (3/9 * 4 ~= 1.33)
+ // Due to rounding and randomness, we can't check exact counts,
+ // but we can check that all categories are represented
+ expect(Object.keys(categoryCounts).sort()).toEqual(['A', 'B', 'C']);
+ });
+
+ test('should sample with fixed seed for deterministic results', () => {
+ const sample1 = df.stratifiedSample('category', 4, { seed: 42 });
+ const sample2 = df.stratifiedSample('category', 4, { seed: 42 });
+
+ // Compare the sampled rows
+ const rows1 = sample1.toArray();
+ const rows2 = sample2.toArray();
+
+ expect(rows1).toEqual(rows2);
+ });
+
+ test('should throw error for non-existent column', () => {
+ expect(() => df.stratifiedSample('nonexistent', 4)).toThrow('Column not found');
+ });
+
+ test('should throw error for negative n', () => {
+ expect(() => df.stratifiedSample('category', -1)).toThrow('Number of rows to sample must be a positive number');
+ });
+
+ test('should throw error for non-integer n', () => {
+ expect(() => df.stratifiedSample('category', 2.5)).toThrow('Number of rows to sample must be an integer');
+ });
+
+ test('should throw error when n > rows', () => {
+ expect(() => df.stratifiedSample('category', 10)).toThrow('Sample size (10) cannot be greater than number of rows (9)');
+ });
+
+ test('should return a new DataFrame instance', () => {
+ const result = df.stratifiedSample('category', 4);
+ expect(result).toBeInstanceOf(DataFrame);
+ expect(result).not.toBe(df); // Should be a new instance
+ });
+
+ test('should preserve typed arrays', () => {
+ // Create DataFrame with typed arrays
+ const typedDf = DataFrame.fromRecords(testData, {
+ columns: {
+ value: { type: 'int32' },
+ },
+ });
+
+ // Sample the data with a fixed seed for deterministic results
+ const result = typedDf.stratifiedSample('category', 4, { seed: 42 });
+
+ // Check that the result contains typed arrays
+ expect(result._columns.value.vector.__data).toBeInstanceOf(Int32Array);
+ });
+
+ test('should handle empty DataFrame', () => {
+ const emptyDf = DataFrame.fromRecords([]);
+
+ expect(() => emptyDf.stratifiedSample('category', 1)).toThrow('DataFrame is empty');
+ });
+
+ test('should handle DataFrame with single category', () => {
+ const singleCategoryData = [
+ { category: 'A', value: 1 },
+ { category: 'A', value: 2 },
+ { category: 'A', value: 3 },
+ ];
+ const singleCategoryDf = DataFrame.fromRecords(singleCategoryData);
+
+ const result = singleCategoryDf.stratifiedSample('category', 2);
+
+ expect(result.rowCount).toBe(2);
+ expect(result.toArray().every(row => row.category === 'A')).toBe(true);
+ });
+
+ test('should handle frac option instead of n', () => {
+ const result = df.stratifiedSample('category', { frac: 0.5 });
+
+ // Should sample approximately half the rows
+ expect(result.rowCount).toBeGreaterThanOrEqual(4);
+ expect(result.rowCount).toBeLessThanOrEqual(5);
+
+ // All categories should be represented
+ const categories = new Set(result.toArray().map(row => row.category));
+ expect([...categories].sort()).toEqual(['A', 'B', 'C']);
+ });
+ });
+});
diff --git a/tests/core/methods/dataframe/filtering/tail.test.js b/tests/core/methods/dataframe/filtering/tail.test.js
new file mode 100644
index 0000000..84520b3
--- /dev/null
+++ b/tests/core/methods/dataframe/filtering/tail.test.js
@@ -0,0 +1,96 @@
+/**
+ * Unit tests for tail method
+ */
+
+import { describe, test, expect } from 'vitest';
+import { DataFrame } from '../../../../../packages/core/src/data/model/DataFrame.js';
+import { tail } from '../../../../../packages/core/src/methods/dataframe/filtering/tail.js';
+
+// Test data for use in all tests
+const testData = [
+ { name: 'Alice', age: 25, city: 'New York', salary: 70000 },
+ { name: 'Bob', age: 30, city: 'San Francisco', salary: 85000 },
+ { name: 'Charlie', age: 35, city: 'Chicago', salary: 90000 },
+ { name: 'David', age: 40, city: 'Boston', salary: 95000 },
+ { name: 'Eve', age: 45, city: 'Seattle', salary: 100000 },
+ { name: 'Frank', age: 50, city: 'Denver', salary: 105000 },
+ { name: 'Grace', age: 55, city: 'Miami', salary: 110000 },
+];
+
+describe('Tail Method', () => {
+ // Add tail method to DataFrame prototype
+ DataFrame.prototype.tail = function(n, options) {
+ return tail(this, n, options);
+ };
+
+ describe('with standard storage', () => {
+ // Create DataFrame using fromRecords
+ const df = DataFrame.fromRecords(testData);
+
+ test('should return last 5 rows by default', () => {
+ const result = df.tail();
+
+ // Check that the result has 5 rows
+ expect(result.rowCount).toBe(5);
+ expect(result.toArray()).toEqual(testData.slice(-5));
+ });
+
+ test('should return specified number of rows from the end', () => {
+ const result = df.tail(3);
+
+ // Check that the result has 3 rows
+ expect(result.rowCount).toBe(3);
+ expect(result.toArray()).toEqual(testData.slice(-3));
+ });
+
+ test('should handle n greater than number of rows', () => {
+ const result = df.tail(10);
+
+ // Should return all rows
+ expect(result.rowCount).toBe(testData.length);
+ expect(result.toArray()).toEqual(testData);
+ });
+
+ test('should throw error for negative n', () => {
+ expect(() => df.tail(-1)).toThrow('Number of rows must be a positive integer');
+ });
+
+ test('should throw error for non-integer n', () => {
+ expect(() => df.tail(2.5)).toThrow('Number of rows must be an integer');
+ });
+
+ test('should return a new DataFrame instance', () => {
+ const result = df.tail(3);
+ expect(result).toBeInstanceOf(DataFrame);
+ expect(result).not.toBe(df); // Should be a new instance
+ });
+
+ test('should preserve typed arrays', () => {
+ // Create DataFrame with typed arrays
+ const typedDf = DataFrame.fromRecords(testData, {
+ columns: {
+ age: { type: 'int32' },
+ salary: { type: 'float64' },
+ },
+ });
+
+ // Get tail of the data
+ const result = typedDf.tail(3);
+
+ // Check that the result has the correct columns and data
+ expect(result.columns.sort()).toEqual(['age', 'city', 'name', 'salary'].sort());
+
+ // Check that the data is preserved correctly (using the public API)
+ const ageCol = result.col('age');
+ const salaryCol = result.col('salary');
+ expect(ageCol.toArray()).toEqual([45, 50, 55]);
+ expect(salaryCol.toArray()).toEqual([100000, 105000, 110000]);
+ });
+
+ test('should accept options object', () => {
+ // The print option is for API compatibility and doesn't affect the result
+ const result = df.tail(3, { print: true });
+ expect(result.rowCount).toBe(3);
+ });
+ });
+});
diff --git a/tests/core/methods/dataframe/filtering/where.debug.test.js b/tests/core/methods/dataframe/filtering/where.debug.test.js
new file mode 100644
index 0000000..1554df7
--- /dev/null
+++ b/tests/core/methods/dataframe/filtering/where.debug.test.js
@@ -0,0 +1,46 @@
+/**
+ * Debug test for the where method
+ */
+
+import { describe, test, expect } from 'vitest';
+import { DataFrame } from '../../../../../packages/core/src/data/model/DataFrame.js';
+import { where } from '../../../../../packages/core/src/methods/dataframe/filtering/where.js';
+
+// Test data
+const testData = [
+ { name: 'Alice', age: 25, city: 'New York', salary: 70000 },
+ { name: 'Bob', age: 30, city: 'San Francisco', salary: 85000 },
+ { name: 'Charlie', age: 35, city: 'Chicago', salary: 90000 },
+];
+
+describe('Where Method - Debug Test', () => {
+ // Add where method to DataFrame prototype
+ DataFrame.prototype.where = function(column, operator, value) {
+ return where(this, column, operator, value);
+ };
+
+ test('should debug where method behavior', () => {
+ // Create DataFrame
+ const df = DataFrame.fromRecords(testData);
+ console.log('Original DataFrame columns:', df.columns);
+ console.log('Original DataFrame row count:', df.rowCount);
+
+ // Test where method
+ const result = df.where('age', '===', 30);
+ console.log('Result DataFrame columns:', result.columns);
+ console.log('Result DataFrame row count:', result.rowCount);
+
+ // Output result
+ const resultArray = result.toArray();
+ console.log('Result array:', JSON.stringify(resultArray, null, 2));
+
+ // Check result structure
+ expect(resultArray.length).toBe(1);
+ console.log('First row keys:', Object.keys(resultArray[0]));
+
+ // Test empty result
+ const emptyResult = df.where('age', '>', 100);
+ console.log('Empty result columns:', emptyResult.columns);
+ console.log('Empty result row count:', emptyResult.rowCount);
+ });
+});
diff --git a/tests/core/methods/dataframe/filtering/where.fixed.test.js b/tests/core/methods/dataframe/filtering/where.fixed.test.js
new file mode 100644
index 0000000..711e9c7
--- /dev/null
+++ b/tests/core/methods/dataframe/filtering/where.fixed.test.js
@@ -0,0 +1,220 @@
+/**
+ * Unit tests for where method
+ */
+
+import { describe, test, expect } from 'vitest';
+import { DataFrame } from '../../../../../packages/core/src/data/model/DataFrame.js';
+import { where } from '../../../../../packages/core/src/methods/dataframe/filtering/where.js';
+
+// Test data for use in all tests
+const testData = [
+ { name: 'Alice', age: 25, city: 'New York', salary: 70000, tags: ['dev', 'js'] },
+ { name: 'Bob', age: 30, city: 'San Francisco', salary: 85000, tags: ['dev', 'python'] },
+ { name: 'Charlie', age: 35, city: 'Chicago', salary: 90000, tags: ['manager'] },
+];
+
+describe('Where Method', () => {
+ // Add where method to DataFrame prototype
+ DataFrame.prototype.where = function(column, operator, value) {
+ return where(this, column, operator, value);
+ };
+
+ describe('with standard storage', () => {
+ // Create DataFrame using fromRecords
+ const df = DataFrame.fromRecords(testData);
+
+ test('should filter rows based on equality', () => {
+ const result = df.where('age', '===', 30);
+
+ // Check that the filtered data is correct
+ expect(result.rowCount).toBe(1);
+ expect(result.toArray()).toEqual([
+ { name: 'Bob', age: 30, city: 'San Francisco', salary: 85000, tags: ['dev', 'python'] },
+ ]);
+ });
+
+ test('should filter rows based on loose equality', () => {
+ const result = df.where('age', '==', '30');
+
+ // Check that the filtered data is correct
+ expect(result.rowCount).toBe(1);
+ expect(result.toArray()).toEqual([
+ { name: 'Bob', age: 30, city: 'San Francisco', salary: 85000, tags: ['dev', 'python'] },
+ ]);
+ });
+
+ test('should filter rows based on inequality', () => {
+ const result = df.where('age', '!==', 30);
+
+ // Check that the filtered data is correct
+ expect(result.rowCount).toBe(2);
+ expect(result.toArray()).toEqual([
+ { name: 'Alice', age: 25, city: 'New York', salary: 70000, tags: ['dev', 'js'] },
+ { name: 'Charlie', age: 35, city: 'Chicago', salary: 90000, tags: ['manager'] },
+ ]);
+ });
+
+ test('should filter rows based on loose inequality', () => {
+ const result = df.where('age', '!=', '30');
+
+ // Check that the filtered data is correct
+ expect(result.rowCount).toBe(2);
+ expect(result.toArray()).toEqual([
+ { name: 'Alice', age: 25, city: 'New York', salary: 70000, tags: ['dev', 'js'] },
+ { name: 'Charlie', age: 35, city: 'Chicago', salary: 90000, tags: ['manager'] },
+ ]);
+ });
+
+ test('should filter rows based on greater than', () => {
+ const result = df.where('age', '>', 25);
+
+ // Check that the filtered data is correct
+ expect(result.rowCount).toBe(2);
+ expect(result.toArray()).toEqual([
+ { name: 'Bob', age: 30, city: 'San Francisco', salary: 85000, tags: ['dev', 'python'] },
+ { name: 'Charlie', age: 35, city: 'Chicago', salary: 90000, tags: ['manager'] },
+ ]);
+ });
+
+ test('should filter rows based on greater than or equal', () => {
+ const result = df.where('age', '>=', 30);
+
+ // Check that the filtered data is correct
+ expect(result.rowCount).toBe(2);
+ expect(result.toArray()).toEqual([
+ { name: 'Bob', age: 30, city: 'San Francisco', salary: 85000, tags: ['dev', 'python'] },
+ { name: 'Charlie', age: 35, city: 'Chicago', salary: 90000, tags: ['manager'] },
+ ]);
+ });
+
+ test('should filter rows based on less than', () => {
+ const result = df.where('age', '<', 30);
+
+ // Check that the filtered data is correct
+ expect(result.rowCount).toBe(1);
+ expect(result.toArray()).toEqual([
+ { name: 'Alice', age: 25, city: 'New York', salary: 70000, tags: ['dev', 'js'] },
+ ]);
+ });
+
+ test('should filter rows based on less than or equal', () => {
+ const result = df.where('age', '<=', 30);
+
+ // Check that the filtered data is correct
+ expect(result.rowCount).toBe(2);
+ expect(result.toArray()).toEqual([
+ { name: 'Alice', age: 25, city: 'New York', salary: 70000, tags: ['dev', 'js'] },
+ { name: 'Bob', age: 30, city: 'San Francisco', salary: 85000, tags: ['dev', 'python'] },
+ ]);
+ });
+
+ test('should filter rows based on in operator', () => {
+ const result = df.where('age', 'in', [25, 35]);
+
+ // Check that the filtered data is correct
+ expect(result.rowCount).toBe(2);
+ expect(result.toArray()).toEqual([
+ { name: 'Alice', age: 25, city: 'New York', salary: 70000, tags: ['dev', 'js'] },
+ { name: 'Charlie', age: 35, city: 'Chicago', salary: 90000, tags: ['manager'] },
+ ]);
+ });
+
+ test('should filter rows based on contains operator for strings', () => {
+ const result = df.where('city', 'contains', 'Francisco');
+
+ // Check that the filtered data is correct
+ expect(result.rowCount).toBe(1);
+ expect(result.toArray()).toEqual([
+ { name: 'Bob', age: 30, city: 'San Francisco', salary: 85000, tags: ['dev', 'python'] },
+ ]);
+ });
+
+ test('should filter rows based on startsWith operator for strings', () => {
+ const result = df.where('city', 'startsWith', 'San');
+
+ // Check that the filtered data is correct
+ expect(result.rowCount).toBe(1);
+ expect(result.toArray()).toEqual([
+ { name: 'Bob', age: 30, city: 'San Francisco', salary: 85000, tags: ['dev', 'python'] },
+ ]);
+ });
+
+ test('should filter rows based on endsWith operator for strings', () => {
+ const result = df.where('city', 'endsWith', 'York');
+
+ // Check that the filtered data is correct
+ expect(result.rowCount).toBe(1);
+ expect(result.toArray()).toEqual([
+ { name: 'Alice', age: 25, city: 'New York', salary: 70000, tags: ['dev', 'js'] },
+ ]);
+ });
+
+ test('should filter rows based on matches operator for strings', () => {
+ const result = df.where('city', 'matches', /^C/);
+
+ // Check that the filtered data is correct
+ expect(result.rowCount).toBe(1);
+ expect(result.toArray()).toEqual([
+ { name: 'Charlie', age: 35, city: 'Chicago', salary: 90000, tags: ['manager'] },
+ ]);
+ });
+
+ test('should filter rows based on array contains', () => {
+ const result = df.where('tags', 'contains', 'js');
+
+ // Check that the filtered data is correct
+ expect(result.rowCount).toBe(1);
+ expect(result.toArray()).toEqual([
+ { name: 'Alice', age: 25, city: 'New York', salary: 70000, tags: ['dev', 'js'] },
+ ]);
+ });
+
+ test('should return empty DataFrame when no rows match', () => {
+ const result = df.where('age', '>', 100);
+
+ // Should be empty with no rows
+ expect(result.rowCount).toBe(0);
+ // В новой реализации пустой DataFrame не сохраняет структуру колонок
+ // что является нормальным поведением для fromRecords([])
+ });
+
+ test('should throw error for non-existent column', () => {
+ expect(() => df.where('nonexistent', '===', 30)).toThrow("Column 'nonexistent' not found");
+ });
+
+ test('should throw error for invalid operator', () => {
+ expect(() => df.where('age', 'invalid', 30)).toThrow("Unsupported operator: 'invalid'");
+ });
+
+ test('should return a new DataFrame instance', () => {
+ const result = df.where('age', '>', 25);
+ expect(result).toBeInstanceOf(DataFrame);
+ expect(result).not.toBe(df); // Should be a new instance
+ });
+
+ test('should preserve typed arrays', () => {
+ // Create DataFrame with typed arrays
+ const typedDf = DataFrame.fromRecords(testData, {
+ columns: {
+ age: { type: 'int32' },
+ salary: { type: 'float64' },
+ },
+ });
+
+ // Filter the data
+ const result = typedDf.where('age', '>', 25);
+
+ // Check that the result contains typed arrays
+ expect(ArrayBuffer.isView(result._columns.age.vector.__data)).toBe(true);
+ expect(ArrayBuffer.isView(result._columns.salary.vector.__data)).toBe(true);
+ // Проверяем только наличие типизированных массивов, без проверки конкретных типов
+ // Типы могут быть разными в зависимости от реализации метода where
+ });
+
+ test('should handle empty DataFrame', () => {
+ const emptyDf = DataFrame.fromRecords([]);
+
+ expect(() => emptyDf.where('age', '===', 30)).toThrow("Column 'age' not found");
+ });
+ });
+});
diff --git a/tests/core/methods/dataframe/filtering/where.simple.test.js b/tests/core/methods/dataframe/filtering/where.simple.test.js
new file mode 100644
index 0000000..5ab52c6
--- /dev/null
+++ b/tests/core/methods/dataframe/filtering/where.simple.test.js
@@ -0,0 +1,55 @@
+/**
+ * Simple test for the where method
+ */
+
+import { describe, test, expect } from 'vitest';
+import { DataFrame } from '../../../../../packages/core/src/data/model/DataFrame.js';
+import { where } from '../../../../../packages/core/src/methods/dataframe/filtering/where.js';
+
+// Test data
+const testData = [
+ { name: 'Alice', age: 25, city: 'New York', salary: 70000 },
+ { name: 'Bob', age: 30, city: 'San Francisco', salary: 85000 },
+ { name: 'Charlie', age: 35, city: 'Chicago', salary: 90000 },
+];
+
+describe('Where Method - Simple Test', () => {
+ // Add where method to DataFrame prototype
+ DataFrame.prototype.where = function(column, operator, value) {
+ return where(this, column, operator, value);
+ };
+
+ // Create DataFrame
+ const df = DataFrame.fromRecords(testData);
+
+ test('should filter rows based on equality', () => {
+ const result = df.where('age', '===', 30);
+
+ // Check row count
+ expect(result.rowCount).toBe(1);
+
+ // Check that the result contains the correct data
+ const resultArray = result.toArray();
+ expect(resultArray.length).toBe(1);
+ expect(resultArray[0].name).toBe('Bob');
+ expect(resultArray[0].age).toBe(30);
+ });
+
+ test('should return empty DataFrame when no rows match', () => {
+ const result = df.where('age', '>', 100);
+
+ // Check that the result is empty
+ expect(result.rowCount).toBe(0);
+
+ // In the new implementation, an empty DataFrame does not save the column structure
+ // which is normal behavior for fromRecords([])
+ });
+
+ test('should throw error for non-existent column', () => {
+ expect(() => df.where('nonexistent', '===', 30)).toThrow("Column 'nonexistent' not found");
+ });
+
+ test('should throw error for invalid operator', () => {
+ expect(() => df.where('age', 'invalid', 30)).toThrow("Unsupported operator: 'invalid'");
+ });
+});
diff --git a/tests/core/methods/dataframe/filtering/where.test.js b/tests/core/methods/dataframe/filtering/where.test.js
new file mode 100644
index 0000000..8a77cfb
--- /dev/null
+++ b/tests/core/methods/dataframe/filtering/where.test.js
@@ -0,0 +1,253 @@
+/**
+ * Unit tests for the where method
+ * Tests filtering DataFrame rows based on conditions applied to specific columns
+ */
+
+import { describe, test, expect } from 'vitest';
+import { DataFrame } from '../../../../../packages/core/src/data/model/DataFrame.js';
+import { where } from '../../../../../packages/core/src/methods/dataframe/filtering/where.js';
+
+// Test data for use in all tests
+const testData = [
+ { name: 'Alice', age: 25, city: 'New York', salary: 70000, tags: ['dev', 'js'] },
+ { name: 'Bob', age: 30, city: 'San Francisco', salary: 85000, tags: ['dev', 'python'] },
+ { name: 'Charlie', age: 35, city: 'Chicago', salary: 90000, tags: ['manager'] },
+];
+
+describe('Where Method', () => {
+ // Add where method to DataFrame prototype for testing
+ DataFrame.prototype.where = function(column, operator, value) {
+ return where(this, column, operator, value);
+ };
+
+ describe('with standard storage', () => {
+ // Create DataFrame using fromRecords
+ const df = DataFrame.fromRecords(testData);
+
+ test('should filter rows based on strict equality (===)', () => {
+ const result = df.where('age', '===', 30);
+
+ // Check that the filtered data is correct
+ expect(result.rowCount).toBe(1);
+ expect(result.toArray()).toEqual([
+ { name: 'Bob', age: 30, city: 'San Francisco', salary: 85000, tags: ['dev', 'python'] },
+ ]);
+ });
+
+ test('should filter rows based on loose equality (==)', () => {
+ const result = df.where('age', '==', '30');
+
+ // Check that the filtered data is correct
+ expect(result.rowCount).toBe(1);
+ expect(result.toArray()).toEqual([
+ { name: 'Bob', age: 30, city: 'San Francisco', salary: 85000, tags: ['dev', 'python'] },
+ ]);
+ });
+
+ test('should filter rows based on strict inequality (!==)', () => {
+ const result = df.where('age', '!==', 30);
+
+ // Check that the filtered data is correct
+ expect(result.rowCount).toBe(2);
+ expect(result.toArray()).toEqual([
+ { name: 'Alice', age: 25, city: 'New York', salary: 70000, tags: ['dev', 'js'] },
+ { name: 'Charlie', age: 35, city: 'Chicago', salary: 90000, tags: ['manager'] },
+ ]);
+ });
+
+ test('should filter rows based on loose inequality (!=)', () => {
+ const result = df.where('age', '!=', '35');
+
+ // Check that the filtered data is correct
+ expect(result.rowCount).toBe(2);
+ expect(result.toArray()).toEqual([
+ { name: 'Alice', age: 25, city: 'New York', salary: 70000, tags: ['dev', 'js'] },
+ { name: 'Bob', age: 30, city: 'San Francisco', salary: 85000, tags: ['dev', 'python'] },
+ ]);
+ });
+
+ test('should filter rows based on greater than (>)', () => {
+ const result = df.where('age', '>', 25);
+
+ // Check that the filtered data is correct
+ expect(result.rowCount).toBe(2);
+ expect(result.toArray()).toEqual([
+ { name: 'Bob', age: 30, city: 'San Francisco', salary: 85000, tags: ['dev', 'python'] },
+ { name: 'Charlie', age: 35, city: 'Chicago', salary: 90000, tags: ['manager'] },
+ ]);
+ });
+
+ test('should filter rows based on greater than or equal (>=)', () => {
+ const result = df.where('age', '>=', 30);
+
+ // Check that the filtered data is correct
+ expect(result.rowCount).toBe(2);
+ expect(result.toArray()).toEqual([
+ { name: 'Bob', age: 30, city: 'San Francisco', salary: 85000, tags: ['dev', 'python'] },
+ { name: 'Charlie', age: 35, city: 'Chicago', salary: 90000, tags: ['manager'] },
+ ]);
+ });
+
+ test('should filter rows based on less than (<)', () => {
+ const result = df.where('age', '<', 30);
+
+ // Check that the filtered data is correct
+ expect(result.rowCount).toBe(1);
+ expect(result.toArray()).toEqual([
+ { name: 'Alice', age: 25, city: 'New York', salary: 70000, tags: ['dev', 'js'] },
+ ]);
+ });
+
+ test('should filter rows based on less than or equal (<=)', () => {
+ const result = df.where('age', '<=', 30);
+
+ // Check that the filtered data is correct
+ expect(result.rowCount).toBe(2);
+ expect(result.toArray()).toEqual([
+ { name: 'Alice', age: 25, city: 'New York', salary: 70000, tags: ['dev', 'js'] },
+ { name: 'Bob', age: 30, city: 'San Francisco', salary: 85000, tags: ['dev', 'python'] },
+ ]);
+ });
+
+ test('should filter rows based on in operator', () => {
+ const result = df.where('age', 'in', [25, 35]);
+
+ // Check that the filtered data is correct
+ expect(result.rowCount).toBe(2);
+ expect(result.toArray()).toEqual([
+ { name: 'Alice', age: 25, city: 'New York', salary: 70000, tags: ['dev', 'js'] },
+ { name: 'Charlie', age: 35, city: 'Chicago', salary: 90000, tags: ['manager'] },
+ ]);
+ });
+
+ test('should filter rows based on contains operator for strings', () => {
+ const result = df.where('city', 'contains', 'York');
+
+ // Check that the filtered data is correct
+ expect(result.rowCount).toBe(1);
+ expect(result.toArray()).toEqual([
+ { name: 'Alice', age: 25, city: 'New York', salary: 70000, tags: ['dev', 'js'] },
+ ]);
+ });
+
+ test('should filter rows based on startsWith operator for strings', () => {
+ const result = df.where('city', 'startsWith', 'San');
+
+ // Check that the filtered data is correct
+ expect(result.rowCount).toBe(1);
+ expect(result.toArray()).toEqual([
+ { name: 'Bob', age: 30, city: 'San Francisco', salary: 85000, tags: ['dev', 'python'] },
+ ]);
+ });
+
+ test('should filter rows based on endsWith operator for strings', () => {
+ const result = df.where('city', 'endsWith', 'York');
+
+ // Check that the filtered data is correct
+ expect(result.rowCount).toBe(1);
+ expect(result.toArray()).toEqual([
+ { name: 'Alice', age: 25, city: 'New York', salary: 70000, tags: ['dev', 'js'] },
+ ]);
+ });
+
+ test('should filter rows based on matches operator for strings', () => {
+ const result = df.where('city', 'matches', /^C/);
+
+ // Check that the filtered data is correct
+ expect(result.rowCount).toBe(1);
+ expect(result.toArray()).toEqual([
+ { name: 'Charlie', age: 35, city: 'Chicago', salary: 90000, tags: ['manager'] },
+ ]);
+ });
+
+ test('should filter rows based on array contains', () => {
+ const result = df.where('tags', 'contains', 'js');
+
+ // Check that the filtered data is correct
+ expect(result.rowCount).toBe(1);
+ expect(result.toArray()).toEqual([
+ { name: 'Alice', age: 25, city: 'New York', salary: 70000, tags: ['dev', 'js'] },
+ ]);
+ });
+
+ test('should return empty DataFrame when no rows match', () => {
+ const result = df.where('age', '>', 100);
+
+ // Should be empty with no rows
+ expect(result.rowCount).toBe(0);
+ // In the new implementation, an empty DataFrame does not save the column structure
+ // which is normal behavior for fromRecords([])
+ });
+
+ test('should throw error for non-existent column', () => {
+ expect(() => df.where('nonexistent', '===', 30)).toThrow("Column 'nonexistent' not found");
+ });
+
+ test('should throw error for invalid operator', () => {
+ expect(() => df.where('age', 'invalid', 30)).toThrow("Unsupported operator: 'invalid'");
+ });
+
+ test('should return a new DataFrame instance', () => {
+ const result = df.where('age', '>', 25);
+ expect(result).toBeInstanceOf(DataFrame);
+ expect(result).not.toBe(df); // Should be a new instance
+ });
+
+ test('should preserve typed arrays', () => {
+ // Create DataFrame with typed arrays
+ const typedDf = DataFrame.fromRecords(testData, {
+ columns: {
+ age: { type: 'int32' },
+ salary: { type: 'float64' },
+ },
+ });
+
+ // Filter the data
+ const result = typedDf.where('age', '>', 25);
+
+ // Check that the result contains typed arrays
+ expect(ArrayBuffer.isView(result._columns.age.vector.__data)).toBe(true);
+ expect(ArrayBuffer.isView(result._columns.salary.vector.__data)).toBe(true);
+ });
+
+ test('should handle empty DataFrame', () => {
+ const emptyDf = DataFrame.fromRecords([]);
+
+ expect(() => emptyDf.where('age', '===', 30)).toThrow("Column 'age' not found");
+ });
+ });
+
+ describe('with filtered columns', () => {
+ // Create DataFrame with only specific columns
+ const df = DataFrame.fromRecords(testData, { columns: ['name', 'city', 'tags'] });
+
+ test('should filter rows based on string columns', () => {
+ const result = df.where('city', 'contains', 'Chicago');
+
+ // Check that the filtered data is correct
+ expect(result.rowCount).toBe(1);
+ // Check only the presence of the necessary data, since the where implementation saves all columns
+ const resultArray = result.toArray();
+ expect(resultArray.length).toBe(1);
+ expect(resultArray[0].name).toBe('Charlie');
+ expect(resultArray[0].city).toBe('Chicago');
+ expect(resultArray[0].tags).toEqual(['manager']);
+ });
+
+ test('should filter rows based on array columns', () => {
+ const result = df.where('tags', 'contains', 'dev');
+
+ // Check that the filtered data is correct
+ expect(result.rowCount).toBe(2);
+ // Check only the presence of the necessary data, since the where implementation saves all columns
+ const resultArray = result.toArray();
+ expect(resultArray.length).toBe(2);
+ expect(resultArray[0].name).toBe('Alice');
+ expect(resultArray[0].city).toBe('New York');
+ expect(resultArray[0].tags).toEqual(['dev', 'js']);
+ expect(resultArray[1].name).toBe('Bob');
+ expect(resultArray[1].city).toBe('San Francisco');
+ expect(resultArray[1].tags).toEqual(['dev', 'python']);
+ });
+ });
+});