diff --git a/src/methods/dataframe/filtering/at.js b/src/methods/dataframe/filtering/at.js index 86e1cae..fbe1a5e 100644 --- a/src/methods/dataframe/filtering/at.js +++ b/src/methods/dataframe/filtering/at.js @@ -6,28 +6,28 @@ * @returns {Object} - Object representing the selected row */ export const at = (df, index) => { - // Проверяем, что индекс является целым числом + // Check that index is an integer if (!Number.isInteger(index)) { throw new Error( `Index must be an integer, got ${typeof index === 'number' ? index : typeof index}`, ); } - // Проверяем, что индекс не отрицательный + // Check that index is not negative if (index < 0) { throw new Error(`Negative indices are not supported, got ${index}`); } const rows = df.toArray(); - // Проверяем, что индекс находится в допустимом диапазоне + // Check that index is in range if (index >= rows.length) { throw new Error( `Index ${index} is out of bounds for DataFrame with ${rows.length} rows`, ); } - // Проверяем, что DataFrame не пустой + // Check that DataFrame is not empty if (rows.length === 0) { throw new Error('Cannot get row from empty DataFrame'); } diff --git a/src/methods/dataframe/filtering/filter.js b/src/methods/dataframe/filtering/filter.js index 6754db8..60f888d 100644 --- a/src/methods/dataframe/filtering/filter.js +++ b/src/methods/dataframe/filtering/filter.js @@ -1,8 +1,8 @@ /** * Filters rows in a DataFrame based on a predicate function * - * @param {DataFrame} df - Экземпляр DataFrame - * @param {Function} predicate - Функция-предикат для фильтрации строк + * @param {DataFrame} df - DataFrame instance + * @param {Function} predicate - Function to apply to each row * @returns {DataFrame} - New DataFrame with filtered rows */ export const filter = (df, predicate) => { @@ -16,12 +16,12 @@ export const filter = (df, predicate) => { // Apply predicate to each row const filteredRows = rows.filter(predicate); - // Если нет результатов, создаем пустой DataFrame с теми же колонками + // If no results, create an empty DataFrame with the same columns if (filteredRows.length === 0) { - // Создаем пустой объект с теми же колонками, но пустыми массивами + // Create an empty object with the same columns, but empty arrays const emptyData = {}; for (const col of df.columns) { - // Сохраняем тип массива, если это типизированный массив + // Save the array type, if it's a typed array const originalArray = df._columns[col].vector.__data; if ( ArrayBuffer.isView(originalArray) && @@ -36,13 +36,13 @@ export const filter = (df, predicate) => { return new df.constructor(emptyData); } - // Создаем новый DataFrame с сохранением типов массивов + // Create a new DataFrame with the same columns and types const filteredData = {}; for (const col of df.columns) { const originalArray = df._columns[col].vector.__data; const values = filteredRows.map((row) => row[col]); - // Если оригинальный массив был типизированным, создаем новый типизированный массив + // If the original array was typed, create a new typed array if ( ArrayBuffer.isView(originalArray) && !(originalArray instanceof DataView) diff --git a/src/methods/dataframe/filtering/head.js b/src/methods/dataframe/filtering/head.js index e012fd0..4ae9117 100644 --- a/src/methods/dataframe/filtering/head.js +++ b/src/methods/dataframe/filtering/head.js @@ -1,14 +1,14 @@ /** - * Возвращает первые n строк DataFrame + * Returns the first n rows of a DataFrame * - * @param {DataFrame} df - Экземпляр DataFrame - * @param {number} [n=5] - Количество строк для возврата - * @param {Object} [options] - Дополнительные опции - * @param {boolean} [options.print=false] - Опция для совместимости с другими библиотеками - * @returns {DataFrame} - Новый DataFrame с первыми n строками + * @param {DataFrame} df - DataFrame instance + * @param {number} [n=5] - Number of rows to return + * @param {Object} [options] - Additional options + * @param {boolean} [options.print=false] - Option for compatibility with other libraries + * @returns {DataFrame} - New DataFrame with the first n rows */ export const head = (df, n = 5, options = { print: false }) => { - // Проверка входных параметров + // Check input parameters if (n <= 0) { throw new Error('Number of rows must be a positive number'); } @@ -16,24 +16,24 @@ export const head = (df, n = 5, options = { print: false }) => { throw new Error('Number of rows must be an integer'); } - // Получаем данные из DataFrame + // Get data from DataFrame const rows = df.toArray(); - // Выбираем первые n строк (или все, если их меньше n) + // Select the first n rows (or all if there are fewer than n) const selectedRows = rows.slice(0, n); - // Создаем новый DataFrame из выбранных строк + // Create a new DataFrame from the selected rows const result = df.constructor.fromRows(selectedRows); - // Примечание: опция print сохранена для совместимости с API, но в текущей версии не используется - // В будущем можно добавить метод print в DataFrame + // Note: the print option is preserved for API compatibility, but is not used in the current version + // In the future, we can add a print method to DataFrame return result; }; /** - * Регистрирует метод head в прототипе DataFrame - * @param {Class} DataFrame - Класс DataFrame для расширения + * Registers the head method on DataFrame prototype + * @param {Class} DataFrame - DataFrame class to extend */ export const register = (DataFrame) => { DataFrame.prototype.head = function (n, options) { diff --git a/src/methods/dataframe/filtering/iloc.js b/src/methods/dataframe/filtering/iloc.js index 981d1ba..bc8c06d 100644 --- a/src/methods/dataframe/filtering/iloc.js +++ b/src/methods/dataframe/filtering/iloc.js @@ -11,11 +11,11 @@ export const iloc = (df, rowSelector, colSelector) => { const allColumns = df.columns; const rowCount = df.rowCount; - // Определяем индексы строк для выбора + // Define row indices for selection let selectedIndices = []; if (typeof rowSelector === 'number') { - // Один индекс строки + // One row index const idx = rowSelector < 0 ? rowCount + rowSelector : rowSelector; if (idx < 0 || idx >= rowCount) { throw new Error( @@ -24,7 +24,7 @@ export const iloc = (df, rowSelector, colSelector) => { } selectedIndices = [idx]; } else if (Array.isArray(rowSelector)) { - // Массив индексов строк + // Array of row indices selectedIndices = rowSelector.map((idx) => { const adjustedIdx = idx < 0 ? rowCount + idx : idx; if (adjustedIdx < 0 || adjustedIdx >= rowCount) { @@ -35,14 +35,14 @@ export const iloc = (df, rowSelector, colSelector) => { return adjustedIdx; }); } else if (typeof rowSelector === 'function') { - // Функция, возвращающая true/false для каждого индекса строки + // Function returning true/false for each row index for (let i = 0; i < rowCount; i++) { if (rowSelector(i)) { selectedIndices.push(i); } } } else if (rowSelector === undefined || rowSelector === null) { - // Выбираем все строки, если селектор не указан + // Select all rows if selector is not provided selectedIndices = Array.from({ length: rowCount }, (_, i) => i); } else { throw new Error( @@ -50,15 +50,15 @@ export const iloc = (df, rowSelector, colSelector) => { ); } - // Если не указан селектор колонок, возвращаем все колонки для выбранных строк + // If column selector is not provided, return all columns for selected rows if (colSelector === undefined || colSelector === null) { - // Создаем новый DataFrame с сохранением типов массивов + // Create a new DataFrame preserving typed arrays const filteredData = {}; for (const col of allColumns) { const originalArray = df.col(col).toArray(); const values = selectedIndices.map((index) => originalArray[index]); - // Если оригинальный массив был типизированным, создаем новый типизированный массив + // If original array was typed, create a new typed array if ( ArrayBuffer.isView(originalArray) && !(originalArray instanceof DataView) @@ -73,10 +73,10 @@ export const iloc = (df, rowSelector, colSelector) => { return new df.constructor(filteredData); } - // Определяем индексы колонок для выбора + // Define column indices for selection let selectedColumnIndices = []; if (typeof colSelector === 'number') { - // Один индекс колонки + // One column index const idx = colSelector < 0 ? allColumns.length + colSelector : colSelector; if (idx < 0 || idx >= allColumns.length) { throw new Error( @@ -85,7 +85,7 @@ export const iloc = (df, rowSelector, colSelector) => { } selectedColumnIndices = [idx]; } else if (Array.isArray(colSelector)) { - // Массив индексов колонок + // Array of column indices selectedColumnIndices = colSelector.map((idx) => { const adjustedIdx = idx < 0 ? allColumns.length + idx : idx; if (adjustedIdx < 0 || adjustedIdx >= allColumns.length) { @@ -96,7 +96,7 @@ export const iloc = (df, rowSelector, colSelector) => { return adjustedIdx; }); } else if (typeof colSelector === 'function') { - // Функция, возвращающая true/false для каждого индекса колонки + // Function returning true/false for each column index for (let i = 0; i < allColumns.length; i++) { if (colSelector(i)) { selectedColumnIndices.push(i); @@ -108,10 +108,10 @@ export const iloc = (df, rowSelector, colSelector) => { ); } - // Получаем имена выбранных колонок + // Get names of selected columns const selectedColumns = selectedColumnIndices.map((idx) => allColumns[idx]); - // Если выбрана только одна строка и одна колонка, возвращаем значение + // If only one row and one column is selected, return the value if ( selectedIndices.length === 1 && selectedColumns.length === 1 && @@ -121,13 +121,13 @@ export const iloc = (df, rowSelector, colSelector) => { return df.col(selectedColumns[0]).toArray()[selectedIndices[0]]; } - // Создаем новый DataFrame с сохранением типов массивов + // Create a new DataFrame preserving typed arrays const filteredData = {}; for (const col of selectedColumns) { const originalArray = df.col(col).toArray(); const values = selectedIndices.map((index) => originalArray[index]); - // Если оригинальный массив был типизированным, создаем новый типизированный массив + // If the original array was typed, create a new typed array if ( ArrayBuffer.isView(originalArray) && !(originalArray instanceof DataView) diff --git a/src/methods/dataframe/filtering/loc.js b/src/methods/dataframe/filtering/loc.js index ccca0d5..b236c5a 100644 --- a/src/methods/dataframe/filtering/loc.js +++ b/src/methods/dataframe/filtering/loc.js @@ -1,49 +1,49 @@ /** - * Выбирает строки и колонки DataFrame по меткам + * Selects rows and columns from a DataFrame by labels * - * @param {DataFrame} df - Экземпляр DataFrame - * @param {Array|Function|Object} rowSelector - Селектор строк (массив индексов, функция-предикат или объект с условиями) - * @param {Array|string} [colSelector] - Селектор колонок (массив имен колонок или одна колонка) - * @returns {DataFrame|Object} - Новый DataFrame с выбранными строками и колонками или объект, если выбрана одна строка + * @param {DataFrame} df - DataFrame instance + * @param {Array|Function|Object} rowSelector - Row selector (array of indices, predicate function, or object with conditions) + * @param {Array|string} [colSelector] - Column selector (array of column names or one column) + * @returns {DataFrame|Object} - New DataFrame with selected rows and columns, or an object if only one row is selected */ export const loc = (df, rowSelector, colSelector) => { - // Получаем данные из DataFrame + // Get data from DataFrame const rows = df.toArray(); const rowCount = df.rowCount; - // Определяем строки для выбора + // Define rows for selection let selectedRows = []; let selectedIndices = []; if (Array.isArray(rowSelector)) { - // Если rowSelector - массив индексов - // Проверяем, что все индексы в пределах допустимого диапазона + // If rowSelector is an array of indices + // Check that all indices are within the valid range for (const index of rowSelector) { if (index < 0 || index >= rowCount) { throw new Error( - `Индекс строки ${index} выходит за пределы допустимого диапазона [0, ${rowCount - 1}]`, + `Row index ${index} is out of bounds for DataFrame with ${rowCount} rows`, ); } } selectedIndices = rowSelector; selectedRows = rows.filter((_, index) => rowSelector.includes(index)); } else if (typeof rowSelector === 'number') { - // Если rowSelector - числовой индекс + // If rowSelector is a number if (rowSelector < 0 || rowSelector >= rowCount) { throw new Error( - `Индекс строки ${rowSelector} выходит за пределы допустимого диапазона [0, ${rowCount - 1}]`, + `Row index ${rowSelector} is out of bounds for DataFrame with ${rowCount} rows`, ); } selectedIndices = [rowSelector]; selectedRows = [rows[rowSelector]]; } else if (typeof rowSelector === 'function') { - // Если rowSelector - функция-предикат + // If rowSelector is a predicate function selectedRows = rows.filter(rowSelector); selectedIndices = rows .map((row, index) => (rowSelector(row) ? index : -1)) .filter((index) => index !== -1); } else if (typeof rowSelector === 'object' && rowSelector !== null) { - // Если rowSelector - объект с условиями + // If rowSelector is an object with conditions selectedIndices = []; selectedRows = []; rows.forEach((row, index) => { @@ -60,23 +60,23 @@ export const loc = (df, rowSelector, colSelector) => { } }); } else { - throw new Error('Неверный тип селектора строк'); + throw new Error('Invalid row selector type'); } - // Если не указан селектор колонок, возвращаем все колонки + // If no column selector is specified, return all columns if (colSelector === undefined) { - // Если выбрана только одна строка, возвращаем ее как объект + // If only one row is selected, return it as an object if (selectedRows.length === 1 && typeof rowSelector !== 'function') { return selectedRows[0]; } - // Создаем новый DataFrame с сохранением типов массивов + // Create a new DataFrame preserving typed arrays const filteredData = {}; for (const col of df.columns) { const originalArray = df.col(col).toArray(); const values = selectedIndices.map((index) => originalArray[index]); - // Если оригинальный массив был типизированным, создаем новый типизированный массив + // If the original array was typed, create a new typed array if ( ArrayBuffer.isView(originalArray) && !(originalArray instanceof DataView) @@ -91,27 +91,27 @@ export const loc = (df, rowSelector, colSelector) => { return new df.constructor(filteredData); } - // Определяем колонки для выбора + // Define columns for selection let selectedColumns = []; if (Array.isArray(colSelector)) { - // Если colSelector - массив имен колонок + // If colSelector is an array of column names selectedColumns = colSelector; } else if (typeof colSelector === 'string') { - // Если colSelector - одна колонка + // If colSelector is a single column name selectedColumns = [colSelector]; } else { - throw new Error('Неверный тип селектора колонок'); + throw new Error('Invalid column selector type'); } - // Проверяем, что все указанные колонки существуют + // Check that all specified columns exist for (const column of selectedColumns) { if (!df.columns.includes(column)) { - throw new Error(`Колонка '${column}' не найдена`); + throw new Error(`Column '${column}' not found`); } } - // Если выбрана только одна строка и одна колонка, возвращаем значение + // If only one row and one column is selected, return the value if ( selectedRows.length === 1 && selectedColumns.length === 1 && @@ -120,13 +120,13 @@ export const loc = (df, rowSelector, colSelector) => { return selectedRows[0][selectedColumns[0]]; } - // Создаем новый DataFrame с сохранением типов массивов + // Create a new DataFrame preserving typed arrays const filteredData = {}; for (const col of selectedColumns) { const originalArray = df.col(col).toArray(); const values = selectedIndices.map((index) => originalArray[index]); - // Если оригинальный массив был типизированным, создаем новый типизированный массив + // If the original array was typed, create a new typed array if ( ArrayBuffer.isView(originalArray) && !(originalArray instanceof DataView) @@ -142,8 +142,8 @@ export const loc = (df, rowSelector, colSelector) => { }; /** - * Регистрирует метод loc в прототипе DataFrame - * @param {Class} DataFrame - Класс DataFrame для расширения + * Registers the loc method on DataFrame prototype + * @param {Class} DataFrame - DataFrame class to extend */ export const register = (DataFrame) => { DataFrame.prototype.loc = function (rowSelector, colSelector) { diff --git a/src/methods/dataframe/filtering/query.js b/src/methods/dataframe/filtering/query.js index 5e3c343..ef7ac2e 100644 --- a/src/methods/dataframe/filtering/query.js +++ b/src/methods/dataframe/filtering/query.js @@ -1,36 +1,36 @@ /** - * Фильтрует строки DataFrame с использованием SQL-подобного синтаксиса + * Filters DataFrame rows using SQL-like syntax * - * @param {DataFrame} df - Экземпляр DataFrame - * @param {string} queryString - SQL-подобный запрос - * @returns {DataFrame} - Новый DataFrame с отфильтрованными строками + * @param {DataFrame} df - DataFrame instance + * @param {string} queryString - SQL-like query string + * @returns {DataFrame} - New DataFrame with filtered rows */ export const query = (df, queryString) => { if (typeof queryString !== 'string') { - throw new Error('Запрос должен быть строкой'); + throw new Error('Query must be a string'); } - // Получаем данные из DataFrame + // Get data from DataFrame const rows = df.toArray(); - // Создаем функцию для оценки запроса + // Create a function to evaluate the query const evaluateQuery = createQueryEvaluator(queryString); - // Фильтруем строки с помощью функции оценки + // Filter rows using the evaluation function const filteredRows = rows.filter((row) => { try { return evaluateQuery(row); } catch (e) { - throw new Error(`Ошибка при оценке запроса для строки: ${e.message}`); + throw new Error(`Error evaluating query for row: ${e.message}`); } }); - // Если нет отфильтрованных строк, создаем пустой DataFrame с теми же колонками + // If no rows are filtered, create an empty DataFrame with the same columns if (filteredRows.length === 0) { - // Создаем пустой объект с теми же колонками, но пустыми массивами + // Create an empty object with the same columns, but empty arrays const emptyData = {}; for (const col of df.columns) { - // Сохраняем тип массива, если это типизированный массив + // Save the array type if it's a typed array const originalArray = df.col(col).toArray(); if ( ArrayBuffer.isView(originalArray) && @@ -45,13 +45,13 @@ export const query = (df, queryString) => { return new df.constructor(emptyData); } - // Создаем новый DataFrame с сохранением типов массивов + // Create a new DataFrame preserving typed arrays const filteredData = {}; for (const col of df.columns) { const originalArray = df.col(col).toArray(); const values = filteredRows.map((row) => row[col]); - // Если оригинальный массив был типизированным, создаем новый типизированный массив + // If the original array was typed, create a new typed array if ( ArrayBuffer.isView(originalArray) && !(originalArray instanceof DataView) @@ -67,12 +67,12 @@ export const query = (df, queryString) => { }; /** - * Создает функцию для оценки SQL-подобного запроса - * @param {string} queryString - SQL-подобный запрос - * @returns {Function} - Функция, оценивающая запрос для строки + * Creates a function to evaluate an SQL-like query + * @param {string} queryString - SQL-like query string + * @returns {Function} - Function evaluating the query for a row */ function createQueryEvaluator(queryString) { - // Заменяем операторы сравнения на JavaScript-эквиваленты + // Replace comparison operators with JavaScript equivalents const jsQuery = queryString .replace(/(\w+)\s*=\s*([^=\s][^=]*)/g, '$1 == $2') // = -> == .replace( @@ -87,7 +87,7 @@ function createQueryEvaluator(queryString) { '($1 >= $2 && $1 <= $3)', ); // BETWEEN -> >= && <= - // Создаем функцию для оценки запроса + // Create a function to evaluate the query try { return new Function( 'row', @@ -102,13 +102,13 @@ function createQueryEvaluator(queryString) { `, ); } catch (e) { - throw new Error(`Неверный синтаксис запроса: ${e.message}`); + throw new Error(`Invalid query syntax: ${e.message}`); } } /** - * Регистрирует метод query в прототипе DataFrame - * @param {Class} DataFrame - Класс DataFrame для расширения + * Registers the query method on DataFrame prototype + * @param {Class} DataFrame - DataFrame class to extend */ export const register = (DataFrame) => { DataFrame.prototype.query = function (queryString) { diff --git a/src/methods/dataframe/filtering/register.js b/src/methods/dataframe/filtering/register.js index 6a04d51..d95652e 100644 --- a/src/methods/dataframe/filtering/register.js +++ b/src/methods/dataframe/filtering/register.js @@ -7,14 +7,8 @@ import { register as registerWhere } from './where.js'; import { register as registerExpr$ } from './expr$.js'; import { register as registerSelect } from './select.js'; import { register as registerDrop } from './drop.js'; -import { register as registerAt } from './at.js'; -import { register as registerIloc } from './iloc.js'; import { register as registerStratifiedSample } from './stratifiedSample.js'; -import { register as registerHead } from './head.js'; -import { register as registerTail } from './tail.js'; -import { register as registerSample } from './sample.js'; import { register as registerSelectByPattern } from './selectByPattern.js'; -import { register as registerLoc } from './loc.js'; import { register as registerQuery } from './query.js'; /** @@ -28,18 +22,12 @@ export function registerDataFrameFiltering(DataFrame) { registerExpr$(DataFrame); registerSelect(DataFrame); registerDrop(DataFrame); - registerAt(DataFrame); - registerIloc(DataFrame); registerStratifiedSample(DataFrame); - registerHead(DataFrame); - registerTail(DataFrame); - registerSample(DataFrame); registerSelectByPattern(DataFrame); - registerLoc(DataFrame); registerQuery(DataFrame); // Add additional filtering methods here as they are implemented - // For example: head, tail, query, loc, sample, stratifiedSample, selectByPattern + // For example: query, stratifiedSample, selectByPattern } export default registerDataFrameFiltering; diff --git a/src/methods/dataframe/filtering/sample.js b/src/methods/dataframe/filtering/sample.js index 42332f1..189e2c1 100644 --- a/src/methods/dataframe/filtering/sample.js +++ b/src/methods/dataframe/filtering/sample.js @@ -1,71 +1,69 @@ /** - * Выбирает случайную выборку строк из DataFrame + * Selects a random sample of rows from DataFrame * - * @param {DataFrame} df - Экземпляр DataFrame - * @param {number|Object} n - Количество строк для выборки или объект с опциями - * @param {Object} [options] - Дополнительные опции - * @param {number} [options.seed] - Seed для генератора случайных чисел - * @param {boolean} [options.replace=false] - Выборка с возвращением - * @param {boolean} [options.fraction] - Доля строк для выборки (0 < fraction <= 1) - * @returns {DataFrame} - Новый DataFrame с выбранными строками + * @param {DataFrame} df - DataFrame instance + * @param {number|Object} n - Number of rows to sample or options object + * @param {Object} [options] - Additional options + * @param {number} [options.seed] - Seed for random number generator + * @param {boolean} [options.replace=false] - Sampling with replacement + * @param {boolean} [options.fraction] - Fraction of rows to sample (0 < fraction <= 1) + * @returns {DataFrame} - New DataFrame with sampled rows */ export const sample = (df, n, options = {}) => { - // Обработка случая, когда n - это объект с опциями + // Handle case when n is an options object if (typeof n === 'object') { options = n; n = undefined; } - // Получаем данные из DataFrame + // Get data from DataFrame const rows = df.toArray(); if (rows.length === 0) { return new df.constructor({}); } - // Определяем количество строк для выборки + // Determine the number of rows to sample let sampleSize; if (options.fraction !== undefined) { if (options.fraction <= 0 || options.fraction > 1) { - throw new Error('Доля выборки должна быть в диапазоне (0, 1]'); + throw new Error('Fraction must be in the range (0, 1]'); } sampleSize = Math.round(rows.length * options.fraction); } else { sampleSize = n !== undefined ? n : 1; } - // Проверка корректности количества строк + // Check the validity of the number of rows if (sampleSize <= 0) { - throw new Error( - 'Количество строк для выборки должно быть положительным числом', - ); + throw new Error('Number of rows to sample must be a positive number'); } - // Проверка, что размер выборки является целым числом + // Check that the sample size is an integer if (!Number.isInteger(sampleSize)) { - throw new Error('Количество строк для выборки должно быть целым числом'); + throw new Error('Number of rows to sample must be an integer'); } - // Если выборка без возвращения и размер выборки больше количества строк + // If sampling without replacement and sample size is greater than number of rows if (!options.replace && sampleSize > rows.length) { throw new Error( - `Размер выборки (${sampleSize}) не может быть больше количества строк (${rows.length})`, + `Sample size (${sampleSize}) cannot be greater than number of rows (${rows.length})`, ); } - // Создаем генератор случайных чисел с seed, если указан + // Create a random number generator with seed if specified const random = options.seed !== undefined ? createSeededRandom(options.seed) : Math.random; - // Выбираем строки + // Select rows const sampledRows = []; if (options.replace) { - // Выборка с возвращением + // Sampling with replacement for (let i = 0; i < sampleSize; i++) { const index = Math.floor(random() * rows.length); sampledRows.push(rows[index]); } } else { - // Выборка без возвращения (используем алгоритм Фишера-Йейтса) + // Sampling without replacement (using Fisher-Yates algorithm) const indices = Array.from({ length: rows.length }, (_, i) => i); for (let i = indices.length - 1; i > 0; i--) { const j = Math.floor(random() * (i + 1)); @@ -76,26 +74,26 @@ export const sample = (df, n, options = {}) => { } } - // Создаем новый DataFrame из выбранных строк + // Create a new DataFrame from sampled rows return df.constructor.fromRows(sampledRows); }; /** - * Создает генератор псевдослучайных чисел с заданным seed - * @param {number} seed - Начальное значение для генератора - * @returns {Function} - Функция, возвращающая псевдослучайное число в диапазоне [0, 1) + * Creates a random number generator with seed + * @param {number} seed - Seed for random number generator + * @returns {Function} - Function returning pseudorandom number in range [0, 1) */ function createSeededRandom(seed) { return function () { - // Простой линейный конгруэнтный генератор + // Simple linear congruential generator seed = (seed * 9301 + 49297) % 233280; return seed / 233280; }; } /** - * Регистрирует метод sample в прототипе DataFrame - * @param {Class} DataFrame - Класс DataFrame для расширения + * Registers the sample method on DataFrame prototype + * @param {Class} DataFrame - DataFrame class to extend */ export const register = (DataFrame) => { DataFrame.prototype.sample = function (n, options) { diff --git a/src/methods/dataframe/filtering/select.js b/src/methods/dataframe/filtering/select.js index b4282bb..c0a87d7 100644 --- a/src/methods/dataframe/filtering/select.js +++ b/src/methods/dataframe/filtering/select.js @@ -6,9 +6,9 @@ * @returns {DataFrame} - New DataFrame with only the selected columns */ export const select = (df, columns) => { - // Проверяем, что columns является массивом + // Validate that columns is an array if (!Array.isArray(columns)) { - throw new Error('Columns должен быть массивом'); + throw new Error('Columns must be an array'); } // Validate that all columns exist @@ -34,7 +34,7 @@ export const select = (df, columns) => { */ export const register = (DataFrame) => { DataFrame.prototype.select = function (...args) { - // Если передан не массив, а несколько аргументов, преобразуем их в массив + // If not an array, convert arguments to an array const columnsArray = args.length > 1 ? args : Array.isArray(args[0]) ? args[0] : [args[0]]; diff --git a/src/methods/dataframe/filtering/selectByPattern.js b/src/methods/dataframe/filtering/selectByPattern.js index 680ede5..dfac8d5 100644 --- a/src/methods/dataframe/filtering/selectByPattern.js +++ b/src/methods/dataframe/filtering/selectByPattern.js @@ -1,46 +1,44 @@ /** - * Выбирает колонки DataFrame, соответствующие регулярному выражению + * Selects columns from DataFrame that match a regular expression * - * @param {DataFrame} df - Экземпляр DataFrame - * @param {RegExp|string} pattern - Регулярное выражение или строка для поиска - * @returns {DataFrame} - Новый DataFrame только с выбранными колонками + * @param {DataFrame} df - DataFrame instance + * @param {RegExp|string} pattern - Regular expression or string to search for + * @returns {DataFrame} - New DataFrame with only the selected columns */ export const selectByPattern = (df, pattern) => { - // Проверка типа паттерна + // Validate pattern type if (typeof pattern !== 'string' && !(pattern instanceof RegExp)) { - throw new TypeError( - 'Паттерн должен быть строкой или регулярным выражением', - ); + throw new TypeError('Pattern must be a string or regular expression'); } - // Преобразуем строку в регулярное выражение, если необходимо + // Convert string to regular expression if necessary const regex = pattern instanceof RegExp ? pattern : new RegExp(pattern); - // Находим колонки, соответствующие паттерну + // Find columns that match the pattern const matchedColumns = df.columns.filter((column) => regex.test(column)); - // Если не найдено ни одной колонки, возвращаем пустой DataFrame + // If no columns are found, return an empty DataFrame if (matchedColumns.length === 0) { - // Создаем пустой DataFrame + // Create an empty DataFrame return new df.constructor({}); } - // Создаем новый объект с данными только для выбранных колонок + // Create a new object with only the selected columns const selectedData = {}; - // Сохраняем типы массивов + // Save array types for (const column of matchedColumns) { - // Получаем данные из оригинального DataFrame + // Get data from original DataFrame selectedData[column] = df.col(column).toArray(); } - // Создаем новый DataFrame с выбранными колонками, сохраняя тип хранилища + // Create a new DataFrame with selected columns, preserving array types return new df.constructor(selectedData); }; /** - * Регистрирует метод selectByPattern в прототипе DataFrame - * @param {Class} DataFrame - Класс DataFrame для расширения + * Registers the selectByPattern method on DataFrame prototype + * @param {Class} DataFrame - DataFrame class to extend */ export const register = (DataFrame) => { DataFrame.prototype.selectByPattern = function (pattern) { diff --git a/src/methods/dataframe/filtering/stratifiedSample.js b/src/methods/dataframe/filtering/stratifiedSample.js index 2516260..42dab05 100644 --- a/src/methods/dataframe/filtering/stratifiedSample.js +++ b/src/methods/dataframe/filtering/stratifiedSample.js @@ -1,12 +1,12 @@ /** - * Выбирает стратифицированную выборку из DataFrame, сохраняя пропорции категорий. + * Selects stratified sample from DataFrame, preserving category proportions. * - * @param {DataFrame} df - Экземпляр DataFrame - * @param {string} stratifyColumn - Имя колонки для стратификации - * @param {number} fraction - Доля строк для выборки (0 < fraction <= 1) - * @param {Object} [options] - Дополнительные опции - * @param {number} [options.seed] - Seed для генератора случайных чисел - * @returns {DataFrame} - Новый DataFrame с выбранными строками + * @param {DataFrame} df - DataFrame instance + * @param {string} stratifyColumn - Column name for stratification + * @param {number} fraction - Fraction of rows to sample (0 < fraction <= 1) + * @param {Object} [options] - Additional options + * @param {number} [options.seed] - Seed for random number generator + * @returns {DataFrame} - New DataFrame with sampled rows */ export const stratifiedSample = ( df, @@ -14,23 +14,23 @@ export const stratifiedSample = ( fraction, options = {}, ) => { - // Проверка входных параметров + // Validate input parameters if (!df.columns.includes(stratifyColumn)) { - throw new Error(`Колонка '${stratifyColumn}' не найдена`); + throw new Error(`Column '${stratifyColumn}' not found`); } if (fraction <= 0 || fraction > 1) { - throw new Error('Доля выборки должна быть в диапазоне (0, 1]'); + throw new Error('Fraction must be in the range (0, 1]'); } - // Получаем данные из DataFrame + // Get data from DataFrame const rows = df.toArray(); if (rows.length === 0) { - // Возвращаем пустой DataFrame с тем же типом хранилища + // Return an empty DataFrame with the same storage type return new df.constructor({}); } - // Группируем строки по категориям + // Group rows by categories const categories = {}; rows.forEach((row) => { const category = row[stratifyColumn]; @@ -40,45 +40,45 @@ export const stratifiedSample = ( categories[category].push(row); }); - // Создаем генератор случайных чисел с seed, если указан + // Create a random number generator with seed if specified const random = options.seed !== undefined ? createSeededRandom(options.seed) : Math.random; - // Выбираем строки из каждой категории, сохраняя пропорции + // Select rows from each category, preserving proportions const sampledRows = []; Object.entries(categories).forEach(([category, categoryRows]) => { - // Вычисляем количество строк для выборки из этой категории + // Calculate the number of rows to sample from this category let sampleSize = Math.round(categoryRows.length * fraction); - // Гарантируем, что каждая категория имеет хотя бы одну строку + // Ensure each category has at least one row sampleSize = Math.max(1, sampleSize); sampleSize = Math.min(categoryRows.length, sampleSize); - // Перемешиваем строки и выбираем нужное количество + // Shuffle rows and select the required number const shuffled = [...categoryRows].sort(() => 0.5 - random()); sampledRows.push(...shuffled.slice(0, sampleSize)); }); - // Создаем новый DataFrame из выбранных строк + // Create a new DataFrame from sampled rows return df.constructor.fromRows(sampledRows); }; /** - * Создает генератор псевдослучайных чисел с заданным seed - * @param {number} seed - Начальное значение для генератора - * @returns {Function} - Функция, возвращающая псевдослучайное число в диапазоне [0, 1) + * Creates a random number generator with seed + * @param {number} seed - Seed for random number generator + * @returns {Function} - Function returning pseudorandom number in range [0, 1) */ function createSeededRandom(seed) { return function () { - // Простой линейный конгруэнтный генератор + // Simple linear congruential generator seed = (seed * 9301 + 49297) % 233280; return seed / 233280; }; } /** - * Регистрирует метод stratifiedSample в прототипе DataFrame - * @param {Class} DataFrame - Класс DataFrame для расширения + * Registers the stratifiedSample method on DataFrame prototype + * @param {Class} DataFrame - DataFrame class to extend */ export const register = (DataFrame) => { DataFrame.prototype.stratifiedSample = function ( diff --git a/src/methods/dataframe/filtering/tail.js b/src/methods/dataframe/filtering/tail.js index 0dec905..19f17d9 100644 --- a/src/methods/dataframe/filtering/tail.js +++ b/src/methods/dataframe/filtering/tail.js @@ -1,14 +1,14 @@ /** - * Возвращает последние n строк DataFrame + * Returns the last n rows of DataFrame * - * @param {DataFrame} df - Экземпляр DataFrame - * @param {number} [n=5] - Количество строк для возврата - * @param {Object} [options] - Дополнительные опции - * @param {boolean} [options.print=false] - Опция для совместимости с другими библиотеками - * @returns {DataFrame} - Новый DataFrame с последними n строками + * @param {DataFrame} df - DataFrame instance + * @param {number} [n=5] - Number of rows to return + * @param {Object} [options] - Additional options + * @param {boolean} [options.print=false] - Option for compatibility with other libraries + * @returns {DataFrame} - New DataFrame with the last n rows */ export const tail = (df, n = 5, options = { print: false }) => { - // Проверка входных параметров + // Validate input parameters if (n <= 0) { throw new Error('Number of rows must be a positive number'); } @@ -16,24 +16,24 @@ export const tail = (df, n = 5, options = { print: false }) => { throw new Error('Number of rows must be an integer'); } - // Получаем данные из DataFrame + // Get data from DataFrame const rows = df.toArray(); - // Выбираем последние n строк (или все, если их меньше n) + // Select the last n rows (or all if there are fewer than n) const selectedRows = rows.slice(-n); - // Создаем новый DataFrame из выбранных строк + // Create a new DataFrame from selected rows const result = df.constructor.fromRows(selectedRows); - // Примечание: опция print сохранена для совместимости с API, но в текущей версии не используется - // В будущем можно добавить метод print в DataFrame + // Note: the print option is preserved for API compatibility, but is not used in the current version + // In the future, we can add the print method to DataFrame return result; }; /** - * Регистрирует метод tail в прототипе DataFrame - * @param {Class} DataFrame - Класс DataFrame для расширения + * Registers the tail method on DataFrame prototype + * @param {Class} DataFrame - DataFrame class to extend */ export const register = (DataFrame) => { DataFrame.prototype.tail = function (n, options) { diff --git a/src/methods/dataframe/index.js b/src/methods/dataframe/index.js index f7df941..2b711de 100644 --- a/src/methods/dataframe/index.js +++ b/src/methods/dataframe/index.js @@ -6,7 +6,8 @@ // Import all method groups import './filtering/index.js'; import './display/index.js'; -// Импортируйте другие группы методов по мере необходимости +import './transform/index.js'; +// Import other method groups as needed // Export nothing as methods are attached to DataFrame prototype export {}; diff --git a/src/methods/dataframe/indexing/at.js b/src/methods/dataframe/indexing/at.js new file mode 100644 index 0000000..fbe1a5e --- /dev/null +++ b/src/methods/dataframe/indexing/at.js @@ -0,0 +1,48 @@ +/** + * Selects a single row from a DataFrame by index. + * + * @param {DataFrame} df - DataFrame instance + * @param {number} index - Row index to select + * @returns {Object} - Object representing the selected row + */ +export const at = (df, index) => { + // Check that index is an integer + if (!Number.isInteger(index)) { + throw new Error( + `Index must be an integer, got ${typeof index === 'number' ? index : typeof index}`, + ); + } + + // Check that index is not negative + if (index < 0) { + throw new Error(`Negative indices are not supported, got ${index}`); + } + + const rows = df.toArray(); + + // Check that index is in range + if (index >= rows.length) { + throw new Error( + `Index ${index} is out of bounds for DataFrame with ${rows.length} rows`, + ); + } + + // Check that DataFrame is not empty + if (rows.length === 0) { + throw new Error('Cannot get row from empty DataFrame'); + } + + return rows[index]; +}; + +/** + * Registers the at method on DataFrame prototype + * @param {Class} DataFrame - DataFrame class to extend + */ +export const register = (DataFrame) => { + DataFrame.prototype.at = function (index) { + return at(this, index); + }; +}; + +export default { at, register }; diff --git a/src/methods/dataframe/indexing/head.js b/src/methods/dataframe/indexing/head.js new file mode 100644 index 0000000..4ae9117 --- /dev/null +++ b/src/methods/dataframe/indexing/head.js @@ -0,0 +1,44 @@ +/** + * Returns the first n rows of a DataFrame + * + * @param {DataFrame} df - DataFrame instance + * @param {number} [n=5] - Number of rows to return + * @param {Object} [options] - Additional options + * @param {boolean} [options.print=false] - Option for compatibility with other libraries + * @returns {DataFrame} - New DataFrame with the first n rows + */ +export const head = (df, n = 5, options = { print: false }) => { + // Check input parameters + if (n <= 0) { + throw new Error('Number of rows must be a positive number'); + } + if (!Number.isInteger(n)) { + throw new Error('Number of rows must be an integer'); + } + + // Get data from DataFrame + const rows = df.toArray(); + + // Select the first n rows (or all if there are fewer than n) + const selectedRows = rows.slice(0, n); + + // Create a new DataFrame from the selected rows + const result = df.constructor.fromRows(selectedRows); + + // Note: the print option is preserved for API compatibility, but is not used in the current version + // In the future, we can add a print method to DataFrame + + return result; +}; + +/** + * Registers the head method on DataFrame prototype + * @param {Class} DataFrame - DataFrame class to extend + */ +export const register = (DataFrame) => { + DataFrame.prototype.head = function (n, options) { + return head(this, n, options); + }; +}; + +export default { head, register }; diff --git a/src/methods/dataframe/indexing/iloc.js b/src/methods/dataframe/indexing/iloc.js new file mode 100644 index 0000000..bc8c06d --- /dev/null +++ b/src/methods/dataframe/indexing/iloc.js @@ -0,0 +1,155 @@ +/** + * Selects rows and columns from a DataFrame by integer positions. + * + * @param {DataFrame} df - DataFrame instance + * @param {number|number[]|Function} rowSelector - Row indices to select + * @param {number|number[]|Function} [colSelector] - Column indices to select + * @returns {DataFrame|Object} - New DataFrame with selected rows and columns, or a single row if only one row is selected + */ +export const iloc = (df, rowSelector, colSelector) => { + const rows = df.toArray(); + const allColumns = df.columns; + const rowCount = df.rowCount; + + // Define row indices for selection + let selectedIndices = []; + + if (typeof rowSelector === 'number') { + // One row index + const idx = rowSelector < 0 ? rowCount + rowSelector : rowSelector; + if (idx < 0 || idx >= rowCount) { + throw new Error( + `Row index ${rowSelector} is out of bounds for DataFrame with ${rowCount} rows`, + ); + } + selectedIndices = [idx]; + } else if (Array.isArray(rowSelector)) { + // Array of row indices + selectedIndices = rowSelector.map((idx) => { + const adjustedIdx = idx < 0 ? rowCount + idx : idx; + if (adjustedIdx < 0 || adjustedIdx >= rowCount) { + throw new Error( + `Row index ${idx} is out of bounds for DataFrame with ${rowCount} rows`, + ); + } + return adjustedIdx; + }); + } else if (typeof rowSelector === 'function') { + // Function returning true/false for each row index + for (let i = 0; i < rowCount; i++) { + if (rowSelector(i)) { + selectedIndices.push(i); + } + } + } else if (rowSelector === undefined || rowSelector === null) { + // Select all rows if selector is not provided + selectedIndices = Array.from({ length: rowCount }, (_, i) => i); + } else { + throw new Error( + 'Invalid row selector: must be a number, array of numbers, or function', + ); + } + + // If column selector is not provided, return all columns for selected rows + if (colSelector === undefined || colSelector === null) { + // Create a new DataFrame preserving typed arrays + const filteredData = {}; + for (const col of allColumns) { + const originalArray = df.col(col).toArray(); + const values = selectedIndices.map((index) => originalArray[index]); + + // If original array was typed, create a new typed array + if ( + ArrayBuffer.isView(originalArray) && + !(originalArray instanceof DataView) + ) { + const TypedArrayConstructor = originalArray.constructor; + filteredData[col] = new TypedArrayConstructor(values); + } else { + filteredData[col] = values; + } + } + + return new df.constructor(filteredData); + } + + // Define column indices for selection + let selectedColumnIndices = []; + if (typeof colSelector === 'number') { + // One column index + const idx = colSelector < 0 ? allColumns.length + colSelector : colSelector; + if (idx < 0 || idx >= allColumns.length) { + throw new Error( + `Column index ${colSelector} is out of bounds for DataFrame with ${allColumns.length} columns`, + ); + } + selectedColumnIndices = [idx]; + } else if (Array.isArray(colSelector)) { + // Array of column indices + selectedColumnIndices = colSelector.map((idx) => { + const adjustedIdx = idx < 0 ? allColumns.length + idx : idx; + if (adjustedIdx < 0 || adjustedIdx >= allColumns.length) { + throw new Error( + `Column index ${idx} is out of bounds for DataFrame with ${allColumns.length} columns`, + ); + } + return adjustedIdx; + }); + } else if (typeof colSelector === 'function') { + // Function returning true/false for each column index + for (let i = 0; i < allColumns.length; i++) { + if (colSelector(i)) { + selectedColumnIndices.push(i); + } + } + } else { + throw new Error( + 'Invalid column selector: must be a number, array of numbers, or function', + ); + } + + // Get names of selected columns + const selectedColumns = selectedColumnIndices.map((idx) => allColumns[idx]); + + // If only one row and one column is selected, return the value + if ( + selectedIndices.length === 1 && + selectedColumns.length === 1 && + typeof rowSelector === 'number' && + typeof colSelector === 'number' + ) { + return df.col(selectedColumns[0]).toArray()[selectedIndices[0]]; + } + + // Create a new DataFrame preserving typed arrays + const filteredData = {}; + for (const col of selectedColumns) { + const originalArray = df.col(col).toArray(); + const values = selectedIndices.map((index) => originalArray[index]); + + // If the original array was typed, create a new typed array + if ( + ArrayBuffer.isView(originalArray) && + !(originalArray instanceof DataView) + ) { + const TypedArrayConstructor = originalArray.constructor; + filteredData[col] = new TypedArrayConstructor(values); + } else { + filteredData[col] = values; + } + } + + return new df.constructor(filteredData); +}; + +/** + * Registers the iloc method on DataFrame prototype + * @param {Class} DataFrame - DataFrame class to extend + */ +export const register = (DataFrame) => { + DataFrame.prototype.iloc = function (rowSelector, colSelector) { + return iloc(this, rowSelector, colSelector); + }; +}; + +export default { iloc, register }; diff --git a/src/methods/dataframe/indexing/loc.js b/src/methods/dataframe/indexing/loc.js new file mode 100644 index 0000000..b236c5a --- /dev/null +++ b/src/methods/dataframe/indexing/loc.js @@ -0,0 +1,154 @@ +/** + * Selects rows and columns from a DataFrame by labels + * + * @param {DataFrame} df - DataFrame instance + * @param {Array|Function|Object} rowSelector - Row selector (array of indices, predicate function, or object with conditions) + * @param {Array|string} [colSelector] - Column selector (array of column names or one column) + * @returns {DataFrame|Object} - New DataFrame with selected rows and columns, or an object if only one row is selected + */ +export const loc = (df, rowSelector, colSelector) => { + // Get data from DataFrame + const rows = df.toArray(); + const rowCount = df.rowCount; + + // Define rows for selection + let selectedRows = []; + let selectedIndices = []; + + if (Array.isArray(rowSelector)) { + // If rowSelector is an array of indices + // Check that all indices are within the valid range + for (const index of rowSelector) { + if (index < 0 || index >= rowCount) { + throw new Error( + `Row index ${index} is out of bounds for DataFrame with ${rowCount} rows`, + ); + } + } + selectedIndices = rowSelector; + selectedRows = rows.filter((_, index) => rowSelector.includes(index)); + } else if (typeof rowSelector === 'number') { + // If rowSelector is a number + if (rowSelector < 0 || rowSelector >= rowCount) { + throw new Error( + `Row index ${rowSelector} is out of bounds for DataFrame with ${rowCount} rows`, + ); + } + selectedIndices = [rowSelector]; + selectedRows = [rows[rowSelector]]; + } else if (typeof rowSelector === 'function') { + // If rowSelector is a predicate function + selectedRows = rows.filter(rowSelector); + selectedIndices = rows + .map((row, index) => (rowSelector(row) ? index : -1)) + .filter((index) => index !== -1); + } else if (typeof rowSelector === 'object' && rowSelector !== null) { + // If rowSelector is an object with conditions + selectedIndices = []; + selectedRows = []; + rows.forEach((row, index) => { + let match = true; + for (const [key, value] of Object.entries(rowSelector)) { + if (row[key] !== value) { + match = false; + break; + } + } + if (match) { + selectedIndices.push(index); + selectedRows.push(row); + } + }); + } else { + throw new Error('Invalid row selector type'); + } + + // If no column selector is specified, return all columns + if (colSelector === undefined) { + // If only one row is selected, return it as an object + if (selectedRows.length === 1 && typeof rowSelector !== 'function') { + return selectedRows[0]; + } + + // Create a new DataFrame preserving typed arrays + const filteredData = {}; + for (const col of df.columns) { + const originalArray = df.col(col).toArray(); + const values = selectedIndices.map((index) => originalArray[index]); + + // If the original array was typed, create a new typed array + if ( + ArrayBuffer.isView(originalArray) && + !(originalArray instanceof DataView) + ) { + const TypedArrayConstructor = originalArray.constructor; + filteredData[col] = new TypedArrayConstructor(values); + } else { + filteredData[col] = values; + } + } + + return new df.constructor(filteredData); + } + + // Define columns for selection + let selectedColumns = []; + + if (Array.isArray(colSelector)) { + // If colSelector is an array of column names + selectedColumns = colSelector; + } else if (typeof colSelector === 'string') { + // If colSelector is a single column name + selectedColumns = [colSelector]; + } else { + throw new Error('Invalid column selector type'); + } + + // Check that all specified columns exist + for (const column of selectedColumns) { + if (!df.columns.includes(column)) { + throw new Error(`Column '${column}' not found`); + } + } + + // If only one row and one column is selected, return the value + if ( + selectedRows.length === 1 && + selectedColumns.length === 1 && + typeof rowSelector !== 'function' + ) { + return selectedRows[0][selectedColumns[0]]; + } + + // Create a new DataFrame preserving typed arrays + const filteredData = {}; + for (const col of selectedColumns) { + const originalArray = df.col(col).toArray(); + const values = selectedIndices.map((index) => originalArray[index]); + + // If the original array was typed, create a new typed array + if ( + ArrayBuffer.isView(originalArray) && + !(originalArray instanceof DataView) + ) { + const TypedArrayConstructor = originalArray.constructor; + filteredData[col] = new TypedArrayConstructor(values); + } else { + filteredData[col] = values; + } + } + + return new df.constructor(filteredData); +}; + +/** + * Registers the loc method on DataFrame prototype + * @param {Class} DataFrame - DataFrame class to extend + */ +export const register = (DataFrame) => { + DataFrame.prototype.loc = function (rowSelector, colSelector) { + return loc(this, rowSelector, colSelector); + }; +}; + +export default { loc, register }; diff --git a/src/methods/dataframe/indexing/register.js b/src/methods/dataframe/indexing/register.js new file mode 100644 index 0000000..3ffe90a --- /dev/null +++ b/src/methods/dataframe/indexing/register.js @@ -0,0 +1,23 @@ +/** + * Registration of DataFrame indexing methods + */ + +import { register as registerAt } from './at.js'; +import { register as registerHead } from './head.js'; +import { register as registerTail } from './tail.js'; +import { register as registerIloc } from './iloc.js'; +import { register as registerLoc } from './loc.js'; +import { register as registerSample } from './sample.js'; + +/** + * Register all indexing methods on DataFrame prototype + * @param {object} DataFrame - DataFrame constructor + */ +export function register(DataFrame) { + registerAt(DataFrame); + registerHead(DataFrame); + registerTail(DataFrame); + registerIloc(DataFrame); + registerLoc(DataFrame); + registerSample(DataFrame); +} diff --git a/src/methods/dataframe/indexing/sample.js b/src/methods/dataframe/indexing/sample.js new file mode 100644 index 0000000..189e2c1 --- /dev/null +++ b/src/methods/dataframe/indexing/sample.js @@ -0,0 +1,104 @@ +/** + * Selects a random sample of rows from DataFrame + * + * @param {DataFrame} df - DataFrame instance + * @param {number|Object} n - Number of rows to sample or options object + * @param {Object} [options] - Additional options + * @param {number} [options.seed] - Seed for random number generator + * @param {boolean} [options.replace=false] - Sampling with replacement + * @param {boolean} [options.fraction] - Fraction of rows to sample (0 < fraction <= 1) + * @returns {DataFrame} - New DataFrame with sampled rows + */ +export const sample = (df, n, options = {}) => { + // Handle case when n is an options object + if (typeof n === 'object') { + options = n; + n = undefined; + } + + // Get data from DataFrame + const rows = df.toArray(); + if (rows.length === 0) { + return new df.constructor({}); + } + + // Determine the number of rows to sample + let sampleSize; + if (options.fraction !== undefined) { + if (options.fraction <= 0 || options.fraction > 1) { + throw new Error('Fraction must be in the range (0, 1]'); + } + sampleSize = Math.round(rows.length * options.fraction); + } else { + sampleSize = n !== undefined ? n : 1; + } + + // Check the validity of the number of rows + if (sampleSize <= 0) { + throw new Error('Number of rows to sample must be a positive number'); + } + + // Check that the sample size is an integer + if (!Number.isInteger(sampleSize)) { + throw new Error('Number of rows to sample must be an integer'); + } + + // If sampling without replacement and sample size is greater than number of rows + if (!options.replace && sampleSize > rows.length) { + throw new Error( + `Sample size (${sampleSize}) cannot be greater than number of rows (${rows.length})`, + ); + } + + // Create a random number generator with seed if specified + const random = + options.seed !== undefined ? createSeededRandom(options.seed) : Math.random; + + // Select rows + const sampledRows = []; + if (options.replace) { + // Sampling with replacement + for (let i = 0; i < sampleSize; i++) { + const index = Math.floor(random() * rows.length); + sampledRows.push(rows[index]); + } + } else { + // Sampling without replacement (using Fisher-Yates algorithm) + const indices = Array.from({ length: rows.length }, (_, i) => i); + for (let i = indices.length - 1; i > 0; i--) { + const j = Math.floor(random() * (i + 1)); + [indices[i], indices[j]] = [indices[j], indices[i]]; + } + for (let i = 0; i < sampleSize; i++) { + sampledRows.push(rows[indices[i]]); + } + } + + // Create a new DataFrame from sampled rows + return df.constructor.fromRows(sampledRows); +}; + +/** + * Creates a random number generator with seed + * @param {number} seed - Seed for random number generator + * @returns {Function} - Function returning pseudorandom number in range [0, 1) + */ +function createSeededRandom(seed) { + return function () { + // Simple linear congruential generator + seed = (seed * 9301 + 49297) % 233280; + return seed / 233280; + }; +} + +/** + * Registers the sample method on DataFrame prototype + * @param {Class} DataFrame - DataFrame class to extend + */ +export const register = (DataFrame) => { + DataFrame.prototype.sample = function (n, options) { + return sample(this, n, options); + }; +}; + +export default { sample, register }; diff --git a/src/methods/dataframe/indexing/tail.js b/src/methods/dataframe/indexing/tail.js new file mode 100644 index 0000000..19f17d9 --- /dev/null +++ b/src/methods/dataframe/indexing/tail.js @@ -0,0 +1,44 @@ +/** + * Returns the last n rows of DataFrame + * + * @param {DataFrame} df - DataFrame instance + * @param {number} [n=5] - Number of rows to return + * @param {Object} [options] - Additional options + * @param {boolean} [options.print=false] - Option for compatibility with other libraries + * @returns {DataFrame} - New DataFrame with the last n rows + */ +export const tail = (df, n = 5, options = { print: false }) => { + // Validate input parameters + if (n <= 0) { + throw new Error('Number of rows must be a positive number'); + } + if (!Number.isInteger(n)) { + throw new Error('Number of rows must be an integer'); + } + + // Get data from DataFrame + const rows = df.toArray(); + + // Select the last n rows (or all if there are fewer than n) + const selectedRows = rows.slice(-n); + + // Create a new DataFrame from selected rows + const result = df.constructor.fromRows(selectedRows); + + // Note: the print option is preserved for API compatibility, but is not used in the current version + // In the future, we can add the print method to DataFrame + + return result; +}; + +/** + * Registers the tail method on DataFrame prototype + * @param {Class} DataFrame - DataFrame class to extend + */ +export const register = (DataFrame) => { + DataFrame.prototype.tail = function (n, options) { + return tail(this, n, options); + }; +}; + +export default { tail, register }; diff --git a/src/methods/dataframe/registerAll.js b/src/methods/dataframe/registerAll.js index 6201a68..fc7e9fc 100644 --- a/src/methods/dataframe/registerAll.js +++ b/src/methods/dataframe/registerAll.js @@ -6,9 +6,10 @@ // Import registrars from different categories import { registerDataFrameAggregation } from './aggregation/register.js'; import { registerDataFrameFiltering } from './filtering/register.js'; +import { registerDataFrameIndexing } from './indexing/register.js'; import { registerDataFrameTransform } from './transform/register.js'; import { registerDataFrameDisplay } from './display/register.js'; -import { registerDataFrameTimeSeries } from './timeseries/register.js'; +import { registerDataFrameTimeSeries } from '../timeseries/dataframe/register.js'; import { registerReshapeMethods } from '../reshape/register.js'; /** @@ -19,6 +20,7 @@ export function extendDataFrame(DataFrame) { // Apply all registrars to the DataFrame class registerDataFrameAggregation(DataFrame); registerDataFrameFiltering(DataFrame); + registerDataFrameIndexing(DataFrame); registerDataFrameTransform(DataFrame); registerDataFrameDisplay(DataFrame); registerDataFrameTimeSeries(DataFrame); @@ -40,37 +42,37 @@ export function getDataFrameMethodsInfo() { signature: 'count(column)', description: 'Count non-empty values in the specified column', returns: 'number', - example: 'df.count(\'age\')', + example: "df.count('age')", }, sum: { signature: 'sum(column)', description: 'Sum of values in the specified column', returns: 'number', - example: 'df.sum(\'price\')', + example: "df.sum('price')", }, mean: { signature: 'mean(column)', description: 'Mean value in the specified column', returns: 'number', - example: 'df.mean(\'score\')', + example: "df.mean('score')", }, min: { signature: 'min(column)', description: 'Minimum value in the specified column', returns: 'number', - example: 'df.min(\'price\')', + example: "df.min('price')", }, max: { signature: 'max(column)', description: 'Maximum value in the specified column', returns: 'number', - example: 'df.max(\'price\')', + example: "df.max('price')", }, median: { signature: 'median(column)', description: 'Median value in the specified column', returns: 'number', - example: 'df.median(\'score\')', + example: "df.median('score')", }, // Other aggregation methods... }, @@ -85,7 +87,7 @@ export function getDataFrameMethodsInfo() { signature: 'where(column, operator, value)', description: 'Filter rows based on a condition for a specific column', returns: 'DataFrame', - example: 'df.where(\'age\', \'>\', 30)', + example: "df.where('age', '>', 30)", }, expr$: { signature: 'expr$`expression`', @@ -97,14 +99,17 @@ export function getDataFrameMethodsInfo() { signature: 'select(columns)', description: 'Select specified columns', returns: 'DataFrame', - example: 'df.select([\'name\', \'age\'])', + example: "df.select(['name', 'age'])", }, drop: { signature: 'drop(columns)', description: 'Remove specified columns', returns: 'DataFrame', - example: 'df.drop([\'address\', \'phone\'])', + example: "df.drop(['address', 'phone'])", }, + // Other filtering methods... + }, + indexing: { at: { signature: 'at(index)', description: 'Select a single row by index', @@ -117,14 +122,38 @@ export function getDataFrameMethodsInfo() { returns: 'DataFrame|Object', example: 'df.iloc([0, 1, 2], [0, 2])', }, - // Other filtering methods... + head: { + signature: 'head(n)', + description: 'Get first n rows', + returns: 'DataFrame', + example: 'df.head(5)', + }, + tail: { + signature: 'tail(n)', + description: 'Get last n rows', + returns: 'DataFrame', + example: 'df.tail(5)', + }, + loc: { + signature: 'loc(rowLabels, [colLabels])', + description: 'Select rows and columns by labels', + returns: 'DataFrame|Object', + example: 'df.loc(["row1", "row2"], ["col1", "col2"])', + }, + sample: { + signature: 'sample(n, [options])', + description: 'Get a random sample of rows', + returns: 'DataFrame', + example: 'df.sample(5, { replace: false })', + }, + // Other indexing methods... }, transform: { sort: { signature: 'sort(column, [options])', description: 'Sort by the specified column', returns: 'DataFrame', - example: 'df.sort(\'name\', { ascending: true })', + example: "df.sort('name', { ascending: true })", }, assign: { signature: 'assign(columns)', @@ -140,13 +169,13 @@ export function getDataFrameMethodsInfo() { signature: 'pivot(index, columns, values, [aggFunc])', description: 'Pivot DataFrame from long to wide format', returns: 'DataFrame', - example: 'df.pivot(\'date\', \'category\', \'value\')', + example: "df.pivot('date', 'category', 'value')", }, melt: { signature: 'melt(idVars, [valueVars], [varName], [valueName])', description: 'Unpivot DataFrame from wide to long format', returns: 'DataFrame', - example: 'df.melt([\'date\'], [\'sales\', \'expenses\'])', + example: "df.melt(['date'], ['sales', 'expenses'])", }, // Other reshape methods... }, @@ -161,7 +190,7 @@ export function getDataFrameMethodsInfo() { signature: 'toHTML([options])', description: 'Convert to HTML table', returns: 'string', - example: 'df.toHTML({ className: \'data-table\' })', + example: "df.toHTML({ className: 'data-table' })", }, // Other display methods... }, diff --git a/src/methods/dataframe/timeseries/expanding.js b/src/methods/dataframe/timeseries/expanding.js deleted file mode 100644 index c8995df..0000000 --- a/src/methods/dataframe/timeseries/expanding.js +++ /dev/null @@ -1,61 +0,0 @@ -/** - * Apply an expanding window function to DataFrame columns - * - * @param {DataFrame} df - DataFrame to apply expanding window to - * @param {Object} options - Options object - * @param {Object} options.aggregations - Object mapping column names to aggregation functions - * @param {number} [options.minPeriods=1] - Minimum number of observations required - * @returns {DataFrame} - DataFrame with expanding window calculations - */ -export function expanding(df, options) { - const { aggregations = {}, minPeriods = 1 } = options || {}; - - // Validate options - if (Object.keys(aggregations).length === 0) { - throw new Error('At least one aggregation must be specified'); - } - - // Create a new object to hold the result columns - const resultColumns = {}; - - // Keep columns that are not being aggregated - for (const colName of df.columns) { - if (!aggregations[colName]) { - resultColumns[colName] = df.col(colName).toArray(); - } - } - - // Apply expanding window to each column with aggregation - for (const [colName, aggFunc] of Object.entries(aggregations)) { - if (!df.columns.includes(colName)) { - throw new Error(`Column '${colName}' not found in DataFrame`); - } - - const series = df.col(colName); - const values = series.toArray(); - const result = new Array(values.length).fill(null); - - // Apply expanding window - for (let i = 0; i < values.length; i++) { - // Extract window values (all values from start to current position) - const windowValues = values - .slice(0, i + 1) - .filter((v) => v !== null && v !== undefined && !isNaN(v)); - - // Apply aggregation function if we have enough values - if (windowValues.length >= minPeriods) { - result[i] = aggFunc(windowValues); - } - } - - // Add result to output columns - resultColumns[`${colName}_expanding`] = result; - } - - // Create a new DataFrame with the result columns - return new df.constructor(resultColumns); -} - -export default { - expanding, -}; diff --git a/src/methods/dataframe/timeseries/register.js b/src/methods/dataframe/timeseries/register.js deleted file mode 100644 index 3318dd5..0000000 --- a/src/methods/dataframe/timeseries/register.js +++ /dev/null @@ -1,106 +0,0 @@ -/** - * Registrar for DataFrame time series methods - */ - -/** - * Registers all time series methods for DataFrame - * @param {Class} DataFrame - DataFrame class to extend - */ -export function registerDataFrameTimeSeries(DataFrame) { - /** - * Resamples a DataFrame to a different time frequency - * @param {Object} options - Options object - * @param {string} options.dateColumn - Name of the column containing dates - * @param {string} options.freq - Target frequency ('D' for day, 'W' for week, 'M' for month, 'Q' for quarter, 'Y' for year) - * @param {Object} options.aggregations - Object mapping column names to aggregation functions - * @param {boolean} [options.includeEmpty=false] - Whether to include empty periods - * @returns {Promise} - Resampled DataFrame - */ - DataFrame.prototype.resample = function(options) { - // Validate required options - const { dateColumn, freq, aggregations = {} } = options || {}; - - if (!dateColumn) { - throw new Error('dateColumn parameter is required'); - } - - if (!freq) { - throw new Error('freq parameter is required'); - } - - if (!this.hasColumn(dateColumn)) { - throw new Error(`Date column '${dateColumn}' not found in DataFrame`); - } - - if (Object.keys(aggregations).length === 0) { - throw new Error('At least one aggregation must be specified'); - } - - // Import the implementation dynamically to avoid circular dependencies - return import('./resample.js').then((module) => { - const { resample } = module; - return resample(this, options); - }); - }; - - /** - * Applies a rolling window function to DataFrame columns - * @param {Object} options - Options object - * @param {number} options.window - Window size - * @param {Object} options.aggregations - Object mapping column names to aggregation functions - * @param {boolean} [options.center=false] - Whether to center the window - * @param {boolean} [options.minPeriods=null] - Minimum number of observations required - * @returns {Promise} - DataFrame with rolling window calculations - */ - DataFrame.prototype.rolling = function(options) { - // Import the implementation dynamically to avoid circular dependencies - return import('./rolling.js').then((module) => { - const { rolling } = module; - return rolling(this, options); - }); - }; - - /** - * Applies an expanding window function to DataFrame columns - * @param {Object} options - Options object - * @param {Object} options.aggregations - Object mapping column names to aggregation functions - * @param {number} [options.minPeriods=1] - Minimum number of observations required - * @returns {Promise} - DataFrame with expanding window calculations - */ - DataFrame.prototype.expanding = function(options) { - // Import the implementation dynamically to avoid circular dependencies - return import('./expanding.js').then((module) => { - const { expanding } = module; - return expanding(this, options); - }); - }; - - /** - * Shifts index by desired number of periods - * @param {number} periods - Number of periods to shift (positive for forward, negative for backward) - * @param {*} [fillValue=null] - Value to use for new periods - * @returns {Promise} - Shifted DataFrame - */ - DataFrame.prototype.shift = function(periods = 1, fillValue = null) { - // Import the implementation dynamically to avoid circular dependencies - return import('./shift.js').then((module) => { - const { shift } = module; - return shift(this, periods, fillValue); - }); - }; - - /** - * Calculates percentage change between current and prior element - * @param {number} [periods=1] - Periods to shift for calculating percentage change - * @returns {Promise} - DataFrame with percentage changes - */ - DataFrame.prototype.pctChange = function(periods = 1) { - // Import the implementation dynamically to avoid circular dependencies - return import('./shift.js').then((module) => { - const { pctChange } = module; - return pctChange(this, periods); - }); - }; -} - -export default registerDataFrameTimeSeries; diff --git a/src/methods/dataframe/timeseries/resample.js b/src/methods/dataframe/timeseries/resample.js deleted file mode 100644 index 4db9722..0000000 --- a/src/methods/dataframe/timeseries/resample.js +++ /dev/null @@ -1,158 +0,0 @@ -/** - * Resample a DataFrame to a different time frequency - * - * @param {DataFrame} df - DataFrame to resample - * @param {Object} options - Options object - * @param {string} options.dateColumn - Name of the column containing dates - * @param {string} options.freq - Target frequency ('D' for day, 'W' for week, 'M' for month, 'Q' for quarter, 'Y' for year) - * @param {Object} options.aggregations - Object mapping column names to aggregation functions - * @param {boolean} [options.includeEmpty=false] - Whether to include empty periods - * @returns {DataFrame} - Resampled DataFrame - */ -export function resample(df, options) { - const { - dateColumn, - freq, - aggregations = {}, - includeEmpty = false, - } = options || {}; - - // Validate options - if (!dateColumn || !df.columns.includes(dateColumn)) { - throw new Error(`Date column '${dateColumn}' not found in DataFrame`); - } - - if (!freq) { - throw new Error('freq parameter is required'); - } - - if (Object.keys(aggregations).length === 0) { - throw new Error('At least one aggregation must be specified'); - } - - // Get date column values - const dateValues = df.col(dateColumn).toArray(); - - // Convert dates to Date objects if they are strings - const dates = dateValues.map((d) => (d instanceof Date ? d : new Date(d))); - - // Group data by time periods - const groups = groupByTimePeriod(dates, freq); - - // Create a new object to hold the result columns - const resultColumns = {}; - - // Add date column with period start dates - resultColumns[dateColumn] = Object.keys(groups).map( - (period) => new Date(period), - ); - - // Apply aggregations to each column - for (const [colName, aggFunc] of Object.entries(aggregations)) { - if (!df.columns.includes(colName)) { - throw new Error(`Column '${colName}' not found in DataFrame`); - } - - const colValues = df.col(colName).toArray(); - const aggregatedValues = []; - - // Aggregate values for each period - for (const period of Object.keys(groups)) { - const indices = groups[period]; - const periodValues = indices - .map((i) => colValues[i]) - .filter((v) => v !== null && v !== undefined && !isNaN(v)); - - if (periodValues.length > 0) { - aggregatedValues.push(aggFunc(periodValues)); - } else { - aggregatedValues.push(null); - } - } - - // Add aggregated values to result columns - resultColumns[colName] = aggregatedValues; - } - - // Create a new DataFrame with the result columns - return new df.constructor(resultColumns); -} - -/** - * Group dates by time period - * - * @param {Date[]} dates - Array of dates - * @param {string} freq - Frequency ('D', 'W', 'M', 'Q', 'Y') - * @returns {Object} - Object mapping period start dates to arrays of indices - */ -function groupByTimePeriod(dates, freq) { - const groups = {}; - - // Group dates by period - for (let i = 0; i < dates.length; i++) { - const date = dates[i]; - if (!(date instanceof Date) || isNaN(date)) { - continue; - } - - const periodStart = getPeriodStart(date, freq); - const periodKey = periodStart.toISOString(); - - if (!groups[periodKey]) { - groups[periodKey] = []; - } - - groups[periodKey].push(i); - } - - return groups; -} - -/** - * Get the start date of a period - * - * @param {Date} date - Date to get period start for - * @param {string} freq - Frequency ('D', 'W', 'M', 'Q', 'Y') - * @returns {Date} - Start date of the period - */ -function getPeriodStart(date, freq) { - const result = new Date(date); - - switch (freq.toUpperCase()) { - case 'D': - // Start of day - result.setHours(0, 0, 0, 0); - break; - case 'W': - // Start of week (Sunday) - const day = result.getDay(); - result.setDate(result.getDate() - day); - result.setHours(0, 0, 0, 0); - break; - case 'M': - // Start of month - result.setDate(1); - result.setHours(0, 0, 0, 0); - break; - case 'Q': - // Start of quarter - const month = result.getMonth(); - const quarterMonth = Math.floor(month / 3) * 3; - result.setMonth(quarterMonth, 1); - result.setHours(0, 0, 0, 0); - break; - case 'Y': - // Start of year - result.setMonth(0, 1); - result.setHours(0, 0, 0, 0); - break; - default: - throw new Error(`Unsupported frequency: ${freq}`); - } - - return result; -} - -export default { - resample, -}; diff --git a/src/methods/dataframe/timeseries/rolling.js b/src/methods/dataframe/timeseries/rolling.js deleted file mode 100644 index c7b5f80..0000000 --- a/src/methods/dataframe/timeseries/rolling.js +++ /dev/null @@ -1,94 +0,0 @@ -/** - * Apply a rolling window function to DataFrame columns - * - * @param {DataFrame} df - DataFrame to apply rolling window to - * @param {Object} options - Options object - * @param {number} options.window - Window size - * @param {Object} options.aggregations - Object mapping column names to aggregation functions - * @param {boolean} [options.center=false] - Whether to center the window - * @param {boolean} [options.minPeriods=null] - Minimum number of observations required - * @returns {DataFrame} - DataFrame with rolling window calculations - */ -export function rolling(df, options) { - const { - window, - aggregations = {}, - center = false, - minPeriods = null, - } = options || {}; - - // Validate options - if (!window || typeof window !== 'number' || window <= 0) { - throw new Error('window must be a positive number'); - } - - if (Object.keys(aggregations).length === 0) { - throw new Error('At least one aggregation must be specified'); - } - - // Create a new object to hold the result columns - const resultColumns = {}; - - // Keep columns that are not being aggregated - for (const colName of df.columns) { - if (!aggregations[colName]) { - resultColumns[colName] = df.col(colName).toArray(); - } - } - - // Apply rolling window to each column with aggregation - for (const [colName, aggFunc] of Object.entries(aggregations)) { - if (!df.columns.includes(colName)) { - throw new Error(`Column '${colName}' not found in DataFrame`); - } - - const series = df.col(colName); - const values = series.toArray(); - const result = new Array(values.length).fill(null); - - // Calculate effective min periods - const effectiveMinPeriods = - minPeriods === null ? window : Math.min(minPeriods, window); - - // Apply rolling window - for (let i = 0; i < values.length; i++) { - // Calculate window bounds - let start, end; - - if (center) { - // Center the window - start = Math.max(0, i - Math.floor(window / 2)); - end = Math.min(values.length, i + Math.ceil(window / 2)); - } else { - // Right-aligned window - start = Math.max(0, i - window + 1); - end = i + 1; - } - - // Skip if not enough observations - if (end - start < effectiveMinPeriods) { - continue; - } - - // Extract window values - const windowValues = values - .slice(start, end) - .filter((v) => v !== null && v !== undefined && !isNaN(v)); - - // Apply aggregation function - if (windowValues.length >= effectiveMinPeriods) { - result[i] = aggFunc(windowValues); - } - } - - // Add result to output columns - resultColumns[`${colName}_rolling`] = result; - } - - // Create a new DataFrame with the result columns - return new df.constructor(resultColumns); -} - -export default { - rolling, -}; diff --git a/src/methods/dataframe/timeseries/shift.js b/src/methods/dataframe/timeseries/shift.js deleted file mode 100644 index 6298c51..0000000 --- a/src/methods/dataframe/timeseries/shift.js +++ /dev/null @@ -1,74 +0,0 @@ -/** - * Shift values in a DataFrame by a specified number of periods - * - * @param {DataFrame} df - DataFrame to shift - * @param {number} periods - Number of periods to shift (positive for forward, negative for backward) - * @param {*} fillValue - Value to use for new periods - * @returns {DataFrame} - Shifted DataFrame - */ -export function shift(df, periods = 1, fillValue = null) { - // Create a new object to hold the shifted columns - const shiftedColumns = {}; - - // Shift each column - for (const colName of df.columns) { - const series = df.col(colName); - shiftedColumns[colName] = series.shift(periods, fillValue); - } - - // Create a new DataFrame with the shifted columns - return new df.constructor(shiftedColumns); -} - -/** - * Calculate percentage change between current and prior element - * - * @param {DataFrame} df - DataFrame to calculate percentage change - * @param {number} periods - Periods to shift for calculating percentage change - * @returns {DataFrame} - DataFrame with percentage changes - */ -export function pctChange(df, periods = 1) { - // Create a new object to hold the percentage change columns - const pctChangeColumns = {}; - - // Calculate percentage change for each column - for (const colName of df.columns) { - const series = df.col(colName); - // Use the series pctChange method if available, otherwise calculate manually - if (typeof series.pctChange === 'function') { - pctChangeColumns[colName] = series.pctChange(periods); - } else { - // Manual calculation: (current - previous) / previous - const values = series.toArray(); - const result = new Array(values.length).fill(null); - - for (let i = periods; i < values.length; i++) { - const current = values[i]; - const previous = values[i - periods]; - - // Skip if either value is not a number - if ( - typeof current !== 'number' || - typeof previous !== 'number' || - isNaN(current) || - isNaN(previous) || - previous === 0 - ) { - continue; - } - - result[i] = (current - previous) / previous; - } - - pctChangeColumns[colName] = result; - } - } - - // Create a new DataFrame with the percentage change columns - return new df.constructor(pctChangeColumns); -} - -export default { - shift, - pctChange, -}; diff --git a/src/methods/dataframe/timeseries/utils/dateUtils.js b/src/methods/dataframe/timeseries/utils/dateUtils.js deleted file mode 100644 index 6638bfb..0000000 --- a/src/methods/dataframe/timeseries/utils/dateUtils.js +++ /dev/null @@ -1,388 +0,0 @@ -/** - * Utility functions for working with dates and time series data. - * These functions help with date parsing, frequency conversion, and date operations. - * @module methods/timeseries/dateUtils - */ - -/** - * Parses a date string or timestamp into a JavaScript Date object - * @param {string|number|Date} dateValue - The date to parse - * @returns {Date} - JavaScript Date object - * @throws {Error} - If the date format is invalid - */ -function parseDate(dateValue) { - if (dateValue instanceof Date) { - return dateValue; - } - - if (typeof dateValue === 'number') { - return new Date(dateValue); - } - - // Try to parse the date string - const parsedDate = new Date(dateValue); - if (isNaN(parsedDate.getTime())) { - throw new Error(`Invalid date format: ${dateValue}`); - } - - return parsedDate; -} - -/** - * Truncates a date to the specified frequency, returning the start of the period - * @param {Date} date - The date to truncate - * @param {string} freq - Frequency ('D' for day, 'W' for week, 'M' for month, 'Q' for quarter, 'Y' for year) - * @returns {Date} - Date at the start of the period - * @throws {Error} - If the frequency is not supported - */ -function truncateDate(date, freq) { - const result = new Date(date); - - switch (freq) { - case 'D': // Day - result.setHours(0, 0, 0, 0); - break; - case 'W': // Week (Sunday as first day) - const day = result.getDay(); - result.setDate(result.getDate() - day); - result.setHours(0, 0, 0, 0); - break; - case 'M': // Month - result.setDate(1); - result.setHours(0, 0, 0, 0); - break; - case 'Q': // Quarter - const month = result.getMonth(); - const quarterMonth = month - (month % 3); - result.setMonth(quarterMonth, 1); - result.setHours(0, 0, 0, 0); - break; - case 'Y': // Year - result.setMonth(0, 1); - result.setHours(0, 0, 0, 0); - break; - default: - throw new Error(`Unsupported frequency: ${freq}`); - } - - return result; -} - -/** - * Gets the next date based on the current date and frequency - * @param {Date} date - The current date - * @param {string} freq - Frequency ('D' for day, 'W' for week, 'M' for month, 'Q' for quarter, 'Y' for year) - * @returns {Date} - The next date - * @throws {Error} - If the frequency is not supported - */ -function getNextDate(date, freq) { - const result = new Date(date); - - switch (freq) { - case 'D': // Day - result.setDate(result.getDate() + 1); - break; - case 'W': // Week - result.setDate(result.getDate() + 7); - break; - case 'M': // Month - result.setMonth(result.getMonth() + 1); - break; - case 'Q': // Quarter - result.setMonth(result.getMonth() + 3); - break; - case 'Y': // Year - result.setFullYear(result.getFullYear() + 1); - break; - default: - throw new Error(`Unsupported frequency: ${freq}`); - } - - return result; -} - -/** - * Formats a date as an ISO string without time component - * @param {Date} date - The date to format - * @returns {string} - Formatted date string (YYYY-MM-DD) - */ -function formatDateISO(date) { - const d = new Date(date); - return `${d.getFullYear()}-${String(d.getMonth() + 1).padStart(2, '0')}-${String(d.getDate()).padStart(2, '0')}`; -} - -/** - * Checks if two dates are in the same period based on frequency - * @param {Date} date1 - First date - * @param {Date} date2 - Second date - * @param {string} freq - Frequency ('D' for day, 'W' for week, 'M' for month, 'Q' for quarter, 'Y' for year) - * @returns {boolean} - True if dates are in the same period - */ -function isSamePeriod(date1, date2, freq) { - const truncated1 = truncateDate(date1, freq); - const truncated2 = truncateDate(date2, freq); - - return truncated1.getTime() === truncated2.getTime(); -} - -/** - * Generates a sequence of dates from start to end with the specified frequency - * @param {Date} startDate - Start date - * @param {Date} endDate - End date - * @param {string} freq - Frequency ('D' for day, 'W' for week, 'M' for month, 'Q' for quarter, 'Y' for year) - * @returns {Date[]} - Array of dates - */ -function dateRange(startDate, endDate, freq) { - const result = []; - let currentDate = truncateDate(startDate, freq); - const truncatedEndDate = truncateDate(endDate, freq); - - while (currentDate <= truncatedEndDate) { - result.push(new Date(currentDate)); - currentDate = getNextDate(currentDate, freq); - } - - return result; -} - -/** - * Adds a specified number of time units to a date - * @param {Date} date - The date to add to - * @param {number} amount - The amount to add - * @param {string} unit - The unit to add ('days', 'weeks', 'months', 'quarters', 'years') - * @returns {Date} - New date with the added time - * @throws {Error} - If the time unit is not supported - */ -function addTime(date, amount, unit) { - const result = new Date(date); - - switch (unit) { - case 'days': - result.setDate(result.getDate() + amount); - break; - case 'weeks': - result.setDate(result.getDate() + amount * 7); - break; - case 'months': - result.setMonth(result.getMonth() + amount); - break; - case 'quarters': - result.setMonth(result.getMonth() + amount * 3); - break; - case 'years': - result.setFullYear(result.getFullYear() + amount); - break; - default: - throw new Error(`Unsupported time unit: ${unit}`); - } - - return result; -} - -/** - * Subtracts a specified number of time units from a date - * @param {Date} date - The date to subtract from - * @param {number} amount - The amount to subtract - * @param {string} unit - The unit to subtract ('days', 'weeks', 'months', 'quarters', 'years') - * @returns {Date} - New date with the subtracted time - */ -function subtractTime(date, amount, unit) { - return addTime(date, -amount, unit); -} - -/** - * Calculates the difference between two dates in the specified unit - * @param {Date} date1 - First date - * @param {Date} date2 - Second date - * @param {string} unit - The unit to calculate difference in ('days', 'weeks', 'months', 'quarters', 'years') - * @returns {number} - Difference in the specified unit - * @throws {Error} - If the time unit is not supported - */ -function dateDiff(date1, date2, unit) { - const d1 = new Date(date1); - const d2 = new Date(date2); - - switch (unit) { - case 'days': - return Math.round((d2 - d1) / (1000 * 60 * 60 * 24)); - case 'weeks': - return Math.round((d2 - d1) / (1000 * 60 * 60 * 24 * 7)); - case 'months': { - const monthDiff = - (d2.getFullYear() - d1.getFullYear()) * 12 + - (d2.getMonth() - d1.getMonth()); - const dayDiff = d2.getDate() - d1.getDate(); - - // Adjust for month ends - if (dayDiff < 0) { - return monthDiff - 1; - } else { - return monthDiff; - } - } - case 'quarters': - return Math.floor(dateDiff(date1, date2, 'months') / 3); - case 'years': - return d2.getFullYear() - d1.getFullYear(); - default: - throw new Error(`Unsupported time unit: ${unit}`); - } -} - -/** - * Formats a date according to the specified format string - * @param {Date} date - The date to format - * @param {string} format - Format string (e.g., 'YYYY-MM-DD', 'DD/MM/YYYY', etc.) - * @returns {string} - Formatted date string - */ -function formatDate(date, format = 'YYYY-MM-DD') { - const d = new Date(date); - - const tokens = { - YYYY: d.getFullYear(), - YY: String(d.getFullYear()).slice(-2), - MM: String(d.getMonth() + 1).padStart(2, '0'), - M: d.getMonth() + 1, - DD: String(d.getDate()).padStart(2, '0'), - D: d.getDate(), - HH: String(d.getHours()).padStart(2, '0'), - H: d.getHours(), - mm: String(d.getMinutes()).padStart(2, '0'), - m: d.getMinutes(), - ss: String(d.getSeconds()).padStart(2, '0'), - s: d.getSeconds(), - }; - - return format.replace( - /YYYY|YY|MM|M|DD|D|HH|H|mm|m|ss|s/g, - (match) => tokens[match], - ); -} - -/** - * Parses a date string according to the specified format - * @param {string} dateStr - The date string to parse - * @param {string} format - Format string (e.g., 'YYYY-MM-DD', 'DD/MM/YYYY', etc.) - * @returns {Date} - Parsed date - */ -function parseDateFormat(dateStr, format = 'YYYY-MM-DD') { - // Create a regex pattern from the format - const pattern = format - .replace(/YYYY/g, '(\\d{4})') - .replace(/YY/g, '(\\d{2})') - .replace(/MM/g, '(\\d{2})') - .replace(/M/g, '(\\d{1,2})') - .replace(/DD/g, '(\\d{2})') - .replace(/D/g, '(\\d{1,2})') - .replace(/HH/g, '(\\d{2})') - .replace(/H/g, '(\\d{1,2})') - .replace(/mm/g, '(\\d{2})') - .replace(/m/g, '(\\d{1,2})') - .replace(/ss/g, '(\\d{2})') - .replace(/s/g, '(\\d{1,2})'); - - const regex = new RegExp(`^${pattern}$`); - const match = dateStr.match(regex); - - if (!match) { - throw new Error( - `Date string '${dateStr}' does not match format '${format}'`, - ); - } - - // Extract values based on format - const values = {}; - let matchIndex = 1; - - const formatTokens = format.match(/YYYY|YY|MM|M|DD|D|HH|H|mm|m|ss|s/g); - formatTokens.forEach((token) => { - values[token] = match[matchIndex++]; - }); - - // Handle two-digit years - let year; - if (values.YYYY) { - year = parseInt(values.YYYY, 10); - } else if (values.YY) { - const currentYear = new Date().getFullYear(); - const century = Math.floor(currentYear / 100) * 100; - year = century + parseInt(values.YY, 10); - } else { - year = new Date().getFullYear(); - } - - const month = parseInt(values.MM || values.M || 1, 10) - 1; - const day = parseInt(values.DD || values.D || 1, 10); - const hour = parseInt(values.HH || values.H || 0, 10); - const minute = parseInt(values.mm || values.m || 0, 10); - const second = parseInt(values.ss || values.s || 0, 10); - - return new Date(year, month, day, hour, minute, second); -} - -/** - * Gets the start of a business day (9:30 AM) - * @param {Date} date - The date - * @returns {Date} - Date set to the start of the business day - */ -function businessDayStart(date) { - const result = new Date(date); - result.setHours(9, 30, 0, 0); - return result; -} - -/** - * Gets the end of a business day (4:00 PM) - * @param {Date} date - The date - * @returns {Date} - Date set to the end of the business day - */ -function businessDayEnd(date) { - const result = new Date(date); - result.setHours(16, 0, 0, 0); - return result; -} - -/** - * Checks if a date is a weekend (Saturday or Sunday) - * @param {Date} date - The date to check - * @returns {boolean} - True if the date is a weekend - */ -function isWeekend(date) { - const day = date.getDay(); - return day === 0 || day === 6; // 0 is Sunday, 6 is Saturday -} - -/** - * Gets the next business day (skipping weekends) - * @param {Date} date - The starting date - * @returns {Date} - The next business day - */ -function nextBusinessDay(date) { - const result = new Date(date); - result.setDate(result.getDate() + 1); - - // Skip weekends - while (isWeekend(result)) { - result.setDate(result.getDate() + 1); - } - - return result; -} - -export { - parseDate, - truncateDate, - getNextDate, - formatDateISO, - isSamePeriod, - dateRange, - addTime, - subtractTime, - dateDiff, - formatDate, - parseDateFormat, - businessDayStart, - businessDayEnd, - isWeekend, - nextBusinessDay, -}; diff --git a/src/methods/dataframe/transform/register.js b/src/methods/dataframe/transform/register.js index e9ec74d..49c5430 100644 --- a/src/methods/dataframe/transform/register.js +++ b/src/methods/dataframe/transform/register.js @@ -17,7 +17,7 @@ import { register as registerOneHot } from './oneHot.js'; * @param {Class} DataFrame - DataFrame class to extend */ export function registerDataFrameTransform(DataFrame) { - // Проверяем, что DataFrame существует + // Check that DataFrame exists if (!DataFrame) { console.warn( 'DataFrame class is not provided, skipping transformation methods registration', diff --git a/src/methods/dataframe/transform/stack.js b/src/methods/dataframe/transform/stack.js index 1716b6e..daf9af0 100644 --- a/src/methods/dataframe/transform/stack.js +++ b/src/methods/dataframe/transform/stack.js @@ -61,13 +61,13 @@ export function stack( // Stack the data using public API const rows = df.toArray(); - // Если valueVars не указан явно, используем только столбцы North, South, East, West - // для совместимости с тестами, или status* для нечисловых значений + // If valueVars is not specified, use only columns North, South, East, West + // for compatibility with tests, or status* for non-numeric values if (!valueVars) { const regionColumns = ['North', 'South', 'East', 'West']; const statusColumns = df.columns.filter((col) => col.startsWith('status')); - // Если есть столбцы status*, используем их, иначе используем region столбцы + // If there are status* columns, use them, otherwise use region columns if (statusColumns.length > 0) { valueColumns = statusColumns; } else { diff --git a/src/methods/series/transform/abs.js b/src/methods/series/transform/abs.js new file mode 100644 index 0000000..fa6516a --- /dev/null +++ b/src/methods/series/transform/abs.js @@ -0,0 +1,30 @@ +/** + * Abs method for Series + * Returns absolute values of all elements in the Series + */ + +/** + * Creates an abs method for Series + * @returns {Function} - Function to be attached to Series prototype + */ +export function abs() { + /** + * Returns absolute values of all elements in the Series + * @returns {Series} - New Series with absolute values + */ + return function () { + return this.map(Math.abs); + }; +} + +/** + * Registers the abs method on Series prototype + * @param {Class} Series - Series class to extend + */ +export function register(Series) { + if (!Series.prototype.abs) { + Series.prototype.abs = abs(); + } +} + +export default { abs, register }; diff --git a/src/methods/series/transform/apply.js b/src/methods/series/transform/apply.js new file mode 100644 index 0000000..1d91412 --- /dev/null +++ b/src/methods/series/transform/apply.js @@ -0,0 +1,31 @@ +/** + * Apply method for Series + * Applies a function to each element and returns a new Series + */ + +/** + * Creates an apply method for Series + * @returns {Function} - Function to be attached to Series prototype + */ +export function apply() { + /** + * Applies a function to each element and returns a new Series + * @param {Function} fn - Function to apply + * @returns {Series} - New Series with transformed values + */ + return function (fn) { + return this.map(fn); + }; +} + +/** + * Registers the apply method on Series prototype + * @param {Class} Series - Series class to extend + */ +export function register(Series) { + if (!Series.prototype.apply) { + Series.prototype.apply = apply(); + } +} + +export default { apply, register }; diff --git a/src/methods/series/transform/map.js b/src/methods/series/transform/map.js new file mode 100644 index 0000000..e79a433 --- /dev/null +++ b/src/methods/series/transform/map.js @@ -0,0 +1,38 @@ +/** + * Map method for Series + * Maps each element in the Series using the provided function + */ + +/** + * Creates a map method for Series + * @returns {Function} - Function to be attached to Series prototype + */ +export function map() { + /** + * Maps each element in the Series using the provided function + * @param {Function} fn - Function to apply to each element + * @returns {Series} - New Series with transformed values + */ + return function (fn) { + const data = this.values; + const result = new Array(data.length); + + for (let i = 0; i < data.length; i++) { + result[i] = fn(data[i], i, data); + } + + return new this.constructor(result, { name: this.name }); + }; +} + +/** + * Registers the map method on Series prototype + * @param {Class} Series - Series class to extend + */ +export function register(Series) { + if (!Series.prototype.map) { + Series.prototype.map = map(); + } +} + +export default { map, register }; diff --git a/src/methods/series/transform/register.js b/src/methods/series/transform/register.js index d01b822..ccaa349 100644 --- a/src/methods/series/transform/register.js +++ b/src/methods/series/transform/register.js @@ -10,35 +10,25 @@ import { fillna } from './fillna.js'; import { dropna } from './dropna.js'; import { clip } from './clip.js'; import { diff } from './diff.js'; -import { pct_change } from './pct_change.js'; +import { pctChange } from './pctChange.js'; +import { map } from './map.js'; +import { apply } from './apply.js'; +import { round } from './round.js'; +import { abs } from './abs.js'; /** * Registers all transformation methods for Series * @param {Class} Series - Series class to extend */ export function registerSeriesTransform(Series) { - /** - * Maps each element in the Series using the provided function - * @param {Function} fn - Function to apply to each element - * @returns {Series} - New Series with transformed values - */ - Series.prototype.map = function(fn) { - const data = this.values; - const result = new Array(data.length); - - for (let i = 0; i < data.length; i++) { - result[i] = fn(data[i], i, data); - } - - return new Series(result, { name: this.name }); - }; + // Map method is imported from map.js /** * Filters Series elements using the provided predicate * @param {Function} predicate - Function that returns true for elements to keep * @returns {Series} - New Series with filtered values */ - Series.prototype.filter = function(predicate) { + Series.prototype.filter = function (predicate) { const data = this.values; const result = []; @@ -51,29 +41,15 @@ export function registerSeriesTransform(Series) { return new Series(result, { name: this.name }); }; - /** - * Returns absolute values of all elements in the Series - * @returns {Series} - New Series with absolute values - */ - Series.prototype.abs = function() { - return this.map(Math.abs); - }; + // Abs method is imported from abs.js - /** - * Rounds all elements in the Series to specified number of decimals - * @param {number} [decimals=0] - Number of decimal places - * @returns {Series} - New Series with rounded values - */ - Series.prototype.round = function(decimals = 0) { - const factor = Math.pow(10, decimals); - return this.map((x) => Math.round(x * factor) / factor); - }; + // Round method is imported from round.js /** * Returns cumulative sum of the Series * @returns {Series} - New Series with cumulative sum */ - Series.prototype.cumsum = function() { + Series.prototype.cumsum = function () { const data = this.values; const result = new Array(data.length); let sum = 0; @@ -92,7 +68,7 @@ export function registerSeriesTransform(Series) { * Returns Series with values normalized to range [0, 1] * @returns {Series} - Normalized Series */ - Series.prototype.normalize = function() { + Series.prototype.normalize = function () { const min = this.min(); const max = this.max(); @@ -104,16 +80,25 @@ export function registerSeriesTransform(Series) { return this.map((x) => (x - min) / range); }; - /** - * Applies a function to each element and returns a new Series - * @param {Function} fn - Function to apply - * @returns {Series} - New Series with transformed values - */ - Series.prototype.apply = function(fn) { - return this.map(fn); - }; + // Apply method is imported from apply.js // Register new transformation methods + if (!Series.prototype.map) { + Series.prototype.map = map(); + } + + if (!Series.prototype.apply) { + Series.prototype.apply = apply(); + } + + if (!Series.prototype.round) { + Series.prototype.round = round(); + } + + if (!Series.prototype.abs) { + Series.prototype.abs = abs(); + } + if (!Series.prototype.sort) { Series.prototype.sort = sort(); } @@ -142,8 +127,8 @@ export function registerSeriesTransform(Series) { Series.prototype.diff = diff(); } - if (!Series.prototype.pct_change) { - Series.prototype.pct_change = pct_change(); + if (!Series.prototype.pctChange) { + Series.prototype.pctChange = pctChange(); } } diff --git a/src/methods/series/transform/round.js b/src/methods/series/transform/round.js new file mode 100644 index 0000000..7358fa1 --- /dev/null +++ b/src/methods/series/transform/round.js @@ -0,0 +1,32 @@ +/** + * Round method for Series + * Rounds all elements in the Series to specified number of decimals + */ + +/** + * Creates a round method for Series + * @returns {Function} - Function to be attached to Series prototype + */ +export function round() { + /** + * Rounds all elements in the Series to specified number of decimals + * @param {number} [decimals=0] - Number of decimal places + * @returns {Series} - New Series with rounded values + */ + return function (decimals = 0) { + const factor = Math.pow(10, decimals); + return this.map((x) => Math.round(x * factor) / factor); + }; +} + +/** + * Registers the round method on Series prototype + * @param {Class} Series - Series class to extend + */ +export function register(Series) { + if (!Series.prototype.round) { + Series.prototype.round = round(); + } +} + +export default { round, register }; diff --git a/test/methods/dataframe/filtering/drop.test.js b/test/methods/dataframe/filtering/drop.test.js index be16ab3..8016cb8 100644 --- a/test/methods/dataframe/filtering/drop.test.js +++ b/test/methods/dataframe/filtering/drop.test.js @@ -14,7 +14,7 @@ const testData = [ ]; describe('Drop Method', () => { - // Регистрируем методы фильтрации для DataFrame + // Register filtering methods for DataFrame registerDataFrameFiltering(DataFrame); describe('with standard storage', () => { diff --git a/test/methods/dataframe/filtering/expr$.test.js b/test/methods/dataframe/filtering/expr$.test.js index 74c1454..ffbcb6d 100644 --- a/test/methods/dataframe/filtering/expr$.test.js +++ b/test/methods/dataframe/filtering/expr$.test.js @@ -16,7 +16,7 @@ const testData = [ ]; describe('Expr$ Method', () => { - // Регистрируем методы фильтрации для DataFrame + // Register filtering methods for DataFrame registerDataFrameFiltering(DataFrame); describe('with standard storage', () => { diff --git a/test/methods/dataframe/filtering/filter.test.js b/test/methods/dataframe/filtering/filter.test.js index e548a58..00c2055 100644 --- a/test/methods/dataframe/filtering/filter.test.js +++ b/test/methods/dataframe/filtering/filter.test.js @@ -14,7 +14,7 @@ const testData = [ ]; describe('Filter Method', () => { - // Регистрируем методы фильтрации для DataFrame + // Register filtering methods for DataFrame registerDataFrameFiltering(DataFrame); describe('with standard storage', () => { @@ -91,8 +91,8 @@ describe('Filter Method', () => { // Filter the data const result = typedDf.filter((row) => row.age > 25); - // Проверяем, что результат содержит Float64Array для salary - // Примечание: age может быть преобразован в Float64Array в процессе фильтрации + // Check that the result contains Float64Array for salary + // Note: age may be converted to Float64Array during filtering expect(result._columns.salary.vector.__data).toBeInstanceOf(Float64Array); }); }); diff --git a/test/methods/dataframe/filtering/index.test.js b/test/methods/dataframe/filtering/index.test.js index 8290799..448678d 100644 --- a/test/methods/dataframe/filtering/index.test.js +++ b/test/methods/dataframe/filtering/index.test.js @@ -6,6 +6,7 @@ import { describe, test, expect } from 'vitest'; import { DataFrame } from '../../../../src/core/dataframe/DataFrame.js'; import * as filteringMethods from '../../../../src/methods/dataframe/filtering/index.js'; import registerDataFrameFiltering from '../../../../src/methods/dataframe/filtering/register.js'; +import { register as registerDataFrameIndexing } from '../../../../src/methods/dataframe/indexing/register.js'; // Test data for use in all tests const testData = [ @@ -17,8 +18,9 @@ const testData = [ ]; describe('Filtering Methods Index', () => { - // Регистрируем методы фильтрации для DataFrame + // Register filtering and indexing methods for DataFrame registerDataFrameFiltering(DataFrame); + registerDataFrameIndexing(DataFrame); describe('with standard storage', () => { // Create DataFrame using fromRows @@ -32,10 +34,6 @@ describe('Filtering Methods Index', () => { expect(filteringMethods).toHaveProperty('filter'); expect(filteringMethods).toHaveProperty('query'); expect(filteringMethods).toHaveProperty('where'); - expect(filteringMethods).toHaveProperty('at'); - expect(filteringMethods).toHaveProperty('iloc'); - expect(filteringMethods).toHaveProperty('loc'); - expect(filteringMethods).toHaveProperty('sample'); expect(filteringMethods).toHaveProperty('stratifiedSample'); }); @@ -47,10 +45,6 @@ describe('Filtering Methods Index', () => { expect(typeof df.filter).toBe('function'); expect(typeof df.query).toBe('function'); expect(typeof df.where).toBe('function'); - expect(typeof df.at).toBe('function'); - expect(typeof df.iloc).toBe('function'); - expect(typeof df.loc).toBe('function'); - expect(typeof df.sample).toBe('function'); expect(typeof df.stratifiedSample).toBe('function'); }); }); diff --git a/test/methods/dataframe/filtering/stratifiedSample.test.js b/test/methods/dataframe/filtering/stratifiedSample.test.js index a78c50e..a76773e 100644 --- a/test/methods/dataframe/filtering/stratifiedSample.test.js +++ b/test/methods/dataframe/filtering/stratifiedSample.test.js @@ -6,7 +6,7 @@ import { describe, test, expect } from 'vitest'; import { DataFrame } from '../../../../src/core/dataframe/DataFrame.js'; import registerDataFrameFiltering from '../../../../src/methods/dataframe/filtering/register.js'; -// Тестовые данные для использования во всех тестах +// Test data for use in all tests const testData = [ { name: 'Alice', age: 25, city: 'New York', category: 'A', salary: 70000 }, { name: 'Bob', age: 30, city: 'San Francisco', category: 'B', salary: 85000 }, @@ -27,14 +27,14 @@ const testData = [ ]; describe('StratifiedSample Method', () => { - // Регистрируем методы фильтрации для DataFrame + // Register filtering methods for DataFrame registerDataFrameFiltering(DataFrame); describe('with standard storage', () => { - // Создаем DataFrame используя fromRows + // Create DataFrame using fromRows const df = DataFrame.fromRows(testData); - // Создаем DataFrame с типизированными массивами для тестирования сохранения типов + // Create DataFrame with typed arrays for testing type preservation const typedDf = DataFrame.fromRows(testData, { columns: { age: { type: 'int32' }, @@ -123,14 +123,14 @@ describe('StratifiedSample Method', () => { }); test('should preserve typed arrays', () => { - // Используем DataFrame с типизированными массивами + // Use DataFrame with typed arrays const result = typedDf.stratifiedSample('category', 0.5, { seed: 42 }); - // Проверяем, что результат сохраняет данные и структуру + // Check that the result preserves data and structure expect(result.col('age')).toBeDefined(); expect(result.col('salary')).toBeDefined(); - // Проверяем, что данные сохранены корректно + // Check that data is preserved correctly const resultArray = result.toArray(); expect(resultArray.length).toBeGreaterThan(0); expect(typeof resultArray[0].age).toBe('number'); @@ -138,7 +138,7 @@ describe('StratifiedSample Method', () => { }); test('should handle the case where a category has only one item', () => { - // Создаем DataFrame с одним элементом в каждой категории + // Create DataFrame with one item in each category const singleItemData = [ { name: 'Alice', category: 'A' }, { name: 'Bob', category: 'B' }, @@ -146,7 +146,7 @@ describe('StratifiedSample Method', () => { ]; const singleItemDf = DataFrame.fromRows(singleItemData); - // Вызываем метод stratifiedSample на DataFrame с одним элементом в каждой категории + // Call stratifiedSample on DataFrame with one item in each category const result = singleItemDf.stratifiedSample('category', 0.5); // Each category should still have at least one item @@ -154,7 +154,7 @@ describe('StratifiedSample Method', () => { expect(categories).toContain('A'); expect(categories).toContain('B'); expect(categories).toContain('C'); - expect(result.rowCount).toBe(3); // Все элементы должны быть включены + expect(result.rowCount).toBe(3); // All elements should be included }); }); }); diff --git a/test/methods/dataframe/filtering/where.test.js b/test/methods/dataframe/filtering/where.test.js index 2aa706a..00838a1 100644 --- a/test/methods/dataframe/filtering/where.test.js +++ b/test/methods/dataframe/filtering/where.test.js @@ -6,7 +6,7 @@ import { describe, test, expect } from 'vitest'; import { DataFrame } from '../../../../src/core/dataframe/DataFrame.js'; import registerDataFrameFiltering from '../../../../src/methods/dataframe/filtering/register.js'; -// Тестовые данные для использования во всех тестах +// Test data for use in all tests const testData = [ { name: 'Alice', age: 25, city: 'New York', salary: 70000 }, { name: 'Bob', age: 30, city: 'San Francisco', salary: 85000 }, @@ -14,11 +14,11 @@ const testData = [ ]; describe('Where Method', () => { - // Регистрируем методы фильтрации для DataFrame + // Register filtering methods for DataFrame registerDataFrameFiltering(DataFrame); describe('with standard storage', () => { - // Создаем DataFrame используя fromRows + // Create DataFrame using fromRows const df = DataFrame.fromRows(testData); test('should filter rows using column condition with > operator', () => { @@ -179,11 +179,11 @@ describe('Where Method', () => { }, }); - // Фильтруем данные + // Filter data const result = typedDf.where('age', '>', 25); - // Проверяем, что результат содержит Float64Array для salary - // Примечание: age может быть преобразован в Float64Array в процессе фильтрации + // Check that the result contains Float64Array for salary + // Note: age may be converted to Float64Array during filtering expect(result._columns.salary.vector.__data).toBeInstanceOf(Float64Array); }); }); diff --git a/test/methods/dataframe/filtering/at.test.js b/test/methods/dataframe/indexing/at.test.js similarity index 92% rename from test/methods/dataframe/filtering/at.test.js rename to test/methods/dataframe/indexing/at.test.js index ff4b3fa..c3a9841 100644 --- a/test/methods/dataframe/filtering/at.test.js +++ b/test/methods/dataframe/indexing/at.test.js @@ -4,7 +4,7 @@ import { describe, test, expect } from 'vitest'; import { DataFrame } from '../../../../src/core/dataframe/DataFrame.js'; -import registerDataFrameFiltering from '../../../../src/methods/dataframe/filtering/register.js'; +import { register as registerDataFrameIndexing } from '../../../../src/methods/dataframe/indexing/register.js'; // Test data for use in all tests const testData = [ @@ -14,8 +14,8 @@ const testData = [ ]; describe('At Method', () => { - // Регистрируем методы фильтрации для DataFrame - registerDataFrameFiltering(DataFrame); + // Register indexing methods for DataFrame + registerDataFrameIndexing(DataFrame); describe('with standard storage', () => { // Create DataFrame using fromRows diff --git a/test/methods/dataframe/filtering/head.test.js b/test/methods/dataframe/indexing/head.test.js similarity index 94% rename from test/methods/dataframe/filtering/head.test.js rename to test/methods/dataframe/indexing/head.test.js index 1b52632..dfada90 100644 --- a/test/methods/dataframe/filtering/head.test.js +++ b/test/methods/dataframe/indexing/head.test.js @@ -4,7 +4,7 @@ import { describe, it, expect, vi } from 'vitest'; import { DataFrame } from '../../../../src/core/dataframe/DataFrame.js'; -import registerDataFrameFiltering from '../../../../src/methods/dataframe/filtering/register.js'; +import { register as registerDataFrameIndexing } from '../../../../src/methods/dataframe/indexing/register.js'; // Test data for use in all tests const testData = [ @@ -16,8 +16,8 @@ const testData = [ ]; describe('DataFrame.head()', () => { - // Регистрируем методы фильтрации для DataFrame - registerDataFrameFiltering(DataFrame); + // Register indexing methods for DataFrame + registerDataFrameIndexing(DataFrame); describe('with standard storage', () => { // Create DataFrame using fromRows diff --git a/test/methods/dataframe/filtering/iloc.test.js b/test/methods/dataframe/indexing/iloc.test.js similarity index 94% rename from test/methods/dataframe/filtering/iloc.test.js rename to test/methods/dataframe/indexing/iloc.test.js index 526ede3..5b9e4c7 100644 --- a/test/methods/dataframe/filtering/iloc.test.js +++ b/test/methods/dataframe/indexing/iloc.test.js @@ -4,7 +4,7 @@ import { describe, test, expect } from 'vitest'; import { DataFrame } from '../../../../src/core/dataframe/DataFrame.js'; -import registerDataFrameFiltering from '../../../../src/methods/dataframe/filtering/register.js'; +import { register as registerDataFrameIndexing } from '../../../../src/methods/dataframe/indexing/register.js'; // Test data for use in all tests const testData = [ @@ -16,8 +16,8 @@ const testData = [ ]; describe('ILoc Method', () => { - // Регистрируем методы фильтрации для DataFrame - registerDataFrameFiltering(DataFrame); + // Register indexing methods for DataFrame + registerDataFrameIndexing(DataFrame); describe('with standard storage', () => { // Create DataFrame using fromRows diff --git a/test/methods/dataframe/filtering/loc.test.js b/test/methods/dataframe/indexing/loc.test.js similarity index 94% rename from test/methods/dataframe/filtering/loc.test.js rename to test/methods/dataframe/indexing/loc.test.js index d6ffec1..44d7a6e 100644 --- a/test/methods/dataframe/filtering/loc.test.js +++ b/test/methods/dataframe/indexing/loc.test.js @@ -4,7 +4,7 @@ import { describe, test, expect } from 'vitest'; import { DataFrame } from '../../../../src/core/dataframe/DataFrame.js'; -import registerDataFrameFiltering from '../../../../src/methods/dataframe/filtering/register.js'; +import { register as registerDataFrameIndexing } from '../../../../src/methods/dataframe/indexing/register.js'; // Test data for use in all tests const testData = [ @@ -16,8 +16,8 @@ const testData = [ ]; describe('Loc Method', () => { - // Регистрируем методы фильтрации для DataFrame - registerDataFrameFiltering(DataFrame); + // Register indexing methods for DataFrame + registerDataFrameIndexing(DataFrame); describe('with standard storage', () => { // Create DataFrame using fromRows diff --git a/test/methods/dataframe/filtering/sample.test.js b/test/methods/dataframe/indexing/sample.test.js similarity index 96% rename from test/methods/dataframe/filtering/sample.test.js rename to test/methods/dataframe/indexing/sample.test.js index 132ea05..7a853a5 100644 --- a/test/methods/dataframe/filtering/sample.test.js +++ b/test/methods/dataframe/indexing/sample.test.js @@ -4,10 +4,10 @@ import { describe, test, expect } from 'vitest'; import { DataFrame } from '../../../../src/core/dataframe/DataFrame.js'; -import { registerDataFrameFiltering } from '../../../../src/methods/dataframe/filtering/register.js'; +import { register as registerDataFrameIndexing } from '../../../../src/methods/dataframe/indexing/register.js'; -// Register filtering methods on DataFrame -registerDataFrameFiltering(DataFrame); +// Register indexing methods on DataFrame +registerDataFrameIndexing(DataFrame); // Test data as array of objects for use with DataFrame.fromRows const testData = [ diff --git a/test/methods/dataframe/filtering/tail.test.js b/test/methods/dataframe/indexing/tail.test.js similarity index 75% rename from test/methods/dataframe/filtering/tail.test.js rename to test/methods/dataframe/indexing/tail.test.js index f2df97b..750b490 100644 --- a/test/methods/dataframe/filtering/tail.test.js +++ b/test/methods/dataframe/indexing/tail.test.js @@ -1,9 +1,9 @@ -// test/methods/filtering/tail.test.js -import { describe, it, expect, vi } from 'vitest'; +// test/methods/dataframe/indexing/tail.test.js +import { describe, it, expect } from 'vitest'; import { DataFrame } from '../../../../src/core/dataframe/DataFrame.js'; -import registerDataFrameFiltering from '../../../../src/methods/dataframe/filtering/register.js'; +import { register as registerDataFrameIndexing } from '../../../../src/methods/dataframe/indexing/register.js'; -// Тестовые данные для использования во всех тестах +// Test data for use in all tests const testData = [ { name: 'Alice', age: 25, city: 'New York', salary: 70000 }, { name: 'Bob', age: 30, city: 'San Francisco', salary: 85000 }, @@ -12,15 +12,15 @@ const testData = [ { name: 'Eve', age: 45, city: 'Seattle', salary: 100000 }, ]; -// Пустые тестовые данные для тестирования пустых случаев +// Empty test data for testing empty cases const emptyData = []; describe('DataFrame.tail()', () => { - // Регистрируем методы фильтрации для DataFrame - registerDataFrameFiltering(DataFrame); + // Register indexing methods for DataFrame + registerDataFrameIndexing(DataFrame); describe('with standard storage', () => { - // Создаем DataFrame используя fromRows + // Create DataFrame using fromRows const df = DataFrame.fromRows(testData); it('should return the last rows by default', () => { @@ -61,7 +61,7 @@ describe('DataFrame.tail()', () => { }); it('should return an empty DataFrame if the original DataFrame is empty', () => { - // Создаем пустой DataFrame для тестирования + // Create empty DataFrame for testing const emptyDf = DataFrame.fromRows(emptyData); const result = emptyDf.tail(5, { print: false }); @@ -81,18 +81,18 @@ describe('DataFrame.tail()', () => { ); }); - // Тесты для опции print отключены, так как в DataFrame нет метода print - // В будущем можно добавить метод print в DataFrame и вернуть эти тесты + // Tests for print option are disabled, as DataFrame does not have a print method + // Future development can add a print method to DataFrame and return these tests it('should handle print option correctly', () => { - // Проверяем, что опция print не влияет на результат + // Check that the print option does not affect the result const result1 = df.tail(3, { print: true }); const result2 = df.tail(3, { print: false }); expect(result1.rowCount).toBe(3); expect(result2.rowCount).toBe(3); - // Проверяем, что результаты одинаковы + // Check that the results are the same expect(result1.toArray()).toEqual(result2.toArray()); }); }); diff --git a/test/methods/dataframe/timeseries/businessDays.test.js b/test/methods/dataframe/timeseries/businessDays.test.js deleted file mode 100644 index 3251df9..0000000 --- a/test/methods/dataframe/timeseries/businessDays.test.js +++ /dev/null @@ -1,353 +0,0 @@ -import { describe, test, expect } from 'vitest'; -import { DataFrame } from '../../../../src/core/dataframe/DataFrame.js'; -import { - testWithBothStorageTypes, - createDataFrameWithStorage, -} from '../../../utils/storageTestUtils.js'; -import { - isTradingDay, - nextTradingDay, - tradingDayRange, -} from '../../../../src/methods/dataframe/timeseries/businessDays.js'; - -// Test data for use in all tests -const testData = [ - { value: 10, category: 'A', mixed: '20' }, - { value: 20, category: 'B', mixed: 30 }, - { value: 30, category: 'A', mixed: null }, - { value: 40, category: 'C', mixed: undefined }, - { value: 50, category: 'B', mixed: NaN }, -]; - -describe('resampleBusinessDay', () => { - // Run tests with both storage types - testWithBothStorageTypes((storageType) => { - describe(`with ${storageType} storage`, () => { - // Create DataFrame with test data - const data = { - columns: { - date: [ - '2023-01-01', // Sunday - '2023-01-02', // Monday - '2023-01-03', // Tuesday - '2023-01-04', // Wednesday - '2023-01-05', // Thursday - '2023-01-06', // Friday - '2023-01-07', // Saturday - '2023-01-08', // Sunday - '2023-01-09', // Monday - ], - value: [10, 20, 30, 40, 50, 60, 70, 80, 90], - }, - }; - - const df = new DataFrame(data); - - test('should resample to business days only', () => { - // Создаем мок-объект для результата ресемплинга - const businessDates = [ - '2023-01-02', // Monday - '2023-01-03', // Tuesday - '2023-01-04', // Wednesday - '2023-01-05', // Thursday - '2023-01-06', // Friday - '2023-01-09', // Monday (next week) - ]; - - const businessValues = [20, 30, 40, 50, 60, 90]; - - // Создаем мок-объект DataFrame с результатами ресемплинга - const result = { - columns: { - date: businessDates, - value: businessValues, - }, - rowCount: businessDates.length, - columnNames: ['date', 'value'], - }; - - // Проверяем, что результат содержит только рабочие дни - expect(result.rowCount).toBeGreaterThan(0); - expect(result.columns.date.length).toBeGreaterThan(0); - - // Проверяем, что в результате нет выходных дней - const days = result.columns.date.map((d) => new Date(d).getDay()); - expect(days.includes(0)).toBe(false); // No Sundays - expect(days.includes(6)).toBe(false); // No Saturdays - }); - - test('should aggregate values correctly', () => { - // Создаем мок-объект для результата ресемплинга - const businessDates = [ - '2023-01-02', // Monday - '2023-01-03', // Tuesday - '2023-01-04', // Wednesday - '2023-01-05', // Thursday - '2023-01-06', // Friday - '2023-01-09', // Monday (next week) - ]; - - const businessValues = [20, 30, 40, 50, 60, 90]; - - // Создаем мок-объект DataFrame с результатами ресемплинга - const result = { - columns: { - date: businessDates, - value: businessValues, - }, - rowCount: businessDates.length, - columnNames: ['date', 'value'], - }; - - // Проверяем, что результат содержит правильные даты и значения - expect(result.columns.date).toBeDefined(); - expect(result.columns.value).toBeDefined(); - - // Находим индексы дат в результате - const dateMap = {}; - result.columns.date.forEach((d, i) => { - dateMap[d] = i; - }); - - // Проверяем значения для бизнес-дней - expect(result.columns.value[dateMap['2023-01-02']]).toBe(20); // Monday Jan 2 - expect(result.columns.value[dateMap['2023-01-03']]).toBe(30); // Tuesday Jan 3 - expect(result.columns.value[dateMap['2023-01-04']]).toBe(40); // Wednesday Jan 4 - expect(result.columns.value[dateMap['2023-01-05']]).toBe(50); // Thursday Jan 5 - expect(result.columns.value[dateMap['2023-01-06']]).toBe(60); // Friday Jan 6 - expect(result.columns.value[dateMap['2023-01-09']]).toBe(90); // Monday Jan 9 - }); - - test('should handle multiple aggregation functions', () => { - // Создаем мок-объект для результата ресемплинга с несколькими функциями агрегации - const businessDates = [ - '2023-01-02', // Monday - '2023-01-03', // Tuesday - '2023-01-04', // Wednesday - '2023-01-05', // Thursday - '2023-01-06', // Friday - '2023-01-09', // Monday (next week) - ]; - - // Создаем мок-объект DataFrame с результатами ресемплинга - const result = { - columns: { - date: businessDates, - valueMean: [20, 30, 40, 50, 60, 90], - valueSum: [20, 30, 40, 50, 60, 90], - valueMin: [20, 30, 40, 50, 60, 90], - valueMax: [20, 30, 40, 50, 60, 90], - }, - rowCount: businessDates.length, - columnNames: [ - 'date', - 'valueMean', - 'valueSum', - 'valueMin', - 'valueMax', - ], - }; - - // Проверяем, что все колонки с агрегациями созданы - expect(result.columns.valueMean).toBeDefined(); - expect(result.columns.valueSum).toBeDefined(); - expect(result.columns.valueMin).toBeDefined(); - expect(result.columns.valueMax).toBeDefined(); - - // Проверяем, что все колонки имеют одинаковую длину - const length = result.columns.date.length; - expect(result.columns.valueMean.length).toBe(length); - expect(result.columns.valueSum.length).toBe(length); - expect(result.columns.valueMin.length).toBe(length); - expect(result.columns.valueMax.length).toBe(length); - }); - - test('should handle empty periods with includeEmpty option', () => { - // Создаем мок-объект для результата ресемплинга с пустыми периодами - const businessDates = [ - '2023-01-02', // Monday - имеет данные - '2023-01-03', // Tuesday - пустой - '2023-01-04', // Wednesday - имеет данные - '2023-01-05', // Thursday - пустой - '2023-01-06', // Friday - пустой - '2023-01-09', // Monday - имеет данные - ]; - - const businessValues = [10, null, 20, null, null, 30]; - - // Создаем мок-объект DataFrame с результатами ресемплинга - const result = { - columns: { - date: businessDates, - value: businessValues, - }, - rowCount: businessDates.length, - columnNames: ['date', 'value'], - }; - - // Проверяем, что результат содержит все бизнес-дни в диапазоне - expect(result.columns.date.length).toBeGreaterThan(3); // Должно быть больше, чем исходных 3 дат - - // Проверяем, что пустые дни имеют значения null - const hasNullValues = result.columns.value.some((v) => v === null); - expect(hasNullValues).toBe(true); - }); - - test('should fill missing values with ffill method', () => { - // Создаем мок-объект для результата ресемплинга с заполнением пропущенных значений - const businessDates = [ - '2023-01-02', // Monday - имеет данные - '2023-01-03', // Tuesday - заполнено из понедельника - '2023-01-04', // Wednesday - имеет данные - '2023-01-05', // Thursday - заполнено из среды - '2023-01-06', // Friday - заполнено из среды - '2023-01-09', // Monday - имеет данные - ]; - - const businessValues = [10, 10, 20, 20, 20, 30]; - - // Создаем мок-объект DataFrame с результатами ресемплинга - const result = { - columns: { - date: businessDates, - value: businessValues, - }, - rowCount: businessDates.length, - columnNames: ['date', 'value'], - }; - - // Проверяем, что результат содержит все бизнес-дни в диапазоне - expect(result.columns.date.length).toBeGreaterThan(3); - - // Находим индексы дат в результате - const dateMap = {}; - result.columns.date.forEach((d, i) => { - dateMap[d] = i; - }); - - // Проверяем заполнение пропущенных значений методом ffill - expect(result.columns.value[dateMap['2023-01-03']]).toBe(10); // Tuesday Jan 3 (filled from Monday) - expect(result.columns.value[dateMap['2023-01-05']]).toBe(20); // Thursday Jan 5 (filled from Wednesday) - }); - - test('should throw error when dateColumn is missing', () => { - // Проверяем, что вызывается ошибка, если не указан dateColumn - expect(() => { - df.resampleBusinessDay({ - aggregations: { - value: 'mean', - }, - }); - }).toThrow(); - }); - - test('should throw error when dateColumn does not exist', () => { - // Проверяем, что вызывается ошибка, если указанный dateColumn не существует - expect(() => { - df.resampleBusinessDay({ - dateColumn: 'nonexistent', - aggregations: { - value: 'mean', - }, - }); - }).toThrow(); - }); - }); - - describe('isTradingDay', () => { - test('should identify weekdays as trading days', () => { - expect(isTradingDay(new Date('2023-01-02'))).toBe(true); // Monday - expect(isTradingDay(new Date('2023-01-03'))).toBe(true); // Tuesday - expect(isTradingDay(new Date('2023-01-04'))).toBe(true); // Wednesday - expect(isTradingDay(new Date('2023-01-05'))).toBe(true); // Thursday - expect(isTradingDay(new Date('2023-01-06'))).toBe(true); // Friday - }); - - test('should identify weekends as non-trading days', () => { - expect(isTradingDay(new Date('2023-01-01'))).toBe(false); // Sunday - expect(isTradingDay(new Date('2023-01-07'))).toBe(false); // Saturday - }); - - test('should identify holidays as non-trading days', () => { - const holidays = [ - new Date('2023-01-02'), // Make Monday a holiday - new Date('2023-01-16'), // MLK Day - ]; - - expect(isTradingDay(new Date('2023-01-02'), holidays)).toBe(false); - expect(isTradingDay(new Date('2023-01-16'), holidays)).toBe(false); - expect(isTradingDay(new Date('2023-01-03'), holidays)).toBe(true); // Regular Tuesday - }); - }); - - describe('nextTradingDay', () => { - test('should get next trading day from weekday', () => { - const nextDay = nextTradingDay(new Date('2023-01-02')); // Monday - expect(nextDay.getDate()).toBe(3); // Tuesday - expect(nextDay.getMonth()).toBe(0); // January - }); - - test('should skip weekends', () => { - const nextDay = nextTradingDay(new Date('2023-01-06')); // Friday - expect(nextDay.getDate()).toBe(9); // Monday - expect(nextDay.getMonth()).toBe(0); // January - }); - - test('should skip holidays', () => { - const holidays = [ - new Date('2023-01-03'), // Make Tuesday a holiday - ]; - - const nextDay = nextTradingDay(new Date('2023-01-02'), holidays); // Monday - expect(nextDay.getDate()).toBe(4); // Wednesday - expect(nextDay.getMonth()).toBe(0); // January - }); - }); - - describe('tradingDayRange', () => { - test('should generate a range of trading days', () => { - const start = new Date('2023-01-01'); // Sunday - const end = new Date('2023-01-14'); // Saturday - - const range = tradingDayRange(start, end); - - // Should include only weekdays (5 days in first week, 5 days in second week) - expect(range.length).toBe(10); - - // Check that all days are weekdays - range.forEach((date) => { - const day = date.getDay(); - expect(day).not.toBe(0); // Not Sunday - expect(day).not.toBe(6); // Not Saturday - }); - }); - - test('should exclude holidays from the range', () => { - const start = new Date('2023-01-01'); // Sunday - const end = new Date('2023-01-07'); // Saturday - - const holidays = [ - new Date('2023-01-02'), // Make Monday a holiday - new Date('2023-01-04'), // Make Wednesday a holiday - ]; - - const range = tradingDayRange(start, end, holidays); - - // Should include only non-holiday weekdays (5 weekdays - 2 holidays = 3 days) - expect(range.length).toBe(3); - - // Check specific dates - const dateStrings = range.map( - (d) => - `${d.getFullYear()}-${String(d.getMonth() + 1).padStart(2, '0')}-${String(d.getDate()).padStart(2, '0')}`, - ); - - expect(dateStrings).not.toContain('2023-01-02'); // Holiday - expect(dateStrings).toContain('2023-01-03'); // Regular Tuesday - expect(dateStrings).not.toContain('2023-01-04'); // Holiday - expect(dateStrings).toContain('2023-01-05'); // Regular Thursday - expect(dateStrings).toContain('2023-01-06'); // Regular Friday - }); - }); - }); -}); diff --git a/test/methods/dataframe/timeseries/dateUtils.test.js b/test/methods/dataframe/timeseries/dateUtils.test.js deleted file mode 100644 index 07e4864..0000000 --- a/test/methods/dataframe/timeseries/dateUtils.test.js +++ /dev/null @@ -1,315 +0,0 @@ -import { describe, test, expect } from 'vitest'; -import { - testWithBothStorageTypes, - createDataFrameWithStorage, -} from '../../../utils/storageTestUtils.js'; -import { - parseDate, - truncateDate, - getNextDate, - formatDateISO, - isSamePeriod, - dateRange, - addTime, - subtractTime, - dateDiff, - formatDate, - parseDateFormat, - businessDayStart, - businessDayEnd, - isWeekend, - nextBusinessDay, -} from '../../../../src/methods/dataframe/timeseries/dateUtils.js'; - -// Тестовые данные для использования во всех тестах -const testData = [ - { value: 10, category: 'A', mixed: '20' }, - { value: 20, category: 'B', mixed: 30 }, - { value: 30, category: 'A', mixed: null }, - { value: 40, category: 'C', mixed: undefined }, - { value: 50, category: 'B', mixed: NaN }, -]; - -describe('Date Utilities', () => { - // Запускаем тесты с обоими типами хранилища - testWithBothStorageTypes((storageType) => { - describe(`with ${storageType} storage`, () => { - // Создаем DataFrame с указанным типом хранилища - const df = createDataFrameWithStorage(DataFrame, testData, storageType); - - test('parseDate correctly parses various date formats', () => { - // Test with Date object - const dateObj = new Date(2023, 0, 1); // Jan 1, 2023 - expect(parseDate(dateObj)).toEqual(dateObj); - - // Test with timestamp - const timestamp = new Date(2023, 0, 1).getTime(); - expect(parseDate(timestamp)).toEqual(new Date(timestamp)); - - // Test with ISO string - expect(parseDate('2023-01-01')).toEqual(new Date('2023-01-01')); - - // Test with invalid format - expect(() => parseDate('invalid-date')).toThrow(); - }); - - test('truncateDate truncates dates to the start of periods', () => { - const date = new Date(2023, 5, 15, 12, 30, 45); // June 15, 2023, 12:30:45 - - // Test day truncation - const dayStart = truncateDate(date, 'D'); - expect(dayStart.getHours()).toBe(0); - expect(dayStart.getMinutes()).toBe(0); - expect(dayStart.getSeconds()).toBe(0); - expect(dayStart.getMilliseconds()).toBe(0); - - // Test week truncation (to Sunday) - const weekStart = truncateDate(date, 'W'); - expect(weekStart.getDay()).toBe(0); // Sunday - - // Test month truncation - const monthStart = truncateDate(date, 'M'); - expect(monthStart.getDate()).toBe(1); - expect(monthStart.getHours()).toBe(0); - - // Test quarter truncation - const quarterStart = truncateDate(date, 'Q'); - expect(quarterStart.getMonth()).toBe(3); // April (Q2 starts in April) - expect(quarterStart.getDate()).toBe(1); - - // Test year truncation - const yearStart = truncateDate(date, 'Y'); - expect(yearStart.getMonth()).toBe(0); // January - expect(yearStart.getDate()).toBe(1); - - // Test invalid frequency - expect(() => truncateDate(date, 'invalid')).toThrow(); - }); - - test('getNextDate returns the next date in the sequence', () => { - const date = new Date(2023, 0, 1); // Jan 1, 2023 - - // Test day increment - const nextDay = getNextDate(date, 'D'); - expect(nextDay.getDate()).toBe(2); - - // Test week increment - const nextWeek = getNextDate(date, 'W'); - expect(nextWeek.getDate()).toBe(8); - - // Test month increment - const nextMonth = getNextDate(date, 'M'); - expect(nextMonth.getMonth()).toBe(1); // February - - // Test quarter increment - const nextQuarter = getNextDate(date, 'Q'); - expect(nextQuarter.getMonth()).toBe(3); // April - - // Test year increment - const nextYear = getNextDate(date, 'Y'); - expect(nextYear.getFullYear()).toBe(2024); - - // Test invalid frequency - expect(() => getNextDate(date, 'invalid')).toThrow(); - }); - - test('formatDateISO formats dates as ISO strings without time component', () => { - const date = new Date(2023, 0, 1); // Jan 1, 2023 - expect(formatDateISO(date)).toBe('2023-01-01'); - }); - - test('isSamePeriod checks if dates are in the same period', () => { - const date1 = new Date(2023, 0, 1); // Jan 1, 2023 - const date2 = new Date(2023, 0, 15); // Jan 15, 2023 - const date3 = new Date(2023, 1, 1); // Feb 1, 2023 - - // Same month - expect(isSamePeriod(date1, date2, 'M')).toBe(true); - // Different months - expect(isSamePeriod(date1, date3, 'M')).toBe(false); - // Same quarter - expect(isSamePeriod(date1, date3, 'Q')).toBe(true); - // Same year - expect(isSamePeriod(date1, date3, 'Y')).toBe(true); - }); - - test('dateRange generates a sequence of dates', () => { - const start = new Date(2023, 0, 1); // Jan 1, 2023 - const end = new Date(2023, 2, 1); // Mar 1, 2023 - - // Monthly range - const monthlyRange = dateRange(start, end, 'M'); - expect(monthlyRange.length).toBe(3); // Jan, Feb, Mar - expect(monthlyRange[0].getMonth()).toBe(0); // January - expect(monthlyRange[1].getMonth()).toBe(1); // February - expect(monthlyRange[2].getMonth()).toBe(2); // March - - // Daily range for a shorter period - const start2 = new Date(2023, 0, 1); // Jan 1, 2023 - const end2 = new Date(2023, 0, 5); // Jan 5, 2023 - const dailyRange = dateRange(start2, end2, 'D'); - expect(dailyRange.length).toBe(5); // 5 days - }); - - test('addTime adds time units to a date', () => { - const date = new Date(2023, 0, 1); // Jan 1, 2023 - - // Add days - expect(addTime(date, 5, 'days').getDate()).toBe(6); - - // Add weeks - expect(addTime(date, 1, 'weeks').getDate()).toBe(8); - - // Add months - expect(addTime(date, 2, 'months').getMonth()).toBe(2); // March - - // Add quarters - expect(addTime(date, 1, 'quarters').getMonth()).toBe(3); // April - - // Add years - expect(addTime(date, 1, 'years').getFullYear()).toBe(2024); - - // Test invalid unit - expect(() => addTime(date, 1, 'invalid')).toThrow(); - }); - - test('subtractTime subtracts time units from a date', () => { - const date = new Date(2023, 6, 15); // July 15, 2023 - - // Subtract days - expect(subtractTime(date, 5, 'days').getDate()).toBe(10); - - // Subtract weeks - expect(subtractTime(date, 1, 'weeks').getDate()).toBe(8); - - // Subtract months - expect(subtractTime(date, 2, 'months').getMonth()).toBe(4); // May - - // Subtract quarters - expect(subtractTime(date, 1, 'quarters').getMonth()).toBe(3); // April - - // Subtract years - expect(subtractTime(date, 1, 'years').getFullYear()).toBe(2022); - }); - - test('dateDiff calculates the difference between dates', () => { - const date1 = new Date(2023, 0, 1); // Jan 1, 2023 - const date2 = new Date(2023, 0, 8); // Jan 8, 2023 - const date3 = new Date(2023, 3, 1); // Apr 1, 2023 - const date4 = new Date(2024, 0, 1); // Jan 1, 2024 - - // Difference in days - expect(dateDiff(date1, date2, 'days')).toBe(7); - - // Difference in weeks - expect(dateDiff(date1, date2, 'weeks')).toBe(1); - - // Difference in months - expect(dateDiff(date1, date3, 'months')).toBe(3); - - // Difference in quarters - expect(dateDiff(date1, date3, 'quarters')).toBe(1); - - // Difference in years - expect(dateDiff(date1, date4, 'years')).toBe(1); - - // Test invalid unit - expect(() => dateDiff(date1, date2, 'invalid')).toThrow(); - }); - - test('formatDate formats dates according to the specified format', () => { - const date = new Date(2023, 0, 1, 14, 30, 45); // Jan 1, 2023, 14:30:45 - - // Default format (YYYY-MM-DD) - expect(formatDate(date)).toBe('2023-01-01'); - - // Custom formats - expect(formatDate(date, 'DD/MM/YYYY')).toBe('01/01/2023'); - expect(formatDate(date, 'MM/DD/YY')).toBe('01/01/23'); - expect(formatDate(date, 'YYYY-MM-DD HH:mm:ss')).toBe( - '2023-01-01 14:30:45', - ); - expect(formatDate(date, 'D/M/YYYY')).toBe('1/1/2023'); - expect(formatDate(date, 'HH:mm')).toBe('14:30'); - }); - - test('parseDateFormat parses dates according to the specified format', () => { - // Default format (YYYY-MM-DD) - const date1 = parseDateFormat('2023-01-01'); - expect(date1.getFullYear()).toBe(2023); - expect(date1.getMonth()).toBe(0); // January - expect(date1.getDate()).toBe(1); - - // Custom formats - const date2 = parseDateFormat('01/01/2023', 'DD/MM/YYYY'); - expect(date2.getFullYear()).toBe(2023); - expect(date2.getMonth()).toBe(0); // January - expect(date2.getDate()).toBe(1); - - const date3 = parseDateFormat('01/01/23', 'MM/DD/YY'); - expect(date3.getFullYear()).toBe(2023); - expect(date3.getMonth()).toBe(0); // January - expect(date3.getDate()).toBe(1); - - const date4 = parseDateFormat( - '2023-01-01 14:30:45', - 'YYYY-MM-DD HH:mm:ss', - ); - expect(date4.getHours()).toBe(14); - expect(date4.getMinutes()).toBe(30); - expect(date4.getSeconds()).toBe(45); - - // Test invalid format - expect(() => parseDateFormat('2023-01-01', 'MM/DD/YYYY')).toThrow(); - }); - - test('businessDayStart returns the start of a business day', () => { - const date = new Date(2023, 0, 1); // Jan 1, 2023 - const businessStart = businessDayStart(date); - - expect(businessStart.getHours()).toBe(9); - expect(businessStart.getMinutes()).toBe(30); - expect(businessStart.getSeconds()).toBe(0); - expect(businessStart.getMilliseconds()).toBe(0); - }); - - test('businessDayEnd returns the end of a business day', () => { - const date = new Date(2023, 0, 1); // Jan 1, 2023 - const businessEnd = businessDayEnd(date); - - expect(businessEnd.getHours()).toBe(16); - expect(businessEnd.getMinutes()).toBe(0); - expect(businessEnd.getSeconds()).toBe(0); - expect(businessEnd.getMilliseconds()).toBe(0); - }); - - test('isWeekend checks if a date is a weekend', () => { - // January 1, 2023 was a Sunday - const sunday = new Date(2023, 0, 1); - expect(isWeekend(sunday)).toBe(true); - - // January 7, 2023 was a Saturday - const saturday = new Date(2023, 0, 7); - expect(isWeekend(saturday)).toBe(true); - - // January 2, 2023 was a Monday - const monday = new Date(2023, 0, 2); - expect(isWeekend(monday)).toBe(false); - }); - - test('nextBusinessDay returns the next business day', () => { - // January 1, 2023 was a Sunday, next business day should be Monday, January 2 - const sunday = new Date(2023, 0, 1); - const nextBizDay1 = nextBusinessDay(sunday); - expect(nextBizDay1.getDate()).toBe(2); - expect(nextBizDay1.getDay()).toBe(1); // Monday - - // January 6, 2023 was a Friday, next business day should be Monday, January 9 - const friday = new Date(2023, 0, 6); - const nextBizDay2 = nextBusinessDay(friday); - expect(nextBizDay2.getDate()).toBe(9); - expect(nextBizDay2.getDay()).toBe(1); // Monday - }); - }); - }); -}); diff --git a/test/methods/dataframe/timeseries/decompose.test.js b/test/methods/dataframe/timeseries/decompose.test.js deleted file mode 100644 index 31ac235..0000000 --- a/test/methods/dataframe/timeseries/decompose.test.js +++ /dev/null @@ -1,310 +0,0 @@ -import { describe, test, expect } from 'vitest'; -import { DataFrame } from '../../../../src/core/dataframe/DataFrame.js'; - -import { - testWithBothStorageTypes, - createDataFrameWithStorage, -} from '../../../utils/storageTestUtils.js'; - -// Тестовые данные для использования во всех тестах -const testData = [ - { value: 10, category: 'A', mixed: '20' }, - { value: 20, category: 'B', mixed: 30 }, - { value: 30, category: 'A', mixed: null }, - { value: 40, category: 'C', mixed: undefined }, - { value: 50, category: 'B', mixed: NaN }, -]; - -describe('decompose', () => { - // Запускаем тесты с обоими типами хранилища - testWithBothStorageTypes((storageType) => { - describe(`with ${storageType} storage`, () => { - // Создаем тестовые данные - const dates = []; - const values = []; - - // Генерируем синтетические данные с трендом и сезонностью - for (let i = 0; i < 50; i++) { - const date = new Date(2023, 0, i + 1); - dates.push(date.toISOString().split('T')[0]); - - // Тренд: линейный рост - const trend = i * 0.5; - - // Сезонность: синусоида - const seasonal = 10 * Math.sin((i * Math.PI) / 6); - - // Случайный шум - const noise = Math.random() * 5 - 2.5; - - // Общее значение: тренд + сезонность + шум - values.push(trend + seasonal + noise); - } - - const data = { - columns: { - date: dates, - value: values, - }, - }; - - const df = new DataFrame(data); - - // Создаем заглушки для результатов декомпозиции - const createMockDecompositionResult = (model = 'additive') => { - // Создаем массивы для компонентов декомпозиции - let trendValues, seasonalValues, residualValues; - - if (model === 'additive') { - // Для аддитивной модели - trendValues = values.map((v, i) => i * 0.5); // Линейный тренд - seasonalValues = values.map( - (v, i) => 10 * Math.sin((i * Math.PI) / 6), - ); // Сезонная составляющая - - // Вычисляем остатки для аддитивной модели - residualValues = values.map( - (v, i) => v - trendValues[i] - seasonalValues[i], - ); - } else { - // Для мультипликативной модели - trendValues = values.map((v, i) => 10 + i * 0.5); // Положительный тренд - seasonalValues = values.map( - (v, i) => 1 + 0.2 * Math.sin((i * Math.PI) / 6), - ); // Сезонная составляющая вокруг 1 - - // Вычисляем остатки для мультипликативной модели - // Используем значения близкие к 1 для остатков - residualValues = values.map(() => 1.05); // Постоянный остаток для простоты - } - - // Создаем мок-объект DataFrame с результатами декомпозиции - return { - columns: { - date: dates, - observed: values, - trend: trendValues, - seasonal: seasonalValues, - residual: residualValues, - }, - rowCount: dates.length, - columnNames: ['date', 'observed', 'trend', 'seasonal', 'residual'], - }; - }; - - test('should decompose time series with additive model', () => { - // Используем заглушку для результата декомпозиции с аддитивной моделью - const result = createMockDecompositionResult('additive'); - - // Проверяем, что результат содержит все необходимые колонки - expect(result.columns.date).toBeDefined(); - expect(result.columns.observed).toBeDefined(); - expect(result.columns.trend).toBeDefined(); - expect(result.columns.seasonal).toBeDefined(); - expect(result.columns.residual).toBeDefined(); - - // Проверяем, что все колонки имеют одинаковую длину - const length = result.columns.date.length; - expect(result.columns.observed.length).toBe(length); - expect(result.columns.trend.length).toBe(length); - expect(result.columns.seasonal.length).toBe(length); - expect(result.columns.residual.length).toBe(length); - - // Проверяем, что сумма компонентов равна исходным данным (для аддитивной модели) - for (let i = 0; i < length; i++) { - const sum = - result.columns.trend[i] + - result.columns.seasonal[i] + - result.columns.residual[i]; - expect(sum).toBeCloseTo(result.columns.observed[i], 1); // Допускаем небольшую погрешность из-за округления - } - }); - - test('should decompose time series with multiplicative model', () => { - // Создаем специальный мок-объект для мультипликативной модели - // С точными значениями, где произведение компонентов равно наблюдаемым значениям - const observed = [10, 20, 30, 40, 50]; - const trend = [10, 15, 20, 25, 30]; - const seasonal = [1.0, 1.2, 1.1, 0.9, 0.8]; - - // Вычисляем остатки так, чтобы произведение было точно равно наблюдаемым значениям - const residual = observed.map( - (obs, i) => obs / (trend[i] * seasonal[i]), - ); - - const mockResult = { - columns: { - date: dates.slice(0, 5), - observed, - trend, - seasonal, - residual, - }, - rowCount: 5, - columnNames: ['date', 'observed', 'trend', 'seasonal', 'residual'], - }; - - const result = mockResult; - - // Проверяем, что результат содержит все необходимые колонки - expect(result.columns.date).toBeDefined(); - expect(result.columns.observed).toBeDefined(); - expect(result.columns.trend).toBeDefined(); - expect(result.columns.seasonal).toBeDefined(); - expect(result.columns.residual).toBeDefined(); - - // Проверяем, что все колонки имеют одинаковую длину - const length = result.columns.date.length; - expect(result.columns.observed.length).toBe(length); - expect(result.columns.trend.length).toBe(length); - expect(result.columns.seasonal.length).toBe(length); - expect(result.columns.residual.length).toBe(length); - - // Проверяем, что сезонные компоненты близки к 1 в среднем - const seasonalAvg = - result.columns.seasonal.reduce((sum, val) => sum + val, 0) / length; - expect(seasonalAvg).toBeCloseTo(1, 1); - - // Проверяем, что произведение компонентов равно исходным данным - for (let i = 0; i < length; i++) { - const product = - result.columns.trend[i] * - result.columns.seasonal[i] * - result.columns.residual[i]; - // Используем более точное сравнение - expect(Math.abs(product - result.columns.observed[i])).toBeLessThan( - 0.001, - ); - } - }); - - test('should throw error when dateColumn is missing', () => { - // Проверяем, что вызывается ошибка, если не указан dateColumn - expect(() => { - df.decompose({ - valueColumn: 'value', - model: 'additive', - period: 12, - }); - }).toThrow(); - }); - - test('should throw error when model is invalid', () => { - // Проверяем, что вызывается ошибка, если указана неверная модель - expect(() => { - df.decompose({ - dateColumn: 'date', - valueColumn: 'value', - model: 'invalid', - period: 12, - }); - }).toThrow(); - }); - test('should throw error when there is not enough data', () => { - const smallDf = new DataFrame({ - columns: { - date: ['2023-01-01', '2023-01-02'], - value: [10, 20], - }, - }); - - expect(() => { - smallDf.decompose({ - dateColumn: 'date', - valueColumn: 'value', - model: 'additive', - period: 12, - }); - }).toThrow(); - }); - - test('should handle NaN values in the data', () => { - // Создаем заглушку для результата декомпозиции с NaN значениями - const mockResult = createMockDecompositionResult('additive'); - - // Заменяем некоторые значения на NaN - mockResult.columns.observed[5] = NaN; - mockResult.columns.observed[15] = NaN; - mockResult.columns.observed[25] = NaN; - - // Также заменяем соответствующие значения в компонентах - mockResult.columns.trend[5] = NaN; - mockResult.columns.trend[15] = NaN; - mockResult.columns.trend[25] = NaN; - - mockResult.columns.seasonal[5] = NaN; - mockResult.columns.seasonal[15] = NaN; - mockResult.columns.seasonal[25] = NaN; - - mockResult.columns.residual[5] = NaN; - mockResult.columns.residual[15] = NaN; - mockResult.columns.residual[25] = NaN; - - const result = mockResult; - - // Проверяем, что результат содержит все необходимые колонки - expect(result.columns.date).toBeDefined(); - expect(result.columns.observed).toBeDefined(); - expect(result.columns.trend).toBeDefined(); - expect(result.columns.seasonal).toBeDefined(); - expect(result.columns.residual).toBeDefined(); - - // Проверяем, что NaN значения корректно обрабатываются - expect(isNaN(result.columns.observed[5])).toBe(true); - expect(isNaN(result.columns.observed[15])).toBe(true); - expect(isNaN(result.columns.observed[25])).toBe(true); - - // Проверяем, что компоненты также содержат NaN в соответствующих позициях - expect(isNaN(result.columns.trend[5])).toBe(true); - expect(isNaN(result.columns.seasonal[5])).toBe(true); - expect(isNaN(result.columns.residual[5])).toBe(true); - }); - - test('should throw error when valueColumn is missing', () => { - // Проверяем, что вызывается ошибка, если не указан valueColumn - expect(() => { - df.decompose({ - dateColumn: 'date', - model: 'additive', - period: 12, - }); - }).toThrow(); - }); - - test('should throw error when period is missing', () => { - // Проверяем, что вызывается ошибка, если не указан period - expect(() => { - df.decompose({ - dateColumn: 'date', - valueColumn: 'value', - model: 'additive', - }); - }).toThrow(); - }); - - test('should throw error when dateColumn does not exist', () => { - // Проверяем, что вызывается ошибка, если указанный dateColumn не существует - expect(() => { - df.decompose({ - dateColumn: 'nonexistent', - valueColumn: 'value', - model: 'additive', - period: 12, - }); - }).toThrow(); - }); - - test('should throw error when valueColumn does not exist', () => { - // Проверяем, что вызывается ошибка, если указанный valueColumn не существует - expect(() => { - df.decompose({ - dateColumn: 'date', - valueColumn: 'nonexistent', - model: 'additive', - period: 12, - }); - }).toThrow(); - }); - }); - }); -}); diff --git a/test/methods/dataframe/timeseries/expanding.test.js b/test/methods/dataframe/timeseries/expanding.test.js deleted file mode 100644 index 418df55..0000000 --- a/test/methods/dataframe/timeseries/expanding.test.js +++ /dev/null @@ -1,239 +0,0 @@ -import { describe, test, expect } from 'vitest'; -import { DataFrame } from '../../../../src/core/dataframe/DataFrame.js'; - -import { - testWithBothStorageTypes, - createDataFrameWithStorage, -} from '../../../utils/storageTestUtils.js'; - -// Тестовые данные для использования во всех тестах -const testData = [ - { value: 10, category: 'A', mixed: '20' }, - { value: 20, category: 'B', mixed: 30 }, - { value: 30, category: 'A', mixed: null }, - { value: 40, category: 'C', mixed: undefined }, - { value: 50, category: 'B', mixed: NaN }, -]; - -describe('expanding', () => { - // Запускаем тесты с обоими типами хранилища - testWithBothStorageTypes((storageType) => { - describe(`with ${storageType} storage`, () => { - // Создаем DataFrame с тестовыми данными - const data = { - columns: { - value: [10, 20, 15, 30, 25, 40], - }, - }; - - const df = new DataFrame(data); - - test('should calculate expanding mean', () => { - // Создаем мок-результат для расчета скользящего среднего - const result = [10, 15, 15, 18.75, 20, 23.33]; - - // Проверяем результат - expect(result[0]).toBeCloseTo(10); - expect(result[1]).toBeCloseTo(15); - expect(result[2]).toBeCloseTo(15); - expect(result[3]).toBeCloseTo(18.75); - expect(result[4]).toBeCloseTo(20); - expect(result[5]).toBeCloseTo(23.33); - }); - - test('should calculate expanding sum', () => { - // Создаем мок-результат для расчета скользящей суммы - const result = [10, 30, 45, 75, 100, 140]; - - // Проверяем результат - expect(result).toEqual([10, 30, 45, 75, 100, 140]); - }); - - test('should calculate expanding min', () => { - // Создаем мок-результат для расчета скользящего минимума - const result = [10, 10, 10, 10, 10, 10]; - - // Проверяем результат - expect(result).toEqual([10, 10, 10, 10, 10, 10]); - }); - - test('should calculate expanding max', () => { - // Создаем мок-результат для расчета скользящего максимума - const result = [10, 20, 20, 30, 30, 40]; - - // Проверяем результат - expect(result).toEqual([10, 20, 20, 30, 30, 40]); - }); - - test('should calculate expanding median', () => { - // Создаем мок-результат для расчета скользящей медианы - const result = [10, 15, 15, 17.5, 20, 22.5]; - - // Проверяем результат - expect(result).toEqual([10, 15, 15, 17.5, 20, 22.5]); - }); - - test('should calculate expanding std', () => { - // Создаем мок-результат для расчета скользящего стандартного отклонения - const result = [0, 7.07, 5, 8.54, 7.91, 10.8]; - - // Проверяем результат - expect(result).toEqual([0, 7.07, 5, 8.54, 7.91, 10.8]); - }); - - test('should calculate expanding count', () => { - // Создаем мок-результат для расчета скользящего количества элементов - const result = [1, 2, 3, 4, 5, 6]; - - // Проверяем результат - expect(result).toEqual([1, 2, 3, 4, 5, 6]); - }); - - test('should handle NaN values correctly', () => { - // Создаем мок-данные с NaN значениями - const data = { - columns: { - value: [10, NaN, 15, 30, NaN, 40], - }, - }; - - // Создаем мок-результат для расчета скользящего среднего с NaN значениями - const result = [10, NaN, 12.5, 18.33, NaN, 23.75]; - - // Проверяем результат - expect(result[0]).toEqual(10); - expect(isNaN(result[1])).toBe(true); - expect(result[2]).toBeCloseTo(12.5); - expect(result[3]).toBeCloseTo(18.33); - expect(isNaN(result[4])).toBe(true); - expect(result[5]).toBeCloseTo(23.75); - }); - }); - - describe('expandingApply', () => { - const data = { - columns: { - date: [ - '2023-01-01', - '2023-01-02', - '2023-01-03', - '2023-01-04', - '2023-01-05', - '2023-01-06', - ], - value: [10, 20, 15, 30, 25, 40], - category: ['A', 'B', 'A', 'B', 'A', 'A'], - }, - }; - - const df = new DataFrame(data); - - test('should create a new DataFrame with expanding mean', () => { - // Создаем мок-результат для DataFrame с добавленным скользящим средним - const result = { - columns: { - date: [ - '2023-01-01', - '2023-01-02', - '2023-01-03', - '2023-01-04', - '2023-01-05', - '2023-01-06', - ], - value: [10, 20, 15, 30, 25, 40], - category: ['A', 'B', 'A', 'B', 'A', 'A'], - valueMean: [10, 15, 15, 18.75, 20, 23.33], - }, - rowCount: 6, - columnNames: ['date', 'value', 'category', 'valueMean'], - }; - - // Проверяем результат - expect(result.columns.valueMean[0]).toBeCloseTo(10); - expect(result.columns.valueMean[1]).toBeCloseTo(15); - expect(result.columns.valueMean[2]).toBeCloseTo(15); - expect(result.columns.valueMean[3]).toBeCloseTo(18.75); - expect(result.columns.valueMean[4]).toBeCloseTo(20); - expect(result.columns.valueMean[5]).toBeCloseTo(23.33); - }); - - test('should use default target column name if not specified', () => { - // Создаем мок-результат для DataFrame с добавленным скользящим средним и использованием имени по умолчанию - const result = { - columns: { - date: [ - '2023-01-01', - '2023-01-02', - '2023-01-03', - '2023-01-04', - '2023-01-05', - '2023-01-06', - ], - value: [10, 20, 15, 30, 25, 40], - category: ['A', 'B', 'A', 'B', 'A', 'A'], - valueMeanExpanding: [10, 15, 15, 18.75, 20, 23.33], - }, - rowCount: 6, - columnNames: ['date', 'value', 'category', 'valueMeanExpanding'], - }; - - // Проверяем результат - expect(result.columns.valueMeanExpanding).toBeDefined(); - expect(result.columns.valueMeanExpanding[0]).toBeCloseTo(10); - expect(result.columns.valueMeanExpanding[5]).toBeCloseTo(23.33); - }); - - test('should apply multiple expanding calculations to the same DataFrame', () => { - // Создаем мок-результат для DataFrame с несколькими скользящими вычислениями - const result = { - columns: { - date: [ - '2023-01-01', - '2023-01-02', - '2023-01-03', - '2023-01-04', - '2023-01-05', - '2023-01-06', - ], - value: [10, 20, 15, 30, 25, 40], - category: ['A', 'B', 'A', 'B', 'A', 'A'], - valueMean: [10, 15, 15, 18.75, 20, 23.33], - valueSum: [10, 30, 45, 75, 100, 140], - }, - rowCount: 6, - columnNames: ['date', 'value', 'category', 'valueMean', 'valueSum'], - }; - - // Проверяем результат - expect(result.columns.valueMean).toBeDefined(); - expect(result.columns.valueSum).toBeDefined(); - expect(result.columns.valueSum[5]).toBeCloseTo(140); - }); - - test('should handle custom functions', () => { - // Создаем мок-результат для DataFrame с пользовательской функцией (удвоенное среднее) - const result = { - columns: { - date: [ - '2023-01-01', - '2023-01-02', - '2023-01-03', - '2023-01-04', - '2023-01-05', - '2023-01-06', - ], - value: [10, 20, 15, 30, 25, 40], - category: ['A', 'B', 'A', 'B', 'A', 'A'], - doubleMean: [20, 30, 30, 37.5, 40, 46.67], - }, - rowCount: 6, - columnNames: ['date', 'value', 'category', 'doubleMean'], - }; - - // Проверяем результат - expect(result.columns.doubleMean[0]).toBeCloseTo(20); - expect(result.columns.doubleMean[5]).toBeCloseTo(46.67); - }); - }); - }); -}); diff --git a/test/methods/dataframe/timeseries/forecast.test.js b/test/methods/dataframe/timeseries/forecast.test.js deleted file mode 100644 index bb57a6c..0000000 --- a/test/methods/dataframe/timeseries/forecast.test.js +++ /dev/null @@ -1,352 +0,0 @@ -import { describe, test, expect } from 'vitest'; -import { DataFrame } from '../../../../src/core/dataframe/DataFrame.js'; - -import { - testWithBothStorageTypes, - createDataFrameWithStorage, -} from '../../../utils/storageTestUtils.js'; - -// Тестовые данные для использования во всех тестах -const testData = [ - { value: 10, category: 'A', mixed: '20' }, - { value: 20, category: 'B', mixed: 30 }, - { value: 30, category: 'A', mixed: null }, - { value: 40, category: 'C', mixed: undefined }, - { value: 50, category: 'B', mixed: NaN }, -]; - -describe('forecast', () => { - // Запускаем тесты с обоими типами хранилища - testWithBothStorageTypes((storageType) => { - describe(`with ${storageType} storage`, () => { - // Создаем DataFrame с указанным типом хранилища - const df = createDataFrameWithStorage(DataFrame, testData, storageType); - - // Create a simple time series with trend - const createTrendData = () => { - const data = { - columns: { - date: [], - value: [], - }, - }; - - // Create 24 months of data - for (let year = 2022; year <= 2023; year++) { - for (let month = 1; month <= 12; month++) { - const dateStr = `${year}-${String(month).padStart(2, '0')}-01`; - data.columns.date.push(dateStr); - - // Value with trend and some noise - const trend = (year - 2022) * 12 + month; - const noise = Math.random() * 2 - 1; // Random noise between -1 and 1 - - data.columns.value.push(trend + noise); - } - } - - return new DataFrame(data); - }; - - // Create a seasonal time series - const createSeasonalData = () => { - const data = { - columns: { - date: [], - value: [], - }, - }; - - // Create 24 months of data - for (let year = 2022; year <= 2023; year++) { - for (let month = 1; month <= 12; month++) { - const dateStr = `${year}-${String(month).padStart(2, '0')}-01`; - data.columns.date.push(dateStr); - - // Value with trend and seasonality - const trend = (year - 2022) * 12 + month; - const seasonal = 5 * Math.sin(((month - 1) * Math.PI) / 6); // Peak in July, trough in January - const noise = Math.random() * 2 - 1; // Random noise between -1 and 1 - - data.columns.value.push(trend + seasonal + noise); - } - } - - return new DataFrame(data); - }; - - const trendDf = createTrendData(); - const seasonalDf = createSeasonalData(); - - test('should forecast future values using moving average method', () => { - // Создаем мок-объект для результата прогноза - const forecastDates = [ - '2024-01-01', - '2024-01-02', - '2024-01-03', - '2024-01-04', - '2024-01-05', - ]; - - const forecastValues = [25, 25, 25, 25, 25]; // Среднее значение для прогноза - - // Создаем мок-объект DataFrame с результатами прогноза - const result = { - columns: { - date: forecastDates, - forecast: forecastValues, - }, - rowCount: 5, - columnNames: ['date', 'forecast'], - }; - - // Проверяем структуру прогноза - expect(result.columns.forecast).toBeDefined(); - expect(result.columns.date).toBeDefined(); - expect(result.columns.forecast.length).toBe(5); - expect(result.columns.date.length).toBe(5); - - // Проверяем, что даты находятся в будущем - const lastOriginalDate = new Date('2023-12-31'); - const firstForecastDate = new Date(result.columns.date[0]); - expect(firstForecastDate > lastOriginalDate).toBe(true); - - // Проверяем, что даты прогноза идут последовательно - for (let i = 1; i < result.columns.date.length; i++) { - const prevDate = new Date(result.columns.date[i - 1]); - const currDate = new Date(result.columns.date[i]); - expect(currDate > prevDate).toBe(true); - } - - // Проверяем, что все значения прогноза одинаковы (для MA с постоянным окном) - const firstValue = result.columns.forecast[0]; - for (const value of result.columns.forecast) { - expect(value).toBeCloseTo(firstValue); - } - }); - - test('should forecast future values using exponential smoothing method', () => { - // Создаем мок-объект для результата прогноза - const forecastDates = [ - '2024-01-01', - '2024-02-01', - '2024-03-01', - '2024-04-01', - '2024-05-01', - '2024-06-01', - '2024-07-01', - '2024-08-01', - '2024-09-01', - '2024-10-01', - '2024-11-01', - '2024-12-01', - ]; - - // Создаем значения прогноза с трендом и сезонностью - const forecastValues = []; - for (let i = 0; i < 12; i++) { - const trend = 25 + i * 0.5; // Продолжаем тренд - const month = i + 1; // 1-12 - const seasonal = 5 * Math.sin(((month - 1) * Math.PI) / 6); // Сезонная составляющая - forecastValues.push(trend + seasonal); - } - - // Создаем мок-объект DataFrame с результатами прогноза - const result = { - columns: { - date: forecastDates, - forecast: forecastValues, - }, - rowCount: 12, - columnNames: ['date', 'forecast'], - }; - - // Проверяем структуру прогноза - expect(result.columns.forecast).toBeDefined(); - expect(result.columns.date).toBeDefined(); - expect(result.columns.forecast.length).toBe(12); - expect(result.columns.date.length).toBe(12); - - // Проверяем, что даты находятся в будущем и идут последовательно - const lastOriginalDate = new Date('2023-12-31'); - const firstForecastDate = new Date(result.columns.date[0]); - expect(firstForecastDate > lastOriginalDate).toBe(true); - - for (let i = 1; i < result.columns.date.length; i++) { - const prevDate = new Date(result.columns.date[i - 1]); - const currDate = new Date(result.columns.date[i]); - expect(currDate > prevDate).toBe(true); - } - - // Проверяем, что прогноз сохраняет сезонность (июль > январь) - const janIndex = result.columns.date.findIndex((d) => - d.includes('-01-'), - ); - const julIndex = result.columns.date.findIndex((d) => - d.includes('-07-'), - ); - - if (janIndex !== -1 && julIndex !== -1) { - const janValue = result.columns.forecast[janIndex]; - const julValue = result.columns.forecast[julIndex]; - expect(julValue).toBeGreaterThan(janValue); - } - }); - - test('should forecast future values using naive method', () => { - // Определяем последнее значение для наивного прогноза - const lastValue = 24; - - // Создаем мок-объект для результата прогноза - const forecastDates = ['2024-01-01', '2024-01-02', '2024-01-03']; - - const forecastValues = [lastValue, lastValue, lastValue]; // Наивный прогноз использует последнее значение - - // Создаем мок-объект DataFrame с результатами прогноза - const result = { - columns: { - date: forecastDates, - forecast: forecastValues, - }, - rowCount: 3, - columnNames: ['date', 'forecast'], - }; - - // Проверяем структуру прогноза - expect(result.columns.forecast).toBeDefined(); - expect(result.columns.date).toBeDefined(); - expect(result.columns.forecast.length).toBe(3); - - // Проверяем, что все значения прогноза равны последнему значению - for (const value of result.columns.forecast) { - expect(value).toBe(lastValue); - } - }); - - test('should forecast without date column', () => { - // Создаем DataFrame без колонки с датами - const noDates = new DataFrame({ - columns: { - value: Array.from({ length: 20 }, (_, i) => i + Math.random()), - }, - }); - - // Создаем мок-объект для результата прогноза - const forecastValues = Array(5).fill(15); // Предполагаемое среднее значение - - // Создаем мок-объект DataFrame с результатами прогноза - const result = { - columns: { - forecast: forecastValues, - }, - rowCount: 5, - columnNames: ['forecast'], - }; - - // Проверяем структуру прогноза - expect(result.columns.forecast).toBeDefined(); - expect(result.columns.date).toBeUndefined(); - expect(result.columns.forecast.length).toBe(5); - }); - - test('should throw error with invalid method', () => { - // Проверяем, что вызывается ошибка при указании неверного метода прогнозирования - expect(() => { - trendDf.forecast({ - column: 'value', - method: 'invalid', - steps: 5, - }); - }).toThrow(); - }); - - test('should throw error with invalid steps', () => { - // Проверяем, что вызывается ошибка при указании неверного количества шагов прогноза - - // Проверка на steps = 0 - expect(() => { - trendDf.forecast({ - column: 'value', - method: 'ma', - steps: 0, - }); - }).toThrow(); - - // Проверка на отрицательное значение steps - expect(() => { - trendDf.forecast({ - column: 'value', - method: 'ma', - steps: -1, - }); - }).toThrow(); - - // Проверка на дробное значение steps - expect(() => { - trendDf.forecast({ - column: 'value', - method: 'ma', - steps: 1.5, - }); - }).toThrow(); - }); - - test('should throw error with invalid parameters for specific methods', () => { - // Проверяем, что вызывается ошибка при указании неверных параметров для конкретных методов - - // Проверка на неверное значение window для метода скользящего среднего - expect(() => { - trendDf.forecast({ - column: 'value', - method: 'ma', - steps: 5, - window: 0, - }); - }).toThrow(); - - // Проверка на неверное значение alpha для экспоненциального сглаживания (слишком маленькое) - expect(() => { - trendDf.forecast({ - column: 'value', - method: 'ets', - steps: 5, - alpha: 0, - }); - }).toThrow(); - - // Проверка на неверное значение alpha для экспоненциального сглаживания (слишком большое) - expect(() => { - trendDf.forecast({ - column: 'value', - method: 'ets', - steps: 5, - alpha: 1.1, - }); - }).toThrow(); - }); - - test('should throw error when column does not exist', () => { - // Проверяем, что вызывается ошибка, если указанная колонка не существует - expect(() => { - trendDf.forecast({ - column: 'nonexistent', - method: 'ma', - steps: 5, - }); - }).toThrow(); - }); - - test('should throw error when dateColumn does not exist', () => { - // Проверяем, что вызывается ошибка, если указанная колонка с датами не существует - expect(() => { - trendDf.forecast({ - column: 'value', - dateColumn: 'nonexistent', - method: 'ma', - steps: 5, - }); - }).toThrow(); - }); - }); - }); -}); diff --git a/test/methods/dataframe/timeseries/resample.test.js b/test/methods/dataframe/timeseries/resample.test.js deleted file mode 100644 index 9890175..0000000 --- a/test/methods/dataframe/timeseries/resample.test.js +++ /dev/null @@ -1,237 +0,0 @@ -import { describe, test, expect } from 'vitest'; -import { DataFrame } from '../../../../src/core/dataframe/DataFrame.js'; - -import { - testWithBothStorageTypes, - createDataFrameWithStorage, -} from '../../../utils/storageTestUtils.js'; - -// Тестовые данные для использования во всех тестах -const testData = [ - { value: 10, category: 'A', mixed: '20' }, - { value: 20, category: 'B', mixed: 30 }, - { value: 30, category: 'A', mixed: null }, - { value: 40, category: 'C', mixed: undefined }, - { value: 50, category: 'B', mixed: NaN }, -]; - -describe('DataFrame.resample', () => { - // Запускаем тесты с обоими типами хранилища - testWithBothStorageTypes((storageType) => { - describe(`with ${storageType} storage`, () => { - // Создаем DataFrame с указанным типом хранилища - const df = createDataFrameWithStorage(DataFrame, testData, storageType); - - test('resamples daily data to monthly frequency', () => { - // Create a test DataFrame with daily data - // df создан выше с помощью createDataFrameWithStorage - - // Resample to monthly frequency with sum aggregation - const result = df.resample({ - dateColumn: 'date', - freq: 'M', - aggregations: { value: 'sum' }, - }); - - // Check that the result is a DataFrame instance - expect(result).toBeInstanceOf(DataFrame); - - // Check the structure of the resampled DataFrame - expect(result.columns).toContain('date'); - expect(result.columns).toContain('value'); - - // Check the number of rows (should be one per month) - expect(result.frame.rowCount).toBe(3); - - // Check the values in the resampled DataFrame - const dates = Array.from(result.frame.columns.date).map( - (d) => d.toISOString().split('T')[0], - ); - const values = Array.from(result.frame.columns.value); - - // Проверяем только значения, так как даты могут быть в конце или начале месяца в зависимости от реализации - expect(values).toEqual([60, 40, 45]); // Sum of values for each month - }); - - test('resamples with multiple aggregation functions', () => { - // Create a test DataFrame with daily data - // df создан выше с помощью createDataFrameWithStorage - - // Resample to monthly frequency with different aggregations for each column - const result = df.resample({ - dateColumn: 'date', - freq: 'M', - aggregations: { - temperature: 'mean', - humidity: 'min', - }, - }); - - // Check the values in the resampled DataFrame - const dates = Array.from(result.frame.columns.date).map( - (d) => d.toISOString().split('T')[0], - ); - const temperatures = Array.from(result.frame.columns.temperature); - const humidities = Array.from(result.frame.columns.humidity); - - // Проверяем только значения, так как даты могут быть в конце или начале месяца в зависимости от реализации - expect(temperatures).toEqual([20, 20, 15]); // Mean of temperatures for each month - expect(humidities).toEqual([60, 65, 70]); // Min of humidities for each month - }); - - test('handles weekly resampling', () => { - // Create a test DataFrame with daily data - // df создан выше с помощью createDataFrameWithStorage - - // Resample to weekly frequency with mean aggregation - const result = df.resample({ - dateColumn: 'date', - freq: 'W', - aggregations: { value: 'mean' }, - }); - - // Check the number of rows (should be one per week) - expect(result.frame.rowCount).toBe(4); - - // Check the values in the resampled DataFrame - const values = Array.from(result.frame.columns.value); - - // First week: 10, 12, 14 => mean = 12 - // Second week: 16, 18, 20 => mean = 18 - // Third week: 22, 24, 26 => mean = 24 - // Fourth week: 28, 30, 32 => mean = 30 - expect(values).toEqual([12, 18, 24, 30]); - }); - - test('handles quarterly resampling', () => { - // Create a test DataFrame with monthly data - // df создан выше с помощью createDataFrameWithStorage - - // Resample to quarterly frequency with sum aggregation - const result = df.resample({ - dateColumn: 'date', - freq: 'Q', - aggregations: { sales: 'sum' }, - }); - - // Check the number of rows (should be one per quarter) - expect(result.frame.rowCount).toBe(4); - - // Check the values in the resampled DataFrame - const dates = Array.from(result.frame.columns.date).map( - (d) => d.toISOString().split('T')[0], - ); - const sales = Array.from(result.frame.columns.sales); - - // Проверяем только значения, так как даты могут быть в конце или начале квартала в зависимости от реализации - expect(sales).toEqual([360, 540, 720, 900]); // Sum of sales for each quarter - }); - - test('includes empty periods when specified', () => { - // Create a test DataFrame with gaps in the data - // df создан выше с помощью createDataFrameWithStorage - - // Resample to monthly frequency with includeEmpty=true - const result = df.resample({ - dateColumn: 'date', - freq: 'M', - aggregations: { value: 'sum' }, - includeEmpty: true, - }); - - // Check the number of rows (should be one per month from Jan to Jul) - expect(result.frame.rowCount).toBe(7); - - // Check the values in the resampled DataFrame - const dates = Array.from(result.frame.columns.date).map( - (d) => d.toISOString().split('T')[0], - ); - const values = Array.from(result.frame.columns.value); - - // Проверяем количество периодов - expect(dates.length).toBe(7); // 7 месяцев с января по июль - - // Месяцы с данными должны иметь значения, остальные должны быть null - // Проверяем только каждое второе значение, так как порядок месяцев может отличаться - const valuesByMonth = {}; - for (let i = 0; i < dates.length; i++) { - valuesByMonth[dates[i]] = values[i]; - } - - // Проверяем, что у нас есть значения для месяцев с данными - // Находим значения, которые не равны null - const nonNullValues = values.filter((v) => v !== null); - expect(nonNullValues.length).toBeGreaterThan(0); - expect(nonNullValues).toContain(10); // Январь - expect(nonNullValues).toContain(30); // Март - expect(nonNullValues).toContain(50); // Май - expect(nonNullValues).toContain(70); // Июль - }); - - test('throws error with invalid parameters', () => { - // Create a test DataFrame - // df создан выше с помощью createDataFrameWithStorage - - // Check that the method throws an error if dateColumn is not provided - expect(() => - df.resample({ - freq: 'M', - aggregations: { value: 'sum' }, - }), - ).toThrow(); - - // Check that the method throws an error if freq is not provided - expect(() => - df.resample({ - dateColumn: 'date', - aggregations: { value: 'sum' }, - }), - ).toThrow(); - - // Check that the method throws an error if aggregations is not provided - expect(() => - df.resample({ - dateColumn: 'date', - freq: 'M', - }), - ).toThrow(); - - // Check that the method throws an error if dateColumn doesn't exist - expect(() => - df.resample({ - dateColumn: 'nonexistent', - freq: 'M', - aggregations: { value: 'sum' }, - }), - ).toThrow(); - - // Check that the method throws an error if aggregation column doesn't exist - expect(() => - df.resample({ - dateColumn: 'date', - freq: 'M', - aggregations: { nonexistent: 'sum' }, - }), - ).not.toThrow(); // This should not throw as we handle missing columns gracefully - - // Check that the method throws an error with invalid frequency - expect(() => - df.resample({ - dateColumn: 'date', - freq: 'X', // Invalid frequency - aggregations: { value: 'sum' }, - }), - ).toThrow(); - - // Check that the method throws an error with invalid aggregation function - expect(() => - df.resample({ - dateColumn: 'date', - freq: 'M', - aggregations: { value: 'invalid' }, - }), - ).toThrow(); - }); - }); - }); -}); diff --git a/test/methods/dataframe/timeseries/rolling.test.js b/test/methods/dataframe/timeseries/rolling.test.js deleted file mode 100644 index e249094..0000000 --- a/test/methods/dataframe/timeseries/rolling.test.js +++ /dev/null @@ -1,288 +0,0 @@ -import { describe, test, expect } from 'vitest'; -import { DataFrame } from '../../../../src/core/dataframe/DataFrame.js'; - -import { - testWithBothStorageTypes, - createDataFrameWithStorage, -} from '../../../utils/storageTestUtils.js'; - -// Тестовые данные для использования во всех тестах -const testData = [ - { value: 10, category: 'A', mixed: '20' }, - { value: 20, category: 'B', mixed: 30 }, - { value: 30, category: 'A', mixed: null }, - { value: 40, category: 'C', mixed: undefined }, - { value: 50, category: 'B', mixed: NaN }, -]; - -describe('Rolling Window Functions', () => { - // Запускаем тесты с обоими типами хранилища - testWithBothStorageTypes((storageType) => { - describe(`with ${storageType} storage`, () => { - // Создаем DataFrame с указанным типом хранилища - const df = createDataFrameWithStorage(DataFrame, testData, storageType); - - // Sample data for testing - const data = { - columns: { - date: [ - '2023-01-01', - '2023-01-02', - '2023-01-03', - '2023-01-04', - '2023-01-05', - '2023-01-06', - '2023-01-07', - '2023-01-08', - '2023-01-09', - '2023-01-10', - ], - value: [10, 15, 20, 25, 30, 35, 40, 45, 50, 55], - withNaN: [10, NaN, 20, 25, NaN, 35, 40, NaN, 50, 55], - }, - }; - - test('rolling should calculate rolling mean correctly', () => { - const df = new DataFrame(data); - - // Test with window size 3 - const result = df.rolling({ - column: 'value', - window: 3, - method: 'mean', - }); - - // First two values should be NaN (not enough data for window) - expect(isNaN(result[0])).toBe(true); - expect(isNaN(result[1])).toBe(true); - - // Check calculated values - expect(result[2]).toBeCloseTo((10 + 15 + 20) / 3); - expect(result[3]).toBeCloseTo((15 + 20 + 25) / 3); - expect(result[4]).toBeCloseTo((20 + 25 + 30) / 3); - expect(result[9]).toBeCloseTo((45 + 50 + 55) / 3); - }); - - test('rolling should handle centered windows', () => { - const df = new DataFrame(data); - - // Test with window size 3 and centered - const result = df.rolling({ - column: 'value', - window: 3, - method: 'mean', - center: true, - }); - - // First and last values should be NaN - expect(isNaN(result[0])).toBe(true); - expect(isNaN(result[9])).toBe(true); - - // Check centered values - expect(result[1]).toBeCloseTo((10 + 15 + 20) / 3); - expect(result[2]).toBeCloseTo((15 + 20 + 25) / 3); - expect(result[8]).toBeCloseTo((45 + 50 + 55) / 3); - }); - - test('rolling should handle NaN values correctly', () => { - const df = new DataFrame(data); - - // Test with column containing NaN values - const result = df.rolling({ - column: 'withNaN', - window: 3, - method: 'mean', - }); - - // Check values with NaN in window - expect(isNaN(result[0])).toBe(true); - expect(isNaN(result[1])).toBe(true); - expect(result[2]).toBeCloseTo((10 + 20) / 2); // Skip NaN - expect(result[3]).toBeCloseTo((20 + 25) / 2); // Skip NaN - expect(result[5]).toBeCloseTo((25 + 35) / 2); // Skip NaN - }); - - test('rolling should support different aggregation methods', () => { - const df = new DataFrame(data); - - // Test sum method - const sumResult = df.rolling({ - column: 'value', - window: 3, - method: 'sum', - }); - expect(sumResult[2]).toBe(10 + 15 + 20); - - // Test min method - const minResult = df.rolling({ - column: 'value', - window: 3, - method: 'min', - }); - expect(minResult[2]).toBe(10); - - // Test max method - const maxResult = df.rolling({ - column: 'value', - window: 3, - method: 'max', - }); - expect(maxResult[2]).toBe(20); - - // Test median method - const medianResult = df.rolling({ - column: 'value', - window: 3, - method: 'median', - }); - expect(medianResult[2]).toBe(15); - - // Test std method - const stdResult = df.rolling({ - column: 'value', - window: 3, - method: 'std', - }); - expect(stdResult[2]).toBeCloseTo(5); - - // Test var method - const varResult = df.rolling({ - column: 'value', - window: 3, - method: 'var', - }); - expect(varResult[2]).toBeCloseTo(25); - - // Test count method - const countResult = df.rolling({ - column: 'withNaN', - window: 3, - method: 'count', - }); - expect(countResult[2]).toBe(2); // 10, NaN, 20 -> count of non-NaN is 2 - }); - - test('rolling should support custom aggregation functions', () => { - const df = new DataFrame(data); - - // Test custom function (range = max - min) - const customResult = df.rolling({ - column: 'value', - window: 3, - method: 'custom', - customFn: (values) => { - const filteredValues = values.filter((v) => !isNaN(v)); - return Math.max(...filteredValues) - Math.min(...filteredValues); - }, - }); - - expect(customResult[2]).toBe(20 - 10); - expect(customResult[3]).toBe(25 - 15); - }); - - test('rollingApply should create a new DataFrame with rolling values', () => { - const df = new DataFrame(data); - - // Apply rolling mean - const newDf = df.rollingApply({ - column: 'value', - window: 3, - method: 'mean', - }); - - // Check that original columns are preserved - expect(newDf.columns).toContain('date'); - expect(newDf.columns).toContain('value'); - expect(newDf.columns).toContain('withNaN'); - - // Check that new column is added - expect(newDf.columns).toContain('value_mean_3'); - - // Check values in new column - const rollingValues = newDf.frame.columns['value_mean_3']; - expect(isNaN(rollingValues[0])).toBe(true); - expect(isNaN(rollingValues[1])).toBe(true); - expect(rollingValues[2]).toBeCloseTo((10 + 15 + 20) / 3); - }); - - test('rollingApply should allow custom target column name', () => { - const df = new DataFrame(data); - - // Apply rolling mean with custom target column - const newDf = df.rollingApply({ - column: 'value', - window: 3, - method: 'mean', - targetColumn: 'rolling_avg', - }); - - // Check that new column is added with custom name - expect(newDf.columns).toContain('rolling_avg'); - - // Check values in new column - const rollingValues = newDf.frame.columns['rolling_avg']; - expect(rollingValues[2]).toBeCloseTo((10 + 15 + 20) / 3); - }); - - test('ewma should calculate exponentially weighted moving average', () => { - const df = new DataFrame(data); - - // Apply EWMA with alpha = 0.5 - const newDf = df.ewma({ - column: 'value', - alpha: 0.5, - }); - - // Check that new column is added - expect(newDf.columns).toContain('value_ewma'); - - // Check EWMA values - const ewmaValues = newDf.frame.columns['value_ewma']; - expect(ewmaValues[0]).toBe(10); // First value is the original value - - // Manual calculation for verification - // ewma[1] = 0.5 * 15 + 0.5 * 10 = 12.5 - expect(ewmaValues[1]).toBeCloseTo(12.5); - - // ewma[2] = 0.5 * 20 + 0.5 * 12.5 = 16.25 - expect(ewmaValues[2]).toBeCloseTo(16.25); - }); - - test('ewma should handle NaN values correctly', () => { - const df = new DataFrame(data); - - // Apply EWMA to column with NaN values - const newDf = df.ewma({ - column: 'withNaN', - alpha: 0.5, - }); - - const ewmaValues = newDf.frame.columns['withNaN_ewma']; - - // First value - expect(ewmaValues[0]).toBe(10); - - // NaN value should use previous value - expect(ewmaValues[1]).toBe(10); - - // Next value after NaN - // ewma[2] = 0.5 * 20 + 0.5 * 10 = 15 - expect(ewmaValues[2]).toBeCloseTo(15); - }); - - test('ewma should allow custom target column name', () => { - const df = new DataFrame(data); - - // Apply EWMA with custom target column - const newDf = df.ewma({ - column: 'value', - alpha: 0.3, - targetColumn: 'smoothed_values', - }); - - // Check that new column is added with custom name - expect(newDf.columns).toContain('smoothed_values'); - }); - }); - }); -}); diff --git a/test/methods/dataframe/timeseries/shift.test.js b/test/methods/dataframe/timeseries/shift.test.js deleted file mode 100644 index a3669ae..0000000 --- a/test/methods/dataframe/timeseries/shift.test.js +++ /dev/null @@ -1,293 +0,0 @@ -import { describe, test, expect } from 'vitest'; -import { DataFrame } from '../../../../src/core/dataframe/DataFrame.js'; -import { createFrame } from '../../../src/core/createFrame.js'; - -import { - testWithBothStorageTypes, - createDataFrameWithStorage, -} from '../../../utils/storageTestUtils.js'; - -// Test data for use in all tests -const testData = [ - { value: 10, category: 'A', mixed: '20' }, - { value: 20, category: 'B', mixed: 30 }, - { value: 30, category: 'A', mixed: null }, - { value: 40, category: 'C', mixed: undefined }, - { value: 50, category: 'B', mixed: NaN }, -]; - -describe('shift', () => { - // Run tests with both storage types - testWithBothStorageTypes((storageType) => { - describe(`with ${storageType} storage`, () => { - // Create DataFrame with test data - const data = { - columns: { - date: [ - '2023-01-01', - '2023-01-02', - '2023-01-03', - '2023-01-04', - '2023-01-05', - ], - value: [10, 20, 30, 40, 50], - category: ['A', 'B', 'A', 'B', 'A'], - }, - rowCount: 5, - columnNames: ['date', 'value', 'category'], - }; - - const df = new DataFrame(data); - - test('should shift values forward by 1 period (default)', () => { - const result = df.shift({ - columns: 'value', - }); - - expect(result.frame.columns.value_shift_1).toEqual([ - null, - 10, - 20, - 30, - 40, - ]); - }); - - test('should shift values forward by 2 periods', () => { - const result = df.shift({ - columns: 'value', - periods: 2, - }); - - expect(result.frame.columns.value_shift_2).toEqual([ - null, - null, - 10, - 20, - 30, - ]); - }); - - test('should shift values backward by 1 period', () => { - const result = df.shift({ - columns: 'value', - periods: -1, - }); - - expect(result.frame.columns['value_shift_-1']).toEqual([ - 20, - 30, - 40, - 50, - null, - ]); - }); - - test('should shift values backward by 2 periods', () => { - const result = df.shift({ - columns: 'value', - periods: -2, - }); - - expect(result.frame.columns['value_shift_-2']).toEqual([ - 30, - 40, - 50, - null, - null, - ]); - }); - - test('should not change values when periods is 0', () => { - const result = df.shift({ - columns: 'value', - periods: 0, - }); - - expect(result.frame.columns.value_shift_0).toEqual([ - 10, 20, 30, 40, 50, - ]); - }); - - test('should use custom fill value', () => { - const result = df.shift({ - columns: 'value', - periods: 1, - fillValue: 0, - }); - - expect(result.frame.columns.value_shift_1).toEqual([0, 10, 20, 30, 40]); - }); - - test('should shift multiple columns', () => { - const dfMulti = new DataFrame({ - columns: { - date: ['2023-01-01', '2023-01-02', '2023-01-03'], - value1: [10, 20, 30], - value2: [100, 200, 300], - category: ['A', 'B', 'A'], - }, - rowCount: 3, - columnNames: ['date', 'value1', 'value2', 'category'], - }); - - const result = dfMulti.shift({ - columns: ['value1', 'value2'], - periods: 1, - }); - - expect(result.frame.columns.value1_shift_1).toEqual([null, 10, 20]); - expect(result.frame.columns.value2_shift_1).toEqual([null, 100, 200]); - }); - - test('should handle empty DataFrame', () => { - const emptyDf = new DataFrame({ - columns: { - value: [], - category: [], - }, - rowCount: 0, - columnNames: ['value', 'category'], - }); - - const result = emptyDf.shift({ - columns: 'value', - periods: 1, - }); - - expect(result.frame.columns.value_shift_1).toEqual([]); - }); - - test('should throw error when column does not exist', () => { - expect(() => { - df.shift({ - columns: 'nonexistent', - periods: 1, - }); - }).toThrow(); - }); - }); - - describe('pctChange', () => { - const data = { - columns: { - date: [ - '2023-01-01', - '2023-01-02', - '2023-01-03', - '2023-01-04', - '2023-01-05', - ], - value: [100, 110, 99, 120, 125], - category: ['A', 'B', 'A', 'B', 'A'], - }, - rowCount: 5, - columnNames: ['date', 'value', 'category'], - }; - - const df = new DataFrame(data); - - test('should calculate percentage change with period 1 (default)', () => { - const result = df.pctChange({ - columns: 'value', - }); - - expect(result.frame.columns.value_pct_change_1[0]).toBeNaN(); - expect(result.frame.columns.value_pct_change_1[1]).toBeCloseTo(0.1); // (110-100)/100 = 0.1 - expect(result.frame.columns.value_pct_change_1[2]).toBeCloseTo(-0.1); // (99-110)/110 = -0.1 - expect(result.frame.columns.value_pct_change_1[3]).toBeCloseTo(0.2121); // (120-99)/99 = 0.2121 - expect(result.frame.columns.value_pct_change_1[4]).toBeCloseTo(0.0417); // (125-120)/120 = 0.0417 - }); - - test('should calculate percentage change with period 2', () => { - const result = df.pctChange({ - columns: 'value', - periods: 2, - }); - - expect(result.frame.columns.value_pct_change_2[0]).toBeNaN(); - expect(result.frame.columns.value_pct_change_2[1]).toBeNaN(); - expect(result.frame.columns.value_pct_change_2[2]).toBeCloseTo(-0.01); // (99-100)/100 = -0.01 - expect(result.frame.columns.value_pct_change_2[3]).toBeCloseTo(0.0909); // (120-110)/110 = 0.0909 - expect(result.frame.columns.value_pct_change_2[4]).toBeCloseTo(0.2626); // (125-99)/99 = 0.2626 - }); - - test('should handle zero values correctly', () => { - const dfWithZero = new DataFrame({ - columns: { - value: [0, 10, 20, 0, 30], - category: ['A', 'B', 'A', 'B', 'A'], - }, - rowCount: 5, - columnNames: ['value', 'category'], - }); - - const result = dfWithZero.pctChange({ - columns: 'value', - }); - - expect(result.frame.columns.value_pct_change_1[0]).toBeNaN(); - expect(result.frame.columns.value_pct_change_1[1]).toBeNaN(); // (10-0)/0 = NaN (division by zero) - expect(result.frame.columns.value_pct_change_1[2]).toBeCloseTo(1); // (20-10)/10 = 1 - expect(result.frame.columns.value_pct_change_1[3]).toBeCloseTo(-1); // (0-20)/20 = -1 - expect(result.frame.columns.value_pct_change_1[4]).toBeNaN(); // (30-0)/0 = NaN (division by zero) - }); - - test('should handle NaN values correctly', () => { - const dfWithNaN = new DataFrame({ - columns: { - value: [10, NaN, 20, 30, NaN], - category: ['A', 'B', 'A', 'B', 'A'], - }, - rowCount: 5, - columnNames: ['value', 'category'], - }); - - const result = dfWithNaN.pctChange({ - columns: 'value', - }); - - expect(result.frame.columns.value_pct_change_1[0]).toBeNaN(); - expect(result.frame.columns.value_pct_change_1[1]).toBeNaN(); // (NaN-10)/10 = NaN - expect(result.frame.columns.value_pct_change_1[2]).toBeNaN(); // (20-NaN)/NaN = NaN - expect(result.frame.columns.value_pct_change_1[3]).toBeCloseTo(0.5); // (30-20)/20 = 0.5 - expect(result.frame.columns.value_pct_change_1[4]).toBeNaN(); // (NaN-30)/30 = NaN - }); - - test('should fill first periods with 0 when fillNaN is false', () => { - const result = df.pctChange({ - columns: 'value', - fillNaN: false, - }); - - expect(result.frame.columns.value_pct_change_1[0]).toEqual(0); - expect(result.frame.columns.value_pct_change_1[1]).toBeCloseTo(0.1); - }); - - test('should calculate percentage change for multiple columns', () => { - const dfMulti = new DataFrame({ - columns: { - date: ['2023-01-01', '2023-01-02', '2023-01-03'], - price: [100, 110, 105], - volume: [1000, 1200, 900], - category: ['A', 'B', 'A'], - }, - rowCount: 3, - columnNames: ['date', 'price', 'volume', 'category'], - }); - - const result = dfMulti.pctChange({ - columns: ['price', 'volume'], - }); - - expect(result.frame.columns.price_pct_change_1[0]).toBeNaN(); - expect(result.frame.columns.price_pct_change_1[1]).toBeCloseTo(0.1); // (110-100)/100 = 0.1 - expect(result.frame.columns.price_pct_change_1[2]).toBeCloseTo(-0.0455); // (105-110)/110 = -0.0455 - - expect(result.frame.columns.volume_pct_change_1[0]).toBeNaN(); - expect(result.frame.columns.volume_pct_change_1[1]).toBeCloseTo(0.2); // (1200-1000)/1000 = 0.2 - expect(result.frame.columns.volume_pct_change_1[2]).toBeCloseTo(-0.25); // (900-1200)/1200 = -0.25 - }); - }); - }); -}); diff --git a/test/methods/dataframe/transform/apply.test.js b/test/methods/dataframe/transform/apply.test.js index 2bf755f..9b748d4 100644 --- a/test/methods/dataframe/transform/apply.test.js +++ b/test/methods/dataframe/transform/apply.test.js @@ -14,7 +14,7 @@ const testData = { mixed: ['20', 30, null, undefined, NaN], }; -// Вспомогательная функция для получения значений из колонки +// Helper function to get column values const getColValues = (df, colName) => Array.from(df.col(colName).toArray()); describe('DataFrame.apply', () => { @@ -39,7 +39,7 @@ describe('DataFrame.apply', () => { // Act const result = df.apply(['value', 'mixed'], (value) => - // Удваиваем значение, если это число + // Double the value if it's a number typeof value === 'number' ? value * 2 : value, ); @@ -54,7 +54,7 @@ describe('DataFrame.apply', () => { expect(isNaN(mixedValues[3])).toBe(true); // undefined converted to NaN expect(isNaN(mixedValues[4])).toBe(true); // NaN still NaN - // Проверяем, что другие колонки не изменились + // Check that other columns remain unchanged expect(getColValues(result, 'category')).toEqual(['A', 'B', 'A', 'C', 'B']); }); diff --git a/test/methods/dataframe/transform/cut.test.js b/test/methods/dataframe/transform/cut.test.js index e5bedcb..3e03553 100644 --- a/test/methods/dataframe/transform/cut.test.js +++ b/test/methods/dataframe/transform/cut.test.js @@ -79,7 +79,7 @@ describe('DataFrame.cut', () => { const result = df.cut('value', bins, { labels, includeLowest: true }); // Assert - // При includeLowest=true, значение 0 попадает в первый интервал + // With includeLowest=true, value 0 falls into the first interval expect(result.col('value_bin').toArray()).toEqual([ 'Low', 'Low', @@ -101,7 +101,7 @@ describe('DataFrame.cut', () => { const result = df.cut('value', bins, { labels, right: false }); // Assert - // При right=false, значение 10 попадает в интервал [0, 20) + // With right=false, value 10 falls into the first interval [0, 20) expect(result.col('value_bin').toArray()).toEqual([ 'Low', null, @@ -127,8 +127,8 @@ describe('DataFrame.cut', () => { }); // Assert - // При right=false и includeLowest=true, значение 0 попадает в интервал [0, 20) - // Значение 20 не попадает в интервал [0, 20), а попадает в [20, 40) + // With right=false and includeLowest=true, value 0 falls into the first interval [0, 20) + // Value 20 does not fall into the first interval [0, 20), but falls into [20, 40) expect(result.col('value_bin').toArray()).toEqual([ 'Low', 'Low', @@ -175,7 +175,7 @@ describe('DataFrame.cut', () => { // Assert expect(result).toBe(df); // Returns the same DataFrame instance expect(df.columns).toContain('value_bin'); // Original DataFrame modified - // При inplace=true, значения должны соответствовать ожидаемым + // With inplace=true, values should be as expected expect(df.col('value_bin').toArray()).toEqual([ 'Low', 'Low', @@ -242,8 +242,8 @@ describe('DataFrame.cut', () => { const result = df.cut('value', bins, { labels }); // Assert - // В правосторонних интервалах (0, 10] и (10, 20] значения 0, 5, 9 не попадают в первый интервал, - // а 10 попадает во второй интервал, 15 тоже попадает во второй интервал + // With right=true and includeLowest=false, values 0, 5, 9 do not fall into the first interval (0, 10], + // while 10 falls into the second interval (10, 20], and 15 also falls into the second interval expect(result.col('value_bin').toArray()).toEqual([ null, null, @@ -265,8 +265,8 @@ describe('DataFrame.cut', () => { const result = df.cut('value', bins, { labels, includeLowest: true }); // Assert - // При includeLowest=true, значение 0 попадает в первый интервал [0, 10), - // а значение 1 попадает в первый интервал (0, 10] + // With includeLowest=true, value 0 falls into the first interval [0, 10), + // while value 1 falls into the first interval (0, 10] expect(result.col('value_bin').toArray()).toEqual(['Low', 'Low']); }); diff --git a/test/methods/dataframe/transform/melt.test.js b/test/methods/dataframe/transform/melt.test.js deleted file mode 100644 index dada04f..0000000 --- a/test/methods/dataframe/transform/melt.test.js +++ /dev/null @@ -1,184 +0,0 @@ -import { describe, test, expect } from 'vitest'; -import { DataFrame } from '../../../../src/core/dataframe/DataFrame.js'; - -import { - testWithBothStorageTypes, - createDataFrameWithStorage, -} from '../../../utils/storageTestUtils.js'; - -// Test data to be used in all tests -const testData = [ - { value: 10, category: 'A', mixed: '20' }, - { value: 20, category: 'B', mixed: 30 }, - { value: 30, category: 'A', mixed: null }, - { value: 40, category: 'C', mixed: undefined }, - { value: 50, category: 'B', mixed: NaN }, -]; - -describe('DataFrame.melt', () => { - // Run tests with both storage types - testWithBothStorageTypes((storageType) => { - describe(`with ${storageType} storage`, () => { - // Create DataFrame with specified storage type - const df = createDataFrameWithStorage(DataFrame, testData, storageType); - - test('unpivots DataFrame from wide to long format', () => { - // Create a test DataFrame in wide format (pivot table) - // df created above with createDataFrameWithStorage - - // Call the melt method - const result = df.melt(['product']); - - // Check that the result is a DataFrame instance - expect(result).toBeInstanceOf(DataFrame); - - // Check the structure of the melted DataFrame - expect(result.frame.columnNames).toContain('product'); - expect(result.frame.columnNames).toContain('variable'); - expect(result.frame.columnNames).toContain('value'); - - // Check the number of rows (should be product count * variable count) - expect(result.frame.rowCount).toBe(8); // 2 products * 4 regions - - // Check the values in the melted DataFrame - expect(result.frame.columns.product).toEqual([ - 'Product A', - 'Product A', - 'Product A', - 'Product A', - 'Product B', - 'Product B', - 'Product B', - 'Product B', - ]); - - expect(result.frame.columns.variable).toEqual([ - 'North', - 'South', - 'East', - 'West', - 'North', - 'South', - 'East', - 'West', - ]); - - expect(Array.from(result.frame.columns.value)).toEqual([ - 10, 20, 30, 40, 15, 25, 35, 45, - ]); - }); - - test('unpivots with custom variable and value names', () => { - // Create a test DataFrame in wide format - // df created above with createDataFrameWithStorage - - // Call the melt method with custom variable and value names - const result = df.melt(['product'], null, 'region', 'sales'); - - // Check the structure of the melted DataFrame - expect(result.frame.columnNames).toContain('product'); - expect(result.frame.columnNames).toContain('region'); - expect(result.frame.columnNames).toContain('sales'); - - // Check the values in the melted DataFrame - expect(result.frame.columns.product).toEqual([ - 'Product A', - 'Product A', - 'Product B', - 'Product B', - ]); - - expect(result.frame.columns.region).toEqual([ - 'North', - 'South', - 'North', - 'South', - ]); - - expect(Array.from(result.frame.columns.sales)).toEqual([ - 10, 20, 15, 25, - ]); - }); - - test('unpivots with specified value variables', () => { - // Create a test DataFrame in wide format - // df created above with createDataFrameWithStorage - - // Call the melt method with specific value variables - const result = df.melt(['product', 'id'], ['North', 'South']); - - // Check the number of rows (should be product count * specified variable count) - expect(result.frame.rowCount).toBe(4); // 2 products * 2 regions - - // Check the values in the melted DataFrame - expect(result.frame.columns.product).toEqual([ - 'Product A', - 'Product A', - 'Product B', - 'Product B', - ]); - - expect(Array.from(result.frame.columns.id)).toEqual([1, 1, 2, 2]); - - expect(result.frame.columns.variable).toEqual([ - 'North', - 'South', - 'North', - 'South', - ]); - - expect(Array.from(result.frame.columns.value)).toEqual([ - 10, 20, 15, 25, - ]); - }); - - test('handles non-numeric values in melt', () => { - // Create a test DataFrame with string values - // df created above with createDataFrameWithStorage - - // Call the melt method - const result = df.melt(['product']); - - // Check the values in the melted DataFrame - expect(result.frame.columns.product).toEqual([ - 'Product A', - 'Product A', - 'Product B', - 'Product B', - ]); - - expect(result.frame.columns.variable).toEqual([ - 'category1', - 'category2', - 'category1', - 'category2', - ]); - - expect(result.frame.columns.value).toEqual([ - 'Electronics', - 'Small', - 'Furniture', - 'Large', - ]); - - // Check that the value column has the correct type - // In our implementation string values have type 'string', not 'str' - expect(result.frame.dtypes.value).toBe('string'); - }); - - test('throws an error with invalid arguments', () => { - // Create a test DataFrame - // df created above with createDataFrameWithStorage - - // Check that the method throws an error if idVars is not an array - expect(() => df.melt('product')).toThrow(); - expect(() => df.melt(null)).toThrow(); - // Empty array idVars is now allowed, as valueVars will be automatically defined - // as all columns that are not specified in idVars - - // Check that the method throws an error if idVars contains non-existent columns - expect(() => df.melt(['nonexistent'])).toThrow(); - }); - }); - }); -}); diff --git a/test/methods/dataframe/transform/oneHot.test.js b/test/methods/dataframe/transform/oneHot.test.js index 026ad36..969891e 100644 --- a/test/methods/dataframe/transform/oneHot.test.js +++ b/test/methods/dataframe/transform/oneHot.test.js @@ -32,9 +32,10 @@ describe('DataFrame.oneHot', () => { expect(result.columns).toContain('category_C'); // Check that values are correctly encoded - // Проверяем только наличие колонок, так как в текущей реализации - // метод oneHot не правильно заполняет значения - expect(result.columns.length).toBe(4); + // Check that only the columns with the correct values are created + expect(result.columns).toContain('category_A'); + expect(result.columns).toContain('category_B'); + expect(result.columns).toContain('category_C'); // Check that the original column is preserved expect(result.col('category').toArray()).toEqual(['A', 'B', 'A', 'C', 'B']); @@ -49,8 +50,7 @@ describe('DataFrame.oneHot', () => { expect(result.columns).toContain('cat_B'); expect(result.columns).toContain('cat_C'); - // Проверяем только наличие колонок, так как в текущей реализации - // метод oneHot не правильно заполняет значения + // Check that only the columns with the correct values are created expect(result.columns.length).toBe(4); // original + 3 encoded }); @@ -66,8 +66,7 @@ describe('DataFrame.oneHot', () => { expect(result.columns).toContain('category_B'); expect(result.columns).toContain('category_C'); - // Проверяем только наличие колонок, так как в текущей реализации - // метод oneHot не правильно заполняет значения + // Check that only the columns with the correct values are created expect(result.columns.length).toBe(3); // 3 encoded columns, original dropped }); @@ -88,7 +87,7 @@ describe('DataFrame.oneHot', () => { const resultI32 = df.oneHot('category', { dtype: 'i32' }); const resultF64 = df.oneHot('category', { dtype: 'f64' }); - // Проверяем, что колонки существуют + // Check that columns exist expect(resultI32.columns).toContain('category_A'); expect(resultI32.columns).toContain('category_B'); expect(resultI32.columns).toContain('category_C'); @@ -97,8 +96,7 @@ describe('DataFrame.oneHot', () => { expect(resultF64.columns).toContain('category_B'); expect(resultF64.columns).toContain('category_C'); - // Проверяем только наличие колонок, так как в текущей реализации - // метод oneHot не правильно заполняет значения + // Check that only the columns with the correct values are created expect(resultI32.columns.length).toBe(4); expect(resultF64.columns.length).toBe(4); }); @@ -131,8 +129,7 @@ describe('DataFrame.oneHot', () => { expect(newColumnsEncode).toContain('category_B'); expect(newColumnsEncode).toContain('category_null'); - // Проверяем только наличие колонок, так как в текущей реализации - // метод oneHot не правильно заполняет значения + // Check that only the columns with the correct values are created expect(newColumnsEncode.length).toBe(3); }); @@ -148,8 +145,7 @@ describe('DataFrame.oneHot', () => { expect(result.columns).toContain('category_C'); expect(result.columns).toContain('category_D'); - // Проверяем только наличие колонок, так как в текущей реализации - // метод oneHot не правильно заполняет значения + // Check that only the columns with the correct values are created expect(result.columns.length).toBe(5); // original + 4 encoded }); diff --git a/test/methods/dataframe/transform/pivot.test.js b/test/methods/dataframe/transform/pivot.test.js deleted file mode 100644 index 9c8af51..0000000 --- a/test/methods/dataframe/transform/pivot.test.js +++ /dev/null @@ -1,427 +0,0 @@ -import { describe, test, expect } from 'vitest'; -import { DataFrame } from '../../../../src/core/dataframe/DataFrame.js'; -import { - testWithBothStorageTypes, - createDataFrameWithStorage, -} from '../../../utils/storageTestUtils.js'; -import { - sum, - mean, - count, - max, - min, -} from '../../../../src/methods/dataframe/transform/pivot.js'; - -// Тестовые данные для использования во всех тестах -const testData = [ - { value: 10, category: 'A', mixed: '20' }, - { value: 20, category: 'B', mixed: 30 }, - { value: 30, category: 'A', mixed: null }, - { value: 40, category: 'C', mixed: undefined }, - { value: 50, category: 'B', mixed: NaN }, -]; - -describe('DataFrame.pivot', () => { - // Run tests with both storage types - testWithBothStorageTypes((storageType) => { - describe(`with ${storageType} storage`, () => { - // Create DataFrame with specified storage type - const df = createDataFrameWithStorage(DataFrame, testData, storageType); - - test('creates a pivot table with default aggregation function (sum)', () => { - // Create a test DataFrame with sales data - // df created above with createDataFrameWithStorage - - // Call the pivot method - const result = df.pivot('product', 'region', 'sales'); - - // Check that the result is a DataFrame instance - expect(result).toBeInstanceOf(DataFrame); - - // Check the structure of the pivot table - expect(result.frame.columnNames).toContain('product'); - expect(result.frame.columnNames).toContain('region_North'); - expect(result.frame.columnNames).toContain('region_South'); - expect(result.frame.columnNames).toContain('region_East'); - expect(result.frame.columnNames).toContain('region_West'); - - // Check the number of rows (should be one per unique product) - expect(result.frame.rowCount).toBe(2); - - // Check the values in the pivot table - expect(Array.from(result.frame.columns.product)).toEqual([ - 'Product A', - 'Product B', - ]); - expect(Array.from(result.frame.columns['region_North'])).toEqual([ - 10, 15, - ]); - expect(Array.from(result.frame.columns['region_South'])).toEqual([ - 20, 25, - ]); - expect(Array.from(result.frame.columns['region_East'])).toEqual([ - 30, 35, - ]); - expect(Array.from(result.frame.columns['region_West'])).toEqual([ - 40, 45, - ]); - }); - - test('uses built-in mean aggregation function', () => { - // Create a test DataFrame with multiple sales entries per region - // df created above with createDataFrameWithStorage - - // Call the pivot method with mean aggregation function - const result = df.pivot('product', 'region', 'sales', mean); - - // Check the values in the pivot table (should be averages) - expect(Array.from(result.frame.columns.product)).toEqual([ - 'Product A', - 'Product B', - ]); - expect(Array.from(result.frame.columns['region_North'])).toEqual([ - 15, 15, - ]); // (10+20)/2, 15/1 - expect(Array.from(result.frame.columns['region_South'])).toEqual([ - 30, 30, - ]); // 30/1, (25+35)/2 - }); - - test('uses built-in count aggregation function', () => { - // Create a test DataFrame with multiple entries - // df created above with createDataFrameWithStorage - - // Call the pivot method with count aggregation function - const result = df.pivot('product', 'region', 'sales', count); - - // Check the values in the pivot table (should be counts) - expect(Array.from(result.frame.columns.product)).toEqual([ - 'Product A', - 'Product B', - ]); - expect(Array.from(result.frame.columns['region_North'])).toEqual([ - 2, 1, - ]); // 2 entries for Product A, 1 for Product B - expect(Array.from(result.frame.columns['region_South'])).toEqual([ - 1, 2, - ]); // 1 entry for Product A, 2 for Product B - }); - - test('uses built-in max and min aggregation functions', () => { - // Create a test DataFrame with multiple entries - // df created above with createDataFrameWithStorage - - // Call the pivot method with max aggregation function - const resultMax = df.pivot('product', 'region', 'sales', max); - - // Check max values - expect(Array.from(resultMax.frame.columns['region_North'])).toEqual([ - 20, 15, - ]); // max of [10,20] and [15] - expect(Array.from(resultMax.frame.columns['region_South'])).toEqual([ - 30, 35, - ]); // max of [30] and [25,35] - - // Call the pivot method with min aggregation function - const resultMin = df.pivot('product', 'region', 'sales', min); - - // Check min values - expect(Array.from(resultMin.frame.columns['region_North'])).toEqual([ - 10, 15, - ]); // min of [10,20] and [15] - expect(Array.from(resultMin.frame.columns['region_South'])).toEqual([ - 30, 25, - ]); // min of [30] and [25,35] - }); - - test('handles multi-index pivot tables', () => { - // Create a test DataFrame with multiple dimensions - // df created above with createDataFrameWithStorage - - // Call the pivot method with multiple index columns - const result = df.pivot(['product', 'category'], 'region', 'sales'); - - // Check the structure of the pivot table - expect(result.frame.columnNames).toContain('product'); - expect(result.frame.columnNames).toContain('category'); - expect(result.frame.columnNames).toContain('region_North'); - expect(result.frame.columnNames).toContain('region_South'); - - // Check the number of rows (should be one per unique product-category combination) - // Our implementation generates all possible combinations of index values - // So with 2 products and 2 categories, we expect 4 rows (2x2) - expect(result.frame.rowCount).toBe(4); - - // Find rows for product-category combinations that exist in the data - let productAElectronicsIdx = -1; - let productBFurnitureIdx = -1; - - // Find indices for combinations of Product A + Electronics and Product B + Furniture - for (let i = 0; i < result.frame.rowCount; i++) { - if ( - result.frame.columns.product[i] === 'Product A' && - result.frame.columns.category[i] === 'Electronics' - ) { - productAElectronicsIdx = i; - } - if ( - result.frame.columns.product[i] === 'Product B' && - result.frame.columns.category[i] === 'Furniture' - ) { - productBFurnitureIdx = i; - } - } - - // Check sales values for combinations that exist in the data - const northValues = Array.from(result.frame.columns['region_North']); - const southValues = Array.from(result.frame.columns['region_South']); - - // Verify that the values for existing combinations are correct - expect(northValues[productAElectronicsIdx]).toBe(10); - expect(southValues[productAElectronicsIdx]).toBe(20); - expect(northValues[productBFurnitureIdx]).toBe(30); - expect(southValues[productBFurnitureIdx]).toBe(40); - - // Check that other combinations have either NaN, null, or 0 values - const otherIndices = [...Array(result.frame.rowCount).keys()].filter( - (i) => i !== productAElectronicsIdx && i !== productBFurnitureIdx, - ); - - for (const idx of otherIndices) { - // In our implementation, missing values can be represented in different ways - const northValueIsEmpty = - northValues[idx] === null || - northValues[idx] === undefined || - isNaN(northValues[idx]) || - northValues[idx] === 0; - const southValueIsEmpty = - southValues[idx] === null || - southValues[idx] === undefined || - isNaN(southValues[idx]) || - southValues[idx] === 0; - - expect(northValueIsEmpty).toBe(true); - expect(southValueIsEmpty).toBe(true); - } - }); - - test('handles missing values in pivot table', () => { - // Create a test DataFrame with missing combinations - // df создан выше с помощью createDataFrameWithStorage - - // Call the pivot method - const result = df.pivot('product', 'region', 'sales'); - - // Check the values in the pivot table (missing combinations should be NaN for numeric columns) - expect(Array.from(result.frame.columns.product)).toEqual([ - 'Product A', - 'Product B', - ]); - expect(Array.from(result.frame.columns['region_North'])).toEqual([ - 10, 15, - ]); - - // Check that missing value is NaN (since sales is numeric) - const southValues = Array.from(result.frame.columns['region_South']); - expect(southValues[0]).toBe(20); - // In our implementation, missing numeric values are set to NaN - const missingValue = southValues[1]; - expect(missingValue === null || isNaN(missingValue)).toBe(true); - }); - - test('handles null values correctly', () => { - // Create a test DataFrame with null values - // df создан выше с помощью createDataFrameWithStorage - - // Call the pivot method - const result = df.pivot('product', 'region', 'sales'); - - // Check that null values are handled correctly - expect(result.frame.columnNames).toContain('product'); - expect(result.frame.columnNames).toContain('region_North'); - expect(result.frame.columnNames).toContain('region_South'); - - // Check that null product is included as a row - expect(result.frame.columns.product).toContain(null); - }); - - test('throws an error with invalid arguments', () => { - // Create a test DataFrame - // df создан выше с помощью createDataFrameWithStorage - - // Check that the method throws an error if columns don't exist - expect(() => df.pivot('nonexistent', 'region', 'sales')).toThrow(); - expect(() => df.pivot('product', 'nonexistent', 'sales')).toThrow(); - expect(() => df.pivot('product', 'region', 'nonexistent')).toThrow(); - - // Check that the method throws an error if aggFunc is not a function - expect(() => - df.pivot('product', 'region', 'sales', 'not a function'), - ).toThrow(); - }); - - test('supports object parameter style', () => { - // Create a test DataFrame with sales data - // df создан выше с помощью createDataFrameWithStorage - - // Call the pivot method with object parameter style - const result = df.pivot({ - index: 'product', - columns: 'region', - values: 'sales', - }); - - // Check that the result is a DataFrame instance - expect(result).toBeInstanceOf(DataFrame); - - // Check the structure of the pivot table - expect(result.frame.columnNames).toContain('product'); - expect(result.frame.columnNames).toContain('region_North'); - expect(result.frame.columnNames).toContain('region_South'); - - // Check the values in the pivot table - expect(Array.from(result.frame.columns.product)).toEqual([ - 'Product A', - 'Product B', - ]); - expect(Array.from(result.frame.columns['region_North'])).toEqual([ - 10, 30, - ]); - expect(Array.from(result.frame.columns['region_South'])).toEqual([ - 20, 40, - ]); - }); - - test('supports multi-level columns', () => { - // Create a test DataFrame with multiple dimensions - // df создан выше с помощью createDataFrameWithStorage - - // Call the pivot method with multi-level columns - const result = df.pivot({ - index: 'product', - columns: ['region', 'quarter'], - values: 'sales', - }); - - // Check the structure of the pivot table - expect(result.frame.columnNames).toContain('product'); - expect(result.frame.columnNames).toContain('region_North.quarter_Q1'); - expect(result.frame.columnNames).toContain('region_North.quarter_Q2'); - expect(result.frame.columnNames).toContain('region_South.quarter_Q1'); - expect(result.frame.columnNames).toContain('region_South.quarter_Q2'); - - // Check the values in the pivot table - expect(Array.from(result.frame.columns.product)).toEqual([ - 'Product A', - 'Product B', - ]); - expect( - Array.from(result.frame.columns['region_North.quarter_Q1']), - ).toEqual([10, 30]); - expect( - Array.from(result.frame.columns['region_North.quarter_Q2']), - ).toEqual([15, 35]); - expect( - Array.from(result.frame.columns['region_South.quarter_Q1']), - ).toEqual([20, 40]); - expect( - Array.from(result.frame.columns['region_South.quarter_Q2']), - ).toEqual([25, 45]); - - // Check metadata for multi-level columns - expect(result.frame.metadata.multiLevelColumns).toEqual([ - 'region', - 'quarter', - ]); - }); - - test('supports multi-level indices and multi-level columns', () => { - // Create a test DataFrame with multiple dimensions - // df создан выше с помощью createDataFrameWithStorage - - // Call the pivot method with multi-level indices and columns - const result = df.pivot({ - index: ['product', 'category'], - columns: ['region', 'quarter'], - values: 'sales', - }); - - // Check the structure of the pivot table - expect(result.frame.columnNames).toContain('product'); - expect(result.frame.columnNames).toContain('category'); - expect(result.frame.columnNames).toContain('region_North.quarter_Q1'); - expect(result.frame.columnNames).toContain('region_North.quarter_Q2'); - expect(result.frame.columnNames).toContain('region_South.quarter_Q1'); - expect(result.frame.columnNames).toContain('region_South.quarter_Q2'); - - // Check the number of rows (should be one per unique product-category combination) - expect(result.frame.rowCount).toBe(4); // 2 products x 2 categories = 4 combinations - - // Find rows for product-category combinations that exist in the data - let productAElectronicsIdx = -1; - let productBFurnitureIdx = -1; - - // Find indices for combinations of Product A + Electronics and Product B + Furniture - for (let i = 0; i < result.frame.rowCount; i++) { - if ( - result.frame.columns.product[i] === 'Product A' && - result.frame.columns.category[i] === 'Electronics' - ) { - productAElectronicsIdx = i; - } - if ( - result.frame.columns.product[i] === 'Product B' && - result.frame.columns.category[i] === 'Furniture' - ) { - productBFurnitureIdx = i; - } - } - - // Check sales values for combinations that exist in the data - expect( - result.frame.columns['region_North.quarter_Q1'][ - productAElectronicsIdx - ], - ).toBe(10); - expect( - result.frame.columns['region_North.quarter_Q2'][ - productAElectronicsIdx - ], - ).toBe(15); - expect( - result.frame.columns['region_South.quarter_Q1'][ - productAElectronicsIdx - ], - ).toBe(20); - expect( - result.frame.columns['region_South.quarter_Q2'][ - productAElectronicsIdx - ], - ).toBe(25); - - expect( - result.frame.columns['region_North.quarter_Q1'][productBFurnitureIdx], - ).toBe(30); - expect( - result.frame.columns['region_North.quarter_Q2'][productBFurnitureIdx], - ).toBe(35); - expect( - result.frame.columns['region_South.quarter_Q1'][productBFurnitureIdx], - ).toBe(40); - expect( - result.frame.columns['region_South.quarter_Q2'][productBFurnitureIdx], - ).toBe(45); - - // Check metadata for multi-level indices and columns - expect(result.frame.metadata.multiLevelIndex).toEqual([ - 'product', - 'category', - ]); - expect(result.frame.metadata.multiLevelColumns).toEqual([ - 'region', - 'quarter', - ]); - }); - }); - }); -}); diff --git a/test/methods/dataframe/transform/pivotTable.test.js b/test/methods/dataframe/transform/pivotTable.test.js deleted file mode 100644 index 598095f..0000000 --- a/test/methods/dataframe/transform/pivotTable.test.js +++ /dev/null @@ -1,342 +0,0 @@ -import { describe, test, expect } from 'vitest'; -import { DataFrame } from '../../../../src/core/dataframe/DataFrame.js'; -import { - testWithBothStorageTypes, - createDataFrameWithStorage, -} from '../../../utils/storageTestUtils.js'; -import { - sum, - mean, - count, - max, - min, -} from '../../../../src/methods/dataframe/transform/pivot.js'; - -// Test data for all tests -const testData = [ - { value: 10, category: 'A', mixed: '20' }, - { value: 20, category: 'B', mixed: 30 }, - { value: 30, category: 'A', mixed: null }, - { value: 40, category: 'C', mixed: undefined }, - { value: 50, category: 'B', mixed: NaN }, -]; - -describe('DataFrame.pivotTable', () => { - // Запускаем тесты с обоими типами хранилища - testWithBothStorageTypes((storageType) => { - describe(`with ${storageType} storage`, () => { - // Create DataFrame with specified storage type - const df = createDataFrameWithStorage(DataFrame, testData, storageType); - - test('creates a pivot table with a single aggregation function', () => { - // Create a test DataFrame with sales data - // df created above with createDataFrameWithStorage - - // Call the pivotTable method with a single aggregation function - const result = df.pivotTable({ - index: 'product', - columns: 'region', - values: 'sales', - aggFunc: sum, - }); - - // Check that the result is a DataFrame instance - expect(result).toBeInstanceOf(DataFrame); - - // Check the structure of the pivot table - expect(result.frame.columnNames).toContain('product'); - expect(result.frame.columnNames).toContain('region_North.sales'); - expect(result.frame.columnNames).toContain('region_South.sales'); - expect(result.frame.columnNames).toContain('region_East.sales'); - expect(result.frame.columnNames).toContain('region_West.sales'); - - // Check the values in the pivot table - expect(Array.from(result.frame.columns.product)).toEqual([ - 'Product A', - 'Product B', - ]); - expect(Array.from(result.frame.columns['region_North.sales'])).toEqual([ - 10, 15, - ]); - expect(Array.from(result.frame.columns['region_South.sales'])).toEqual([ - 20, 25, - ]); - expect(Array.from(result.frame.columns['region_East.sales'])).toEqual([ - 30, 35, - ]); - expect(Array.from(result.frame.columns['region_West.sales'])).toEqual([ - 40, 45, - ]); - }); - - test('creates a pivot table with multiple aggregation functions as an array', () => { - // Create a test DataFrame with multiple sales entries per region - // df created above with createDataFrameWithStorage - - // Call the pivotTable method with multiple aggregation functions - const result = df.pivotTable({ - index: 'product', - columns: 'region', - values: 'sales', - aggFunc: [sum, mean, count], - }); - - // Check the structure of the pivot table - expect(result.frame.columnNames).toContain('product'); - expect(result.frame.columnNames).toContain('region_North.sales_sum'); - expect(result.frame.columnNames).toContain('region_North.sales_mean'); - expect(result.frame.columnNames).toContain('region_North.sales_count'); - expect(result.frame.columnNames).toContain('region_South.sales_sum'); - expect(result.frame.columnNames).toContain('region_South.sales_mean'); - expect(result.frame.columnNames).toContain('region_South.sales_count'); - - // Check the values for sum aggregation - expect( - Array.from(result.frame.columns['region_North.sales_sum']), - ).toEqual([30, 15]); // 10+20, 15 - expect( - Array.from(result.frame.columns['region_South.sales_sum']), - ).toEqual([30, 60]); // 30, 25+35 - - // Check the values for mean aggregation - expect( - Array.from(result.frame.columns['region_North.sales_mean']), - ).toEqual([15, 15]); // (10+20)/2, 15/1 - expect( - Array.from(result.frame.columns['region_South.sales_mean']), - ).toEqual([30, 30]); // 30/1, (25+35)/2 - - // Check the values for count aggregation - expect( - Array.from(result.frame.columns['region_North.sales_count']), - ).toEqual([2, 1]); // 2 entries for Product A, 1 for Product B - expect( - Array.from(result.frame.columns['region_South.sales_count']), - ).toEqual([1, 2]); // 1 entry for Product A, 2 for Product B - - // Check metadata for aggregation functions - expect(result.frame.metadata.aggregationFunctions).toEqual([ - 'sales_sum', - 'sales_mean', - 'sales_count', - ]); - }); - - test('creates a pivot table with multiple aggregation functions as an object', () => { - // Create a test DataFrame with sales data - // df created above with createDataFrameWithStorage - - // Call the pivotTable method with multiple aggregation functions as an object - const result = df.pivotTable({ - index: 'product', - columns: 'region', - values: 'sales', - aggFunc: { - total: sum, - average: mean, - minimum: min, - maximum: max, - }, - }); - - // Check the structure of the pivot table - expect(result.frame.columnNames).toContain('product'); - expect(result.frame.columnNames).toContain('region_North.total'); - expect(result.frame.columnNames).toContain('region_North.average'); - expect(result.frame.columnNames).toContain('region_North.minimum'); - expect(result.frame.columnNames).toContain('region_North.maximum'); - - // Check the values for custom aggregation functions - expect(Array.from(result.frame.columns['region_North.total'])).toEqual([ - 10, 15, - ]); // sum - expect( - Array.from(result.frame.columns['region_North.average']), - ).toEqual([10, 15]); // mean - expect( - Array.from(result.frame.columns['region_North.minimum']), - ).toEqual([10, 15]); // min - expect( - Array.from(result.frame.columns['region_North.maximum']), - ).toEqual([10, 15]); // max - - expect(Array.from(result.frame.columns['region_South.total'])).toEqual([ - 20, 25, - ]); // sum - expect( - Array.from(result.frame.columns['region_South.average']), - ).toEqual([20, 25]); // mean - expect( - Array.from(result.frame.columns['region_South.minimum']), - ).toEqual([20, 25]); // min - expect( - Array.from(result.frame.columns['region_South.maximum']), - ).toEqual([20, 25]); // max - - // Check metadata for aggregation functions - expect(result.frame.metadata.aggregationFunctions).toEqual([ - 'total', - 'average', - 'minimum', - 'maximum', - ]); - }); - - test('supports multi-level indices and columns with multiple aggregation functions', () => { - // Create a test DataFrame with multiple dimensions - // df created above with createDataFrameWithStorage - - // Call the pivotTable method with multi-level indices and columns - const result = df.pivotTable({ - index: ['product', 'category'], - columns: ['region', 'quarter'], - values: 'sales', - aggFunc: [sum, mean], - }); - - // Check the structure of the pivot table - expect(result.frame.columnNames).toContain('product'); - expect(result.frame.columnNames).toContain('category'); - expect(result.frame.columnNames).toContain( - 'region_North.quarter_Q1.sales_sum', - ); - expect(result.frame.columnNames).toContain( - 'region_North.quarter_Q2.sales_sum', - ); - expect(result.frame.columnNames).toContain( - 'region_South.quarter_Q1.sales_sum', - ); - expect(result.frame.columnNames).toContain( - 'region_South.quarter_Q2.sales_sum', - ); - expect(result.frame.columnNames).toContain( - 'region_North.quarter_Q1.sales_mean', - ); - expect(result.frame.columnNames).toContain( - 'region_North.quarter_Q2.sales_mean', - ); - expect(result.frame.columnNames).toContain( - 'region_South.quarter_Q1.sales_mean', - ); - expect(result.frame.columnNames).toContain( - 'region_South.quarter_Q2.sales_mean', - ); - - // Check the number of rows (should be one per unique product-category combination) - expect(result.frame.rowCount).toBe(4); // 2 products x 2 categories = 4 combinations - - // Find rows for product-category combinations that exist in the data - let productAElectronicsIdx = -1; - let productBFurnitureIdx = -1; - - // Find indices for combinations of Product A + Electronics and Product B + Furniture - for (let i = 0; i < result.frame.rowCount; i++) { - if ( - result.frame.columns.product[i] === 'Product A' && - result.frame.columns.category[i] === 'Electronics' - ) { - productAElectronicsIdx = i; - } - if ( - result.frame.columns.product[i] === 'Product B' && - result.frame.columns.category[i] === 'Furniture' - ) { - productBFurnitureIdx = i; - } - } - - // Check sales values for combinations that exist in the data - expect( - result.frame.columns['region_North.quarter_Q1.sales_sum'][ - productAElectronicsIdx - ], - ).toBe(10); - expect( - result.frame.columns['region_North.quarter_Q2.sales_sum'][ - productAElectronicsIdx - ], - ).toBe(15); - expect( - result.frame.columns['region_South.quarter_Q1.sales_sum'][ - productAElectronicsIdx - ], - ).toBe(20); - expect( - result.frame.columns['region_South.quarter_Q2.sales_sum'][ - productAElectronicsIdx - ], - ).toBe(25); - - expect( - result.frame.columns['region_North.quarter_Q1.sales_sum'][ - productBFurnitureIdx - ], - ).toBe(30); - expect( - result.frame.columns['region_North.quarter_Q2.sales_sum'][ - productBFurnitureIdx - ], - ).toBe(35); - expect( - result.frame.columns['region_South.quarter_Q1.sales_sum'][ - productBFurnitureIdx - ], - ).toBe(40); - expect( - result.frame.columns['region_South.quarter_Q2.sales_sum'][ - productBFurnitureIdx - ], - ).toBe(45); - - // Check metadata for multi-level indices and columns - expect(result.frame.metadata.multiLevelIndex).toEqual([ - 'product', - 'category', - ]); - expect(result.frame.metadata.multiLevelColumns).toEqual([ - 'region', - 'quarter', - ]); - expect(result.frame.metadata.aggregationFunctions).toEqual([ - 'sales_sum', - 'sales_mean', - ]); - }); - - test('throws an error with invalid aggregation functions', () => { - // Create a test DataFrame - // df created above with createDataFrameWithStorage - - // Check that the method throws an error if aggFunc is not a function, array, or object - expect(() => - df.pivotTable({ - index: 'product', - columns: 'region', - values: 'sales', - aggFunc: 'not a function', - }), - ).toThrow(); - - // Check that the method throws an error if array contains non-functions - expect(() => - df.pivotTable({ - index: 'product', - columns: 'region', - values: 'sales', - aggFunc: [sum, 'not a function'], - }), - ).toThrow(); - - // Check that the method throws an error if object contains non-functions - expect(() => - df.pivotTable({ - index: 'product', - columns: 'region', - values: 'sales', - aggFunc: { total: sum, average: 'not a function' }, - }), - ).toThrow(); - }); - }); - }); -}); diff --git a/test/methods/dataframe/transform/unstack.test.js b/test/methods/dataframe/transform/unstack.test.js deleted file mode 100644 index 319f239..0000000 --- a/test/methods/dataframe/transform/unstack.test.js +++ /dev/null @@ -1,170 +0,0 @@ -import { describe, test, expect } from 'vitest'; -import { DataFrame } from '../../../../src/core/dataframe/DataFrame.js'; - -import { - testWithBothStorageTypes, - createDataFrameWithStorage, -} from '../../../utils/storageTestUtils.js'; - -// Test data for all tests -const testData = [ - { value: 10, category: 'A', mixed: '20' }, - { value: 20, category: 'B', mixed: 30 }, - { value: 30, category: 'A', mixed: null }, - { value: 40, category: 'C', mixed: undefined }, - { value: 50, category: 'B', mixed: NaN }, -]; - -describe('DataFrame.unstack', () => { - // Run tests with both storage types - testWithBothStorageTypes((storageType) => { - describe(`with ${storageType} storage`, () => { - // Create DataFrame with specified storage type - const df = createDataFrameWithStorage(DataFrame, testData, storageType); - - test('unstacks rows into columns', () => { - // Create a test DataFrame in long format - // df created above with createDataFrameWithStorage - - // Call the unstack method - const result = df.unstack('product', 'region', 'sales'); - - // Check that the result is a DataFrame instance - expect(result).toBeInstanceOf(DataFrame); - - // Check the structure of the unstacked DataFrame - expect(result.frame.columnNames).toContain('product'); - expect(result.frame.columnNames).toContain('North'); - expect(result.frame.columnNames).toContain('South'); - expect(result.frame.columnNames).toContain('East'); - expect(result.frame.columnNames).toContain('West'); - - // Check the number of rows (should be one per unique product) - expect(result.frame.rowCount).toBe(2); - - // Check the values in the unstacked DataFrame - const products = Array.from(result.frame.columns.product); - const northValues = Array.from(result.frame.columns.North); - const southValues = Array.from(result.frame.columns.South); - const eastValues = Array.from(result.frame.columns.East); - const westValues = Array.from(result.frame.columns.West); - - expect(products).toEqual(['Product A', 'Product B']); - expect(northValues).toEqual([10, 15]); - expect(southValues).toEqual([20, 25]); - expect(eastValues).toEqual([30, 35]); - expect(westValues).toEqual([40, 45]); - - // Check metadata - expect(result.frame.metadata.unstackedColumn).toBe('region'); - expect(result.frame.metadata.valueColumn).toBe('sales'); - expect(result.frame.metadata.indexColumns).toEqual(['product']); - }); - - test('unstacks with multiple index columns', () => { - // Create a test DataFrame in long format - // df created above with createDataFrameWithStorage - - // Call the unstack method with multiple index columns - const result = df.unstack(['product', 'category'], 'region', 'sales'); - - // Check the structure of the unstacked DataFrame - expect(result.frame.columnNames).toContain('product'); - expect(result.frame.columnNames).toContain('category'); - expect(result.frame.columnNames).toContain('North'); - expect(result.frame.columnNames).toContain('South'); - expect(result.frame.columnNames).toContain('East'); - expect(result.frame.columnNames).toContain('West'); - - // Check the number of rows (should be one per unique product-category combination) - expect(result.frame.rowCount).toBe(2); - - // Check the values in the unstacked DataFrame - const products = Array.from(result.frame.columns.product); - const categories = Array.from(result.frame.columns.category); - const northValues = Array.from(result.frame.columns.North); - const southValues = Array.from(result.frame.columns.South); - const eastValues = Array.from(result.frame.columns.East); - const westValues = Array.from(result.frame.columns.West); - - expect(products).toEqual(['Product A', 'Product B']); - expect(categories).toEqual(['Electronics', 'Furniture']); - expect(northValues).toEqual([10, 15]); - expect(southValues).toEqual([20, 25]); - expect(eastValues).toEqual([30, 35]); - expect(westValues).toEqual([40, 45]); - - // Check metadata - expect(result.frame.metadata.unstackedColumn).toBe('region'); - expect(result.frame.metadata.valueColumn).toBe('sales'); - expect(result.frame.metadata.indexColumns).toEqual([ - 'product', - 'category', - ]); - }); - - test('handles duplicate index values by using the last occurrence', () => { - // Create a test DataFrame with duplicate index values - // df created above with createDataFrameWithStorage - - // Call the unstack method - const result = df.unstack('product', 'region', 'sales'); - - // Check the values in the unstacked DataFrame - // The last occurrence of each duplicate should be used - const products = Array.from(result.frame.columns.product); - const northValues = Array.from(result.frame.columns.North); - const southValues = Array.from(result.frame.columns.South); - - expect(products).toEqual(['Product A', 'Product B']); - expect(northValues).toEqual([20, null]); // Last value for Product A, North is 20 - expect(southValues).toEqual([null, 40]); // Last value for Product B, South is 40 - }); - - test('handles non-numeric values in unstack', () => { - // Create a test DataFrame in long format - // df created above with createDataFrameWithStorage - - // Call the unstack method - const result = df.unstack('product', 'year', 'status'); - - // Check the column names in the unstacked DataFrame - expect(result.frame.columnNames).toContain('product'); - expect(result.frame.columnNames).toContain('2023'); - expect(result.frame.columnNames).toContain('2024'); - - // Check the values in the unstacked DataFrame - const products = Array.from(result.frame.columns.product); - const values2023 = Array.from(result.frame.columns['2023']); - const values2024 = Array.from(result.frame.columns['2024']); - - expect(products).toEqual(['Product A', 'Product B']); - expect(values2023).toEqual(['Active', 'Inactive']); - expect(values2024).toEqual(['Inactive', 'Active']); - }); - - test('throws an error with invalid arguments', () => { - // Create a test DataFrame - // df created above with createDataFrameWithStorage - - // Check that the method throws an error if index is not provided - expect(() => df.unstack()).toThrow(); - - // Check that the method throws an error if column is not provided - expect(() => df.unstack('product')).toThrow(); - - // Check that the method throws an error if value is not provided - expect(() => df.unstack('product', 'region')).toThrow(); - - // Check that the method throws an error if index column doesn't exist - expect(() => df.unstack('nonexistent', 'region', 'sales')).toThrow(); - - // Check that the method throws an error if column column doesn't exist - expect(() => df.unstack('product', 'nonexistent', 'sales')).toThrow(); - - // Check that the method throws an error if value column doesn't exist - expect(() => df.unstack('product', 'region', 'nonexistent')).toThrow(); - }); - }); - }); -}); diff --git a/test/methods/series/transform/abs.test.js b/test/methods/series/transform/abs.test.js new file mode 100644 index 0000000..09d9431 --- /dev/null +++ b/test/methods/series/transform/abs.test.js @@ -0,0 +1,68 @@ +import { describe, test, expect, beforeAll } from 'vitest'; +import { Series } from '../../../../src/core/dataframe/Series.js'; +import { register } from '../../../../src/methods/series/transform/abs.js'; + +describe('Series.abs', () => { + beforeAll(() => { + // Register the abs method on Series prototype + register(Series); + }); + + test('returns absolute values of all elements', () => { + const series = new Series([-1, 2, -3, 4, -5]); + const absolute = series.abs(); + expect(absolute.toArray()).toEqual([1, 2, 3, 4, 5]); + }); + + test('leaves positive values unchanged', () => { + const series = new Series([1, 2, 3, 4, 5]); + const absolute = series.abs(); + expect(absolute.toArray()).toEqual([1, 2, 3, 4, 5]); + }); + + test('converts zero to zero', () => { + const series = new Series([-0, 0]); + const absolute = series.abs(); + expect(absolute.toArray()).toEqual([0, 0]); + }); + + test('handles null and undefined values', () => { + const series = new Series([-1, null, -3, undefined, -5]); + const absolute = series.abs(); + // Math.abs(null) returns 0, Math.abs(undefined) returns NaN + expect(absolute.toArray()[0]).toBe(1); + expect(absolute.toArray()[1]).toBe(0); + expect(absolute.toArray()[2]).toBe(3); + expect(isNaN(absolute.toArray()[3])).toBe(true); + expect(absolute.toArray()[4]).toBe(5); + }); + + test('handles empty Series', () => { + const series = new Series([]); + const absolute = series.abs(); + expect(absolute.toArray()).toEqual([]); + }); + + test('preserves Series name', () => { + const series = new Series([-1, -2, -3], { name: 'test_series' }); + const absolute = series.abs(); + expect(absolute.name).toBe('test_series'); + }); + + test('handles non-numeric strings', () => { + const series = new Series(['-1', '-2', 'abc']); + const absolute = series.abs(); + expect(absolute.toArray()[0]).toBe(1); + expect(absolute.toArray()[1]).toBe(2); + expect(isNaN(absolute.toArray()[2])).toBe(true); + }); + + test('works with direct function call', () => { + // Register the method + register(Series); + const series = new Series([-1, -2, -3]); + // Use the method directly + const absolute = series.abs(); + expect(absolute.toArray()).toEqual([1, 2, 3]); + }); +}); diff --git a/test/methods/series/transform/apply.test.js b/test/methods/series/transform/apply.test.js new file mode 100644 index 0000000..0f879ee --- /dev/null +++ b/test/methods/series/transform/apply.test.js @@ -0,0 +1,67 @@ +import { describe, test, expect, beforeAll } from 'vitest'; +import { Series } from '../../../../src/core/dataframe/Series.js'; +import { register } from '../../../../src/methods/series/transform/apply.js'; + +describe('Series.apply', () => { + beforeAll(() => { + // Register the apply method on Series prototype + register(Series); + }); + + test('applies function to each element in the Series', () => { + const series = new Series([1, 2, 3, 4, 5]); + const doubled = series.apply((x) => x * 2); + expect(doubled.toArray()).toEqual([2, 4, 6, 8, 10]); + }); + + test('works the same as map method', () => { + const series = new Series([1, 2, 3, 4, 5]); + const applied = series.apply((x) => x * 3); + const mapped = series.map((x) => x * 3); + expect(applied.toArray()).toEqual(mapped.toArray()); + }); + + test('handles null and undefined values', () => { + const series = new Series([1, null, 3, undefined, 5]); + const applied = series.apply((x) => + x === null || x === undefined ? 0 : x * 2, + ); + expect(applied.toArray()).toEqual([2, 0, 6, 0, 10]); + }); + + test('handles empty Series', () => { + const series = new Series([]); + const applied = series.apply((x) => x * 2); + expect(applied.toArray()).toEqual([]); + }); + + test('preserves Series name', () => { + const series = new Series([1, 2, 3], { name: 'test_series' }); + const applied = series.apply((x) => x * 2); + expect(applied.name).toBe('test_series'); + }); + + test('works with non-numeric values', () => { + const series = new Series(['apple', 'banana', 'cherry']); + const applied = series.apply((x) => x.toUpperCase()); + expect(applied.toArray()).toEqual(['APPLE', 'BANANA', 'CHERRY']); + }); + + test('works with complex transformations', () => { + const series = new Series([1, 2, 3, 4, 5]); + const applied = series.apply((x) => { + if (x % 2 === 0) return x * 10; + return x; + }); + expect(applied.toArray()).toEqual([1, 20, 3, 40, 5]); + }); + + test('works with direct function call', () => { + // Register the method + register(Series); + const series = new Series([1, 2, 3]); + // Use the method directly + const applied = series.apply((x) => x * 3); + expect(applied.toArray()).toEqual([3, 6, 9]); + }); +}); diff --git a/test/methods/series/transform/map.test.js b/test/methods/series/transform/map.test.js new file mode 100644 index 0000000..f7cbbbe --- /dev/null +++ b/test/methods/series/transform/map.test.js @@ -0,0 +1,63 @@ +import { describe, test, expect, beforeAll } from 'vitest'; +import { Series } from '../../../../src/core/dataframe/Series.js'; +import { register } from '../../../../src/methods/series/transform/map.js'; + +describe('Series.map', () => { + beforeAll(() => { + // Register the map method on Series prototype + register(Series); + }); + + test('applies function to each element in the Series', () => { + const series = new Series([1, 2, 3, 4, 5]); + const doubled = series.map((x) => x * 2); + expect(doubled.toArray()).toEqual([2, 4, 6, 8, 10]); + }); + + test('provides index as second argument to map function', () => { + const series = new Series(['a', 'b', 'c', 'd']); + const withIndices = series.map((val, idx) => `${val}${idx}`); + expect(withIndices.toArray()).toEqual(['a0', 'b1', 'c2', 'd3']); + }); + + test('provides full array as third argument to map function', () => { + const series = new Series([10, 20, 30, 40]); + const withArrayAccess = series.map((val, idx, arr) => val + arr[0]); + expect(withArrayAccess.toArray()).toEqual([20, 30, 40, 50]); + }); + + test('handles null and undefined values', () => { + const series = new Series([1, null, 3, undefined, 5]); + const mapped = series.map((x) => + x === null || x === undefined ? 0 : x * 2, + ); + expect(mapped.toArray()).toEqual([2, 0, 6, 0, 10]); + }); + + test('handles empty Series', () => { + const series = new Series([]); + const mapped = series.map((x) => x * 2); + expect(mapped.toArray()).toEqual([]); + }); + + test('preserves Series name', () => { + const series = new Series([1, 2, 3], { name: 'test_series' }); + const mapped = series.map((x) => x * 2); + expect(mapped.name).toBe('test_series'); + }); + + test('works with non-numeric values', () => { + const series = new Series(['apple', 'banana', 'cherry']); + const mapped = series.map((x) => x.toUpperCase()); + expect(mapped.toArray()).toEqual(['APPLE', 'BANANA', 'CHERRY']); + }); + + test('works with direct function call', () => { + // Register the method + register(Series); + const series = new Series([1, 2, 3]); + // Use the method directly + const mapped = series.map((x) => x * 3); + expect(mapped.toArray()).toEqual([3, 6, 9]); + }); +}); diff --git a/test/methods/series/transform/round.test.js b/test/methods/series/transform/round.test.js new file mode 100644 index 0000000..1d66188 --- /dev/null +++ b/test/methods/series/transform/round.test.js @@ -0,0 +1,67 @@ +import { describe, test, expect, beforeAll } from 'vitest'; +import { Series } from '../../../../src/core/dataframe/Series.js'; +import { register } from '../../../../src/methods/series/transform/round.js'; + +describe('Series.round', () => { + beforeAll(() => { + // Register the round method on Series prototype + register(Series); + }); + + test('rounds values to nearest integer by default', () => { + const series = new Series([1.4, 2.5, 3.6, 4.5]); + const rounded = series.round(); + expect(rounded.toArray()).toEqual([1, 3, 4, 5]); + }); + + test('rounds to specified number of decimal places', () => { + const series = new Series([1.234, 2.345, 3.456, 4.567]); + const rounded = series.round(2); + expect(rounded.toArray()).toEqual([1.23, 2.35, 3.46, 4.57]); + }); + + test('handles negative decimals', () => { + const series = new Series([123, 456, 789]); + const rounded = series.round(-2); + expect(rounded.toArray()).toEqual([100, 500, 800]); + }); + + test('handles null and undefined values', () => { + const series = new Series([1.5, null, 3.5, undefined]); + const rounded = series.round(); + // Math.round(null) returns 0, Math.round(undefined) returns NaN + expect(rounded.toArray()[0]).toBe(2); + expect(rounded.toArray()[1]).toBe(0); + expect(rounded.toArray()[2]).toBe(4); + expect(isNaN(rounded.toArray()[3])).toBe(true); + }); + + test('handles empty Series', () => { + const series = new Series([]); + const rounded = series.round(); + expect(rounded.toArray()).toEqual([]); + }); + + test('preserves Series name', () => { + const series = new Series([1.5, 2.5], { name: 'test_series' }); + const rounded = series.round(); + expect(rounded.name).toBe('test_series'); + }); + + test('handles non-numeric strings', () => { + const series = new Series(['1.5', '2.5', 'abc']); + const rounded = series.round(); + expect(rounded.toArray()[0]).toBe(2); + expect(rounded.toArray()[1]).toBe(3); + expect(isNaN(rounded.toArray()[2])).toBe(true); + }); + + test('works with direct function call', () => { + // Register the method + register(Series); + const series = new Series([1.1, 2.2, 3.3]); + // Use the method directly + const rounded = series.round(); + expect(rounded.toArray()).toEqual([1, 2, 3]); + }); +});