From e06541b53e6f7a2652ff1d0b83f38678bce4406d Mon Sep 17 00:00:00 2001 From: Alex K Date: Tue, 10 Jun 2025 15:03:40 +0200 Subject: [PATCH] fix: refactor aggregation methods and tests, replace deprecated DataFrame.create with fromRows --- src/methods/dataframe/aggregation/count.js | 34 +- src/methods/dataframe/aggregation/register.js | 5 - .../dataframe/aggregation/count.test.js | 79 ++--- .../dataframe/aggregation/first.test.js | 195 ++++++----- .../dataframe/aggregation/last.test.js | 180 +++++----- .../methods/dataframe/aggregation/max.test.js | 127 ++++--- .../dataframe/aggregation/mean.test.js | 120 ++++--- .../dataframe/aggregation/median.test.js | 204 ++++++------ .../methods/dataframe/aggregation/min.test.js | 127 ++++--- .../dataframe/aggregation/mode.test.js | 314 ++++++++---------- .../methods/dataframe/aggregation/std.test.js | 314 +++++++++--------- .../methods/dataframe/aggregation/sum.test.js | 95 +++--- .../dataframe/aggregation/variance.test.js | 272 ++++++++------- 13 files changed, 993 insertions(+), 1073 deletions(-) diff --git a/src/methods/dataframe/aggregation/count.js b/src/methods/dataframe/aggregation/count.js index 9b6bc9f..06faa7d 100644 --- a/src/methods/dataframe/aggregation/count.js +++ b/src/methods/dataframe/aggregation/count.js @@ -7,42 +7,42 @@ */ export const count = ({ validateColumn }) => - (df, column) => { + (df, column) => { // Validate that the column exists - validateColumn(df, column); + validateColumn(df, column); - // Get Series for the column and count valid values - const series = df.col(column); - const values = series.toArray(); + // Get Series for the column and count valid values + const series = df.col(column); + const values = series.toArray(); - let validCount = 0; - for (let i = 0; i < values.length; i++) { - const value = values[i]; - if (value !== null && value !== undefined && !Number.isNaN(value)) { - validCount++; - } + let validCount = 0; + for (let i = 0; i < values.length; i++) { + const value = values[i]; + if (value !== null && value !== undefined && !Number.isNaN(value)) { + validCount++; } + } - return validCount; - }; + return validCount; + }; /** * Registers the count method on DataFrame prototype * @param {Class} DataFrame - DataFrame class to extend */ export const register = (DataFrame) => { - // Создаем валидатор для проверки существования колонки + // Create a validator to check column existence const validateColumn = (df, column) => { if (!df.columns.includes(column)) { throw new Error(`Column '${column}' not found`); } }; - // Создаем функцию count с валидатором + // Create a function count with validator const countFn = count({ validateColumn }); - // Регистрируем метод count в прототипе DataFrame - DataFrame.prototype.count = function(column) { + // Register the count method in the DataFrame prototype + DataFrame.prototype.count = function (column) { return countFn(this, column); }; }; diff --git a/src/methods/dataframe/aggregation/register.js b/src/methods/dataframe/aggregation/register.js index 6764bcc..6cf6873 100644 --- a/src/methods/dataframe/aggregation/register.js +++ b/src/methods/dataframe/aggregation/register.js @@ -13,8 +13,6 @@ import { register as registerLast } from './last.js'; import { register as registerMode } from './mode.js'; import { register as registerVariance } from './variance.js'; import { register as registerStd } from './std.js'; -// Файл sort.js не найден, поэтому импорт закомментирован -// import { register as registerSort } from './sort.js'; /** * Registers all aggregation methods on DataFrame prototype @@ -32,9 +30,6 @@ export const registerDataFrameAggregation = (DataFrame) => { registerMode(DataFrame); registerVariance(DataFrame); registerStd(DataFrame); - // registerSort(DataFrame); // Закомментировано, так как файл sort.js отсутствует - - // Add additional aggregation methods here as they are implemented }; export default registerDataFrameAggregation; diff --git a/test/methods/dataframe/aggregation/count.test.js b/test/methods/dataframe/aggregation/count.test.js index d88f358..001decc 100644 --- a/test/methods/dataframe/aggregation/count.test.js +++ b/test/methods/dataframe/aggregation/count.test.js @@ -12,10 +12,6 @@ import { DataFrame } from '../../../../src/core/dataframe/DataFrame.js'; import { Series } from '../../../../src/core/dataframe/Series.js'; import { count } from '../../../../src/methods/dataframe/aggregation/count.js'; -import { - testWithBothStorageTypes, - createDataFrameWithStorage, -} from '../../../utils/storageTestUtils.js'; /** * Tests for the DataFrame count function */ @@ -120,50 +116,47 @@ describe('DataFrame count function', () => { const countFn = count({ validateColumn }); // Check that the function throws an error for non-existent columns - expect(() => countFn(df, 'z')).toThrow('Column \'z\' not found'); + expect(() => countFn(df, 'z')).toThrow("Column 'z' not found"); }); }); // Tests with real DataFrames describe('DataFrame count with real DataFrames', () => { - // Run tests with both storage types - testWithBothStorageTypes((storageType) => { - describe(`with ${storageType} storage`, () => { - // Create a DataFrame with the specified storage type - const df = createDataFrameWithStorage(DataFrame, testData, storageType); - - test('should count all non-null, non-undefined, non-NaN values in a column', () => { - // Create a validator that does nothing - const validateColumn = () => {}; - const countFn = count({ validateColumn }); - - // Call the count function directly - // All 5 values in the value column are valid - expect(countFn(df, 'value')).toBe(5); - // All 5 values in the category column are valid - expect(countFn(df, 'category')).toBe(5); - // Only 2 valid values ('20' and 30) in the mixed column, others are null, undefined and NaN - expect(countFn(df, 'mixed')).toBe(2); - }); - - test('should handle mixed data types and ignore null, undefined, and NaN', () => { - // Create a validator that does nothing - const validateColumn = () => {}; - const countFn = count({ validateColumn }); - - // In the mixed column there is a string '20', a number 30, null, undefined and NaN - // The count function should only count valid values ('20' and 30) - expect(countFn(df, 'mixed')).toBe(2); - }); - - test('throws on corrupted frame', () => { - // Create a minimally valid frame but without required structure - const broken = {}; - const validateColumn = () => {}; - const countFn = count({ validateColumn }); - - expect(() => countFn(broken, 'a')).toThrow(); - }); + describe('with standard storage', () => { + // Create a DataFrame using fromRows for proper column names + const df = DataFrame.fromRows(testData); + + test('should count all non-null, non-undefined, non-NaN values in a column', () => { + // Create a mock validator + const validateColumn = vi.fn(); + const countFn = count({ validateColumn }); + + // Call the count function directly + // All 5 values in the value column are valid + expect(countFn(df, 'value')).toBe(5); + // All 5 values in the category column are valid + expect(countFn(df, 'category')).toBe(5); + // Only 2 valid values ('20' and 30) in the mixed column, others are null, undefined and NaN + expect(countFn(df, 'mixed')).toBe(2); + }); + + test('should handle mixed data types and ignore null, undefined, and NaN', () => { + // Create a mock validator + const validateColumn = vi.fn(); + const countFn = count({ validateColumn }); + + // In the mixed column there is a string '20', a number 30, null, undefined and NaN + // The count function should only count valid values ('20' and 30) + expect(countFn(df, 'mixed')).toBe(2); + }); + + test('throws on corrupted frame', () => { + // Create a minimally valid frame but without required structure + const broken = {}; + const validateColumn = vi.fn(); + const countFn = count({ validateColumn }); + + expect(() => countFn(broken, 'a')).toThrow(); }); }); }); diff --git a/test/methods/dataframe/aggregation/first.test.js b/test/methods/dataframe/aggregation/first.test.js index 7b52a9c..0bb093b 100644 --- a/test/methods/dataframe/aggregation/first.test.js +++ b/test/methods/dataframe/aggregation/first.test.js @@ -12,112 +12,105 @@ import { register, } from '../../../../src/methods/dataframe/aggregation/first.js'; import { DataFrame } from '../../../../src/core/dataframe/DataFrame.js'; -import { describe, it, expect, vi, beforeEach } from 'vitest'; - -import { - testWithBothStorageTypes, - createDataFrameWithStorage, -} from '../../../utils/storageTestUtils.js'; +import { describe, it, expect, vi } from 'vitest'; // Register the first method in DataFrame for tests register(DataFrame); -// Test data for use in all tests -const testData = [ - { value: 10, category: 'A', mixed: '20' }, - { value: 20, category: 'B', mixed: 30 }, - { value: 30, category: 'A', mixed: null }, - { value: 40, category: 'C', mixed: undefined }, - { value: 50, category: 'B', mixed: NaN }, -]; - describe('first method', () => { - // Run tests with both storage types - testWithBothStorageTypes((storageType) => { - describe(`with ${storageType} storage`, () => { - // Create a DataFrame with the specified storage type - const df = createDataFrameWithStorage(DataFrame, testData, storageType); - - // Test the first function directly - it('should return the first value in a column', () => { - // Create a first function with a mock validator - const validateColumn = vi.fn(); - const firstFn = first({ validateColumn }); - - // Call the first function - const result = firstFn(df, 'value'); - - // Check the result - expect(result).toBe(10); - expect(validateColumn).toHaveBeenCalledWith(df, 'value'); - }); - - it('should handle special values (null, undefined, NaN)', () => { - // Create a first function with a mock validator - const validateColumn = vi.fn(); - const firstFn = first({ validateColumn }); - - // Check that the first values are returned correctly - expect(firstFn(df, 'mixed')).toBe('20'); - expect(validateColumn).toHaveBeenCalledWith(df, 'mixed'); - }); - - it('should return undefined for empty DataFrame', () => { - // Create an empty DataFrame - const emptyDf = createDataFrameWithStorage(DataFrame, [], storageType); - - // Create a first function with a mock validator - const validateColumn = vi.fn(); - const firstFn = first({ validateColumn }); - - // Call the first function - const result = firstFn(emptyDf, 'value'); - - // Check the result - expect(result).toBeUndefined(); - // For an empty DataFrame, the validator is not called, as we immediately return undefined - }); - - it('should throw error for non-existent column', () => { - // Create a validator that throws an error - const validateColumn = (df, column) => { - if (!df.columns.includes(column)) { - throw new Error(`Column '${column}' not found`); - } - }; - - // Create a first function with our validator - const firstFn = first({ validateColumn }); - - // Check that the function throws an error for non-existent columns - expect(() => firstFn(df, 'nonexistent')).toThrow( - 'Column \'nonexistent\' not found', - ); - }); - - // Test the DataFrame.first method - it('should be available as a DataFrame method', () => { - // Check that the first method is available in DataFrame - expect(typeof df.first).toBe('function'); - - // Call the first method and check the result - expect(df.first('value')).toBe(10); - expect(df.first('category')).toBe('A'); - }); - it('should handle empty DataFrame gracefully', () => { - // Create an empty DataFrame - const emptyDf = createDataFrameWithStorage(DataFrame, [], storageType); - - // Check that the first method returns undefined for an empty DataFrame - expect(emptyDf.first('value')).toBeUndefined(); - }); - - it('should throw error for non-existent column', () => { - // Check that the first method throws an error for non-existent columns - expect(() => df.first('nonexistent')).toThrow( - 'Column \'nonexistent\' not found', - ); - }); + describe('with standard storage', () => { + // Test data for use in all tests + const testData = [ + { value: 10, category: 'A', mixed: '20' }, + { value: 20, category: 'B', mixed: 30 }, + { value: 30, category: 'A', mixed: null }, + { value: 40, category: 'C', mixed: undefined }, + { value: 50, category: 'B', mixed: NaN }, + ]; + + // Create DataFrame using fromRows for proper column names + const df = DataFrame.fromRows(testData); + + // Test the first function directly + it('should return the first value in a column', () => { + // Create a first function with a mock validator + const validateColumn = vi.fn(); + const firstFn = first({ validateColumn }); + + // Call the first function + const result = firstFn(df, 'value'); + + // Check the result + expect(result).toBe(10); + expect(validateColumn).toHaveBeenCalledWith(df, 'value'); + }); + + it('should handle special values (null, undefined, NaN)', () => { + // Create a first function with a mock validator + const validateColumn = vi.fn(); + const firstFn = first({ validateColumn }); + + // Check that the first values are returned correctly + expect(firstFn(df, 'mixed')).toBe('20'); + expect(validateColumn).toHaveBeenCalledWith(df, 'mixed'); + }); + + it('should return undefined for empty DataFrame', () => { + // Create an empty DataFrame using fromRows + const emptyDf = DataFrame.fromRows([]); + + // Create a first function with a mock validator + const validateColumn = vi.fn(); + const firstFn = first({ validateColumn }); + + // Call the first function + const result = firstFn(emptyDf, 'value'); + + // Check the result + expect(result).toBeUndefined(); + // For an empty DataFrame, the validator is not called, as we immediately return undefined + }); + + it('should throw error for non-existent column', () => { + // Create a validator that throws an error + const validateColumn = (df, column) => { + if (!df.columns.includes(column)) { + throw new Error(`Column '${column}' not found`); + } + }; + + // Create a first function with our validator + const firstFn = first({ validateColumn }); + + // Check that the function throws an error for non-existent columns + expect(() => firstFn(df, 'nonexistent')).toThrow( + "Column 'nonexistent' not found", + ); + }); + + // Test the DataFrame.first method + it('should be available as a DataFrame method', () => { + // Check that the first method is available in DataFrame + expect(typeof df.first).toBe('function'); + + // Call the first method and check the result + expect(df.first('value')).toBe(10); + expect(df.first('category')).toBe('A'); + }); + + it('should handle empty DataFrame gracefully', () => { + // Create an empty DataFrame using fromRows + const emptyDf = DataFrame.fromRows([]); + + // Check that the first method returns undefined for an empty DataFrame + expect(emptyDf.first('value')).toBeUndefined(); + }); + + it('should throw error for non-existent column', () => { + // Check that the first method throws an error for non-existent columns + expect(() => df.first('nonexistent')).toThrow( + "Column 'nonexistent' not found", + ); }); }); }); diff --git a/test/methods/dataframe/aggregation/last.test.js b/test/methods/dataframe/aggregation/last.test.js index 4527efd..2706713 100644 --- a/test/methods/dataframe/aggregation/last.test.js +++ b/test/methods/dataframe/aggregation/last.test.js @@ -5,11 +5,6 @@ import { register, } from '../../../../src/methods/dataframe/aggregation/last.js'; -import { - testWithBothStorageTypes, - createDataFrameWithStorage, -} from '../../../utils/storageTestUtils.js'; - // Register the last method in DataFrame for tests register(DataFrame); @@ -23,95 +18,92 @@ const testData = [ ]; describe('last method', () => { - // Run tests with both storage types - testWithBothStorageTypes((storageType) => { - describe(`with ${storageType} storage`, () => { - // Create DataFrame with the specified storage type - const df = createDataFrameWithStorage(DataFrame, testData, storageType); - - // Testing the last function directly - it('should return the last value in a column', () => { - // Create last function with a mock validator - const validateColumn = vi.fn(); - const lastFn = last({ validateColumn }); - - // Call the last function - const result = lastFn(df, 'value'); - - // Check the result - expect(result).toBe(50); - expect(validateColumn).toHaveBeenCalledWith(df, 'value'); - }); - - it('should return the last value even if it is null, undefined, or NaN', () => { - // Create last function with a mock validator - const validateColumn = vi.fn(); - const lastFn = last({ validateColumn }); - - // Call the last function - const result = lastFn(df, 'mixed'); - - // Check the result - expect(Number.isNaN(result)).toBe(true); // The last value is NaN - expect(validateColumn).toHaveBeenCalledWith(df, 'mixed'); - }); - - it('should throw an error for non-existent column', () => { - // Create a validator that throws an error - const validateColumn = (df, column) => { - if (!df.columns.includes(column)) { - throw new Error(`Column '${column}' not found`); - } - }; - - // Создаем функцию last с валидатором - const lastFn = last({ validateColumn }); - - // Check that the function throws an error for a non-existent column - expect(() => lastFn(df, 'nonexistent')).toThrow( - 'Column \'nonexistent\' not found', - ); - }); - - it('should return undefined for empty DataFrame', () => { - // Create an empty DataFrame - const emptyDf = createDataFrameWithStorage(DataFrame, [], storageType); - - // Create last function with a mock validator - const validateColumn = vi.fn(); - const lastFn = last({ validateColumn }); - - // Call the last function - const result = lastFn(emptyDf, 'value'); - - // Check the result - expect(result).toBeUndefined(); - // For an empty DataFrame, the validator is not called because we immediately return undefined - }); - // Testing the DataFrame.last method - it('should be available as a DataFrame method', () => { - // Check that the last method is available in DataFrame - expect(typeof df.last).toBe('function'); - - // Call the last method and check the result - expect(df.last('value')).toBe(50); - expect(df.last('category')).toBe('B'); - }); - - it('should handle empty DataFrame gracefully', () => { - // Create an empty DataFrame - const emptyDf = createDataFrameWithStorage(DataFrame, [], storageType); - - // Check that the last method returns undefined for an empty DataFrame - expect(emptyDf.last('value')).toBeUndefined(); - }); - - it('should throw error for non-existent column', () => { - // Check that the last method throws an error for a non-existent column - expect(() => df.last('nonexistent')).toThrow( - 'Column \'nonexistent\' not found', - ); - }); + describe('with standard storage', () => { + // Create DataFrame directly + const df = DataFrame.fromRows(testData); + + // Testing the last function directly + it('should return the last value in a column', () => { + // Create last function with a mock validator + const validateColumn = vi.fn(); + const lastFn = last({ validateColumn }); + + // Call the last function + const result = lastFn(df, 'value'); + + // Check the result + expect(result).toBe(50); + expect(validateColumn).toHaveBeenCalledWith(df, 'value'); + }); + + it('should return the last value even if it is null, undefined, or NaN', () => { + // Create last function with a mock validator + const validateColumn = vi.fn(); + const lastFn = last({ validateColumn }); + + // Call the last function + const result = lastFn(df, 'mixed'); + + // Check the result + expect(Number.isNaN(result)).toBe(true); // The last value is NaN + expect(validateColumn).toHaveBeenCalledWith(df, 'mixed'); + }); + + it('should throw an error for non-existent column', () => { + // Create a validator that throws an error + const validateColumn = (df, column) => { + if (!df.columns.includes(column)) { + throw new Error(`Column '${column}' not found`); + } + }; + + // Create a last function with our validator + const lastFn = last({ validateColumn }); + + // Check that the function throws an error for a non-existent column + expect(() => lastFn(df, 'nonexistent')).toThrow( + "Column 'nonexistent' not found", + ); + }); + + it('should return undefined for empty DataFrame', () => { + // Create an empty DataFrame + const emptyDf = DataFrame.fromRows([]); + + // Create last function with a mock validator + const validateColumn = vi.fn(); + const lastFn = last({ validateColumn }); + + // Call the last function + const result = lastFn(emptyDf, 'value'); + + // Check the result + expect(result).toBeUndefined(); + // For an empty DataFrame, the validator is not called because we immediately return undefined + }); + // Testing the DataFrame.last method + it('should be available as a DataFrame method', () => { + // Check that the last method is available in DataFrame + expect(typeof df.last).toBe('function'); + + // Call the last method and check the result + expect(df.last('value')).toBe(50); + expect(df.last('category')).toBe('B'); + }); + + it('should handle empty DataFrame gracefully', () => { + // Create an empty DataFrame + const emptyDf = DataFrame.fromRows([]); + + // Check that the last method returns undefined for an empty DataFrame + expect(emptyDf.last('value')).toBeUndefined(); + }); + + it('should throw error for non-existent column', () => { + // Check that the last method throws an error for a non-existent column + expect(() => df.last('nonexistent')).toThrow( + "Column 'nonexistent' not found", + ); }); }); }); diff --git a/test/methods/dataframe/aggregation/max.test.js b/test/methods/dataframe/aggregation/max.test.js index 1d13728..8b500f8 100644 --- a/test/methods/dataframe/aggregation/max.test.js +++ b/test/methods/dataframe/aggregation/max.test.js @@ -1,83 +1,82 @@ -import { describe, it, expect } from 'vitest'; +import { describe, it, expect, vi } from 'vitest'; import { DataFrame } from '../../../../src/core/dataframe/DataFrame.js'; import { max } from '../../../../src/methods/dataframe/aggregation/max.js'; -import { - testWithBothStorageTypes, - createDataFrameWithStorage, -} from '../../../utils/storageTestUtils.js'; - -// Test data for use in all tests -const testData = [ - { value: 10, category: 'A', mixed: '20' }, - { value: 20, category: 'B', mixed: 30 }, - { value: 30, category: 'A', mixed: null }, - { value: 40, category: 'C', mixed: undefined }, - { value: 50, category: 'B', mixed: NaN }, -]; - describe('max method', () => { - // Run tests with both storage types - testWithBothStorageTypes((storageType) => { - describe(`with ${storageType} storage`, () => { - // Create a DataFrame with the specified storage type - const df = createDataFrameWithStorage(DataFrame, testData, storageType); + describe('with standard storage', () => { + // Test data for use in all tests + const testData = [ + { value: 10, category: 'A', mixed: '20' }, + { value: 20, category: 'B', mixed: 30 }, + { value: 30, category: 'A', mixed: null }, + { value: 40, category: 'C', mixed: undefined }, + { value: 50, category: 'B', mixed: NaN }, + ]; - it('should find the maximum value in a numeric column', () => { - // Call max function directly - const maxFn = max({ validateColumn: () => {} }); - const result = maxFn(df, 'value'); + // Create DataFrame using fromRows for proper column names + const df = DataFrame.fromRows(testData); - // Check that the maximum is correct - expect(result).toBe(50); - }); + it('should find the maximum value in a numeric column', () => { + // Call max function directly with a mock validator + const validateColumn = vi.fn(); + const maxFn = max({ validateColumn }); + const result = maxFn(df, 'value'); - it('should handle mixed data types by converting to numbers', () => { - // Call max function directly - const maxFn = max({ validateColumn: () => {} }); - const result = maxFn(df, 'mixed'); + // Check that the maximum is correct + expect(result).toBe(50); + expect(validateColumn).toHaveBeenCalledWith(df, 'value'); + }); - // Check that the maximum is correct (only valid numbers are considered) - expect(result).toBe(30); // '20' -> 20, 30 -> 30, null/undefined/NaN are skipped - }); + it('should handle mixed data types by converting to numbers', () => { + // Call max function directly with a mock validator + const validateColumn = vi.fn(); + const maxFn = max({ validateColumn }); + const result = maxFn(df, 'mixed'); - it('should return null for a column with no valid numeric values', () => { - // Call max function directly - const maxFn = max({ validateColumn: () => {} }); - const result = maxFn(df, 'category'); + // Check that the maximum is correct (only valid numbers are considered) + expect(result).toBe(30); // '20' -> 20, 30 -> 30, null/undefined/NaN are skipped + expect(validateColumn).toHaveBeenCalledWith(df, 'mixed'); + }); - // Check that the result is null (no numeric values in 'category' column) - expect(result).toBe(null); - }); + it('should return null for a column with no valid numeric values', () => { + // Call max function directly with a mock validator + const validateColumn = vi.fn(); + const maxFn = max({ validateColumn }); + const result = maxFn(df, 'category'); - it('should throw an error for non-existent column', () => { - // Create a validator that throws an error for non-existent column - const validateColumn = (frame, column) => { - if (!frame.columns.includes(column)) { - throw new Error(`Column '${column}' not found`); - } - }; + // Check that the result is null (no numeric values in 'category' column) + expect(result).toBe(null); + expect(validateColumn).toHaveBeenCalledWith(df, 'category'); + }); + + it('should throw an error for non-existent column', () => { + // Create a validator that throws an error for non-existent column + const validateColumn = (frame, column) => { + if (!frame.columns.includes(column)) { + throw new Error(`Column '${column}' not found`); + } + }; - // Call max function with validator - const maxFn = max({ validateColumn }); + // Call max function with validator + const maxFn = max({ validateColumn }); - // Check that it throws an error for non-existent column - expect(() => maxFn(df, 'nonexistent')).toThrow( - 'Column \'nonexistent\' not found', - ); - }); + // Check that it throws an error for non-existent column + expect(() => maxFn(df, 'nonexistent')).toThrow( + "Column 'nonexistent' not found", + ); + }); - it('should handle empty frames', () => { - // Create an empty DataFrame - const emptyDf = createDataFrameWithStorage(DataFrame, [], storageType); + it('should handle empty frames', () => { + // Create an empty DataFrame using fromRows + const emptyDf = DataFrame.fromRows([]); - // Call max function directly with a validator that doesn't throw for empty frames - const validateColumn = () => {}; // Empty validator that doesn't check anything - const maxFn = max({ validateColumn }); + // Call max function directly with a validator that doesn't throw for empty frames + const validateColumn = vi.fn(); // Mock validator that doesn't check anything + const maxFn = max({ validateColumn }); - // Check that for an empty DataFrame the result is null - expect(maxFn(emptyDf, 'value')).toBe(null); - }); + // Check that for an empty DataFrame the result is null + expect(maxFn(emptyDf, 'value')).toBe(null); + // For an empty DataFrame, the validator is not called, as we immediately return null }); }); }); diff --git a/test/methods/dataframe/aggregation/mean.test.js b/test/methods/dataframe/aggregation/mean.test.js index 5e0b24e..ef14793 100644 --- a/test/methods/dataframe/aggregation/mean.test.js +++ b/test/methods/dataframe/aggregation/mean.test.js @@ -10,10 +10,6 @@ import { describe, test, expect, vi, beforeEach } from 'vitest'; import { DataFrame } from '../../../../src/core/dataframe/DataFrame.js'; import { mean } from '../../../../src/methods/dataframe/aggregation/mean.js'; -import { - testWithBothStorageTypes, - createDataFrameWithStorage, -} from '../../../utils/storageTestUtils.js'; /** * Tests for the mean function @@ -96,7 +92,7 @@ describe('mean', () => { describe('DataFrame.mean', () => { test('should throw error for non-existent column via DataFrame method', () => { // Create a DataFrame with test data - const df = DataFrame.create([{ values: 1 }, { values: 2 }]); + const df = DataFrame.fromRows([{ values: 1 }, { values: 2 }]); // Call the mean method with a non-existent column and expect it to throw an error expect(() => df.mean('nonexistent')).toThrow(); @@ -113,55 +109,71 @@ const testData = [ ]; describe('mean method', () => { - // Run tests with both storage types - testWithBothStorageTypes((storageType) => { - describe(`with ${storageType} storage`, () => { - // Create a DataFrame with the specified storage type - const df = createDataFrameWithStorage(DataFrame, testData, storageType); - - test('should calculate the mean of numeric values in a column', () => { - // Call mean function directly - const meanFn = mean({ validateColumn: () => {} }); - const result = meanFn(df, 'value'); - - // Check that the mean is correct - expect(result).toBe(30); // (10 + 20 + 30 + 40 + 50) / 5 = 30 - }); - - test('should handle mixed data types by converting to numbers', () => { - // Call mean function directly - const meanFn = mean({ validateColumn: () => {} }); - const result = meanFn(df, 'mixed'); - - // Check that the mean is correct (only valid numbers are used) - expect(result).toBe(25); // ('20' -> 20, 30 -> 30) / 2 = 25 - }); - - test('should return NaN for a column with no valid numeric values', () => { - // Call mean function directly - const meanFn = mean({ validateColumn: () => {} }); - const result = meanFn(df, 'category'); - - // Check that the mean is NaN (no numeric values in 'category' column) - expect(isNaN(result)).toBe(true); - }); - - test('should throw an error for non-existent column', () => { - // Create a validator that throws an error for non-existent column - const validateColumn = (frame, column) => { - if (!frame.columns.includes(column)) { - throw new Error(`Column '${column}' not found`); - } - }; - - // Call mean function with validator - const meanFn = mean({ validateColumn }); - - // Check that it throws an error for non-existent column - expect(() => meanFn(df, 'nonexistent')).toThrow( - 'Column \'nonexistent\' not found', - ); - }); + describe('with standard storage', () => { + // Create DataFrame using fromRows for proper column names + const df = DataFrame.fromRows(testData); + + test('should calculate the mean of numeric values in a column', () => { + // Call mean function directly with a mock validator + const validateColumn = vi.fn(); + const meanFn = mean({ validateColumn }); + const result = meanFn(df, 'value'); + + // Check that the mean is correct + expect(result).toBe(30); // (10 + 20 + 30 + 40 + 50) / 5 = 30 + expect(validateColumn).toHaveBeenCalledWith(df, 'value'); + }); + + test('should handle mixed data types by converting to numbers', () => { + // Call mean function directly with a mock validator + const validateColumn = vi.fn(); + const meanFn = mean({ validateColumn }); + const result = meanFn(df, 'mixed'); + + // Check that the mean is correct (only valid numbers are considered) + expect(result).toBe(25); // ('20' -> 20, 30 -> 30) / 2 = 25 + expect(validateColumn).toHaveBeenCalledWith(df, 'mixed'); + }); + + test('should return NaN for a column with no valid numeric values', () => { + // Call mean function directly with a mock validator + const validateColumn = vi.fn(); + const meanFn = mean({ validateColumn }); + const result = meanFn(df, 'category'); + + // Check that the result is NaN (no numeric values in 'category' column) + expect(isNaN(result)).toBe(true); + expect(validateColumn).toHaveBeenCalledWith(df, 'category'); + }); + + test('should throw an error for non-existent column', () => { + // Create a validator that throws an error for non-existent column + const validateColumn = (frame, column) => { + if (!frame.columns.includes(column)) { + throw new Error(`Column '${column}' not found`); + } + }; + + // Call mean function with validator + const meanFn = mean({ validateColumn }); + + // Check that it throws an error for non-existent column + expect(() => meanFn(df, 'nonexistent')).toThrow( + "Column 'nonexistent' not found", + ); + }); + + test('should handle empty frames', () => { + // Create an empty DataFrame using fromRows + const emptyDf = DataFrame.fromRows([]); + + // Call mean function directly with a validator that doesn't throw for empty frames + const validateColumn = vi.fn(); // Mock validator that doesn't check anything + const meanFn = mean({ validateColumn }); + + // Check that for an empty DataFrame the result is NaN + expect(isNaN(meanFn(emptyDf, 'value'))).toBe(true); + // For an empty DataFrame, the validator is not called, as we immediately return NaN }); }); }); diff --git a/test/methods/dataframe/aggregation/median.test.js b/test/methods/dataframe/aggregation/median.test.js index 6739a0d..e012556 100644 --- a/test/methods/dataframe/aggregation/median.test.js +++ b/test/methods/dataframe/aggregation/median.test.js @@ -1,120 +1,104 @@ -import { describe, it, expect } from 'vitest'; +import { describe, it, expect, vi } from 'vitest'; import { DataFrame } from '../../../../src/core/dataframe/DataFrame.js'; import { median } from '../../../../src/methods/dataframe/aggregation/median.js'; -import { - testWithBothStorageTypes, - createDataFrameWithStorage, -} from '../../../utils/storageTestUtils.js'; +describe('median method', () => { + describe('with standard storage', () => { + // Test data for odd number of elements (5 elements) + const testDataOdd = [ + { value: 10, category: 'A', mixed: '20' }, + { value: 20, category: 'B', mixed: 30 }, + { value: 30, category: 'A', mixed: null }, + { value: 40, category: 'C', mixed: undefined }, + { value: 50, category: 'B', mixed: NaN }, + ]; + + // Test data for even number of elements (6 elements) + const testDataEven = [ + { value: 10, category: 'A', mixed: '20' }, + { value: 20, category: 'B', mixed: 30 }, + { value: 30, category: 'A', mixed: null }, + { value: 40, category: 'C', mixed: undefined }, + { value: 50, category: 'B', mixed: NaN }, + { value: 60, category: 'D', mixed: 40 }, + ]; + + // Create DataFrames using fromRows for proper column names + const dfOdd = DataFrame.fromRows(testDataOdd); + const dfEven = DataFrame.fromRows(testDataEven); + + it('should calculate the median for odd number of elements', () => { + // Call median function directly with a mock validator + const validateColumn = vi.fn(); + const medianFn = median({ validateColumn }); + const result = medianFn(dfOdd, 'value'); + + // Check that the median is correct + expect(result).toBe(30); // Sorted: [10, 20, 30, 40, 50] -> median is 30 + expect(validateColumn).toHaveBeenCalledWith(dfOdd, 'value'); + }); -// Test data for use in all tests -const testData = [ - { value: 10, category: 'A', mixed: '20' }, - { value: 20, category: 'B', mixed: 30 }, - { value: 30, category: 'A', mixed: null }, - { value: 40, category: 'C', mixed: undefined }, - { value: 50, category: 'B', mixed: NaN }, -]; + it('should calculate the median for even number of elements', () => { + // Call median function directly with a mock validator + const validateColumn = vi.fn(); + const medianFn = median({ validateColumn }); + const result = medianFn(dfEven, 'value'); -describe('median method', () => { - // Run tests with both storage types - testWithBothStorageTypes((storageType) => { - describe(`with ${storageType} storage`, () => { - // Create test data for odd number of elements - const testDataOdd = [ - { value: 30, category: 'A', mixed: '20' }, - { value: 10, category: 'B', mixed: 30 }, - { value: 50, category: 'A', mixed: null }, - { value: 40, category: 'C', mixed: undefined }, - { value: 20, category: 'B', mixed: NaN }, - ]; - - // Create test data for even number of elements - const testDataEven = [ - { value: 30, category: 'A', mixed: '20' }, - { value: 10, category: 'B', mixed: 30 }, - { value: 50, category: 'A', mixed: null }, - { value: 40, category: 'C', mixed: undefined }, - { value: 20, category: 'B', mixed: NaN }, - { value: 60, category: 'D', mixed: 40 }, - ]; - - // Create a DataFrame with the specified storage type - const dfOdd = createDataFrameWithStorage( - DataFrame, - testDataOdd, - storageType, - ); - const dfEven = createDataFrameWithStorage( - DataFrame, - testDataEven, - storageType, + // Check that the median is correct + expect(result).toBe(35); // Sorted: [10, 20, 30, 40, 50, 60] -> median is (30+40)/2 = 35 + expect(validateColumn).toHaveBeenCalledWith(dfEven, 'value'); + }); + + it('should handle mixed data types by converting to numbers', () => { + // Call median function directly with a mock validator + const validateColumn = vi.fn(); + const medianFn = median({ validateColumn }); + const result = medianFn(dfEven, 'mixed'); + + // Check that the median is correct (only valid numbers are considered) + expect(result).toBe(30); // Valid values: [20, 30, 40] -> median is 30 + expect(validateColumn).toHaveBeenCalledWith(dfEven, 'mixed'); + }); + + it('should return null for a column with no valid numeric values', () => { + // Call median function directly with a mock validator + const validateColumn = vi.fn(); + const medianFn = median({ validateColumn }); + const result = medianFn(dfOdd, 'category'); + + // Check that the result is null (no numeric values in 'category' column) + expect(result).toBe(null); + expect(validateColumn).toHaveBeenCalledWith(dfOdd, 'category'); + }); + + it('should throw an error for non-existent column', () => { + // Create a validator that throws an error for non-existent column + const validateColumn = (frame, column) => { + if (!frame.columns.includes(column)) { + throw new Error(`Column '${column}' not found`); + } + }; + + // Call median function with validator + const medianFn = median({ validateColumn }); + + // Check that it throws an error for non-existent column + expect(() => medianFn(dfOdd, 'nonexistent')).toThrow( + "Column 'nonexistent' not found", ); + }); + + it('should handle empty frames', () => { + // Create an empty DataFrame using fromRows + const emptyDf = DataFrame.fromRows([]); + + // Call median function directly with a validator that doesn't throw for empty frames + const validateColumn = vi.fn(); // Mock validator that doesn't check anything + const medianFn = median({ validateColumn }); - it('should calculate the median for odd number of elements', () => { - // Call median function directly - const medianFn = median({ validateColumn: () => {} }); - const result = medianFn(dfOdd, 'value'); - - // Check that the median is correct - expect(result).toBe(30); // Sorted: [10, 20, 30, 40, 50] -> median is 30 - }); - - it('should calculate the median for even number of elements', () => { - // Call median function directly - const medianFn = median({ validateColumn: () => {} }); - const result = medianFn(dfEven, 'value'); - - // Check that the median is correct - expect(result).toBe(35); // Sorted: [10, 20, 30, 40, 50, 60] -> median is (30+40)/2 = 35 - }); - - it('should handle mixed data types by converting to numbers', () => { - // Call median function directly - const medianFn = median({ validateColumn: () => {} }); - const result = medianFn(dfEven, 'mixed'); - - // Check that the median is correct (only valid numbers are considered) - expect(result).toBe(30); // Valid values: [20, 30, 40] -> median is 30 - }); - - it('should return null for a column with no valid numeric values', () => { - // Call median function directly - const medianFn = median({ validateColumn: () => {} }); - const result = medianFn(dfOdd, 'category'); - - // Check that the result is null (no numeric values in 'category' column) - expect(result).toBe(null); - }); - - it('should throw an error for non-existent column', () => { - // Create a validator that throws an error for non-existent column - const validateColumn = (frame, column) => { - if (!frame.columns.includes(column)) { - throw new Error(`Column '${column}' not found`); - } - }; - - // Call median function with validator - const medianFn = median({ validateColumn }); - - // Check that it throws an error for non-existent column - expect(() => medianFn(dfOdd, 'nonexistent')).toThrow( - 'Column \'nonexistent\' not found', - ); - }); - - it('should handle empty frames', () => { - // Create an empty DataFrame - const emptyDf = createDataFrameWithStorage(DataFrame, [], storageType); - - // Call median function directly with a validator that doesn't throw for empty frames - const validateColumn = () => {}; // Пустой валидатор, который ничего не проверяет - const medianFn = median({ validateColumn }); - - // Проверяем, что для пустого DataFrame результат равен null - expect(medianFn(emptyDf, 'value')).toBe(null); - }); + // Check that for an empty DataFrame the result is null + expect(medianFn(emptyDf, 'value')).toBe(null); + // For an empty DataFrame, the validator is not called, as we immediately return null }); }); }); diff --git a/test/methods/dataframe/aggregation/min.test.js b/test/methods/dataframe/aggregation/min.test.js index 5ea1d62..4872b48 100644 --- a/test/methods/dataframe/aggregation/min.test.js +++ b/test/methods/dataframe/aggregation/min.test.js @@ -1,83 +1,82 @@ -import { describe, it, expect } from 'vitest'; +import { describe, it, expect, vi } from 'vitest'; import { DataFrame } from '../../../../src/core/dataframe/DataFrame.js'; import { min } from '../../../../src/methods/dataframe/aggregation/min.js'; -import { - testWithBothStorageTypes, - createDataFrameWithStorage, -} from '../../../utils/storageTestUtils.js'; - -// Test data for use in all tests -const testData = [ - { value: 10, category: 'A', mixed: '20' }, - { value: 20, category: 'B', mixed: 30 }, - { value: 30, category: 'A', mixed: null }, - { value: 40, category: 'C', mixed: undefined }, - { value: 50, category: 'B', mixed: NaN }, -]; - describe('min method', () => { - // Run tests with both storage types - testWithBothStorageTypes((storageType) => { - describe(`with ${storageType} storage`, () => { - // Create a DataFrame with the specified storage type - const df = createDataFrameWithStorage(DataFrame, testData, storageType); + describe('with standard storage', () => { + // Test data for use in all tests + const testData = [ + { value: 10, category: 'A', mixed: '20' }, + { value: 20, category: 'B', mixed: 30 }, + { value: 30, category: 'A', mixed: null }, + { value: 40, category: 'C', mixed: undefined }, + { value: 50, category: 'B', mixed: NaN }, + ]; - it('should find the minimum value in a numeric column', () => { - // Call min function directly - const minFn = min({ validateColumn: () => {} }); - const result = minFn(df, 'value'); + // Create DataFrame using fromRows for proper column names + const df = DataFrame.fromRows(testData); - // Check that the minimum is correct - expect(result).toBe(10); - }); + it('should find the minimum value in a numeric column', () => { + // Call min function directly with a mock validator + const validateColumn = vi.fn(); + const minFn = min({ validateColumn }); + const result = minFn(df, 'value'); - it('should handle mixed data types by converting to numbers', () => { - // Call min function directly - const minFn = min({ validateColumn: () => {} }); - const result = minFn(df, 'mixed'); + // Check that the minimum is correct + expect(result).toBe(10); + expect(validateColumn).toHaveBeenCalledWith(df, 'value'); + }); - // Check that the minimum is correct (only valid numbers are considered) - expect(result).toBe(20); // '20' -> 20, 30 -> 30, null/undefined/NaN are skipped - }); + it('should handle mixed data types by converting to numbers', () => { + // Call min function directly with a mock validator + const validateColumn = vi.fn(); + const minFn = min({ validateColumn }); + const result = minFn(df, 'mixed'); - it('should return null for a column with no valid numeric values', () => { - // Call min function directly - const minFn = min({ validateColumn: () => {} }); - const result = minFn(df, 'category'); + // Check that the minimum is correct (only valid numbers are considered) + expect(result).toBe(20); // '20' -> 20, 30 -> 30, null/undefined/NaN are skipped + expect(validateColumn).toHaveBeenCalledWith(df, 'mixed'); + }); - // Check that the result is null (no numeric values in 'category' column) - expect(result).toBe(null); - }); + it('should return null for a column with no valid numeric values', () => { + // Call min function directly with a mock validator + const validateColumn = vi.fn(); + const minFn = min({ validateColumn }); + const result = minFn(df, 'category'); - it('should throw an error for non-existent column', () => { - // Create a validator that throws an error for non-existent column - const validateColumn = (frame, column) => { - if (!frame.columns.includes(column)) { - throw new Error(`Column '${column}' not found`); - } - }; + // Check that the result is null (no numeric values in 'category' column) + expect(result).toBe(null); + expect(validateColumn).toHaveBeenCalledWith(df, 'category'); + }); + + it('should throw an error for non-existent column', () => { + // Create a validator that throws an error for non-existent column + const validateColumn = (frame, column) => { + if (!frame.columns.includes(column)) { + throw new Error(`Column '${column}' not found`); + } + }; - // Call min function with validator - const minFn = min({ validateColumn }); + // Call min function with validator + const minFn = min({ validateColumn }); - // Check that it throws an error for non-existent column - expect(() => minFn(df, 'nonexistent')).toThrow( - 'Column \'nonexistent\' not found', - ); - }); + // Check that it throws an error for non-existent column + expect(() => minFn(df, 'nonexistent')).toThrow( + "Column 'nonexistent' not found", + ); + }); - it('should handle empty frames', () => { - // Create an empty DataFrame - const emptyDf = createDataFrameWithStorage(DataFrame, [], storageType); + it('should handle empty frames', () => { + // Create an empty DataFrame using fromRows + const emptyDf = DataFrame.fromRows([]); - // Call min function directly with a validator that doesn't throw for empty frames - const validateColumn = () => {}; // Empty validator that doesn't check anything - const minFn = min({ validateColumn }); + // Call min function directly with a validator that doesn't throw for empty frames + const validateColumn = vi.fn(); // Mock validator that doesn't check anything + const minFn = min({ validateColumn }); - // Check that for an empty DataFrame the result is null - expect(minFn(emptyDf, 'value')).toBe(null); - }); + // Check that for an empty DataFrame the result is null + expect(minFn(emptyDf, 'value')).toBe(null); + // For an empty DataFrame, the validator is not called, as we immediately return null }); }); }); diff --git a/test/methods/dataframe/aggregation/mode.test.js b/test/methods/dataframe/aggregation/mode.test.js index a0f45f6..c9fd83e 100644 --- a/test/methods/dataframe/aggregation/mode.test.js +++ b/test/methods/dataframe/aggregation/mode.test.js @@ -5,182 +5,156 @@ import { register, } from '../../../../src/methods/dataframe/aggregation/mode.js'; -import { - testWithBothStorageTypes, - createDataFrameWithStorage, -} from '../../../utils/storageTestUtils.js'; - // Register the mode method in DataFrame for tests register(DataFrame); -// Test data to be used in all tests -const testData = [ - { value: 10, category: 'A', mixed: '20' }, - { value: 20, category: 'B', mixed: 30 }, - { value: 30, category: 'A', mixed: null }, - { value: 40, category: 'C', mixed: undefined }, - { value: 50, category: 'B', mixed: NaN }, -]; - describe('mode method', () => { - // Test data for modal value - const modeTestData = [ - { value: 30, category: 'A', mixed: '20' }, - { value: 10, category: 'B', mixed: 30 }, - { value: 30, category: 'A', mixed: null }, - { value: 40, category: 'C', mixed: undefined }, - { value: 30, category: 'B', mixed: NaN }, - { value: 20, category: 'B', mixed: '20' }, - ]; - - // Run tests with both storage types - testWithBothStorageTypes((storageType) => { - describe(`with ${storageType} storage`, () => { - // Create a DataFrame with the specified storage type - const df = createDataFrameWithStorage( - DataFrame, - modeTestData, - storageType, + describe('with standard storage', () => { + // Test data for modal value + const modeTestData = [ + { value: 30, category: 'A', mixed: '20' }, + { value: 10, category: 'B', mixed: 30 }, + { value: 30, category: 'A', mixed: null }, + { value: 40, category: 'C', mixed: undefined }, + { value: 30, category: 'B', mixed: NaN }, + { value: 20, category: 'B', mixed: '20' }, + ]; + + // Create DataFrame using fromRows for proper column names + const df = DataFrame.fromRows(modeTestData); + + // Test the mode function directly + it('should find the most frequent value in a column', () => { + // Create the mode function with a mock validator + const validateColumn = vi.fn(); + const modeFn = mode({ validateColumn }); + + // Call the mode function + const result = modeFn(df, 'value'); + + // Check the result + expect(result).toBe(30); // 30 appears 3 times, more often than any other value + expect(validateColumn).toHaveBeenCalledWith(df, 'value'); + }); + + it('should handle mixed data types by treating them as distinct', () => { + // Create the mode function with a mock validator + const validateColumn = vi.fn(); + const modeFn = mode({ validateColumn }); + + // Call the mode function + const result = modeFn(df, 'mixed'); + + // Check the result (only valid values are considered) + expect(result).toBe('20'); // '20' appears twice (string '20', not number 20) + expect(validateColumn).toHaveBeenCalledWith(df, 'mixed'); + }); + + it('should return null for a column with no valid values', () => { + // Create data with only invalid values + const invalidData = [ + { invalid: null }, + { invalid: undefined }, + { invalid: NaN }, + ]; + + // Create DataFrame using fromRows + const invalidDf = DataFrame.fromRows(invalidData); + + // Create the mode function with a mock validator + const validateColumn = vi.fn(); + const modeFn = mode({ validateColumn }); + + // Call the mode function + const result = modeFn(invalidDf, 'invalid'); + + // Check the result + expect(result).toBe(null); // no valid values + expect(validateColumn).toHaveBeenCalledWith(invalidDf, 'invalid'); + }); + + it('should return one of the values if multiple values have the same highest frequency', () => { + // Create data with multiple modal values + const multiModeData = [ + { value: 10 }, + { value: 20 }, + { value: 10 }, + { value: 30 }, + { value: 20 }, + { value: 30 }, + ]; + + // Create DataFrame using fromRows + const multiModeDf = DataFrame.fromRows(multiModeData); + + // Create the mode function with a mock validator + const validateColumn = vi.fn(); + const modeFn = mode({ validateColumn }); + + // Call the mode function + const result = modeFn(multiModeDf, 'value'); + + // Check that one of the modal values is returned (all appear twice) + expect([10, 20, 30]).toContain(result); + expect(validateColumn).toHaveBeenCalledWith(multiModeDf, 'value'); + }); + + it('should throw an error for non-existent column', () => { + // Create a validator that throws an error + const validateColumn = (df, column) => { + if (!df.columns.includes(column)) { + throw new Error(`Column '${column}' not found`); + } + }; + + // Create the mode function with a validator + const modeFn = mode({ validateColumn }); + + // Check that the function throws an error for a non-existent column + expect(() => modeFn(df, 'nonexistent')).toThrow( + "Column 'nonexistent' not found", ); + }); + + it('should return null for empty DataFrame', () => { + // Create an empty DataFrame using fromRows + const emptyDf = DataFrame.fromRows([]); + + // Create the mode function with a mock validator + const validateColumn = vi.fn(); + const modeFn = mode({ validateColumn }); + + // Call the mode function + const result = modeFn(emptyDf, 'value'); + + // Check the result + expect(result).toBe(null); + // For an empty DataFrame, the validator is not called, as we immediately return null + }); + + // Test the DataFrame.mode method + it('should be available as a DataFrame method', () => { + // Check that the mode method is available in DataFrame + expect(typeof df.mode).toBe('function'); + + // Call the mode method and check the result + expect(df.mode('value')).toBe(30); + expect(df.mode('category')).toBe('B'); // 'B' appears more often than 'A' or 'C' + }); - // Test the mode function directly - it('should find the most frequent value in a column', () => { - // Create the mode function with a mock validator - const validateColumn = vi.fn(); - const modeFn = mode({ validateColumn }); - - // Call the mode function - const result = modeFn(df, 'value'); - - // Check the result - expect(result).toBe(30); // 30 appears 3 times, more often than any other value - expect(validateColumn).toHaveBeenCalledWith(df, 'value'); - }); - - it('should handle mixed data types by treating them as distinct', () => { - // Create the mode function with a mock validator - const validateColumn = vi.fn(); - const modeFn = mode({ validateColumn }); - - // Call the mode function - const result = modeFn(df, 'mixed'); - - // Check the result (only valid values are considered) - expect(result).toBe('20'); // '20' appears twice (string '20', not number 20) - expect(validateColumn).toHaveBeenCalledWith(df, 'mixed'); - }); - - it('should return null for a column with no valid values', () => { - // Create data with only invalid values - const invalidData = [ - { invalid: null }, - { invalid: undefined }, - { invalid: NaN }, - ]; - - const invalidDf = createDataFrameWithStorage( - DataFrame, - invalidData, - storageType, - ); - - // Create the mode function with a mock validator - const validateColumn = vi.fn(); - const modeFn = mode({ validateColumn }); - - // Call the mode function - const result = modeFn(invalidDf, 'invalid'); - - // Check the result - expect(result).toBe(null); // no valid values - expect(validateColumn).toHaveBeenCalledWith(invalidDf, 'invalid'); - }); - - it('should return one of the values if multiple values have the same highest frequency', () => { - // Create data with multiple modal values - const multiModeData = [ - { value: 10 }, - { value: 20 }, - { value: 10 }, - { value: 30 }, - { value: 20 }, - { value: 30 }, - ]; - - const multiModeDf = createDataFrameWithStorage( - DataFrame, - multiModeData, - storageType, - ); - - // Create the mode function with a mock validator - const validateColumn = vi.fn(); - const modeFn = mode({ validateColumn }); - - // Call the mode function - const result = modeFn(multiModeDf, 'value'); - - // Check that one of the modal values is returned (all appear twice) - expect([10, 20, 30]).toContain(result); - expect(validateColumn).toHaveBeenCalledWith(multiModeDf, 'value'); - }); - - it('should throw an error for non-existent column', () => { - // Create a validator that throws an error - const validateColumn = (df, column) => { - if (!df.columns.includes(column)) { - throw new Error(`Column '${column}' not found`); - } - }; - - // Create the mode function with a validator - const modeFn = mode({ validateColumn }); - - // Check that the function throws an error for a non-existent column - expect(() => modeFn(df, 'nonexistent')).toThrow( - 'Column \'nonexistent\' not found', - ); - }); - - it('should return null for empty DataFrame', () => { - // Create an empty DataFrame - const emptyDf = createDataFrameWithStorage(DataFrame, [], storageType); - - // Create the mode function with a mock validator - const validateColumn = vi.fn(); - const modeFn = mode({ validateColumn }); - - // Call the mode function - const result = modeFn(emptyDf, 'value'); - - // Check the result - expect(result).toBe(null); - // For an empty DataFrame, the validator is not called, as we immediately return null - }); - // Test the DataFrame.mode method - it('should be available as a DataFrame method', () => { - // Check that the mode method is available in DataFrame - expect(typeof df.mode).toBe('function'); - - // Call the mode method and check the result - expect(df.mode('value')).toBe(30); - expect(df.mode('category')).toBe('B'); // 'B' appears more often than 'A' or 'C' - }); - - it('should handle empty DataFrame gracefully', () => { - // Create an empty DataFrame - const emptyDf = createDataFrameWithStorage(DataFrame, [], storageType); - - // Check that the mode method returns null for an empty DataFrame - expect(emptyDf.mode('value')).toBe(null); - }); - - it('should throw error for non-existent column', () => { - // Check that the mode method throws an error for a non-existent column - expect(() => df.mode('nonexistent')).toThrow( - 'Column \'nonexistent\' not found', - ); - }); + it('should handle empty DataFrame gracefully', () => { + // Create an empty DataFrame using fromRows + const emptyDf = DataFrame.fromRows([]); + + // Check that the mode method returns null for an empty DataFrame + expect(emptyDf.mode('value')).toBe(null); + }); + + it('should throw error for non-existent column', () => { + // Check that the mode method throws an error for a non-existent column + expect(() => df.mode('nonexistent')).toThrow( + "Column 'nonexistent' not found", + ); }); }); }); diff --git a/test/methods/dataframe/aggregation/std.test.js b/test/methods/dataframe/aggregation/std.test.js index 09a16ca..151f19e 100644 --- a/test/methods/dataframe/aggregation/std.test.js +++ b/test/methods/dataframe/aggregation/std.test.js @@ -1,175 +1,161 @@ -import { describe, it, expect, vi } from 'vitest'; +import { describe, it, expect, vi, beforeAll } from 'vitest'; import { DataFrame } from '../../../../src/core/dataframe/DataFrame.js'; import { std, register, } from '../../../../src/methods/dataframe/aggregation/std.js'; -import { - testWithBothStorageTypes, - createDataFrameWithStorage, -} from '../../../utils/storageTestUtils.js'; - -// Register the std method in DataFrame for tests -register(DataFrame); - -// Test data to be used in all tests -const testData = [ - { value: 10, category: 'A', mixed: '20' }, - { value: 20, category: 'B', mixed: 30 }, - { value: 30, category: 'A', mixed: null }, - { value: 40, category: 'C', mixed: undefined }, - { value: 50, category: 'B', mixed: NaN }, -]; +// Register the std method on DataFrame prototype +beforeAll(() => register(DataFrame)); describe('std method', () => { - // Run tests with both storage types - testWithBothStorageTypes((storageType) => { - describe(`with ${storageType} storage`, () => { - // Create DataFrame with the specified storage type - const df = createDataFrameWithStorage(DataFrame, testData, storageType); - - // Testing the std function directly - it('should calculate the standard deviation correctly', () => { - // Create the std function with a mock validator - const validateColumn = vi.fn(); - const stdFn = std({ validateColumn }); - - // Call the std function - const result = stdFn(df, 'value'); - - // Expected standard deviation for [10, 20, 30, 40, 50] - // Mean = 30 - // Sum of squared deviations = - // (10-30)² + (20-30)² + (30-30)² + (40-30)² + (50-30)² = 400 + 100 + 0 + 100 + 400 = 1000 - // Variance (unbiased estimate) = 1000/4 = 250 - // Standard deviation = √250 ≈ 15.811 - const expected = Math.sqrt(250); - - // Check that the result is close to the expected value - // (accounting for floating-point precision) - expect(result).toBeCloseTo(expected, 10); - expect(validateColumn).toHaveBeenCalledWith(df, 'value'); - }); - - it('should handle mixed data types by converting to numbers', () => { - // Create a std function with a mock validator - const validateColumn = vi.fn(); - const stdFn = std({ validateColumn }); - - // Call the std function - const result = stdFn(df, 'mixed'); - - // Expected standard deviation for ['20', 30] (only valid numeric values) - // Mean = 25 - // Sum of squared deviations = (20-25)² + (30-25)² = 25 + 25 = 50 - // Variance (unbiased estimate) = 50/1 = 50 - // Standard deviation = √50 ≈ 7.071 - const expected = Math.sqrt(50); - - // Check that the result is close to the expected value - expect(result).toBeCloseTo(expected, 10); - expect(validateColumn).toHaveBeenCalledWith(df, 'mixed'); - }); - - it('should return null for a column with no valid numeric values', () => { - // Create the std function with a mock validator - const validateColumn = vi.fn(); - const stdFn = std({ validateColumn }); - - // Call the std function - const result = stdFn(df, 'category'); - - // Check that the result is null (no numeric values in the 'category' column) - expect(result).toBe(null); - expect(validateColumn).toHaveBeenCalledWith(df, 'category'); - }); - - it('should throw an error for non-existent column', () => { - // Create a validator that throws an error - const validateColumn = (df, column) => { - if (!df.columns.includes(column)) { - throw new Error(`Column '${column}' not found`); - } - }; - - // Create the std function with the validator - const stdFn = std({ validateColumn }); - - // Check that the function throws an error for a non-existent column - expect(() => stdFn(df, 'nonexistent')).toThrow( - 'Column \'nonexistent\' not found', - ); - }); - - it('should return null for empty DataFrame', () => { - // Create an empty DataFrame - const emptyDf = createDataFrameWithStorage(DataFrame, [], storageType); - - // Create the std function with a mock validator - const validateColumn = vi.fn(); - const stdFn = std({ validateColumn }); - - // Call the std function - const result = stdFn(emptyDf, 'value'); - - // Check that the result is null for an empty DataFrame - expect(result).toBe(null); - // For an empty DataFrame, the validator is not called because we immediately return null - }); - - it('should return 0 for a DataFrame with a single value', () => { - // Create a DataFrame with a single value - const singleValueDf = createDataFrameWithStorage( - DataFrame, - [{ value: 42 }], - storageType, - ); - - // Create the std function with a mock validator - const validateColumn = vi.fn(); - const stdFn = std({ validateColumn }); - - // Call the std function - const result = stdFn(singleValueDf, 'value'); - - // Check that the result is 0 for a DataFrame with a single value - expect(result).toBe(0); - expect(validateColumn).toHaveBeenCalledWith(singleValueDf, 'value'); - }); - - // Testing the DataFrame.std method - it('should be available as a DataFrame method', () => { - // Check that the std method is available in DataFrame - expect(typeof df.std).toBe('function'); - - // Call the std method and check the result - const result = df.std('value', { population: true }); - - // Expected standard deviation for [10, 20, 30, 40, 50] with population: true - // Mean = 30 - // Sum of squared deviations = - // (10-30)² + (20-30)² + (30-30)² + (40-30)² + (50-30)² = 400 + 100 + 0 + 100 + 400 = 1000 - // Variance (biased estimate) = 1000/5 = 200 - // Standard deviation = √200 ≈ 14.142 - const expected = Math.sqrt(200); - expect(result).toBeCloseTo(expected, 5); - }); - - it('should handle empty DataFrame gracefully', () => { - // Create an empty DataFrame - const emptyDf = createDataFrameWithStorage(DataFrame, [], storageType); - - // Check that the std method returns null for an empty DataFrame - expect(emptyDf.std('value')).toBe(null); - }); - - it('should throw error for non-existent column', () => { - // Check that the std method throws an error for a non-existent column - expect(() => df.std('nonexistent')).toThrow( - 'Column \'nonexistent\' not found in DataFrame', - ); - }); + describe('with standard storage', () => { + it('should calculate the standard deviation correctly', () => { + // Create a DataFrame with numeric values + const numericValues = [10, 20, 30, 40, 50]; + const numericDf = DataFrame.fromRows( + numericValues.map((v) => ({ value: v })), + ); + + // Create a mock validator function + const validateColumn = vi.fn(); + const stdFn = std({ validateColumn }); + + // Calculate the standard deviation + const result = stdFn(numericDf, 'value'); + + // Expected standard deviation for [10, 20, 30, 40, 50] with n-1 denominator + // = sqrt(sum((x - mean)^2) / (n - 1)) + // = sqrt(((10-30)^2 + (20-30)^2 + (30-30)^2 + (40-30)^2 + (50-30)^2) / 4) + // = sqrt((400 + 100 + 0 + 100 + 400) / 4) + // = sqrt(1000 / 4) + // = sqrt(250) + // ≈ 15.811 + const expected = Math.sqrt(1000 / 4); + + // Check that the result is close to the expected value + // (accounting for floating-point precision) + expect(result).toBeCloseTo(expected, 3); + expect(validateColumn).toHaveBeenCalledWith(numericDf, 'value'); + }); + + it('should handle mixed data types by converting to numbers', () => { + // Create a DataFrame with mixed data types + const mixedValues = [10, '20', 30, '40', 50]; + const mixedDf = DataFrame.fromRows( + mixedValues.map((v) => ({ value: v })), + ); + + // Create a mock validator function + const validateColumn = vi.fn(); + const stdFn = std({ validateColumn }); + + // Calculate the standard deviation + const result = stdFn(mixedDf, 'value'); + + // Expected standard deviation for [10, 20, 30, 40, 50] with n-1 denominator + const expected = Math.sqrt(1000 / 4); + + // Check that the result is close to the expected value + expect(result).toBeCloseTo(expected, 3); + expect(validateColumn).toHaveBeenCalledWith(mixedDf, 'value'); + }); + + it('should return null for a column with no valid numeric values', () => { + // Create a DataFrame with non-numeric values + const nonNumericValues = ['a', 'b', 'c', null, undefined]; + const nonNumericDf = DataFrame.fromRows( + nonNumericValues.map((v) => ({ value: v })), + ); + + // Create a mock validator function + const validateColumn = vi.fn(); + const stdFn = std({ validateColumn }); + + // Calculate the standard deviation + const result = stdFn(nonNumericDf, 'value'); + + // Check that the result is null for a column with no valid numeric values + expect(result).toBe(null); + expect(validateColumn).toHaveBeenCalledWith(nonNumericDf, 'value'); + }); + + it('should return null for an empty DataFrame', () => { + // Create an empty DataFrame + const emptyDf = DataFrame.fromRows([]); + + // Create a mock validator function + const validateColumn = vi.fn(); + const stdFn = std({ validateColumn }); + + // Calculate the standard deviation + const result = stdFn(emptyDf, 'value'); + + // Check that the result is null for an empty DataFrame + expect(result).toBe(null); + // Validator should not be called for empty DataFrame + expect(validateColumn).not.toHaveBeenCalled(); + }); + + it('should return 0 for a DataFrame with a single value', () => { + // Create a DataFrame with a single value + const singleValue = [42]; + const singleValueDf = DataFrame.fromRows( + singleValue.map((v) => ({ value: v })), + ); + + // Create a mock validator function + const validateColumn = vi.fn(); + const stdFn = std({ validateColumn }); + + // Calculate the standard deviation + const result = stdFn(singleValueDf, 'value'); + + // Check that the result is 0 for a DataFrame with a single value + expect(result).toBe(0); + expect(validateColumn).toHaveBeenCalledWith(singleValueDf, 'value'); + }); + + it('should be available as a DataFrame method', () => { + // Create a DataFrame with numeric values + const values = [10, 20, 30]; + const df = DataFrame.fromRows(values.map((v) => ({ value: v }))); + + // Calculate the standard deviation using the DataFrame method + const result = df.std('value'); + + // Expected standard deviation for [10, 20, 30] with n-1 denominator + // = sqrt(sum((x - mean)^2) / (n - 1)) + // = sqrt(((10-20)^2 + (20-20)^2 + (30-20)^2) / 2) + // = sqrt((100 + 0 + 100) / 2) + // = sqrt(200 / 2) + // = sqrt(100) + // = 10 + const expected = Math.sqrt(200 / 2); + + // Standard deviation = √200 ≈ 14.142 + expect(result).toBeCloseTo(expected, 3); + }); + + it('should handle empty DataFrame gracefully', () => { + // Create an empty DataFrame + const emptyDf = DataFrame.fromRows([]); + + // Calculate the standard deviation using the DataFrame method + const result = emptyDf.std('value'); + + // Check that the result is null for an empty DataFrame + expect(result).toBe(null); + }); + + it('should throw an error for a non-existent column', () => { + // Create a DataFrame + const df = DataFrame.fromRows([{ value: 10 }, { value: 20 }]); + + // Check that an error is thrown for a non-existent column + expect(() => df.std('non_existent')).toThrow( + "Column 'non_existent' not found in DataFrame", + ); }); }); }); diff --git a/test/methods/dataframe/aggregation/sum.test.js b/test/methods/dataframe/aggregation/sum.test.js index 4f0bbb9..1e0cb11 100644 --- a/test/methods/dataframe/aggregation/sum.test.js +++ b/test/methods/dataframe/aggregation/sum.test.js @@ -1,10 +1,6 @@ -import { describe, it, expect } from 'vitest'; +import { describe, it, expect, vi } from 'vitest'; import { DataFrame } from '../../../../src/core/dataframe/DataFrame.js'; import { sum } from '../../../../src/methods/dataframe/aggregation/sum.js'; -import { - testWithBothStorageTypes, - createDataFrameWithStorage, -} from '../../../utils/storageTestUtils.js'; // Test data to be used in all tests const testData = [ @@ -16,55 +12,64 @@ const testData = [ ]; describe('sum method', () => { - // Run tests with both storage types - testWithBothStorageTypes((storageType) => { - describe(`with ${storageType} storage`, () => { - // Create DataFrame with the specified storage type - const df = createDataFrameWithStorage(DataFrame, testData, storageType); + describe('with standard storage', () => { + // Create DataFrame using fromRows for proper column names + const df = DataFrame.fromRows(testData); - it('should calculate the sum of numeric values in a column', () => { - // Call sum function directly - const sumFn = sum({ validateColumn: () => {} }); - const result = sumFn(df, 'value'); + it('should calculate the sum of numeric values in a column', () => { + // Call sum function directly with a mock validator + const validateColumn = vi.fn(); + const sumFn = sum({ validateColumn }); + const result = sumFn(df, 'value'); - // Check that the sum is correct - expect(result).toBe(150); // 10 + 20 + 30 + 40 + 50 = 150 - }); + // Check that the validator was called + expect(validateColumn).toHaveBeenCalledWith(df, 'value'); - it('should handle mixed data types by converting to numbers', () => { - // Call sum function directly - const sumFn = sum({ validateColumn: () => {} }); - const result = sumFn(df, 'mixed'); + // Check that the sum is correct + expect(result).toBe(150); // 10 + 20 + 30 + 40 + 50 = 150 + }); + + it('should handle mixed data types by converting to numbers', () => { + // Call sum function directly with a mock validator + const validateColumn = vi.fn(); + const sumFn = sum({ validateColumn }); + const result = sumFn(df, 'mixed'); + + // Check that the validator was called + expect(validateColumn).toHaveBeenCalledWith(df, 'mixed'); - // Check that the sum is correct (only valid numbers are summed) - expect(result).toBe(50); // '20' -> 20, 30 -> 30, null/undefined/NaN are skipped - }); + // Check that the sum is correct (only valid numbers are summed) + expect(result).toBe(50); // '20' -> 20, 30 -> 30, null/undefined/NaN are skipped + }); + + it('should return 0 for a column with no valid numeric values', () => { + // Call sum function directly with a mock validator + const validateColumn = vi.fn(); + const sumFn = sum({ validateColumn }); + const result = sumFn(df, 'category'); - it('should return 0 for a column with no valid numeric values', () => { - // Call sum function directly - const sumFn = sum({ validateColumn: () => {} }); - const result = sumFn(df, 'category'); + // Check that the validator was called + expect(validateColumn).toHaveBeenCalledWith(df, 'category'); - // Check that the sum is 0 (no numeric values in 'category' column) - expect(result).toBe(0); - }); + // Check that the sum is 0 (no numeric values in 'category' column) + expect(result).toBe(0); + }); - it('should throw an error for non-existent column', () => { - // Create a validator that throws an error for non-existent column - const validateColumn = (frame, column) => { - if (!frame.columns.includes(column)) { - throw new Error(`Column '${column}' not found`); - } - }; + it('should throw an error for non-existent column', () => { + // Create a validator that throws an error for non-existent column + const validateColumn = (frame, column) => { + if (!frame.columns.includes(column)) { + throw new Error(`Column '${column}' not found`); + } + }; - // Call sum function with validator - const sumFn = sum({ validateColumn }); + // Call sum function with validator + const sumFn = sum({ validateColumn }); - // Check that it throws an error for non-existent column - expect(() => sumFn(df, 'nonexistent')).toThrow( - 'Column \'nonexistent\' not found', - ); - }); + // Check that it throws an error for non-existent column + expect(() => sumFn(df, 'nonexistent')).toThrow( + "Column 'nonexistent' not found", + ); }); }); }); diff --git a/test/methods/dataframe/aggregation/variance.test.js b/test/methods/dataframe/aggregation/variance.test.js index fcfb23c..90577b8 100644 --- a/test/methods/dataframe/aggregation/variance.test.js +++ b/test/methods/dataframe/aggregation/variance.test.js @@ -5,11 +5,6 @@ import { register, } from '../../../../src/methods/dataframe/aggregation/variance.js'; -import { - testWithBothStorageTypes, - createDataFrameWithStorage, -} from '../../../utils/storageTestUtils.js'; - // Register the variance method in DataFrame for tests register(DataFrame); @@ -23,143 +18,136 @@ const testData = [ ]; describe('variance method', () => { - // Run tests with both storage types - testWithBothStorageTypes((storageType) => { - describe(`with ${storageType} storage`, () => { - // Create DataFrame with the specified storage type - const df = createDataFrameWithStorage(DataFrame, testData, storageType); - - // Testing the variance function directly - it('should calculate the variance correctly', () => { - // Create the variance function with a mock validator - const validateColumn = vi.fn(); - const varianceFn = variance({ validateColumn }); - - // Call the variance function - const result = varianceFn(df, 'value'); - - // Expected variance for [10, 20, 30, 40, 50] - // Mean = 30 - // Sum of squared deviations = - // (10-30)² + (20-30)² + (30-30)² + (40-30)² + (50-30)² = 400 + 100 + 0 + 100 + 400 = 1000 - // Variance (unbiased estimate) = 1000/4 = 250 - const expected = 250; - - // Check that the result is close to the expected value - // (accounting for floating-point precision) - expect(result).toBeCloseTo(expected, 10); - expect(validateColumn).toHaveBeenCalledWith(df, 'value'); - }); - - it('should handle mixed data types by converting to numbers', () => { - // Create the variance function with a mock validator - const validateColumn = vi.fn(); - const varianceFn = variance({ validateColumn }); - - // Call the variance function - const result = varianceFn(df, 'mixed'); - - // Expected variance for ['20', 30] (only valid numeric values) - // Mean = 25 - // Sum of squared deviations = (20-25)² + (30-25)² = 25 + 25 = 50 - // Variance (unbiased estimate) = 50/1 = 50 - const expected = 50; - - // Check that the result is close to the expected value - expect(result).toBeCloseTo(expected, 10); - expect(validateColumn).toHaveBeenCalledWith(df, 'mixed'); - }); - - it('should return null for a column with no valid numeric values', () => { - // Create the variance function with a mock validator - const validateColumn = vi.fn(); - const varianceFn = variance({ validateColumn }); - - // Call the variance function - const result = varianceFn(df, 'category'); - - // Check that the result is null (no numeric values in the 'category' column) - expect(result).toBe(null); - expect(validateColumn).toHaveBeenCalledWith(df, 'category'); - }); - - it('should throw an error for non-existent column', () => { - // Create a validator that throws an error - const validateColumn = (df, column) => { - if (!df.columns.includes(column)) { - throw new Error(`Column '${column}' not found`); - } - }; - - // Create the variance function with the validator - const varianceFn = variance({ validateColumn }); - - // Check that the function throws an error for a non-existent column - expect(() => varianceFn(df, 'nonexistent')).toThrow( - 'Column \'nonexistent\' not found', - ); - }); - - it('should return null for empty DataFrame', () => { - // Create an empty DataFrame - const emptyDf = createDataFrameWithStorage(DataFrame, [], storageType); - - // Create the variance function with a mock validator - const validateColumn = vi.fn(); - const varianceFn = variance({ validateColumn }); - - // Call the variance function - const result = varianceFn(emptyDf, 'value'); - - // Check that the result is null for an empty DataFrame - expect(result).toBe(null); - // For an empty DataFrame, the validator is not called because we immediately return null - }); - - it('should return 0 for a DataFrame with a single value', () => { - // Create a DataFrame with a single value - const singleValueDf = createDataFrameWithStorage( - DataFrame, - [{ value: 42 }], - storageType, - ); - - // Create the variance function with a mock validator - const validateColumn = vi.fn(); - const varianceFn = variance({ validateColumn }); - - // Call the variance function - const result = varianceFn(singleValueDf, 'value'); - - // Check that the result is 0 for a DataFrame with a single value - expect(result).toBe(0); - expect(validateColumn).toHaveBeenCalledWith(singleValueDf, 'value'); - }); - // Testing the DataFrame.variance method - it('should be available as a DataFrame method', () => { - // Check that the variance method is available in DataFrame - expect(typeof df.variance).toBe('function'); - - // Call the variance method and check the result - const result = df.variance('value'); - const expected = 250; // As calculated above - expect(result).toBeCloseTo(expected, 10); - }); - - it('should handle empty DataFrame gracefully', () => { - // Create an empty DataFrame - const emptyDf = createDataFrameWithStorage(DataFrame, [], storageType); - - // Check that the variance method returns null for an empty DataFrame - expect(emptyDf.variance('value')).toBe(null); - }); - - it('should throw error for non-existent column', () => { - // Check that the variance method throws an error for a non-existent column - expect(() => df.variance('nonexistent')).toThrow( - 'Column \'nonexistent\' not found', - ); - }); + describe('with standard storage', () => { + // Create DataFrame using fromRows for proper column names + const df = DataFrame.fromRows(testData); + + // Testing the variance function directly + it('should calculate the variance correctly', () => { + // Create the variance function with a mock validator + const validateColumn = vi.fn(); + const varianceFn = variance({ validateColumn }); + + // Call the variance function + const result = varianceFn(df, 'value'); + + // Expected variance for [10, 20, 30, 40, 50] + // Mean = 30 + // Sum of squared deviations = + // (10-30)² + (20-30)² + (30-30)² + (40-30)² + (50-30)² = 400 + 100 + 0 + 100 + 400 = 1000 + // Variance (unbiased estimate) = 1000/4 = 250 + const expected = 250; + + // Check that the result is close to the expected value + // (accounting for floating-point precision) + expect(result).toBeCloseTo(expected, 10); + expect(validateColumn).toHaveBeenCalledWith(df, 'value'); + }); + + it('should handle mixed data types by converting to numbers', () => { + // Create the variance function with a mock validator + const validateColumn = vi.fn(); + const varianceFn = variance({ validateColumn }); + + // Call the variance function + const result = varianceFn(df, 'mixed'); + + // Expected variance for ['20', 30] (only valid numeric values) + // Mean = 25 + // Sum of squared deviations = (20-25)² + (30-25)² = 25 + 25 = 50 + // Variance (unbiased estimate) = 50/1 = 50 + const expected = 50; + + // Check that the result is close to the expected value + expect(result).toBeCloseTo(expected, 10); + expect(validateColumn).toHaveBeenCalledWith(df, 'mixed'); + }); + + it('should return null for a column with no valid numeric values', () => { + // Create the variance function with a mock validator + const validateColumn = vi.fn(); + const varianceFn = variance({ validateColumn }); + + // Call the variance function + const result = varianceFn(df, 'category'); + + // Check that the result is null (no numeric values in the 'category' column) + expect(result).toBe(null); + expect(validateColumn).toHaveBeenCalledWith(df, 'category'); + }); + + it('should throw an error for non-existent column', () => { + // Create a validator that throws an error + const validateColumn = (df, column) => { + if (!df.columns.includes(column)) { + throw new Error(`Column '${column}' not found`); + } + }; + + // Create the variance function with the validator + const varianceFn = variance({ validateColumn }); + + // Check that the function throws an error for a non-existent column + expect(() => varianceFn(df, 'nonexistent')).toThrow( + "Column 'nonexistent' not found", + ); + }); + + it('should return null for empty DataFrame', () => { + // Create an empty DataFrame + const emptyDf = DataFrame.fromRows([]); + + // Create the variance function with a mock validator + const validateColumn = vi.fn(); + const varianceFn = variance({ validateColumn }); + + // Call the variance function + const result = varianceFn(emptyDf, 'value'); + + // Check that the result is null for an empty DataFrame + expect(result).toBe(null); + // For an empty DataFrame, the validator is not called because we immediately return null + }); + + it('should return 0 for a DataFrame with a single value', () => { + // Create a DataFrame with a single value + const singleValueDf = DataFrame.fromRows([{ value: 42 }]); + + // Create the variance function with a mock validator + const validateColumn = vi.fn(); + const varianceFn = variance({ validateColumn }); + + // Call the variance function + const result = varianceFn(singleValueDf, 'value'); + + // Check that the result is 0 for a DataFrame with a single value + expect(result).toBe(0); + expect(validateColumn).toHaveBeenCalledWith(singleValueDf, 'value'); + }); + // Testing the DataFrame.variance method + it('should be available as a DataFrame method', () => { + // Check that the variance method is available in DataFrame + expect(typeof df.variance).toBe('function'); + + // Call the variance method and check the result + const result = df.variance('value'); + const expected = 250; // As calculated above + expect(result).toBeCloseTo(expected, 10); + }); + + it('should handle empty DataFrame gracefully', () => { + // Create an empty DataFrame + const emptyDf = DataFrame.fromRows([]); + + // Check that the variance method returns null for an empty DataFrame + expect(emptyDf.variance('value')).toBe(null); + }); + + it('should throw error for non-existent column', () => { + // Check that the variance method throws an error for a non-existent column + expect(() => df.variance('nonexistent')).toThrow( + "Column 'nonexistent' not found", + ); }); }); });