From 718966f9c2a0085bc1c2d3902715aeabc04cbc9c Mon Sep 17 00:00:00 2001 From: Amiri Barksdale at Home Date: Fri, 23 Jan 2026 09:48:14 -0800 Subject: [PATCH 1/2] Build a sampleM for the monadic interface by wrapping DataFRame.Operations.Subset.sample in DataFrame.Monad.modifyM, like filterWhereM --- src/DataFrame/Monad.hs | 26 ++++++------ src/DataFrame/Operations/Subset.hs | 63 ++++++++++++++++-------------- 2 files changed, 48 insertions(+), 41 deletions(-) diff --git a/src/DataFrame/Monad.hs b/src/DataFrame/Monad.hs index cbcfd53..a3afe25 100644 --- a/src/DataFrame/Monad.hs +++ b/src/DataFrame/Monad.hs @@ -1,19 +1,20 @@ -{-# LANGUAGE ExplicitNamespaces #-} -{-# LANGUAGE FlexibleContexts #-} -{-# LANGUAGE GADTs #-} -{-# LANGUAGE InstanceSigs #-} -{-# LANGUAGE RankNTypes #-} +{-# LANGUAGE ExplicitNamespaces #-} +{-# LANGUAGE FlexibleContexts #-} +{-# LANGUAGE GADTs #-} +{-# LANGUAGE InstanceSigs #-} +{-# LANGUAGE RankNTypes #-} {-# LANGUAGE ScopedTypeVariables #-} -{-# LANGUAGE TupleSections #-} +{-# LANGUAGE TupleSections #-} module DataFrame.Monad where -import DataFrame (DataFrame) -import qualified DataFrame as D -import DataFrame.Internal.Column (Columnable) -import DataFrame.Internal.Expression (Expr (..)) +import DataFrame (DataFrame) +import qualified DataFrame as D +import DataFrame.Internal.Column (Columnable) +import DataFrame.Internal.Expression (Expr (..)) -import qualified Data.Text as T +import qualified Data.Text as T +import System.Random -- A re-implementation of the state monad. -- `mtl` might be too heavy a dependency just to get @@ -61,6 +62,9 @@ renameM expr newName = deriveM newName expr filterWhereM :: Expr Bool -> FrameM () filterWhereM p = modifyM (D.filterWhere p) +sampleM :: (RandomGen g) => g -> Double -> FrameM () +sampleM pureGen p = modifyM (D.sample pureGen p) + filterJustM :: (Columnable a) => Expr (Maybe a) -> FrameM (Expr a) filterJustM (Col name) = FrameM $ \df -> let df' = D.filterJust name df diff --git a/src/DataFrame/Operations/Subset.hs b/src/DataFrame/Operations/Subset.hs index 1f21b9d..1af48bb 100644 --- a/src/DataFrame/Operations/Subset.hs +++ b/src/DataFrame/Operations/Subset.hs @@ -1,35 +1,38 @@ -{-# LANGUAGE ExplicitNamespaces #-} -{-# LANGUAGE FlexibleContexts #-} -{-# LANGUAGE GADTs #-} -{-# LANGUAGE OverloadedStrings #-} -{-# LANGUAGE RankNTypes #-} +{-# LANGUAGE ExplicitNamespaces #-} +{-# LANGUAGE FlexibleContexts #-} +{-# LANGUAGE GADTs #-} +{-# LANGUAGE OverloadedStrings #-} +{-# LANGUAGE RankNTypes #-} {-# LANGUAGE ScopedTypeVariables #-} -{-# LANGUAGE TypeApplications #-} +{-# LANGUAGE TypeApplications #-} module DataFrame.Operations.Subset where -import qualified Data.List as L -import qualified Data.Map as M -import qualified Data.Text as T -import qualified Data.Vector as V -import qualified Data.Vector.Generic as VG -import qualified Data.Vector.Unboxed as VU +import qualified Data.List as L +import qualified Data.Map as M +import qualified Data.Text as T +import qualified Data.Vector as V +import qualified Data.Vector.Generic as VG +import qualified Data.Vector.Unboxed as VU import qualified Prelude -import Control.Exception (throw) -import Data.Function ((&)) -import Data.Maybe (fromJust, fromMaybe, isJust, isNothing) -import Data.Type.Equality (TestEquality (..)) -import DataFrame.Errors (DataFrameException (..), TypeErrorContext (..)) -import DataFrame.Internal.Column -import DataFrame.Internal.DataFrame (DataFrame (..), empty, getColumn) -import DataFrame.Internal.Expression -import DataFrame.Internal.Interpreter -import DataFrame.Operations.Core -import DataFrame.Operations.Transformations (apply) -import System.Random -import Type.Reflection -import Prelude hiding (filter, take) +import Control.Exception (throw) +import Data.Function ((&)) +import Data.Maybe (fromJust, fromMaybe, + isJust, isNothing) +import Data.Type.Equality (TestEquality (..)) +import DataFrame.Errors (DataFrameException (..), + TypeErrorContext (..)) +import DataFrame.Internal.Column +import DataFrame.Internal.DataFrame (DataFrame (..), empty, + getColumn) +import DataFrame.Internal.Expression +import DataFrame.Internal.Interpreter +import DataFrame.Operations.Core +import DataFrame.Operations.Transformations (apply) +import Prelude hiding (filter, take) +import System.Random +import Type.Reflection -- | O(k * n) Take the first n rows of a DataFrame. take :: Int -> DataFrame -> DataFrame @@ -106,11 +109,11 @@ filter (Col filterColumnName) condition df = case getColumn filterColumnName df filter expr condition df = let (TColumn col) = case interpret @a df (normalize expr) of - Left e -> throw e + Left e -> throw e Right c -> c indexes = case findIndices condition col of Right ixs -> ixs - Left e -> throw e + Left e -> throw e c' = snd $ dataframeDimensions df in df @@ -157,11 +160,11 @@ filterWhere :: Expr Bool -> DataFrame -> DataFrame filterWhere expr df = let (TColumn col) = case interpret @Bool df (normalize expr) of - Left e -> throw e + Left e -> throw e Right c -> c indexes = case findIndices id col of Right ixs -> ixs - Left e -> throw e + Left e -> throw e c' = snd $ dataframeDimensions df in df From a74486e4a03b4bd95294d3c5e4ed7e9742bc14ba Mon Sep 17 00:00:00 2001 From: Amiri Barksdale at Home Date: Fri, 23 Jan 2026 09:55:43 -0800 Subject: [PATCH 2/2] Run scripts/format.sh and scripts/lint.sh --fix --- src/DataFrame/Monad.hs | 24 +++++----- src/DataFrame/Operations/Subset.hs | 75 +++++++++++++++++------------- 2 files changed, 54 insertions(+), 45 deletions(-) diff --git a/src/DataFrame/Monad.hs b/src/DataFrame/Monad.hs index a3afe25..28a5f5e 100644 --- a/src/DataFrame/Monad.hs +++ b/src/DataFrame/Monad.hs @@ -1,20 +1,20 @@ -{-# LANGUAGE ExplicitNamespaces #-} -{-# LANGUAGE FlexibleContexts #-} -{-# LANGUAGE GADTs #-} -{-# LANGUAGE InstanceSigs #-} -{-# LANGUAGE RankNTypes #-} +{-# LANGUAGE ExplicitNamespaces #-} +{-# LANGUAGE FlexibleContexts #-} +{-# LANGUAGE GADTs #-} +{-# LANGUAGE InstanceSigs #-} +{-# LANGUAGE RankNTypes #-} {-# LANGUAGE ScopedTypeVariables #-} -{-# LANGUAGE TupleSections #-} +{-# LANGUAGE TupleSections #-} module DataFrame.Monad where -import DataFrame (DataFrame) -import qualified DataFrame as D -import DataFrame.Internal.Column (Columnable) -import DataFrame.Internal.Expression (Expr (..)) +import DataFrame (DataFrame) +import qualified DataFrame as D +import DataFrame.Internal.Column (Columnable) +import DataFrame.Internal.Expression (Expr (..)) -import qualified Data.Text as T -import System.Random +import qualified Data.Text as T +import System.Random -- A re-implementation of the state monad. -- `mtl` might be too heavy a dependency just to get diff --git a/src/DataFrame/Operations/Subset.hs b/src/DataFrame/Operations/Subset.hs index 1af48bb..c4806db 100644 --- a/src/DataFrame/Operations/Subset.hs +++ b/src/DataFrame/Operations/Subset.hs @@ -1,38 +1,47 @@ -{-# LANGUAGE ExplicitNamespaces #-} -{-# LANGUAGE FlexibleContexts #-} -{-# LANGUAGE GADTs #-} -{-# LANGUAGE OverloadedStrings #-} -{-# LANGUAGE RankNTypes #-} +{-# LANGUAGE ExplicitNamespaces #-} +{-# LANGUAGE FlexibleContexts #-} +{-# LANGUAGE GADTs #-} +{-# LANGUAGE OverloadedStrings #-} +{-# LANGUAGE RankNTypes #-} {-# LANGUAGE ScopedTypeVariables #-} -{-# LANGUAGE TypeApplications #-} +{-# LANGUAGE TypeApplications #-} module DataFrame.Operations.Subset where -import qualified Data.List as L -import qualified Data.Map as M -import qualified Data.Text as T -import qualified Data.Vector as V -import qualified Data.Vector.Generic as VG -import qualified Data.Vector.Unboxed as VU +import qualified Data.List as L +import qualified Data.Map as M +import qualified Data.Text as T +import qualified Data.Vector as V +import qualified Data.Vector.Generic as VG +import qualified Data.Vector.Unboxed as VU import qualified Prelude -import Control.Exception (throw) -import Data.Function ((&)) -import Data.Maybe (fromJust, fromMaybe, - isJust, isNothing) -import Data.Type.Equality (TestEquality (..)) -import DataFrame.Errors (DataFrameException (..), - TypeErrorContext (..)) -import DataFrame.Internal.Column -import DataFrame.Internal.DataFrame (DataFrame (..), empty, - getColumn) -import DataFrame.Internal.Expression -import DataFrame.Internal.Interpreter -import DataFrame.Operations.Core -import DataFrame.Operations.Transformations (apply) -import Prelude hiding (filter, take) -import System.Random -import Type.Reflection +import Control.Exception (throw) +import Data.Function ((&)) +import Data.Maybe ( + fromJust, + fromMaybe, + isJust, + isNothing, + ) +import Data.Type.Equality (TestEquality (..)) +import DataFrame.Errors ( + DataFrameException (..), + TypeErrorContext (..), + ) +import DataFrame.Internal.Column +import DataFrame.Internal.DataFrame ( + DataFrame (..), + empty, + getColumn, + ) +import DataFrame.Internal.Expression +import DataFrame.Internal.Interpreter +import DataFrame.Operations.Core +import DataFrame.Operations.Transformations (apply) +import System.Random +import Type.Reflection +import Prelude hiding (filter, take) -- | O(k * n) Take the first n rows of a DataFrame. take :: Int -> DataFrame -> DataFrame @@ -109,11 +118,11 @@ filter (Col filterColumnName) condition df = case getColumn filterColumnName df filter expr condition df = let (TColumn col) = case interpret @a df (normalize expr) of - Left e -> throw e + Left e -> throw e Right c -> c indexes = case findIndices condition col of Right ixs -> ixs - Left e -> throw e + Left e -> throw e c' = snd $ dataframeDimensions df in df @@ -160,11 +169,11 @@ filterWhere :: Expr Bool -> DataFrame -> DataFrame filterWhere expr df = let (TColumn col) = case interpret @Bool df (normalize expr) of - Left e -> throw e + Left e -> throw e Right c -> c indexes = case findIndices id col of Right ixs -> ixs - Left e -> throw e + Left e -> throw e c' = snd $ dataframeDimensions df in df