From 09c7f5acb24b634a8837f5ae46d6b44cf89efe26 Mon Sep 17 00:00:00 2001 From: wegar2 Date: Thu, 29 May 2025 22:55:27 +0200 Subject: [PATCH 1/2] fixed broken unit test for bankchun pipeline --- tests/pipeline/test_bankchurn_pipeline.py | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/tests/pipeline/test_bankchurn_pipeline.py b/tests/pipeline/test_bankchurn_pipeline.py index 4173d06..42f80cc 100644 --- a/tests/pipeline/test_bankchurn_pipeline.py +++ b/tests/pipeline/test_bankchurn_pipeline.py @@ -4,17 +4,20 @@ from moddata.src.config import BankchurnPipelineConfig -def test_bankchurn_pipeline_run(): +def test_bankchurn_pipeline_tree_like(): X_train, X_test, y_train, y_test = BankchurnPipeline( config=BankchurnPipelineConfig( random_state=12345, - train_size=0.8 + train_size=0.8, + encoding_and_scaling_model_type="tree_like" ) ).run() - assert X_train.shape == (8_000, 10) - assert X_test.shape == (2_000, 10) + assert X_train.shape == (8_000, 11) + assert X_test.shape == (2_000, 11) assert y_train.shape == (8_000, 1) assert y_test.shape == (2_000, 1) assert np.all(np.array(y_test.index[:3]) == np.array([7867, 1402, 8606])) + + From b0785e6eb99927dd23d0167f296e8aab5693ce54 Mon Sep 17 00:00:00 2001 From: wegar2 Date: Thu, 29 May 2025 22:58:05 +0200 Subject: [PATCH 2/2] removed incorrect use of context manager for loading pl banking data --- moddata/_utils.py | 6 ++---- tests/pipeline/test_bankchurn_pipeline.py | 1 + 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/moddata/_utils.py b/moddata/_utils.py index 48e818e..2c0a16d 100644 --- a/moddata/_utils.py +++ b/moddata/_utils.py @@ -59,11 +59,9 @@ def _load_btc(): def _load_pl_banking_stocks() -> pd.DataFrame: - with ( + return pd.read_parquet(str( resources.files('moddata.data').joinpath('pl_banking_stocks.parquet') - as f - ): - return pd.read_parquet(f) + )) def load_data(dataset: Dataset) -> pd.DataFrame | None: diff --git a/tests/pipeline/test_bankchurn_pipeline.py b/tests/pipeline/test_bankchurn_pipeline.py index 42f80cc..4c0278d 100644 --- a/tests/pipeline/test_bankchurn_pipeline.py +++ b/tests/pipeline/test_bankchurn_pipeline.py @@ -21,3 +21,4 @@ def test_bankchurn_pipeline_tree_like(): assert np.all(np.array(y_test.index[:3]) == np.array([7867, 1402, 8606])) +test_bankchurn_pipeline_tree_like \ No newline at end of file