-
Notifications
You must be signed in to change notification settings - Fork 2
feat: Add integration test wrapping simplexity/run.py #85
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Open
ealt
wants to merge
1
commit into
main
Choose a base branch
from
claude/issue-84-20250930-2256
base: main
Could not load branches
Branch not found: {{ refName }}
Loading
Could not load tags
Nothing to show
Loading
Are you sure you want to change the base?
Some commits from the old base branch may be removed from the timeline,
and old review comments may become outdated.
Open
Changes from all commits
Commits
File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change | ||||||||||||||||||||
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| @@ -0,0 +1,41 @@ | ||||||||||||||||||||||
| defaults: | ||||||||||||||||||||||
| - _self_ | ||||||||||||||||||||||
| - generative_process@training_data_generator: wonka_dursley | ||||||||||||||||||||||
| - generative_process@validation_data_generator: wonka_dursley | ||||||||||||||||||||||
| - predictive_model: transformer | ||||||||||||||||||||||
| - persistence: local_penzai_persister | ||||||||||||||||||||||
| - logging: file_logger | ||||||||||||||||||||||
| - training: small | ||||||||||||||||||||||
| - evaluation@validation: small | ||||||||||||||||||||||
|
|
||||||||||||||||||||||
| seed: 42 | ||||||||||||||||||||||
| experiment_name: integration_test | ||||||||||||||||||||||
| run_name: integration_test_run | ||||||||||||||||||||||
|
|
||||||||||||||||||||||
| # Override with smaller values for faster testing | ||||||||||||||||||||||
| training: | ||||||||||||||||||||||
| n_steps: 10 | ||||||||||||||||||||||
| checkpoint_frequency: 5 | ||||||||||||||||||||||
| metric_frequency: 5 | ||||||||||||||||||||||
| metric_samples: 8 | ||||||||||||||||||||||
|
|
||||||||||||||||||||||
| validation: | ||||||||||||||||||||||
| batch_size: 8 | ||||||||||||||||||||||
| batches: 2 | ||||||||||||||||||||||
|
|
||||||||||||||||||||||
| # Ensure we use small model for testing | ||||||||||||||||||||||
| predictive_model: | ||||||||||||||||||||||
| n_layers: 2 | ||||||||||||||||||||||
| d_model: 32 | ||||||||||||||||||||||
| d_ff: 64 | ||||||||||||||||||||||
| n_heads: 2 | ||||||||||||||||||||||
| max_length: 64 | ||||||||||||||||||||||
|
|
||||||||||||||||||||||
| # Override paths for testing | ||||||||||||||||||||||
| logging: | ||||||||||||||||||||||
| instance: | ||||||||||||||||||||||
| file_path: /tmp/simplexity_test_logs/test.log | ||||||||||||||||||||||
|
|
||||||||||||||||||||||
| persistence: | ||||||||||||||||||||||
| instance: | ||||||||||||||||||||||
| directory: /tmp/simplexity_test_checkpoints | ||||||||||||||||||||||
|
Comment on lines
+37
to
+41
|
||||||||||||||||||||||
| file_path: /tmp/simplexity_test_logs/test.log | |
| persistence: | |
| instance: | |
| directory: /tmp/simplexity_test_checkpoints | |
| file_path: logs/test.log | |
| persistence: | |
| instance: | |
| directory: checkpoints |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,140 @@ | ||
| """Integration test that wraps simplexity/run.py to test the full training workflow.""" | ||
|
|
||
| import os | ||
| import tempfile | ||
| from pathlib import Path | ||
|
|
||
| import pytest | ||
| from hydra import compose, initialize_config_dir | ||
| from omegaconf import OmegaConf | ||
|
|
||
| from simplexity.run import train_model | ||
|
|
||
|
|
||
| def test_integration_end_to_end(): | ||
| """Test the complete end-to-end workflow using run.py.""" | ||
| # Use temporary directories for outputs | ||
| with tempfile.TemporaryDirectory() as temp_dir: | ||
| temp_path = Path(temp_dir) | ||
| log_dir = temp_path / "logs" | ||
| checkpoint_dir = temp_path / "checkpoints" | ||
|
|
||
| # Get the config directory path | ||
| config_dir = Path(__file__).parent / "configs" | ||
|
|
||
| # Initialize Hydra with our test config | ||
| with initialize_config_dir(config_dir=str(config_dir.absolute()), version_base="1.2"): | ||
| cfg = compose( | ||
| config_name="integration_test.yaml", | ||
| overrides=[ | ||
| f"logging.instance.file_path={log_dir}/test.log", | ||
| f"persistence.instance.directory={checkpoint_dir}", | ||
| "training.n_steps=20", # Small number for quick testing | ||
| "training.checkpoint_frequency=10", | ||
| "training.metric_frequency=10", | ||
| ] | ||
| ) | ||
|
|
||
| # Store initial config for validation | ||
| initial_config = OmegaConf.to_container(cfg) | ||
|
|
||
| # Train the model using run.py | ||
| final_loss = train_model(cfg) | ||
|
|
||
| # Verify that training completed and loss is a valid number | ||
| assert isinstance(final_loss, float) | ||
| assert final_loss > 0 | ||
| assert final_loss < 100 # Sanity check - loss shouldn't be astronomical | ||
|
|
||
| # Verify logs were created | ||
| assert log_dir.exists(), "Log directory should have been created" | ||
| log_files = list(log_dir.glob("*")) | ||
| assert len(log_files) > 0, "Log files should have been created" | ||
|
|
||
| # Verify checkpoints were saved | ||
| assert checkpoint_dir.exists(), "Checkpoint directory should have been created" | ||
| checkpoint_files = list(checkpoint_dir.glob("**/*")) | ||
| assert len(checkpoint_files) > 0, "Checkpoint files should have been saved" | ||
|
|
||
| # Check that we have checkpoint at expected steps | ||
| step_10_checkpoint = checkpoint_dir / "step_10" | ||
| assert step_10_checkpoint.exists(), "Checkpoint at step 10 should exist" | ||
|
|
||
| # Verify the config used matches what we expected | ||
| assert initial_config["training"]["n_steps"] == 20 | ||
| assert initial_config["seed"] == 42 | ||
| assert initial_config["experiment_name"] == "integration_test" | ||
|
|
||
|
|
||
| def test_integration_loss_decreases(): | ||
| """Test that loss decreases during training.""" | ||
| with tempfile.TemporaryDirectory() as temp_dir: | ||
| temp_path = Path(temp_dir) | ||
| log_dir = temp_path / "logs" | ||
| checkpoint_dir = temp_path / "checkpoints" | ||
|
|
||
| config_dir = Path(__file__).parent / "configs" | ||
|
|
||
| # First run - get initial loss with very few steps | ||
| with initialize_config_dir(config_dir=str(config_dir.absolute()), version_base="1.2"): | ||
| cfg = compose( | ||
| config_name="integration_test.yaml", | ||
| overrides=[ | ||
| f"logging.instance.file_path={log_dir}/run1/test.log", | ||
| f"persistence.instance.directory={checkpoint_dir}/run1", | ||
| "training.n_steps=1", | ||
| ] | ||
| ) | ||
| initial_loss = train_model(cfg) | ||
|
|
||
| # Second run - train for more steps | ||
| with initialize_config_dir(config_dir=str(config_dir.absolute()), version_base="1.2"): | ||
| cfg = compose( | ||
| config_name="integration_test.yaml", | ||
| overrides=[ | ||
| f"logging.instance.file_path={log_dir}/run2/test.log", | ||
| f"persistence.instance.directory={checkpoint_dir}/run2", | ||
| "training.n_steps=50", | ||
| "seed=42", # Same seed for reproducibility | ||
| ] | ||
| ) | ||
| final_loss = train_model(cfg) | ||
|
|
||
| # Verify loss decreased with more training | ||
| assert final_loss < initial_loss, f"Loss should decrease with training: initial={initial_loss:.4f}, final={final_loss:.4f}" | ||
|
|
||
|
|
||
| def test_integration_with_validation(): | ||
| """Test that validation works correctly during training.""" | ||
| with tempfile.TemporaryDirectory() as temp_dir: | ||
| temp_path = Path(temp_dir) | ||
| log_dir = temp_path / "logs" | ||
| checkpoint_dir = temp_path / "checkpoints" | ||
|
|
||
| config_dir = Path(__file__).parent / "configs" | ||
|
|
||
| with initialize_config_dir(config_dir=str(config_dir.absolute()), version_base="1.2"): | ||
| cfg = compose( | ||
| config_name="integration_test.yaml", | ||
| overrides=[ | ||
| f"logging.instance.file_path={log_dir}/test.log", | ||
| f"persistence.instance.directory={checkpoint_dir}", | ||
| "training.n_steps=20", | ||
| "training.validation_frequency=10", | ||
| "validation.batches=2", | ||
| ] | ||
| ) | ||
|
|
||
| loss = train_model(cfg) | ||
|
|
||
| # Check that validation ran (by checking logs contain validation info) | ||
| assert log_dir.exists() | ||
| assert loss > 0 | ||
|
|
||
| # Check that log file exists and contains validation info | ||
| log_file = log_dir / "test.log" | ||
| assert log_file.exists(), "Log file should have been created" | ||
| with open(log_file, "r") as f: | ||
| content = f.read() | ||
| # Log file should have some content | ||
| assert len(content) > 0, "Log file should not be empty" | ||
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Using hardcoded
/tmp/paths is not portable across operating systems (Windows doesn't have/tmp/). Consider using a relative path or letting the test override this value entirely.