Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
41 changes: 41 additions & 0 deletions tests/configs/integration_test.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
defaults:
- _self_
- generative_process@training_data_generator: wonka_dursley
- generative_process@validation_data_generator: wonka_dursley
- predictive_model: transformer
- persistence: local_penzai_persister
- logging: file_logger
- training: small
- evaluation@validation: small

seed: 42
experiment_name: integration_test
run_name: integration_test_run

# Override with smaller values for faster testing
training:
n_steps: 10
checkpoint_frequency: 5
metric_frequency: 5
metric_samples: 8

validation:
batch_size: 8
batches: 2

# Ensure we use small model for testing
predictive_model:
n_layers: 2
d_model: 32
d_ff: 64
n_heads: 2
max_length: 64

# Override paths for testing
logging:
instance:
file_path: /tmp/simplexity_test_logs/test.log

persistence:
instance:
directory: /tmp/simplexity_test_checkpoints
Comment on lines +37 to +41
Copy link

Copilot AI Sep 30, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Using hardcoded /tmp/ paths is not portable across operating systems (Windows doesn't have /tmp/). Consider using a relative path or letting the test override this value entirely.

Suggested change
file_path: /tmp/simplexity_test_logs/test.log
persistence:
instance:
directory: /tmp/simplexity_test_checkpoints
file_path: logs/test.log
persistence:
instance:
directory: checkpoints

Copilot uses AI. Check for mistakes.
Comment on lines +37 to +41
Copy link

Copilot AI Sep 30, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Using hardcoded /tmp/ paths is not portable across operating systems (Windows doesn't have /tmp/). Consider using a relative path or letting the test override this value entirely.

Suggested change
file_path: /tmp/simplexity_test_logs/test.log
persistence:
instance:
directory: /tmp/simplexity_test_checkpoints
file_path: logs/test.log
persistence:
instance:
directory: checkpoints

Copilot uses AI. Check for mistakes.
140 changes: 140 additions & 0 deletions tests/test_integration.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,140 @@
"""Integration test that wraps simplexity/run.py to test the full training workflow."""

import os

Check failure on line 3 in tests/test_integration.py

View workflow job for this annotation

GitHub Actions / static-analysis

Ruff (F401)

tests/test_integration.py:3:8: F401 `os` imported but unused
import tempfile
from pathlib import Path

import pytest

Check failure on line 7 in tests/test_integration.py

View workflow job for this annotation

GitHub Actions / static-analysis

Ruff (F401)

tests/test_integration.py:7:8: F401 `pytest` imported but unused
from hydra import compose, initialize_config_dir
from omegaconf import OmegaConf

from simplexity.run import train_model


def test_integration_end_to_end():
"""Test the complete end-to-end workflow using run.py."""
# Use temporary directories for outputs
with tempfile.TemporaryDirectory() as temp_dir:
temp_path = Path(temp_dir)
log_dir = temp_path / "logs"
checkpoint_dir = temp_path / "checkpoints"

# Get the config directory path
config_dir = Path(__file__).parent / "configs"

# Initialize Hydra with our test config
with initialize_config_dir(config_dir=str(config_dir.absolute()), version_base="1.2"):
cfg = compose(
config_name="integration_test.yaml",
overrides=[
f"logging.instance.file_path={log_dir}/test.log",
f"persistence.instance.directory={checkpoint_dir}",
"training.n_steps=20", # Small number for quick testing
"training.checkpoint_frequency=10",
"training.metric_frequency=10",
]
)

# Store initial config for validation
initial_config = OmegaConf.to_container(cfg)

# Train the model using run.py
final_loss = train_model(cfg)

# Verify that training completed and loss is a valid number
assert isinstance(final_loss, float)
assert final_loss > 0
assert final_loss < 100 # Sanity check - loss shouldn't be astronomical

# Verify logs were created
assert log_dir.exists(), "Log directory should have been created"
log_files = list(log_dir.glob("*"))
assert len(log_files) > 0, "Log files should have been created"

# Verify checkpoints were saved
assert checkpoint_dir.exists(), "Checkpoint directory should have been created"
checkpoint_files = list(checkpoint_dir.glob("**/*"))
assert len(checkpoint_files) > 0, "Checkpoint files should have been saved"

# Check that we have checkpoint at expected steps
step_10_checkpoint = checkpoint_dir / "step_10"
assert step_10_checkpoint.exists(), "Checkpoint at step 10 should exist"

# Verify the config used matches what we expected
assert initial_config["training"]["n_steps"] == 20
assert initial_config["seed"] == 42
assert initial_config["experiment_name"] == "integration_test"


def test_integration_loss_decreases():
"""Test that loss decreases during training."""
with tempfile.TemporaryDirectory() as temp_dir:
temp_path = Path(temp_dir)
log_dir = temp_path / "logs"
checkpoint_dir = temp_path / "checkpoints"

config_dir = Path(__file__).parent / "configs"

# First run - get initial loss with very few steps
with initialize_config_dir(config_dir=str(config_dir.absolute()), version_base="1.2"):
cfg = compose(
config_name="integration_test.yaml",
overrides=[
f"logging.instance.file_path={log_dir}/run1/test.log",
f"persistence.instance.directory={checkpoint_dir}/run1",
"training.n_steps=1",
]
)
initial_loss = train_model(cfg)

# Second run - train for more steps
with initialize_config_dir(config_dir=str(config_dir.absolute()), version_base="1.2"):
cfg = compose(
config_name="integration_test.yaml",
overrides=[
f"logging.instance.file_path={log_dir}/run2/test.log",
f"persistence.instance.directory={checkpoint_dir}/run2",
"training.n_steps=50",
"seed=42", # Same seed for reproducibility
]
)
final_loss = train_model(cfg)

# Verify loss decreased with more training
assert final_loss < initial_loss, f"Loss should decrease with training: initial={initial_loss:.4f}, final={final_loss:.4f}"

Check failure on line 104 in tests/test_integration.py

View workflow job for this annotation

GitHub Actions / static-analysis

Ruff (E501)

tests/test_integration.py:104:121: E501 Line too long (131 > 120)


def test_integration_with_validation():
"""Test that validation works correctly during training."""
with tempfile.TemporaryDirectory() as temp_dir:
temp_path = Path(temp_dir)
log_dir = temp_path / "logs"
checkpoint_dir = temp_path / "checkpoints"

config_dir = Path(__file__).parent / "configs"

with initialize_config_dir(config_dir=str(config_dir.absolute()), version_base="1.2"):
cfg = compose(
config_name="integration_test.yaml",
overrides=[
f"logging.instance.file_path={log_dir}/test.log",
f"persistence.instance.directory={checkpoint_dir}",
"training.n_steps=20",
"training.validation_frequency=10",
"validation.batches=2",
]
)

loss = train_model(cfg)

# Check that validation ran (by checking logs contain validation info)
assert log_dir.exists()
assert loss > 0

# Check that log file exists and contains validation info
log_file = log_dir / "test.log"
assert log_file.exists(), "Log file should have been created"
with open(log_file, "r") as f:

Check failure on line 137 in tests/test_integration.py

View workflow job for this annotation

GitHub Actions / static-analysis

Ruff (UP015)

tests/test_integration.py:137:33: UP015 Unnecessary mode argument
content = f.read()
# Log file should have some content
assert len(content) > 0, "Log file should not be empty"
Loading