diff --git a/__init__.py b/__init__.py index 16e6f03..7aec3f3 100644 --- a/__init__.py +++ b/__init__.py @@ -14,7 +14,10 @@ # Create namespaces import bridgenlp.adapters as adapters -import bridgenlp.pipes as pipes +try: + import bridgenlp.pipes as pipes +except Exception: + pipes = None __all__ = [ "adapters", diff --git a/bridgenlp/base.py b/bridgenlp/base.py index df73159..7520238 100644 --- a/bridgenlp/base.py +++ b/bridgenlp/base.py @@ -19,6 +19,7 @@ from .config import BridgeConfig from .result import BridgeResult +from .utils import get_model_memory_usage class BridgeBase(ABC): @@ -41,7 +42,8 @@ def __init__(self, config: Optional[BridgeConfig] = None): "num_calls": 0, "total_time": 0.0, "total_tokens": 0, - "errors": 0 + "errors": 0, + "memory_mb": 0.0 } self._metrics_lock = threading.RLock() @@ -160,6 +162,19 @@ def _measure_performance(self): elapsed = time.time() - start_time with self._metrics_lock: self._metrics["total_time"] += elapsed + if self._metrics.get("memory_mb", 0.0) == 0.0: + model = None + # Try to access common model attributes + if hasattr(self, "model"): + try: + model = self.model + except Exception: + model = getattr(self, "_model", None) + elif hasattr(self, "_model"): + model = getattr(self, "_model", None) + + if model is not None: + self._metrics["memory_mb"] = get_model_memory_usage(model) def get_metrics(self) -> Dict[str, float]: """ @@ -188,7 +203,8 @@ def reset_metrics(self) -> None: "num_calls": 0, "total_time": 0.0, "total_tokens": 0, - "errors": 0 + "errors": 0, + "memory_mb": 0.0 } def __enter__(self): diff --git a/bridgenlp/cli.py b/bridgenlp/cli.py index 3e2b93f..b3ba631 100644 --- a/bridgenlp/cli.py +++ b/bridgenlp/cli.py @@ -338,14 +338,15 @@ def read_chunks(stream, chunk_size): # Print summary statistics elapsed_time = time.time() - start_time - if processed_count > 0 and show_progress: - print(f"Processed {processed_count} texts in {elapsed_time:.4f}s " - f"({processed_count / elapsed_time:.2f} texts/sec)" if elapsed_time > 0 else - f"Processed {processed_count} texts in {elapsed_time:.4f}s", - file=sys.stderr) - - # Print metrics if available - if hasattr(bridge, 'get_metrics'): + if processed_count > 0: + if show_progress: + print(f"Processed {processed_count} texts in {elapsed_time:.4f}s " + f"({processed_count / elapsed_time:.2f} texts/sec)" if elapsed_time > 0 else + f"Processed {processed_count} texts in {elapsed_time:.4f}s", + file=sys.stderr) + + # Print metrics if requested + if bridge.config.collect_metrics and hasattr(bridge, 'get_metrics'): metrics = bridge.get_metrics() if metrics: print("Performance metrics:", file=sys.stderr) diff --git a/tests/test_memory_reporting.py b/tests/test_memory_reporting.py new file mode 100644 index 0000000..c93df3f --- /dev/null +++ b/tests/test_memory_reporting.py @@ -0,0 +1,34 @@ +import pytest +from unittest.mock import patch +from bridgenlp.base import BridgeBase +from bridgenlp.config import BridgeConfig +from bridgenlp.result import BridgeResult + +class DummyAdapter(BridgeBase): + def __init__(self): + super().__init__(BridgeConfig(collect_metrics=True)) + self._model = None + + def _load_model(self): + self._model = object() + + def from_text(self, text: str) -> BridgeResult: + with self._measure_performance(): + if self._model is None: + self._load_model() + return BridgeResult(tokens=text.split()) + + def from_tokens(self, tokens): + return BridgeResult(tokens=tokens) + + def from_spacy(self, doc): + return BridgeResult(tokens=[t.text for t in doc]).attach_to_spacy(doc) + + +def test_memory_metric_recorded(): + adapter = DummyAdapter() + with patch('bridgenlp.base.get_model_memory_usage', return_value=12.5) as mem: + adapter.from_text("hello world") + metrics = adapter.get_metrics() + assert metrics['memory_mb'] == 12.5 + assert mem.called