diff --git a/eval_protocol/adapters/__init__.py b/eval_protocol/adapters/__init__.py index 6f031f3d..b6e7c1e9 100644 --- a/eval_protocol/adapters/__init__.py +++ b/eval_protocol/adapters/__init__.py @@ -99,3 +99,19 @@ __all__.extend(["WeaveAdapter"]) except ImportError: pass + +# DataFrame adapter (pandas integration for Lilac, etc.) +try: + from .dataframe import ( + evaluation_rows_to_dataframe, + dataframe_to_evaluation_rows, + ) + + __all__.extend( + [ + "evaluation_rows_to_dataframe", + "dataframe_to_evaluation_rows", + ] + ) +except ImportError: + pass diff --git a/eval_protocol/adapters/dataframe.py b/eval_protocol/adapters/dataframe.py new file mode 100644 index 00000000..f67077f4 --- /dev/null +++ b/eval_protocol/adapters/dataframe.py @@ -0,0 +1,66 @@ +""" +Pandas DataFrame adapter for Eval Protocol. + +This module provides utilities for converting between EvaluationRow format +and pandas DataFrame format, enabling integration with data curation tools +such as Lilac, Great Expectations, or any pandas-based workflow. + +Example usage: + >>> from eval_protocol.adapters.dataframe import ( + ... evaluation_rows_to_dataframe, + ... dataframe_to_evaluation_rows, + ... ) + >>> + >>> # Convert EvaluationRows to DataFrame + >>> df = evaluation_rows_to_dataframe(rows) + >>> + >>> # Convert back to EvaluationRows + >>> rows = dataframe_to_evaluation_rows(df) +""" + +from __future__ import annotations + +import logging + +import pandas as pd + +from ..models import EvaluationRow + +logger = logging.getLogger(__name__) + + +def evaluation_rows_to_dataframe(rows: list[EvaluationRow]) -> pd.DataFrame: + """Convert EvaluationRows to a pandas DataFrame. + + Uses EvaluationRow.to_dict() for serialization. + + Args: + rows: List of EvaluationRow objects + + Returns: + DataFrame with 'data_json' containing serialized rows plus convenience fields + """ + records = [row.to_dict() for row in rows] + return pd.DataFrame(records) + + +def dataframe_to_evaluation_rows(df: pd.DataFrame) -> list[EvaluationRow]: + """Convert a pandas DataFrame back to EvaluationRows. + + Uses EvaluationRow.from_dict() for deserialization. + + Args: + df: DataFrame with 'data_json' column containing serialized EvaluationRows + + Returns: + List of EvaluationRow objects + """ + rows = [] + for _, row_data in df.iterrows(): + try: + row = EvaluationRow.from_dict(row_data.to_dict()) + rows.append(row) + except Exception as e: + logger.warning(f"Failed to convert row: {e}") + continue + return rows diff --git a/eval_protocol/models.py b/eval_protocol/models.py index 13d059d3..a7bc767b 100644 --- a/eval_protocol/models.py +++ b/eval_protocol/models.py @@ -953,6 +953,49 @@ def get_termination_reason(self) -> str: return str(reason) return "unknown" + def to_dict(self) -> Dict[str, Any]: + """Serialize this EvaluationRow to a dictionary. + + The entire EvaluationRow is serialized to JSON, allowing full reconstruction. + Additional scalar fields are included for convenient filtering/grouping. + + Returns: + Dictionary with 'data_json' containing the full serialized row, + plus convenience fields for filtering. + """ + return { + "data_json": self.model_dump_json(), + "row_id": self.input_metadata.row_id if self.input_metadata else None, + "score": self.evaluation_result.score if self.evaluation_result else None, + "message_count": len(self.messages), + "has_tools": bool(self.tools), + "created_at": self.created_at.isoformat() if self.created_at else None, + } + + @classmethod + def from_dict(cls, data: Dict[str, Any]) -> "EvaluationRow": + """Reconstruct an EvaluationRow from a dictionary. + + Args: + data: Dictionary containing 'data_json' with the serialized EvaluationRow. + + Returns: + Reconstructed EvaluationRow instance. + + Raises: + ValueError: If 'data_json' is missing or invalid. + """ + from pydantic import ValidationError + + data_json = data.get("data_json") + if not data_json: + raise ValueError("Missing 'data_json' field in dictionary") + + try: + return cls.model_validate_json(data_json) + except ValidationError as e: + raise ValueError(f"Failed to deserialize EvaluationRow: {e}") from e + def __hash__(self) -> int: # Use a stable hash that works across Python processes return self._stable_hash()