From c3203f5b19df4cd01d5297b9a8d699523e1c9914 Mon Sep 17 00:00:00 2001
From: Shrey Modi <shreycricket10@gmail.com>
Date: Mon, 29 Dec 2025 10:02:14 -0800
Subject: [PATCH 1/3] lilac adapter

---
 eval_protocol/adapters/lilac.py | 265 ++++++++++++++++++++++++++++++++
 1 file changed, 265 insertions(+)
 create mode 100644 eval_protocol/adapters/lilac.py

diff --git a/eval_protocol/adapters/lilac.py b/eval_protocol/adapters/lilac.py
new file mode 100644
index 00000000..684adea9
--- /dev/null
+++ b/eval_protocol/adapters/lilac.py
@@ -0,0 +1,265 @@
+"""
+Lilac ML integration for Eval Protocol.
+
+This adapter provides utilities for converting between EvaluationRow format
+and Lilac dataset format, enabling powerful data curation features like:
+- Clustering and deduplication
+- Semantic search and filtering
+- Quality scoring with embeddings
+- Interactive data exploration
+
+Prerequisites:
+    pip install 'lilac[all]'
+
+Example usage:
+    >>> from eval_protocol.adapters.lilac import (
+    ...     evaluation_rows_to_lilac_dataset,
+    ...     lilac_dataset_to_evaluation_rows,
+    ... )
+    >>>
+    >>> # Convert EvaluationRows to Lilac dataset
+    >>> dataset = evaluation_rows_to_lilac_dataset(rows, name='my-traces')
+    >>>
+    >>> # Do Lilac operations (cluster, filter, etc.)
+    >>> dataset.cluster('messages_json')  # or create your own text column
+    >>>
+    >>> # Convert back to EvaluationRows
+    >>> processed_rows = lilac_dataset_to_evaluation_rows(dataset)
+"""
+
+from __future__ import annotations
+
+import json
+import logging
+from typing import Any, TYPE_CHECKING
+
+import pandas as pd
+
+from eval_protocol.models import (
+    EvaluateResult,
+    EvaluationRow,
+    ExecutionMetadata,
+    InputMetadata,
+    Message,
+)
+
+if TYPE_CHECKING:
+    import lilac as ll
+
+logger = logging.getLogger(__name__)
+
+# Check if lilac is available
+try:
+    import lilac as ll
+
+    LILAC_AVAILABLE = True
+except ImportError:
+    LILAC_AVAILABLE = False
+    ll = None  # type: ignore
+
+
+def _ensure_lilac_available() -> None:
+    """Raise ImportError if lilac is not installed."""
+    if not LILAC_AVAILABLE:
+        raise ImportError("Lilac is not installed. Install it with: pip install 'lilac[all]'")
+
+
+# =============================================================================
+# Core Conversion Functions
+# =============================================================================
+
+
+def _serialize_message(msg: Message) -> dict[str, Any]:
+    """Serialize a Message to a dictionary."""
+    return msg.model_dump(exclude_none=True)
+
+
+def _deserialize_messages(messages_json: str | None) -> list[Message]:
+    """Deserialize messages JSON back to Message objects."""
+    if not messages_json:
+        return []
+    try:
+        messages_data = json.loads(messages_json)
+        return [Message.model_validate(m) for m in messages_data]
+    except (json.JSONDecodeError, ValueError) as e:
+        logger.warning(f"Failed to deserialize messages: {e}")
+        return []
+
+
+def evaluation_row_to_dict(row: EvaluationRow) -> dict[str, Any]:
+    """Convert a single EvaluationRow to a dictionary for Lilac.
+
+    The output contains JSON-serialized fields that can be reconstructed back
+    to EvaluationRow. Users can add their own text columns for clustering.
+    """
+    result: dict[str, Any] = {
+        # Identifiers
+        "row_id": row.input_metadata.row_id if row.input_metadata else None,
+        # Full data as JSON (for reconstruction)
+        "messages_json": json.dumps([_serialize_message(m) for m in row.messages]),
+        "tools_json": json.dumps(row.tools) if row.tools else None,
+        "ground_truth_json": json.dumps(row.ground_truth) if row.ground_truth else None,
+        "input_metadata_json": row.input_metadata.model_dump_json() if row.input_metadata else None,
+        "execution_metadata_json": row.execution_metadata.model_dump_json() if row.execution_metadata else None,
+        "evaluation_result_json": row.evaluation_result.model_dump_json() if row.evaluation_result else None,
+        # Scalar fields for filtering
+        "score": row.evaluation_result.score if row.evaluation_result else None,
+        "message_count": len(row.messages),
+        "has_tools": bool(row.tools),
+    }
+
+    return result
+
+
+def dict_to_evaluation_row(data: dict[str, Any]) -> EvaluationRow:
+    """Convert a Lilac row dictionary back to an EvaluationRow."""
+    # Parse messages
+    messages = _deserialize_messages(data.get("messages_json"))
+
+    # Parse tools
+    tools = None
+    if data.get("tools_json"):
+        try:
+            tools = json.loads(data["tools_json"])
+        except json.JSONDecodeError:
+            pass
+
+    # Parse ground truth
+    ground_truth = None
+    if data.get("ground_truth_json"):
+        try:
+            ground_truth = json.loads(data["ground_truth_json"])
+        except json.JSONDecodeError:
+            pass
+
+    # Parse input metadata
+    input_metadata = InputMetadata()
+    if data.get("input_metadata_json"):
+        try:
+            input_metadata = InputMetadata.model_validate_json(data["input_metadata_json"])
+        except (json.JSONDecodeError, ValueError):
+            input_metadata = InputMetadata(row_id=data.get("row_id"))
+
+    # Parse execution metadata
+    execution_metadata = ExecutionMetadata()
+    if data.get("execution_metadata_json"):
+        try:
+            execution_metadata = ExecutionMetadata.model_validate_json(data["execution_metadata_json"])
+        except (json.JSONDecodeError, ValueError):
+            pass
+
+    # Parse evaluation result
+    evaluation_result = None
+    if data.get("evaluation_result_json"):
+        try:
+            evaluation_result = EvaluateResult.model_validate_json(data["evaluation_result_json"])
+        except (json.JSONDecodeError, ValueError):
+            pass
+
+    return EvaluationRow(
+        messages=messages,
+        tools=tools,
+        ground_truth=ground_truth,
+        input_metadata=input_metadata,
+        execution_metadata=execution_metadata,
+        evaluation_result=evaluation_result,
+    )
+
+
+# =============================================================================
+# Main Conversion Functions
+# =============================================================================
+
+
+def evaluation_rows_to_lilac_dataset(
+    rows: list[EvaluationRow],
+    namespace: str = "local",
+    name: str = "eval-data",
+    project_dir: str | None = None,
+) -> Any:
+    """Convert EvaluationRows to a Lilac dataset.
+
+    Args:
+        rows: List of EvaluationRow objects
+        namespace: Lilac namespace (default: 'local')
+        name: Dataset name
+        project_dir: Lilac project directory (uses default if None)
+
+    Returns:
+        Lilac Dataset object ready for clustering, filtering, etc.
+
+    Example:
+        >>> dataset = evaluation_rows_to_lilac_dataset(rows, name='my-traces')
+        >>>
+        >>> # Add your own text column for clustering
+        >>> df = dataset.to_pandas()
+        >>> df['user_query'] = df['messages_json'].apply(extract_user_query)
+        >>> # Re-create dataset with new column, then cluster
+    """
+    _ensure_lilac_available()
+    import lilac as ll_module  # Re-import after ensuring available
+
+    if project_dir:
+        ll_module.set_project_dir(project_dir)
+
+    # Convert to DataFrame
+    records = [evaluation_row_to_dict(row) for row in rows]
+    df = pd.DataFrame(records)
+
+    config = ll_module.DatasetConfig(
+        namespace=namespace,
+        name=name,
+        source=ll_module.PandasSource(df),
+    )
+
+    return ll_module.create_dataset(config)
+
+
+def lilac_dataset_to_evaluation_rows(
+    dataset: Any,
+    filters: list[tuple[str, str, Any]] | None = None,
+    limit: int | None = None,
+) -> list[EvaluationRow]:
+    """Convert a Lilac dataset back to EvaluationRows.
+
+    Args:
+        dataset: Lilac Dataset object
+        filters: Optional Lilac filter tuples, e.g. [('score', 'greater', 0.5)]
+        limit: Maximum number of rows to return
+
+    Returns:
+        List of EvaluationRow objects
+    """
+    _ensure_lilac_available()
+
+    # Build query
+    kwargs: dict[str, Any] = {}
+    if filters:
+        kwargs["filters"] = filters
+    if limit:
+        kwargs["limit"] = limit
+
+    df = dataset.select_rows(**kwargs).df()
+    return dataframe_to_evaluation_rows(df)
+
+
+def evaluation_rows_to_dataframe(rows: list[EvaluationRow]) -> pd.DataFrame:
+    """Convert EvaluationRows to a pandas DataFrame.
+
+    Useful if you want to work with the DataFrame directly.
+    """
+    records = [evaluation_row_to_dict(row) for row in rows]
+    return pd.DataFrame(records)
+
+
+def dataframe_to_evaluation_rows(df: pd.DataFrame) -> list[EvaluationRow]:
+    """Convert a pandas DataFrame back to EvaluationRows."""
+    rows = []
+    for _, row_data in df.iterrows():
+        try:
+            row = dict_to_evaluation_row(row_data.to_dict())
+            rows.append(row)
+        except Exception as e:
+            logger.warning(f"Failed to convert row: {e}")
+            continue
+    return rows

From 74cca5dc646b16fd0c72124dc86c438296c4d926 Mon Sep 17 00:00:00 2001
From: Shrey Modi <shreycricket10@gmail.com>
Date: Mon, 29 Dec 2025 10:29:32 -0800
Subject: [PATCH 2/3] updated adapter

---
 eval_protocol/adapters/lilac.py | 142 ++++++++------------------------
 1 file changed, 33 insertions(+), 109 deletions(-)

diff --git a/eval_protocol/adapters/lilac.py b/eval_protocol/adapters/lilac.py
index 684adea9..bc66fe6d 100644
--- a/eval_protocol/adapters/lilac.py
+++ b/eval_protocol/adapters/lilac.py
@@ -2,36 +2,39 @@
 Lilac ML integration for Eval Protocol.
 
 This adapter provides utilities for converting between EvaluationRow format
-and Lilac dataset format, enabling powerful data curation features like:
+and pandas DataFrame format, enabling integration with Lilac for data curation:
 - Clustering and deduplication
 - Semantic search and filtering
 - Quality scoring with embeddings
 - Interactive data exploration
 
-Prerequisites:
-    pip install 'lilac[all]'
-
 Example usage:
     >>> from eval_protocol.adapters.lilac import (
-    ...     evaluation_rows_to_lilac_dataset,
-    ...     lilac_dataset_to_evaluation_rows,
+    ...     evaluation_rows_to_dataframe,
+    ...     dataframe_to_evaluation_rows,
     ... )
     >>>
-    >>> # Convert EvaluationRows to Lilac dataset
-    >>> dataset = evaluation_rows_to_lilac_dataset(rows, name='my-traces')
+    >>> # Convert EvaluationRows to DataFrame for Lilac
+    >>> df = evaluation_rows_to_dataframe(rows)
+    >>> df['user_query'] = df['messages_json'].apply(extract_user_message)
     >>>
-    >>> # Do Lilac operations (cluster, filter, etc.)
-    >>> dataset.cluster('messages_json')  # or create your own text column
+    >>> # Use with Lilac for clustering
+    >>> import lilac as ll
+    >>> dataset = ll.create_dataset(ll.DatasetConfig(
+    ...     namespace='local', name='my-data', source=ll.PandasSource(df)
+    ... ))
+    >>> dataset.cluster('user_query')
     >>>
     >>> # Convert back to EvaluationRows
-    >>> processed_rows = lilac_dataset_to_evaluation_rows(dataset)
+    >>> processed_df = dataset.to_pandas(include_signals=True)
+    >>> processed_rows = dataframe_to_evaluation_rows(processed_df)
 """
 
 from __future__ import annotations
 
 import json
 import logging
-from typing import Any, TYPE_CHECKING
+from typing import Any
 
 import pandas as pd
 
@@ -43,29 +46,11 @@
     Message,
 )
 
-if TYPE_CHECKING:
-    import lilac as ll
-
 logger = logging.getLogger(__name__)
 
-# Check if lilac is available
-try:
-    import lilac as ll
-
-    LILAC_AVAILABLE = True
-except ImportError:
-    LILAC_AVAILABLE = False
-    ll = None  # type: ignore
-
-
-def _ensure_lilac_available() -> None:
-    """Raise ImportError if lilac is not installed."""
-    if not LILAC_AVAILABLE:
-        raise ImportError("Lilac is not installed. Install it with: pip install 'lilac[all]'")
-
 
 # =============================================================================
-# Core Conversion Functions
+# Internal Helpers
 # =============================================================================
 
 
@@ -86,13 +71,13 @@ def _deserialize_messages(messages_json: str | None) -> list[Message]:
         return []
 
 
-def evaluation_row_to_dict(row: EvaluationRow) -> dict[str, Any]:
-    """Convert a single EvaluationRow to a dictionary for Lilac.
+def _evaluation_row_to_dict(row: EvaluationRow) -> dict[str, Any]:
+    """Convert a single EvaluationRow to a dictionary.
 
     The output contains JSON-serialized fields that can be reconstructed back
     to EvaluationRow. Users can add their own text columns for clustering.
     """
-    result: dict[str, Any] = {
+    return {
         # Identifiers
         "row_id": row.input_metadata.row_id if row.input_metadata else None,
         # Full data as JSON (for reconstruction)
@@ -108,11 +93,9 @@ def evaluation_row_to_dict(row: EvaluationRow) -> dict[str, Any]:
         "has_tools": bool(row.tools),
     }
 
-    return result
 
-
-def dict_to_evaluation_row(data: dict[str, Any]) -> EvaluationRow:
-    """Convert a Lilac row dictionary back to an EvaluationRow."""
+def _dict_to_evaluation_row(data: dict[str, Any]) -> EvaluationRow:
+    """Convert a dictionary back to an EvaluationRow."""
     # Parse messages
     messages = _deserialize_messages(data.get("messages_json"))
 
@@ -167,97 +150,38 @@ def dict_to_evaluation_row(data: dict[str, Any]) -> EvaluationRow:
 
 
 # =============================================================================
-# Main Conversion Functions
+# Public API
 # =============================================================================
 
 
-def evaluation_rows_to_lilac_dataset(
-    rows: list[EvaluationRow],
-    namespace: str = "local",
-    name: str = "eval-data",
-    project_dir: str | None = None,
-) -> Any:
-    """Convert EvaluationRows to a Lilac dataset.
+def evaluation_rows_to_dataframe(rows: list[EvaluationRow]) -> pd.DataFrame:
+    """Convert EvaluationRows to a pandas DataFrame.
+
+    The DataFrame can be used directly with Lilac for clustering and curation.
 
     Args:
         rows: List of EvaluationRow objects
-        namespace: Lilac namespace (default: 'local')
-        name: Dataset name
-        project_dir: Lilac project directory (uses default if None)
 
     Returns:
-        Lilac Dataset object ready for clustering, filtering, etc.
-
-    Example:
-        >>> dataset = evaluation_rows_to_lilac_dataset(rows, name='my-traces')
-        >>>
-        >>> # Add your own text column for clustering
-        >>> df = dataset.to_pandas()
-        >>> df['user_query'] = df['messages_json'].apply(extract_user_query)
-        >>> # Re-create dataset with new column, then cluster
+        DataFrame with JSON-serialized fields for reconstruction
     """
-    _ensure_lilac_available()
-    import lilac as ll_module  # Re-import after ensuring available
-
-    if project_dir:
-        ll_module.set_project_dir(project_dir)
-
-    # Convert to DataFrame
-    records = [evaluation_row_to_dict(row) for row in rows]
-    df = pd.DataFrame(records)
-
-    config = ll_module.DatasetConfig(
-        namespace=namespace,
-        name=name,
-        source=ll_module.PandasSource(df),
-    )
-
-    return ll_module.create_dataset(config)
+    records = [_evaluation_row_to_dict(row) for row in rows]
+    return pd.DataFrame(records)
 
 
-def lilac_dataset_to_evaluation_rows(
-    dataset: Any,
-    filters: list[tuple[str, str, Any]] | None = None,
-    limit: int | None = None,
-) -> list[EvaluationRow]:
-    """Convert a Lilac dataset back to EvaluationRows.
+def dataframe_to_evaluation_rows(df: pd.DataFrame) -> list[EvaluationRow]:
+    """Convert a pandas DataFrame back to EvaluationRows.
 
     Args:
-        dataset: Lilac Dataset object
-        filters: Optional Lilac filter tuples, e.g. [('score', 'greater', 0.5)]
-        limit: Maximum number of rows to return
+        df: DataFrame with messages_json and other serialized fields
 
     Returns:
         List of EvaluationRow objects
     """
-    _ensure_lilac_available()
-
-    # Build query
-    kwargs: dict[str, Any] = {}
-    if filters:
-        kwargs["filters"] = filters
-    if limit:
-        kwargs["limit"] = limit
-
-    df = dataset.select_rows(**kwargs).df()
-    return dataframe_to_evaluation_rows(df)
-
-
-def evaluation_rows_to_dataframe(rows: list[EvaluationRow]) -> pd.DataFrame:
-    """Convert EvaluationRows to a pandas DataFrame.
-
-    Useful if you want to work with the DataFrame directly.
-    """
-    records = [evaluation_row_to_dict(row) for row in rows]
-    return pd.DataFrame(records)
-
-
-def dataframe_to_evaluation_rows(df: pd.DataFrame) -> list[EvaluationRow]:
-    """Convert a pandas DataFrame back to EvaluationRows."""
     rows = []
     for _, row_data in df.iterrows():
         try:
-            row = dict_to_evaluation_row(row_data.to_dict())
+            row = _dict_to_evaluation_row(row_data.to_dict())
             rows.append(row)
         except Exception as e:
             logger.warning(f"Failed to convert row: {e}")

From e529f7fa10e6533dfee3a7b4db02e1a67beea459 Mon Sep 17 00:00:00 2001
From: Shrey Modi <shreycricket10@gmail.com>
Date: Fri, 2 Jan 2026 13:39:29 -0800
Subject: [PATCH 3/3] updated adapters

---
 eval_protocol/adapters/__init__.py  |  16 +++
 eval_protocol/adapters/dataframe.py |  66 ++++++++++
 eval_protocol/adapters/lilac.py     | 189 ----------------------------
 eval_protocol/models.py             |  43 +++++++
 4 files changed, 125 insertions(+), 189 deletions(-)
 create mode 100644 eval_protocol/adapters/dataframe.py
 delete mode 100644 eval_protocol/adapters/lilac.py

diff --git a/eval_protocol/adapters/__init__.py b/eval_protocol/adapters/__init__.py
index 6f031f3d..b6e7c1e9 100644
--- a/eval_protocol/adapters/__init__.py
+++ b/eval_protocol/adapters/__init__.py
@@ -99,3 +99,19 @@
     __all__.extend(["WeaveAdapter"])
 except ImportError:
     pass
+
+# DataFrame adapter (pandas integration for Lilac, etc.)
+try:
+    from .dataframe import (
+        evaluation_rows_to_dataframe,
+        dataframe_to_evaluation_rows,
+    )
+
+    __all__.extend(
+        [
+            "evaluation_rows_to_dataframe",
+            "dataframe_to_evaluation_rows",
+        ]
+    )
+except ImportError:
+    pass
diff --git a/eval_protocol/adapters/dataframe.py b/eval_protocol/adapters/dataframe.py
new file mode 100644
index 00000000..f67077f4
--- /dev/null
+++ b/eval_protocol/adapters/dataframe.py
@@ -0,0 +1,66 @@
+"""
+Pandas DataFrame adapter for Eval Protocol.
+
+This module provides utilities for converting between EvaluationRow format
+and pandas DataFrame format, enabling integration with data curation tools
+such as Lilac, Great Expectations, or any pandas-based workflow.
+
+Example usage:
+    >>> from eval_protocol.adapters.dataframe import (
+    ...     evaluation_rows_to_dataframe,
+    ...     dataframe_to_evaluation_rows,
+    ... )
+    >>>
+    >>> # Convert EvaluationRows to DataFrame
+    >>> df = evaluation_rows_to_dataframe(rows)
+    >>>
+    >>> # Convert back to EvaluationRows
+    >>> rows = dataframe_to_evaluation_rows(df)
+"""
+
+from __future__ import annotations
+
+import logging
+
+import pandas as pd
+
+from ..models import EvaluationRow
+
+logger = logging.getLogger(__name__)
+
+
+def evaluation_rows_to_dataframe(rows: list[EvaluationRow]) -> pd.DataFrame:
+    """Convert EvaluationRows to a pandas DataFrame.
+
+    Uses EvaluationRow.to_dict() for serialization.
+
+    Args:
+        rows: List of EvaluationRow objects
+
+    Returns:
+        DataFrame with 'data_json' containing serialized rows plus convenience fields
+    """
+    records = [row.to_dict() for row in rows]
+    return pd.DataFrame(records)
+
+
+def dataframe_to_evaluation_rows(df: pd.DataFrame) -> list[EvaluationRow]:
+    """Convert a pandas DataFrame back to EvaluationRows.
+
+    Uses EvaluationRow.from_dict() for deserialization.
+
+    Args:
+        df: DataFrame with 'data_json' column containing serialized EvaluationRows
+
+    Returns:
+        List of EvaluationRow objects
+    """
+    rows = []
+    for _, row_data in df.iterrows():
+        try:
+            row = EvaluationRow.from_dict(row_data.to_dict())
+            rows.append(row)
+        except Exception as e:
+            logger.warning(f"Failed to convert row: {e}")
+            continue
+    return rows
diff --git a/eval_protocol/adapters/lilac.py b/eval_protocol/adapters/lilac.py
deleted file mode 100644
index bc66fe6d..00000000
--- a/eval_protocol/adapters/lilac.py
+++ /dev/null
@@ -1,189 +0,0 @@
-"""
-Lilac ML integration for Eval Protocol.
-
-This adapter provides utilities for converting between EvaluationRow format
-and pandas DataFrame format, enabling integration with Lilac for data curation:
-- Clustering and deduplication
-- Semantic search and filtering
-- Quality scoring with embeddings
-- Interactive data exploration
-
-Example usage:
-    >>> from eval_protocol.adapters.lilac import (
-    ...     evaluation_rows_to_dataframe,
-    ...     dataframe_to_evaluation_rows,
-    ... )
-    >>>
-    >>> # Convert EvaluationRows to DataFrame for Lilac
-    >>> df = evaluation_rows_to_dataframe(rows)
-    >>> df['user_query'] = df['messages_json'].apply(extract_user_message)
-    >>>
-    >>> # Use with Lilac for clustering
-    >>> import lilac as ll
-    >>> dataset = ll.create_dataset(ll.DatasetConfig(
-    ...     namespace='local', name='my-data', source=ll.PandasSource(df)
-    ... ))
-    >>> dataset.cluster('user_query')
-    >>>
-    >>> # Convert back to EvaluationRows
-    >>> processed_df = dataset.to_pandas(include_signals=True)
-    >>> processed_rows = dataframe_to_evaluation_rows(processed_df)
-"""
-
-from __future__ import annotations
-
-import json
-import logging
-from typing import Any
-
-import pandas as pd
-
-from eval_protocol.models import (
-    EvaluateResult,
-    EvaluationRow,
-    ExecutionMetadata,
-    InputMetadata,
-    Message,
-)
-
-logger = logging.getLogger(__name__)
-
-
-# =============================================================================
-# Internal Helpers
-# =============================================================================
-
-
-def _serialize_message(msg: Message) -> dict[str, Any]:
-    """Serialize a Message to a dictionary."""
-    return msg.model_dump(exclude_none=True)
-
-
-def _deserialize_messages(messages_json: str | None) -> list[Message]:
-    """Deserialize messages JSON back to Message objects."""
-    if not messages_json:
-        return []
-    try:
-        messages_data = json.loads(messages_json)
-        return [Message.model_validate(m) for m in messages_data]
-    except (json.JSONDecodeError, ValueError) as e:
-        logger.warning(f"Failed to deserialize messages: {e}")
-        return []
-
-
-def _evaluation_row_to_dict(row: EvaluationRow) -> dict[str, Any]:
-    """Convert a single EvaluationRow to a dictionary.
-
-    The output contains JSON-serialized fields that can be reconstructed back
-    to EvaluationRow. Users can add their own text columns for clustering.
-    """
-    return {
-        # Identifiers
-        "row_id": row.input_metadata.row_id if row.input_metadata else None,
-        # Full data as JSON (for reconstruction)
-        "messages_json": json.dumps([_serialize_message(m) for m in row.messages]),
-        "tools_json": json.dumps(row.tools) if row.tools else None,
-        "ground_truth_json": json.dumps(row.ground_truth) if row.ground_truth else None,
-        "input_metadata_json": row.input_metadata.model_dump_json() if row.input_metadata else None,
-        "execution_metadata_json": row.execution_metadata.model_dump_json() if row.execution_metadata else None,
-        "evaluation_result_json": row.evaluation_result.model_dump_json() if row.evaluation_result else None,
-        # Scalar fields for filtering
-        "score": row.evaluation_result.score if row.evaluation_result else None,
-        "message_count": len(row.messages),
-        "has_tools": bool(row.tools),
-    }
-
-
-def _dict_to_evaluation_row(data: dict[str, Any]) -> EvaluationRow:
-    """Convert a dictionary back to an EvaluationRow."""
-    # Parse messages
-    messages = _deserialize_messages(data.get("messages_json"))
-
-    # Parse tools
-    tools = None
-    if data.get("tools_json"):
-        try:
-            tools = json.loads(data["tools_json"])
-        except json.JSONDecodeError:
-            pass
-
-    # Parse ground truth
-    ground_truth = None
-    if data.get("ground_truth_json"):
-        try:
-            ground_truth = json.loads(data["ground_truth_json"])
-        except json.JSONDecodeError:
-            pass
-
-    # Parse input metadata
-    input_metadata = InputMetadata()
-    if data.get("input_metadata_json"):
-        try:
-            input_metadata = InputMetadata.model_validate_json(data["input_metadata_json"])
-        except (json.JSONDecodeError, ValueError):
-            input_metadata = InputMetadata(row_id=data.get("row_id"))
-
-    # Parse execution metadata
-    execution_metadata = ExecutionMetadata()
-    if data.get("execution_metadata_json"):
-        try:
-            execution_metadata = ExecutionMetadata.model_validate_json(data["execution_metadata_json"])
-        except (json.JSONDecodeError, ValueError):
-            pass
-
-    # Parse evaluation result
-    evaluation_result = None
-    if data.get("evaluation_result_json"):
-        try:
-            evaluation_result = EvaluateResult.model_validate_json(data["evaluation_result_json"])
-        except (json.JSONDecodeError, ValueError):
-            pass
-
-    return EvaluationRow(
-        messages=messages,
-        tools=tools,
-        ground_truth=ground_truth,
-        input_metadata=input_metadata,
-        execution_metadata=execution_metadata,
-        evaluation_result=evaluation_result,
-    )
-
-
-# =============================================================================
-# Public API
-# =============================================================================
-
-
-def evaluation_rows_to_dataframe(rows: list[EvaluationRow]) -> pd.DataFrame:
-    """Convert EvaluationRows to a pandas DataFrame.
-
-    The DataFrame can be used directly with Lilac for clustering and curation.
-
-    Args:
-        rows: List of EvaluationRow objects
-
-    Returns:
-        DataFrame with JSON-serialized fields for reconstruction
-    """
-    records = [_evaluation_row_to_dict(row) for row in rows]
-    return pd.DataFrame(records)
-
-
-def dataframe_to_evaluation_rows(df: pd.DataFrame) -> list[EvaluationRow]:
-    """Convert a pandas DataFrame back to EvaluationRows.
-
-    Args:
-        df: DataFrame with messages_json and other serialized fields
-
-    Returns:
-        List of EvaluationRow objects
-    """
-    rows = []
-    for _, row_data in df.iterrows():
-        try:
-            row = _dict_to_evaluation_row(row_data.to_dict())
-            rows.append(row)
-        except Exception as e:
-            logger.warning(f"Failed to convert row: {e}")
-            continue
-    return rows
diff --git a/eval_protocol/models.py b/eval_protocol/models.py
index 13d059d3..a7bc767b 100644
--- a/eval_protocol/models.py
+++ b/eval_protocol/models.py
@@ -953,6 +953,49 @@ def get_termination_reason(self) -> str:
                 return str(reason)
         return "unknown"
 
+    def to_dict(self) -> Dict[str, Any]:
+        """Serialize this EvaluationRow to a dictionary.
+
+        The entire EvaluationRow is serialized to JSON, allowing full reconstruction.
+        Additional scalar fields are included for convenient filtering/grouping.
+
+        Returns:
+            Dictionary with 'data_json' containing the full serialized row,
+            plus convenience fields for filtering.
+        """
+        return {
+            "data_json": self.model_dump_json(),
+            "row_id": self.input_metadata.row_id if self.input_metadata else None,
+            "score": self.evaluation_result.score if self.evaluation_result else None,
+            "message_count": len(self.messages),
+            "has_tools": bool(self.tools),
+            "created_at": self.created_at.isoformat() if self.created_at else None,
+        }
+
+    @classmethod
+    def from_dict(cls, data: Dict[str, Any]) -> "EvaluationRow":
+        """Reconstruct an EvaluationRow from a dictionary.
+
+        Args:
+            data: Dictionary containing 'data_json' with the serialized EvaluationRow.
+
+        Returns:
+            Reconstructed EvaluationRow instance.
+
+        Raises:
+            ValueError: If 'data_json' is missing or invalid.
+        """
+        from pydantic import ValidationError
+
+        data_json = data.get("data_json")
+        if not data_json:
+            raise ValueError("Missing 'data_json' field in dictionary")
+
+        try:
+            return cls.model_validate_json(data_json)
+        except ValidationError as e:
+            raise ValueError(f"Failed to deserialize EvaluationRow: {e}") from e
+
     def __hash__(self) -> int:
         # Use a stable hash that works across Python processes
         return self._stable_hash()