microsoft · Yash3561 · Jan 16, 2026 · Jan 17, 2026
diff --git a/python/packages/autogen-core/src/autogen_core/logging.py b/python/packages/autogen-core/src/autogen_core/logging.py
@@ -1,6 +1,6 @@
 import json
 from enum import Enum
-from typing import Any, Dict, List, cast
+from typing import Any, Dict, List, Optional, cast
 
 from ._agent_id import AgentId
 from ._message_handler_context import MessageHandlerContext
@@ -15,6 +15,8 @@ def __init__(
         response: Dict[str, Any],
         prompt_tokens: int,
         completion_tokens: int,
+        latency_ms: Optional[float] = None,
+        tokens_per_second: Optional[float] = None,
         **kwargs: Any,
     ) -> None:
         """To be used by model clients to log the call to the LLM.
@@ -24,6 +26,8 @@ def __init__(
             response (Dict[str, Any]): The response of the call. Must be json serializable.
             prompt_tokens (int): Number of tokens used in the prompt.
             completion_tokens (int): Number of tokens used in the completion.
+            latency_ms (Optional[float]): Total call duration in milliseconds.
+            tokens_per_second (Optional[float]): Completion tokens divided by latency in seconds.
 
         Example:
 
@@ -45,6 +49,10 @@ def __init__(
         self.kwargs["response"] = response
         self.kwargs["prompt_tokens"] = prompt_tokens
         self.kwargs["completion_tokens"] = completion_tokens
+        if latency_ms is not None:
+            self.kwargs["latency_ms"] = latency_ms
+        if tokens_per_second is not None:
+            self.kwargs["tokens_per_second"] = tokens_per_second
         try:
             agent_id = MessageHandlerContext.agent_id()
         except RuntimeError:
@@ -59,6 +67,14 @@ def prompt_tokens(self) -> int:
     def completion_tokens(self) -> int:
         return cast(int, self.kwargs["completion_tokens"])
 
+    @property
+    def latency_ms(self) -> Optional[float]:
+        return self.kwargs.get("latency_ms")
+
+    @property
+    def tokens_per_second(self) -> Optional[float]:
+        return self.kwargs.get("tokens_per_second")
+
     # This must output the event in a json serializable format
     def __str__(self) -> str:
         return json.dumps(self.kwargs)
@@ -111,6 +127,9 @@ def __init__(
         response: Dict[str, Any],
         prompt_tokens: int,
         completion_tokens: int,
+        latency_ms: Optional[float] = None,
+        tokens_per_second: Optional[float] = None,
+        ttft_ms: Optional[float] = None,
         **kwargs: Any,
     ) -> None:
         """To be used by model clients to log the end of a stream.
@@ -119,6 +138,9 @@ def __init__(
             response (Dict[str, Any]): The response of the call. Must be json serializable.
             prompt_tokens (int): Number of tokens used in the prompt.
             completion_tokens (int): Number of tokens used in the completion.
+            latency_ms (Optional[float]): Total stream duration in milliseconds.
+            tokens_per_second (Optional[float]): Completion tokens divided by latency in seconds.
+            ttft_ms (Optional[float]): Time to first token in milliseconds.
 
         Example:
 
@@ -138,6 +160,12 @@ def __init__(
         self.kwargs["response"] = response
         self.kwargs["prompt_tokens"] = prompt_tokens
         self.kwargs["completion_tokens"] = completion_tokens
+        if latency_ms is not None:
+            self.kwargs["latency_ms"] = latency_ms
+        if tokens_per_second is not None:
+            self.kwargs["tokens_per_second"] = tokens_per_second
+        if ttft_ms is not None:
+            self.kwargs["ttft_ms"] = ttft_ms
         try:
             agent_id = MessageHandlerContext.agent_id()
         except RuntimeError:
@@ -152,6 +180,18 @@ def prompt_tokens(self) -> int:
     def completion_tokens(self) -> int:
         return cast(int, self.kwargs["completion_tokens"])
 
+    @property
+    def latency_ms(self) -> Optional[float]:
+        return self.kwargs.get("latency_ms")
+
+    @property
+    def tokens_per_second(self) -> Optional[float]:
+        return self.kwargs.get("tokens_per_second")
+
+    @property
+    def ttft_ms(self) -> Optional[float]:
+        return self.kwargs.get("ttft_ms")
+
     # This must output the event in a json serializable format
     def __str__(self) -> str:
         return json.dumps(self.kwargs)

diff --git a/python/packages/autogen-core/tests/test_logging_events.py b/python/packages/autogen-core/tests/test_logging_events.py
@@ -0,0 +1,107 @@
+"""Tests for LLMCallEvent and LLMStreamEndEvent timing fields.
+
+These tests verify the performance telemetry fields added in Issue #5790.
+"""
+
+import json
+
+from autogen_core.logging import LLMCallEvent, LLMStreamEndEvent
+
+
+def test_llm_call_event_timing_fields() -> None:
+    """Test that LLMCallEvent correctly stores and serializes timing fields."""
+    event = LLMCallEvent(
+        messages=[{"role": "user", "content": "Hello"}],
+        response={"content": "Hi there!"},
+        prompt_tokens=10,
+        completion_tokens=20,
+        latency_ms=150.5,
+        tokens_per_second=133.33,
+    )
+
+    # Check property accessors
+    assert event.prompt_tokens == 10
+    assert event.completion_tokens == 20
+    assert event.latency_ms == 150.5
+    assert event.tokens_per_second == 133.33
+
+    # Check JSON serialization includes timing fields
+    json_str = str(event)
+    data = json.loads(json_str)
+    assert data["latency_ms"] == 150.5
+    assert data["tokens_per_second"] == 133.33
+
+
+def test_llm_call_event_timing_fields_optional() -> None:
+    """Test that timing fields are optional and not included when not provided."""
+    event = LLMCallEvent(
+        messages=[{"role": "user", "content": "Hello"}],
+        response={"content": "Hi there!"},
+        prompt_tokens=10,
+        completion_tokens=20,
+    )
+
+    # Check that missing fields return None
+    assert event.latency_ms is None
+    assert event.tokens_per_second is None
+
+    # Check JSON serialization excludes missing timing fields
+    json_str = str(event)
+    data = json.loads(json_str)
+    assert "latency_ms" not in data
+    assert "tokens_per_second" not in data
+
+
+def test_llm_stream_end_event_timing_fields() -> None:
+    """Test that LLMStreamEndEvent correctly stores and serializes timing fields including TTFT."""
+    event = LLMStreamEndEvent(
+        response={"content": "Hello, world!"},
+        prompt_tokens=10,
+        completion_tokens=25,
+        latency_ms=200.0,
+        tokens_per_second=125.0,
+        ttft_ms=50.5,
+    )
+
+    # Check property accessors
+    assert event.prompt_tokens == 10
+    assert event.completion_tokens == 25
+    assert event.latency_ms == 200.0
+    assert event.tokens_per_second == 125.0
+    assert event.ttft_ms == 50.5
+
+    # Check JSON serialization includes all timing fields
+    json_str = str(event)
+    data = json.loads(json_str)
+    assert data["latency_ms"] == 200.0
+    assert data["tokens_per_second"] == 125.0
+    assert data["ttft_ms"] == 50.5
+
+
+def test_llm_stream_end_event_timing_fields_optional() -> None:
+    """Test that streaming timing fields are optional."""
+    event = LLMStreamEndEvent(
+        response={"content": "Hello, world!"},
+        prompt_tokens=10,
+        completion_tokens=25,
+    )
+
+    # Check that missing fields return None
+    assert event.latency_ms is None
+    assert event.tokens_per_second is None
+    assert event.ttft_ms is None
+
+    # Check JSON serialization excludes missing timing fields
+    json_str = str(event)
+    data = json.loads(json_str)
+    assert "latency_ms" not in data
+    assert "tokens_per_second" not in data
+    assert "ttft_ms" not in data
+
+
+if __name__ == "__main__":
+    test_llm_call_event_timing_fields()
+    test_llm_call_event_timing_fields_optional()
+    test_llm_stream_end_event_timing_fields()
+    test_llm_stream_end_event_timing_fields_optional()
+    print("All tests passed!")