Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
42 changes: 41 additions & 1 deletion python/packages/autogen-core/src/autogen_core/logging.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import json
from enum import Enum
from typing import Any, Dict, List, cast
from typing import Any, Dict, List, Optional, cast

from ._agent_id import AgentId
from ._message_handler_context import MessageHandlerContext
Expand All @@ -15,6 +15,8 @@ def __init__(
response: Dict[str, Any],
prompt_tokens: int,
completion_tokens: int,
latency_ms: Optional[float] = None,
tokens_per_second: Optional[float] = None,
**kwargs: Any,
) -> None:
"""To be used by model clients to log the call to the LLM.
Expand All @@ -24,6 +26,8 @@ def __init__(
response (Dict[str, Any]): The response of the call. Must be json serializable.
prompt_tokens (int): Number of tokens used in the prompt.
completion_tokens (int): Number of tokens used in the completion.
latency_ms (Optional[float]): Total call duration in milliseconds.
tokens_per_second (Optional[float]): Completion tokens divided by latency in seconds.

Example:

Expand All @@ -45,6 +49,10 @@ def __init__(
self.kwargs["response"] = response
self.kwargs["prompt_tokens"] = prompt_tokens
self.kwargs["completion_tokens"] = completion_tokens
if latency_ms is not None:
self.kwargs["latency_ms"] = latency_ms
if tokens_per_second is not None:
self.kwargs["tokens_per_second"] = tokens_per_second
try:
agent_id = MessageHandlerContext.agent_id()
except RuntimeError:
Expand All @@ -59,6 +67,14 @@ def prompt_tokens(self) -> int:
def completion_tokens(self) -> int:
return cast(int, self.kwargs["completion_tokens"])

@property
def latency_ms(self) -> Optional[float]:
return self.kwargs.get("latency_ms")

@property
def tokens_per_second(self) -> Optional[float]:
return self.kwargs.get("tokens_per_second")

# This must output the event in a json serializable format
def __str__(self) -> str:
return json.dumps(self.kwargs)
Expand Down Expand Up @@ -111,6 +127,9 @@ def __init__(
response: Dict[str, Any],
prompt_tokens: int,
completion_tokens: int,
latency_ms: Optional[float] = None,
tokens_per_second: Optional[float] = None,
ttft_ms: Optional[float] = None,
**kwargs: Any,
) -> None:
"""To be used by model clients to log the end of a stream.
Expand All @@ -119,6 +138,9 @@ def __init__(
response (Dict[str, Any]): The response of the call. Must be json serializable.
prompt_tokens (int): Number of tokens used in the prompt.
completion_tokens (int): Number of tokens used in the completion.
latency_ms (Optional[float]): Total stream duration in milliseconds.
tokens_per_second (Optional[float]): Completion tokens divided by latency in seconds.
ttft_ms (Optional[float]): Time to first token in milliseconds.

Example:

Expand All @@ -138,6 +160,12 @@ def __init__(
self.kwargs["response"] = response
self.kwargs["prompt_tokens"] = prompt_tokens
self.kwargs["completion_tokens"] = completion_tokens
if latency_ms is not None:
self.kwargs["latency_ms"] = latency_ms
if tokens_per_second is not None:
self.kwargs["tokens_per_second"] = tokens_per_second
if ttft_ms is not None:
self.kwargs["ttft_ms"] = ttft_ms
try:
agent_id = MessageHandlerContext.agent_id()
except RuntimeError:
Expand All @@ -152,6 +180,18 @@ def prompt_tokens(self) -> int:
def completion_tokens(self) -> int:
return cast(int, self.kwargs["completion_tokens"])

@property
def latency_ms(self) -> Optional[float]:
return self.kwargs.get("latency_ms")

@property
def tokens_per_second(self) -> Optional[float]:
return self.kwargs.get("tokens_per_second")

@property
def ttft_ms(self) -> Optional[float]:
return self.kwargs.get("ttft_ms")

# This must output the event in a json serializable format
def __str__(self) -> str:
return json.dumps(self.kwargs)
Expand Down
107 changes: 107 additions & 0 deletions python/packages/autogen-core/tests/test_logging_events.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,107 @@
"""Tests for LLMCallEvent and LLMStreamEndEvent timing fields.

These tests verify the performance telemetry fields added in Issue #5790.
"""

import json

from autogen_core.logging import LLMCallEvent, LLMStreamEndEvent


def test_llm_call_event_timing_fields() -> None:
"""Test that LLMCallEvent correctly stores and serializes timing fields."""
event = LLMCallEvent(
messages=[{"role": "user", "content": "Hello"}],
response={"content": "Hi there!"},
prompt_tokens=10,
completion_tokens=20,
latency_ms=150.5,
tokens_per_second=133.33,
)

# Check property accessors
assert event.prompt_tokens == 10
assert event.completion_tokens == 20
assert event.latency_ms == 150.5
assert event.tokens_per_second == 133.33

# Check JSON serialization includes timing fields
json_str = str(event)
data = json.loads(json_str)
assert data["latency_ms"] == 150.5
assert data["tokens_per_second"] == 133.33


def test_llm_call_event_timing_fields_optional() -> None:
"""Test that timing fields are optional and not included when not provided."""
event = LLMCallEvent(
messages=[{"role": "user", "content": "Hello"}],
response={"content": "Hi there!"},
prompt_tokens=10,
completion_tokens=20,
)

# Check that missing fields return None
assert event.latency_ms is None
assert event.tokens_per_second is None

# Check JSON serialization excludes missing timing fields
json_str = str(event)
data = json.loads(json_str)
assert "latency_ms" not in data
assert "tokens_per_second" not in data


def test_llm_stream_end_event_timing_fields() -> None:
"""Test that LLMStreamEndEvent correctly stores and serializes timing fields including TTFT."""
event = LLMStreamEndEvent(
response={"content": "Hello, world!"},
prompt_tokens=10,
completion_tokens=25,
latency_ms=200.0,
tokens_per_second=125.0,
ttft_ms=50.5,
)

# Check property accessors
assert event.prompt_tokens == 10
assert event.completion_tokens == 25
assert event.latency_ms == 200.0
assert event.tokens_per_second == 125.0
assert event.ttft_ms == 50.5

# Check JSON serialization includes all timing fields
json_str = str(event)
data = json.loads(json_str)
assert data["latency_ms"] == 200.0
assert data["tokens_per_second"] == 125.0
assert data["ttft_ms"] == 50.5


def test_llm_stream_end_event_timing_fields_optional() -> None:
"""Test that streaming timing fields are optional."""
event = LLMStreamEndEvent(
response={"content": "Hello, world!"},
prompt_tokens=10,
completion_tokens=25,
)

# Check that missing fields return None
assert event.latency_ms is None
assert event.tokens_per_second is None
assert event.ttft_ms is None

# Check JSON serialization excludes missing timing fields
json_str = str(event)
data = json.loads(json_str)
assert "latency_ms" not in data
assert "tokens_per_second" not in data
assert "ttft_ms" not in data


if __name__ == "__main__":
test_llm_call_event_timing_fields()
test_llm_call_event_timing_fields_optional()
test_llm_stream_end_event_timing_fields()
test_llm_stream_end_event_timing_fields_optional()
print("All tests passed!")
Loading