From 2636093313ca12f41d1d941d9291f0680cb30558 Mon Sep 17 00:00:00 2001 From: AdithyaKotian Date: Wed, 21 Jan 2026 16:20:55 +0530 Subject: [PATCH 1/3] docs: explain why Tinker is used --- README.md | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/README.md b/README.md index 2be94bfaf..fa55cb882 100644 --- a/README.md +++ b/README.md @@ -24,10 +24,22 @@ Join our [Discord community](https://discord.gg/RYk7CdvDR7) to connect with othe Read more on our [documentation website](https://microsoft.github.io/agent-lightning/). +## Why Tinker? + +- Running large scale LLM experiments locally can be difficult and resource intensive,especially for users without GPUs or complex infrastructure. + +- Tinker allows Agent Lightning users to offload experiment execution to a managed third party service.This removes the need for local GPU setup and reduces operational complexity. + +- Compared to the alternatives such as `verl`,Tinker provides a simpler API and easier integration,making it suitable for rapid experimentation and onboarding. + +- Use Tinker when you want fast setup and managed execution,use local backends when you need full control over infrastructure. + +

Agent-Lightning Core Quickstart

+ ## ⚡ Installation ```bash From f5955d62c8f32366748c44b46442e7f34df02324 Mon Sep 17 00:00:00 2001 From: AdithyaKotian Date: Thu, 22 Jan 2026 19:54:54 +0530 Subject: [PATCH 2/3] fix: clarify error when extracted completion is not a list --- agentlightning/adapter/messages.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/agentlightning/adapter/messages.py b/agentlightning/adapter/messages.py index fe6c5912e..7ad50e146 100644 --- a/agentlightning/adapter/messages.py +++ b/agentlightning/adapter/messages.py @@ -254,7 +254,11 @@ def adapt(self, source: Sequence[Span], /) -> List[OpenAIMessages]: if not isinstance(prompt, list): raise ValueError(f"Extracted prompt from trace is not a list: {prompt}") if not isinstance(completion, list): - raise ValueError(f"Extracted completion from trace is not a list: {completion}") + raise ValueError( + f"Expected completion to be a list, got {type(completion)}. " + f"Value: {repr(completion)[:200]}. " + "If the trace contains a single completion, wrap it in a list before passing it." + ) if not isinstance(request, dict): raise ValueError(f"Extracted request from trace is not a dict: {request}") if not isinstance(response, dict): From 34c2ba86679fbc9a4c0d20b4c698fee01d45ce05 Mon Sep 17 00:00:00 2001 From: AdithyaKotian Date: Fri, 30 Jan 2026 09:46:25 +0530 Subject: [PATCH 3/3] docs: explain how failed rollouts are handled --- docs/how-to/failed-rollouts.md | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) create mode 100644 docs/how-to/failed-rollouts.md diff --git a/docs/how-to/failed-rollouts.md b/docs/how-to/failed-rollouts.md new file mode 100644 index 000000000..96cee4b45 --- /dev/null +++ b/docs/how-to/failed-rollouts.md @@ -0,0 +1,16 @@ +# Handling Failed Rollouts + +Rollouts may fail due to transient system issues such as network errors, timeouts or external service failures. + +## Retry behavior +- Rollout retries are configured via `RolloutConfig`, including settings such as `max_attempts`, retry conditions and timeouts. +- If a rollout fails and returns `None`, it still counts as an attempt and follows the configured retry limits. + +## Batch behavior +- Failed rollouts are handled at the individual rollout level. +- There is currently no built-in mechanism to a automatically skip an entire batch when multiple rollouts fail. + +## Best practices +- Retries are useful for transient failures (e.g. temporary network issues). +- If failures occur frequently, this usually indicates an infrastructure problem rather than an issue retries can fix. +- In such cases, it is recommended to address the underlying system issue instead of increasing retry limits.