diff --git a/api-reference/annotations/overview.mdx b/api-reference/annotations/overview.mdx
index 8c42de7..b7d9b74 100644
--- a/api-reference/annotations/overview.mdx
+++ b/api-reference/annotations/overview.mdx
@@ -1,6 +1,6 @@
---
title: 'Overview'
-description: 'Annotations are used to annotate traces with additional information'
+description: 'Learn how annotations enhance trace review, labeling, and evaluation workflows for more reliable AI agent testing.'
---
## Intro
@@ -19,7 +19,7 @@ To make a call to the Annotations API, you will need to pass through your LangWa
- `DELETE /api/annotations/:id` - Delete a single annotation
- `PATCH /api/annotations/:id` - Update a single annotation
- `GET /api/annotations/trace/:id` - Get the annotations for a single trace
-- `POST /api/annotations/trace/:id` - Create an annotation for a single trace
+- `POST /api/annotations/trace/:id` - Create annotations for traces to support domain labeling, evaluation scoring, and agent testing workflows.
diff --git a/api-reference/datasets/post-dataset-entries.mdx b/api-reference/datasets/post-dataset-entries.mdx
index 0357260..a12d5b2 100644
--- a/api-reference/datasets/post-dataset-entries.mdx
+++ b/api-reference/datasets/post-dataset-entries.mdx
@@ -1,4 +1,4 @@
---
-title: 'Add entries to a dataset'
+title: 'Add dataset entries programmatically using the LangWatch API to build evaluation sets for LLM testing and agent validation.'
openapi: 'POST /api/dataset/{slug}/entries'
---
diff --git a/api-reference/openapi-evals.json b/api-reference/openapi-evals.json
index 10bcda7..d4869bd 100644
--- a/api-reference/openapi-evals.json
+++ b/api-reference/openapi-evals.json
@@ -111,7 +111,7 @@
"/langevals/basic/evaluate": {
"post": {
"summary": "Custom Basic Evaluator",
- "description": "Allows you to check for simple text matches or regex evaluation.",
+ "description": "Configure the Custom Basic Evaluator to check simple matches or regex rules for lightweight automated AI agent evaluations.",
"operationId": "langevals_basic_evaluate",
"requestBody": {
"content": {
@@ -202,7 +202,7 @@
"/langevals/competitor_blocklist/evaluate": {
"post": {
"summary": "Competitor Blocklist",
- "description": "This evaluator checks if any of the specified competitors was mentioned",
+ "description": "Detect competitor mentions using LangWatch’s Competitor Blocklist evaluator to enforce content rules in AI agent testing pipelines.",
"operationId": "langevals_competitor_blocklist_evaluate",
"requestBody": {
"content": {
@@ -475,7 +475,7 @@
"/langevals/exact_match/evaluate": {
"post": {
"summary": "Exact Match Evaluator",
- "description": "A simple evaluator that checks if the output matches the expected_output exactly.",
+ "description": "Use the Exact Match evaluator in LangWatch to verify outputs that require precise matching during AI agent testing.",
"operationId": "langevals_exact_match_evaluate",
"requestBody": {
"content": {
@@ -657,7 +657,7 @@
"/langevals/llm_boolean/evaluate": {
"post": {
"summary": "LLM-as-a-Judge Boolean Evaluator",
- "description": "Use an LLM as a judge with a custom prompt to do a true/false boolean evaluation of the message.",
+ "description": "Use the LLM-as-a-Judge Boolean Evaluator to classify outputs as true or false for fast automated agent evaluations.",
"operationId": "langevals_llm_boolean_evaluate",
"requestBody": {
"content": {
@@ -748,7 +748,7 @@
"/langevals/llm_category/evaluate": {
"post": {
"summary": "LLM-as-a-Judge Category Evaluator",
- "description": "Use an LLM as a judge with a custom prompt to classify the message into custom defined categories.",
+ "description": "Use the LLM-as-a-Judge Category Evaluator to classify outputs into custom categories for structured AI agent evaluations.",
"operationId": "langevals_llm_category_evaluate",
"requestBody": {
"content": {
@@ -839,7 +839,7 @@
"/langevals/llm_score/evaluate": {
"post": {
"summary": "LLM-as-a-Judge Score Evaluator",
- "description": "Use an LLM as a judge with custom prompt to do a numeric score evaluation of the message.",
+ "description": "Score messages with an LLM-as-a-Judge evaluator to generate numeric performance metrics for AI agent testing.",
"operationId": "langevals_llm_score_evaluate",
"requestBody": {
"content": {
@@ -930,7 +930,7 @@
"/langevals/off_topic/evaluate": {
"post": {
"summary": "Off Topic Evaluator",
- "description": "This evaluator checks if the user message is concerning one of the allowed topics of the chatbot",
+ "description": "Detect off-topic messages using LangWatch’s Off Topic Evaluator to enforce domain boundaries during AI agent testing.",
"operationId": "langevals_off_topic_evaluate",
"requestBody": {
"content": {
@@ -1385,7 +1385,7 @@
"/azure/jailbreak/evaluate": {
"post": {
"summary": "Azure Jailbreak Detection",
- "description": "This evaluator checks for jailbreak-attempt in the input using Azure's Content Safety API.",
+ "description": "Use Azure Jailbreak Detection in LangWatch to identify jailbreak attempts and improve safety across AI agent testing workflows.",
"operationId": "azure_jailbreak_evaluate",
"requestBody": {
"content": {
@@ -2362,7 +2362,7 @@
"/example/word_count/evaluate": {
"post": {
"summary": "Example Evaluator",
- "description": "This evaluator serves as a boilerplate for creating new evaluators.",
+ "description": "Use the Example Evaluator template in LangWatch to implement and test custom evaluation logic. This endpoint evaluates outputs by counting words, serving as a template for building your own evaluators.",
"operationId": "example_word_count_evaluate",
"requestBody": {
"content": {
@@ -2441,7 +2441,7 @@
"/ragas/bleu_score/evaluate": {
"post": {
"summary": "BLEU Score",
- "description": "Traditional NLP metric. BLEU score for evaluating the similarity between two strings.",
+ "description": "Use the BLEU Score evaluator to measure string similarity and support automated NLP and AI agent evaluation workflows.",
"operationId": "ragas_bleu_score_evaluate",
"requestBody": {
"content": {
@@ -2793,7 +2793,7 @@
"/ragas/factual_correctness/evaluate": {
"post": {
"summary": "LLM Factual Match",
- "description": "Computes with an LLM how factually similar the generated answer is to the expected output.",
+ "description": "Compute factual similarity with LangWatch’s LLM Factual Match evaluator to validate truthfulness in AI agent evaluations.",
"operationId": "ragas_factual_correctness_evaluate",
"requestBody": {
"content": {
@@ -3248,7 +3248,7 @@
"/ragas/rouge_score/evaluate": {
"post": {
"summary": "ROUGE Score",
- "description": "Traditional NLP metric. ROUGE score for evaluating the similarity between two strings.",
+ "description": "Use the ROUGE Score evaluator in LangWatch to measure text similarity and support AI agent evaluations and NLP quality checks.",
"operationId": "ragas_rouge_score_evaluate",
"requestBody": {
"content": {
@@ -3521,7 +3521,7 @@
"/ragas/summarization_score/evaluate": {
"post": {
"summary": "Summarization Score",
- "description": "Measures how well the summary captures important information from the retrieved contexts.",
+ "description": "Measure summary quality with LangWatch’s Summarization Score to support RAG evaluations and AI agent testing accuracy.",
"operationId": "ragas_summarization_score_evaluate",
"requestBody": {
"content": {
diff --git a/api-reference/openapiLangWatch.json b/api-reference/openapiLangWatch.json
index 34faee8..22c3a2b 100644
--- a/api-reference/openapiLangWatch.json
+++ b/api-reference/openapiLangWatch.json
@@ -18,7 +18,7 @@
"paths": {
"/api/annotations": {
"get": {
- "description": "Returns all annotations for project",
+ "description": "Retrieve project-wide annotations for large-scale dataset building and AI agent evaluation workflows.",
"responses": {
"200": {
"description": "Annotation response",
@@ -48,7 +48,7 @@
},
"/api/annotations/trace/{id}": {
"get": {
- "description": "Returns all annotations for single trace",
+ "description": "Retrieve all annotations for a trace to support systematic debugging and evaluation workflows in agent testing.",
"parameters": [
{
"name": "id",
@@ -87,7 +87,7 @@
}
},
"post": {
- "description": "Create an annotation for a single trace",
+ "description": "Create annotations for traces to support domain labeling, evaluation scoring, and agent testing workflows.",
"parameters": [
{
"name": "id",
@@ -147,7 +147,7 @@
},
"/api/annotations/{id}": {
"get": {
- "description": "Returns a single annotation based on the ID supplied",
+ "description": "Retrieve single annotations to inspect evaluation inputs, domain labels, and agent testing context.",
"parameters": [
{
"name": "id",
@@ -183,7 +183,7 @@
}
},
"delete": {
- "description": "Deletes a single annotation based on the ID supplied",
+ "description": "Delete annotations as part of maintaining clean evaluation datasets and agent testing pipelines.",
"parameters": [
{
"name": "id",
@@ -227,7 +227,7 @@
}
},
"patch": {
- "description": "Updates a single annotation based on the ID supplied",
+ "description": "Patch annotations programmatically to refine trace labeling and support structured AI agent evaluation workflows.",
"parameters": [
{
"name": "id",
@@ -294,7 +294,7 @@
},
"/api/trace/{id}": {
"get": {
- "description": "Returns single trace details based on the ID supplied",
+ "description": "Get detailed trace information via the API to analyze LLM behavior, debug workflows, and support agent testing.",
"parameters": [
{
"name": "id",
@@ -689,7 +689,7 @@
"/api/trace/search": {
"post": {
"summary": "Search traces",
- "description": "Search for traces based on given criteria",
+ "description": "Search for traces using LangWatch APIs to filter performance issues, evaluation results, and agent testing data.",
"tags": [
"Traces"
],
@@ -719,7 +719,7 @@
},
"/api/trace/{id}/share": {
"post": {
- "description": "Returns a public path for a trace",
+ "description": "Generate public trace paths using LangWatch to share insights, evaluations, and agent testing results securely.",
"parameters": [
{
"name": "id",
@@ -762,7 +762,7 @@
},
"/api/trace/{id}/unshare": {
"post": {
- "description": "Deletes a public path for a trace",
+ "description": "Delete public trace paths using LangWatch’s API to maintain control over evaluation and agent testing data.",
"parameters": [
{
"name": "id",
@@ -1365,7 +1365,7 @@
"required": true
}
],
- "description": "Get a specific prompt"
+ "description": "Retrieve prompts via the API to support versioning, evaluation workflows, and agent testing pipelines."
},
"put": {
"responses": {
@@ -1940,7 +1940,7 @@
"required": true
}
],
- "description": "Update a prompt",
+ "description": "Update prompts to refine versions, run new evaluations, and test agent performance under changing configurations.",
"requestBody": {
"content": {
"application/json": {
@@ -2220,7 +2220,7 @@
"required": true
}
],
- "description": "Delete a prompt"
+ "description": "Delete prompts programmatically using LangWatch to manage versioning and maintain clean evaluation pipelines."
}
},
"/api/prompts/{id}/versions": {
@@ -2778,7 +2778,7 @@
"required": true
}
],
- "description": "Get all versions for a prompt. Does not include base prompt data, only versioned data."
+ "description": "Retrieve prompt versions for structured version control, evaluations, and automated AI agent testing workflows."
}
},
"/api/prompts/{id}/sync": {
@@ -4121,7 +4121,7 @@
"required": true
}
],
- "description": "Add entries to a dataset",
+ "description": "Add dataset entries programmatically using the LangWatch API to build evaluation sets for LLM testing and agent validation.",
"requestBody": {
"content": {
"application/json": {
@@ -4831,7 +4831,7 @@
},
"operationId": "getApiPrompts",
"parameters": [],
- "description": "Get all prompts for a project"
+ "description": "Retrieve all prompts for a project to manage prompt versioning, run comparisons, and support agent testing pipelines."
},
"post": {
"responses": {
@@ -5376,7 +5376,7 @@
},
"operationId": "postApiPrompts",
"parameters": [],
- "description": "Create a new prompt with default initial version",
+ "description": "Create new prompts with LangWatch’s API to manage versions, run evaluations, and support automated agent testing pipelines.",
"requestBody": {
"content": {
"application/json": {
@@ -5632,7 +5632,7 @@
},
"operationId": "postApiScenario-events",
"parameters": [],
- "description": "Create a new scenario event",
+ "description": "Create new scenario events for simulation-based agent testing using LangWatch’s API.",
"requestBody": {
"content": {
"application/json": {
@@ -6506,7 +6506,7 @@
},
"operationId": "getIndex",
"parameters": [],
- "description": "Get all prompts for a project"
+ "description": "Retrieve all prompts for a project to manage prompt versioning, run comparisons, and support agent testing pipelines."
},
"post": {
"responses": {
@@ -6694,7 +6694,7 @@
},
"operationId": "postIndex",
"parameters": [],
- "description": "Create a new prompt with default initial version",
+ "description": "Create new prompts with LangWatch’s API to manage versions, run evaluations, and support automated agent testing pipelines.",
"requestBody": {
"content": {
"application/json": {
diff --git a/api-reference/traces/overview.mdx b/api-reference/traces/overview.mdx
index ca0d2be..0792964 100644
--- a/api-reference/traces/overview.mdx
+++ b/api-reference/traces/overview.mdx
@@ -1,6 +1,6 @@
---
title: 'Overview'
-description: 'A Trace is a collection of runs that are related to a single operation'
+description: 'Understand LangWatch Traces, how runs are grouped into a single operation, and how to use them for LLM observability and AI agent evaluations.'
---
## Intro
diff --git a/concepts.mdx b/concepts.mdx
index ef0636f..3bb4341 100644
--- a/concepts.mdx
+++ b/concepts.mdx
@@ -1,6 +1,6 @@
---
title: Concepts
-description: LLM tracing and observability conceptual guide
+description: Explore core concepts of LLM tracing, observability, datasets, and evaluations in LangWatch to design reliable AI agent testing workflows.
keywords: LangWatch, concepts, tracing, observability, LLM, AI, travel, blog, user, customer, labels, threads, traces, spans
---
diff --git a/datasets/ai-dataset-generation.mdx b/datasets/ai-dataset-generation.mdx
index 834967c..5c26fcd 100644
--- a/datasets/ai-dataset-generation.mdx
+++ b/datasets/ai-dataset-generation.mdx
@@ -1,6 +1,6 @@
---
title: Generating a dataset with AI
-description: Bootstrap your evaluations by generating sample data
+description: Generate datasets with AI to bootstrap LLM evaluations, regression tests, and simulation-based agent testing.
---
Getting started with evaluations can be a bit daunting, especially when you don't have a dataset to use yet.
diff --git a/datasets/automatically-from-traces.mdx b/datasets/automatically-from-traces.mdx
index c08c1fa..8359ba5 100644
--- a/datasets/automatically-from-traces.mdx
+++ b/datasets/automatically-from-traces.mdx
@@ -1,6 +1,6 @@
---
title: Automatically build datasets from real-time traces
-description: Continuously populate your datasets with comming data from production
+description: Automatically build datasets from real-time traces to power LLM evaluations, regression tests, and AI agent testing workflows.
---
You can keep continously populating the dataset with new data arriving from production by using **Triggers**, mapping trace fields to any dataset columns you prefer.
diff --git a/datasets/dataset-images.mdx b/datasets/dataset-images.mdx
index fbfa839..4b1d6b5 100644
--- a/datasets/dataset-images.mdx
+++ b/datasets/dataset-images.mdx
@@ -1,6 +1,6 @@
---
title: View images in datasets
-description: Add ability to view images in datasets
+description: View image datasets in LangWatch to support multimodal evaluations and agent testing scenarios.
---
With the your images column type set to type set to `image (URL)`, you will be able to view images in your dataset. This is useful to analyze the images at a glance.
diff --git a/datasets/dataset-threads.mdx b/datasets/dataset-threads.mdx
index 823c777..666e133 100644
--- a/datasets/dataset-threads.mdx
+++ b/datasets/dataset-threads.mdx
@@ -1,6 +1,6 @@
---
title: Add trace threads to datasets
-description: Add full conversation threads to your datasets on a per row basis
+description: Add full conversation threads to datasets in LangWatch to generate richer evaluation inputs for AI agent testing.
---
To add trace threads to a dataset, follow these steps:
diff --git a/datasets/overview.mdx b/datasets/overview.mdx
index 57d1983..3b91cdf 100644
--- a/datasets/overview.mdx
+++ b/datasets/overview.mdx
@@ -1,7 +1,7 @@
---
title: Datasets
sidebarTitle: Overview
-description: Create and manage datasets with LangWatch
+description: Create and manage datasets in LangWatch to build evaluation sets for LLMs and structured AI agent testing.
---
## Create datasets
diff --git a/dspy-visualization/custom-optimizer.mdx b/dspy-visualization/custom-optimizer.mdx
index 720e17e..aaf8d9a 100644
--- a/dspy-visualization/custom-optimizer.mdx
+++ b/dspy-visualization/custom-optimizer.mdx
@@ -1,7 +1,7 @@
---
title: Tracking Custom DSPy Optimizer
sidebarTitle: Custom Optimizer Tracking
-description: Build custom DSPy optimizers and track them in LangWatch
+description: Track custom DSPy optimizer logic in LangWatch to visualize optimization steps and improve AI agent testing workflows.
---
If you are building a custom DSPy optimizer, then LangWatch won't support tracking it out of the box, but adding track to any custom optimizer is also very simple.
diff --git a/dspy-visualization/quickstart.mdx b/dspy-visualization/quickstart.mdx
index b9194b6..7013e83 100644
--- a/dspy-visualization/quickstart.mdx
+++ b/dspy-visualization/quickstart.mdx
@@ -1,7 +1,7 @@
---
title: DSPy Visualization Quickstart
sidebarTitle: Quickstart
-description: Visualize your DSPy notebooks experimentations to better track and debug the optimization process
+description: Quickly visualize DSPy notebooks and optimization experiments in LangWatch to support debugging and agent evaluation.
---
[
](https://colab.research.google.com/github/langwatch/langwatch/blob/main/python-sdk/examples/dspy_visualization.ipynb)
diff --git a/dspy-visualization/rag-visualization.mdx b/dspy-visualization/rag-visualization.mdx
index 5b275dd..8b73c99 100644
--- a/dspy-visualization/rag-visualization.mdx
+++ b/dspy-visualization/rag-visualization.mdx
@@ -1,6 +1,6 @@
---
title: "RAG Visualization"
-description: Visualize your DSPy RAG optimization process in LangWatch
+description: Visualize DSPy RAG optimization steps in LangWatch to better understand performance and support AI agent testing.
---
[
](https://colab.research.google.com/github/langwatch/langevals/blob/main/notebooks/tutorials/dspy_rag.ipynb)
diff --git a/evaluations/custom-evaluator-integration.mdx b/evaluations/custom-evaluator-integration.mdx
index e40602a..57b17db 100644
--- a/evaluations/custom-evaluator-integration.mdx
+++ b/evaluations/custom-evaluator-integration.mdx
@@ -1,6 +1,6 @@
---
title: Instrumenting Custom Evaluator
-description: Add your own evaluation results into LangWatch trace
+description: Integrate custom evaluator results into LangWatch to extend scoring logic for advanced AI agent evaluations.
---
If you have a custom evaluator built in-house which run on your own code, either during the LLM pipeline or after, you can still capture the evaluation results
diff --git a/evaluations/evaluation-by-thread.mdx b/evaluations/evaluation-by-thread.mdx
index 8aca377..d4b0668 100644
--- a/evaluations/evaluation-by-thread.mdx
+++ b/evaluations/evaluation-by-thread.mdx
@@ -1,6 +1,6 @@
---
title: Evaluation by Thread
-description: Evaluate your LLM applications by thread
+description: Evaluate LLM applications by thread in LangWatch to analyze conversation-level performance in agent testing setups.
---
With LangWatch, you can evaluate your LLM applications by thread. This approach is useful for analyzing the performance of your LLM applications across entire conversation threads, helping you identify which threads are performing well or poorly.
diff --git a/features/annotations.mdx b/features/annotations.mdx
index 2c244ae..508401d 100644
--- a/features/annotations.mdx
+++ b/features/annotations.mdx
@@ -1,6 +1,6 @@
---
title: Annotations
-description: Collaborate with domain experts using annotations
+description: Use annotations in LangWatch for expert labeling, trace review, and structured evaluation workflows for AI agent testing.
---
# Create annotations on messages
diff --git a/features/embedded-analytics.mdx b/features/embedded-analytics.mdx
index 61094ab..e6ca03d 100644
--- a/features/embedded-analytics.mdx
+++ b/features/embedded-analytics.mdx
@@ -1,6 +1,6 @@
---
title: Exporting Analytics
-description: Build and integrate LangWatch graphs on your own systems and applications
+description: Export LangWatch analytics into your own dashboards to monitor LLM quality, agent testing metrics, and evaluation performance.
---
## Export Analytics with REST Endpoint
diff --git a/features/triggers.mdx b/features/triggers.mdx
index a7f2977..061047d 100644
--- a/features/triggers.mdx
+++ b/features/triggers.mdx
@@ -1,6 +1,6 @@
---
title: Alerts and Triggers
-description: Be alerted when something goes wrong and trigger actions automatically
+description: Configure Alerts and Triggers in LangWatch to detect regressions, notify teams, and enforce automated guardrails for AI agent testing.
---
## Create triggers based on LangWatch filters
diff --git a/hybrid-setup/elasticsearch.mdx b/hybrid-setup/elasticsearch.mdx
index 67bd4a9..ef453e6 100644
--- a/hybrid-setup/elasticsearch.mdx
+++ b/hybrid-setup/elasticsearch.mdx
@@ -1,6 +1,6 @@
---
title: Elasticsearch
-description: Elasticsearch Setup for LangWatch Hybrid Deployment
+description: Set up Elasticsearch for LangWatch Hybrid deployments to enable scalable search and analysis of traces and agent evaluations.
---
### Introduction
diff --git a/hybrid-setup/overview.mdx b/hybrid-setup/overview.mdx
index 37a2a61..def42a3 100644
--- a/hybrid-setup/overview.mdx
+++ b/hybrid-setup/overview.mdx
@@ -1,6 +1,6 @@
---
title: Overview
-description: LangWatch offers a hybrid setup for companies that require strict data control and compliance.
+description: Learn how LangWatch's hybrid setup ensures strict data control, compliance needs, and secure AI agent testing infrastructure.
---
# Hybrid Setup Overview
diff --git a/hybrid-setup/s3-storage.mdx b/hybrid-setup/s3-storage.mdx
index 7ea8fc4..ef41e6e 100644
--- a/hybrid-setup/s3-storage.mdx
+++ b/hybrid-setup/s3-storage.mdx
@@ -1,6 +1,6 @@
---
title: S3 Storage
-description: S3 Storage Setup for LangWatch Hybrid Deployment
+description: Configure S3 storage for LangWatch Hybrid deployments to store traces, evaluations, and AI agent testing datasets.
---
### S3-Compatible Object Storage Setup
diff --git a/integration/code-examples.mdx b/integration/code-examples.mdx
index a9b8436..227ab0f 100644
--- a/integration/code-examples.mdx
+++ b/integration/code-examples.mdx
@@ -1,6 +1,6 @@
---
title: Code Examples
-description: Examples of LangWatch integrated applications
+description: Explore code examples showing LangWatch integrations for tracing, evaluating, and improving AI agent testing pipelines.
keywords: langwatch, examples, code, integration, python, typescript, opentelemetry
---
diff --git a/integration/flowise.mdx b/integration/flowise.mdx
index ea6d779..f4b9c7e 100644
--- a/integration/flowise.mdx
+++ b/integration/flowise.mdx
@@ -1,7 +1,7 @@
---
title: Flowise Integration
sidebarTitle: Flowise
-description: Capture LLM traces and send them to LangWatch from Flowise
+description: Send Flowise LLM traces to LangWatch to monitor performance, detect issues, and support AI agent evaluation workflows.
---
[Flowise](https://flowiseai.com/) is a low-code tool for building LLM pipelines. If you are using Flowise, you can easily enable LangWatch from their UI for analytics, evaluations and much more.
diff --git a/integration/go/guide.mdx b/integration/go/guide.mdx
index 74c90d0..daf22e9 100644
--- a/integration/go/guide.mdx
+++ b/integration/go/guide.mdx
@@ -1,7 +1,7 @@
---
title: Go Integration Guide
sidebarTitle: Guide
-description: LangWatch Go SDK integration guide for setting up LLM observability and tracing.
+description: Use the LangWatch Go SDK to trace LLM calls, measure performance, and support observability-driven AI agent testing.
keywords: LangWatch, Go, Golang, SDK, integration, guide, setup, tracing, spans, traces, OpenTelemetry, OpenAI
---
diff --git a/integration/go/integrations/anthropic.mdx b/integration/go/integrations/anthropic.mdx
index 82c0334..50600b9 100644
--- a/integration/go/integrations/anthropic.mdx
+++ b/integration/go/integrations/anthropic.mdx
@@ -1,7 +1,7 @@
---
title: Anthropic (Claude) Integration
sidebarTitle: Go
-description: Learn how to instrument Anthropic Claude API calls in Go using LangWatch.
+description: Instrument Anthropic Claude API calls in Go using LangWatch to track performance, detect errors, and improve AI agent testing.
icon: golang
keywords: go, golang, anthropic, claude, instrumentation, langwatch, openai-compatible
---
diff --git a/integration/go/integrations/azure-openai.mdx b/integration/go/integrations/azure-openai.mdx
index 5a2ab2a..8f13e6b 100644
--- a/integration/go/integrations/azure-openai.mdx
+++ b/integration/go/integrations/azure-openai.mdx
@@ -1,7 +1,7 @@
---
title: Azure OpenAI Integration
sidebarTitle: Go
-description: Learn how to instrument Azure OpenAI API calls in Go using the LangWatch SDK.
+description: Instrument Azure OpenAI API calls in Go using LangWatch to monitor model usage, latency, and AI agent evaluation metrics.
icon: golang
keywords: go, golang, azure, azure openai, instrumentation, langwatch, openai-compatible
---
diff --git a/integration/go/integrations/grok.mdx b/integration/go/integrations/grok.mdx
index decd58f..f5f9a8a 100644
--- a/integration/go/integrations/grok.mdx
+++ b/integration/go/integrations/grok.mdx
@@ -1,7 +1,7 @@
---
title: Grok (xAI) Integration
sidebarTitle: Grok (xAI)
-description: Instrument Grok (xAI) API calls in Go with the LangWatch SDK for full observability.
+description: Instrument Grok (xAI) API calls in Go using LangWatch to capture high-speed traces and improve AI agent evaluations.
keywords: go, golang, grok, xai, instrumentation, langwatch, openai-compatible
---
diff --git a/integration/go/integrations/groq.mdx b/integration/go/integrations/groq.mdx
index a74c991..12fc099 100644
--- a/integration/go/integrations/groq.mdx
+++ b/integration/go/integrations/groq.mdx
@@ -1,7 +1,7 @@
---
title: Groq Integration
sidebarTitle: Groq
-description: Learn how to instrument Groq API calls in Go using the LangWatch SDK for high-speed LLM tracing.
+description: Instrument Groq API calls in Go using LangWatch for fast LLM observability, cost tracking, and agent evaluation insights.
keywords: go, golang, groq, instrumentation, langwatch, openai-compatible
---
diff --git a/integration/go/integrations/ollama.mdx b/integration/go/integrations/ollama.mdx
index 73422d0..f92388d 100644
--- a/integration/go/integrations/ollama.mdx
+++ b/integration/go/integrations/ollama.mdx
@@ -1,7 +1,7 @@
---
title: Ollama (Local Models) Integration
sidebarTitle: Ollama (Local)
-description: Learn how to trace local LLMs running via Ollama in Go using the LangWatch SDK.
+description: Instrument local Ollama models in Go to monitor performance, debug RAG flows, and support AI agent testing environments.
keywords: go, golang, ollama, local llm, instrumentation, langwatch, openai-compatible
---
diff --git a/integration/go/integrations/open-ai.mdx b/integration/go/integrations/open-ai.mdx
index ddfb39b..927f8b8 100644
--- a/integration/go/integrations/open-ai.mdx
+++ b/integration/go/integrations/open-ai.mdx
@@ -1,7 +1,7 @@
---
title: OpenAI Instrumentation
sidebarTitle: Go
-description: Learn how to instrument OpenAI API calls with the LangWatch Go SDK using middleware.
+description: Instrument OpenAI API calls with the Go SDK to trace LLM interactions, measure performance, and support agent evaluation pipelines.
icon: golang
keywords: openai, instrumentation, golang, go, langwatch, middleware, streaming
---
diff --git a/integration/go/integrations/openrouter.mdx b/integration/go/integrations/openrouter.mdx
index 5fe6453..89055dc 100644
--- a/integration/go/integrations/openrouter.mdx
+++ b/integration/go/integrations/openrouter.mdx
@@ -1,7 +1,7 @@
---
title: OpenRouter Integration
sidebarTitle: OpenRouter
-description: Learn how to instrument calls to hundreds of models via OpenRouter in Go using the LangWatch SDK.
+description: Instrument OpenRouter model calls in Go with LangWatch to compare models, track quality, and run AI agent evaluations.
keywords: go, golang, openrouter, model router, instrumentation, langwatch, opentelemetry, openai-compatible
---
diff --git a/integration/java/integrations/spring-ai.mdx b/integration/java/integrations/spring-ai.mdx
index 6b1e6a4..2dacae2 100644
--- a/integration/java/integrations/spring-ai.mdx
+++ b/integration/java/integrations/spring-ai.mdx
@@ -1,7 +1,7 @@
---
title: Spring AI (Java) Integration
sidebarTitle: Spring AI
-description: Configure OpenTelemetry in your Spring AI app to send traces to LangWatch.
+description: Configure Spring AI with OpenTelemetry and LangWatch to capture LLM traces and enable full-stack AI agent evaluations.
keywords: java, spring, spring ai, spring boot, opentelemetry, langwatch, observability
---
diff --git a/integration/langflow.mdx b/integration/langflow.mdx
index 375caf8..0b2dfd9 100644
--- a/integration/langflow.mdx
+++ b/integration/langflow.mdx
@@ -1,7 +1,7 @@
---
title: Langflow Integration
sidebarTitle: Langflow
-description: LangWatch is the best observability integration for Langflow
+description: Integrate Langflow with LangWatch to capture node execution, prompt behavior, and evaluation metrics for AI agent testing.
---
[Langflow](https://www.langflow.org/) is a low-code tool for building LLM pipelines. If you are using Langflow, you can easily enable LangWatch from their UI for analytics, evaluations and much more.
diff --git a/integration/mcp.mdx b/integration/mcp.mdx
index 7a428ea..9345667 100644
--- a/integration/mcp.mdx
+++ b/integration/mcp.mdx
@@ -1,7 +1,7 @@
---
title: LangWatch MCP Server
sidebarTitle: LangWatch MCP
-description: Turn your coding assistant into a LangWatch expert
+description: Use the LangWatch MCP Server to extend your coding assistant with deep LangWatch insights for tracing, testing, and agent evaluations.
---
The [LangWatch MCP Server](https://www.npmjs.com/package/@langwatch/mcp-server) gives your AI coding assistant (Cursor, Claude Code, Codex, etc.) full access to all LangWatch and [Scenario](https://langwatch.ai/scenario/) documentation and features via the [Model Context Protocol](https://modelcontextprotocol.io/introduction).
diff --git a/integration/opentelemetry/guide.mdx b/integration/opentelemetry/guide.mdx
index ed47435..8ef7b65 100644
--- a/integration/opentelemetry/guide.mdx
+++ b/integration/opentelemetry/guide.mdx
@@ -1,7 +1,7 @@
---
title: OpenTelemetry Integration Guide
sidebarTitle: OpenTelemetry
-description: Use OpenTelemetry to capture LLM traces and send them to LangWatch from any programming language
+description: Integrate OpenTelemetry with LangWatch to collect LLM spans from any language for unified AI agent evaluation data.
icon: telescope
keywords: langwatch, opentelemetry, integration, guide, java, c#, .net, python, typescript, javascript, go, sdk, open telemetry, open telemetry integration, open telemetry guide, open telemetry integration guide, open telemetry integration guide java, open telemetry integration guide c#, open telemetry integration guide .net, open telemetry integration guide python, open telemetry integration guide typescript, open telemetry integration guide javascript, open telemetry integration guide go
---
diff --git a/integration/python/guide.mdx b/integration/python/guide.mdx
index ba40382..da51be2 100644
--- a/integration/python/guide.mdx
+++ b/integration/python/guide.mdx
@@ -1,7 +1,7 @@
---
title: Python Integration Guide
sidebarTitle: Guide
-description: LangWatch Python SDK integration guide
+description: Follow the LangWatch Python integration guide to capture traces, debug pipelines, and enable observability for agent testing.
keywords: LangWatch, Python, SDK, integration, guide, setup, tracing, spans, traces, OpenTelemetry, OpenAI, Celery, HTTP clients, databases, ORMs
---
diff --git a/integration/python/integrations/agno.mdx b/integration/python/integrations/agno.mdx
index 998cc5d..48165a1 100644
--- a/integration/python/integrations/agno.mdx
+++ b/integration/python/integrations/agno.mdx
@@ -1,7 +1,7 @@
---
title: Agno Instrumentation
sidebarTitle: Agno
-description: Learn how to instrument Agno agents and send traces to LangWatch using the Python SDK.
+description: Instrument Agno agents with LangWatch’s Python SDK to send traces, analyze behaviors, and strengthen AI agent testing and evaluations.
keywords: agno, openinference, langwatch, python, tracing, observability
---
diff --git a/integration/python/integrations/anthropic.mdx b/integration/python/integrations/anthropic.mdx
index 63cc5f7..0eedf6d 100644
--- a/integration/python/integrations/anthropic.mdx
+++ b/integration/python/integrations/anthropic.mdx
@@ -1,7 +1,7 @@
---
title: Anthropic Instrumentation
sidebarTitle: Python
-description: Learn how to instrument Anthropic API calls with the LangWatch Python SDK
+description: Instrument Anthropic API calls with LangWatch’s Python SDK to trace usage, debug issues, and support AI agent testing.
icon: python
keywords: anthropic, claude, instrumentation, openinference, langwatch, python
---
diff --git a/integration/python/integrations/autogen.mdx b/integration/python/integrations/autogen.mdx
index 7f2b76f..ef372dd 100644
--- a/integration/python/integrations/autogen.mdx
+++ b/integration/python/integrations/autogen.mdx
@@ -1,7 +1,7 @@
---
title: AutoGen Instrumentation
sidebarTitle: AutoGen
-description: Learn how to instrument AutoGen applications with LangWatch.
+description: Integrate AutoGen applications with LangWatch to trace multi-agent interactions and run systematic AI agent evaluations.
keywords: autogen, python, sdk, instrumentation, opentelemetry, langwatch, tracing
---
diff --git a/integration/python/integrations/aws-bedrock.mdx b/integration/python/integrations/aws-bedrock.mdx
index bb8cb45..793017f 100644
--- a/integration/python/integrations/aws-bedrock.mdx
+++ b/integration/python/integrations/aws-bedrock.mdx
@@ -1,7 +1,7 @@
---
title: AWS Bedrock Instrumentation
sidebarTitle: Bedrock
-description: Learn how to instrument AWS Bedrock calls with the LangWatch Python SDK using OpenInference.
+description: Instrument AWS Bedrock calls using OpenInference and LangWatch to capture metrics and behaviors for AI agent testing workflows.
icon: python
keywords: aws, bedrock, boto3, instrumentation, opentelemetry, openinference, langwatch, python, tracing
---
diff --git a/integration/python/integrations/azure-ai.mdx b/integration/python/integrations/azure-ai.mdx
index 4f7882f..50a0bae 100644
--- a/integration/python/integrations/azure-ai.mdx
+++ b/integration/python/integrations/azure-ai.mdx
@@ -1,7 +1,7 @@
---
title: Azure AI Inference SDK Instrumentation
sidebarTitle: Python
-description: Learn how to instrument the Azure AI Inference Python SDK with LangWatch.
+description: Instrument Azure AI Inference SDK calls with LangWatch to trace requests, monitor quality, and run AI agent evaluations.
icon: python
keywords: azure ai inference, python, sdk, instrumentation, opentelemetry, langwatch, tracing
---
diff --git a/integration/python/integrations/crew-ai.mdx b/integration/python/integrations/crew-ai.mdx
index 84e7847..76b6d69 100644
--- a/integration/python/integrations/crew-ai.mdx
+++ b/integration/python/integrations/crew-ai.mdx
@@ -1,6 +1,6 @@
---
title: CrewAI
-description: Learn how to instrument the CrewAI Python SDK with LangWatch.
+description: Integrate the CrewAI Python SDK with LangWatch to trace multi-agent workflows, debug failures, and support systematic AI agent testing.
keywords: crewai, python, sdk, instrumentation, opentelemetry, langwatch, tracing
---
diff --git a/integration/python/integrations/dspy.mdx b/integration/python/integrations/dspy.mdx
index 3be45e5..30e85c9 100644
--- a/integration/python/integrations/dspy.mdx
+++ b/integration/python/integrations/dspy.mdx
@@ -1,7 +1,7 @@
---
title: DSPy Instrumentation
sidebarTitle: DSPy
-description: Learn how to instrument DSPy programs with the LangWatch Python SDK
+description: Learn how to instrument DSPy programs with the LangWatch Python SDK to trace RAG pipelines, optimize prompts, and improve AI agent evaluations.
keywords: dspy, instrumentation, autotrack, langwatch, python
---
diff --git a/integration/python/integrations/google-ai.mdx b/integration/python/integrations/google-ai.mdx
index 7ac63f7..f4e3c4f 100644
--- a/integration/python/integrations/google-ai.mdx
+++ b/integration/python/integrations/google-ai.mdx
@@ -1,7 +1,7 @@
---
title: Google Agent Development Kit (ADK) Instrumentation
sidebarTitle: Google ADK
-description: Learn how to instrument Google Agent Development Kit (ADK) applications with LangWatch.
+description: Integrate Google ADK agents into LangWatch to trace actions, tools, and interactions for structured AI agent evaluations.
keywords: google adk, agent development kit, python, sdk, instrumentation, opentelemetry, langwatch, tracing
---
diff --git a/integration/python/integrations/instructor.mdx b/integration/python/integrations/instructor.mdx
index 16a0b1e..ee7c14b 100644
--- a/integration/python/integrations/instructor.mdx
+++ b/integration/python/integrations/instructor.mdx
@@ -1,7 +1,7 @@
---
title: Instructor AI Instrumentation
sidebarTitle: Instructor AI
-description: Learn how to instrument Instructor AI applications with LangWatch using OpenInference.
+description: Instrument Instructor AI with LangWatch to track structured outputs, detect errors, and enhance AI agent testing workflows.
keywords: instructor, python, sdk, instrumentation, opentelemetry, langwatch, tracing, openinference, structured output
---
diff --git a/integration/python/integrations/langchain.mdx b/integration/python/integrations/langchain.mdx
index b6be301..76c5fe5 100644
--- a/integration/python/integrations/langchain.mdx
+++ b/integration/python/integrations/langchain.mdx
@@ -1,7 +1,7 @@
---
title: LangChain Instrumentation
sidebarTitle: Python
-description: Learn how to instrument Langchain applications with the LangWatch Python SDK.
+description: Instrument LangChain applications with LangWatch to trace chains, RAG flows, and metrics for AI agent evaluations.
icon: python
keywords: langchain, instrumentation, callback, langwatch, python, tracing
---
diff --git a/integration/python/integrations/langgraph.mdx b/integration/python/integrations/langgraph.mdx
index 16903c4..33d8139 100644
--- a/integration/python/integrations/langgraph.mdx
+++ b/integration/python/integrations/langgraph.mdx
@@ -1,7 +1,7 @@
---
title: LangGraph Instrumentation
sidebarTitle: LangGraph
-description: Learn how to instrument LangGraph applications with the LangWatch Python SDK.
+description: Instrument LangGraph applications with the LangWatch Python SDK to trace graph nodes, analyze workflows, and support AI agent testing.
icon: python
keywords: langgraph, instrumentation, callback, langwatch, python, tracing
---
diff --git a/integration/python/integrations/lite-llm.mdx b/integration/python/integrations/lite-llm.mdx
index 8a3ddbb..fe18885 100644
--- a/integration/python/integrations/lite-llm.mdx
+++ b/integration/python/integrations/lite-llm.mdx
@@ -1,7 +1,7 @@
---
title: LiteLLM Instrumentation
sidebarTitle: LiteLLM
-description: Learn how to instrument LiteLLM calls with the LangWatch Python SDK.
+description: Instrument LiteLLM calls with the LangWatch Python SDK to capture LLM traces, measure quality, and support AI agent testing workflows.
keywords: litellm, instrumentation, autotrack, langwatch, python, tracing
---
diff --git a/integration/python/integrations/llamaindex.mdx b/integration/python/integrations/llamaindex.mdx
index 1170833..7874896 100644
--- a/integration/python/integrations/llamaindex.mdx
+++ b/integration/python/integrations/llamaindex.mdx
@@ -1,7 +1,7 @@
---
title: LlamaIndex Instrumentation
sidebarTitle: LlamaIndex
-description: Learn how to instrument LlamaIndex applications with LangWatch.
+description: Instrument LlamaIndex applications with LangWatch to trace retrieval, generation, and RAG behavior for AI agent evaluations.
keywords: llamaindex, python, sdk, instrumentation, opentelemetry, langwatch, tracing
---
diff --git a/integration/python/integrations/open-ai-agents.mdx b/integration/python/integrations/open-ai-agents.mdx
index 057c6b4..bb38d42 100644
--- a/integration/python/integrations/open-ai-agents.mdx
+++ b/integration/python/integrations/open-ai-agents.mdx
@@ -1,7 +1,7 @@
---
title: OpenAI Agents SDK Instrumentation
sidebarTitle: OpenAI Agents
-description: Learn how to instrument OpenAI Agents with the LangWatch Python SDK
+description: Instrument OpenAI Agents with the LangWatch Python SDK to capture traces, run AI agent evaluations, and debug agent testing scenarios.
keywords: openai-agents, instrumentation, openinference, langwatch, python, tracing
---
diff --git a/integration/python/integrations/open-ai-azure.mdx b/integration/python/integrations/open-ai-azure.mdx
index 55c72b0..dd2eb15 100644
--- a/integration/python/integrations/open-ai-azure.mdx
+++ b/integration/python/integrations/open-ai-azure.mdx
@@ -1,7 +1,7 @@
---
title: Azure OpenAI Instrumentation
sidebarTitle: Azure OpenAI
-description: Learn how to instrument Azure OpenAI API calls with the LangWatch Python SDK
+description: Instrument Azure OpenAI API calls with the LangWatch Python SDK to capture traces, measure costs, and run agent evaluations.
keywords: azure openai, openai, instrumentation, autotrack, openinference, openllmetry, LangWatch, Python
---
diff --git a/integration/python/integrations/open-ai.mdx b/integration/python/integrations/open-ai.mdx
index dc1eaaf..c9e43a5 100644
--- a/integration/python/integrations/open-ai.mdx
+++ b/integration/python/integrations/open-ai.mdx
@@ -1,7 +1,7 @@
---
title: OpenAI Instrumentation
sidebarTitle: Python
-description: Learn how to instrument OpenAI API calls with the LangWatch Python SDK
+description: Instrument OpenAI API calls with the LangWatch Python SDK to capture traces, debug, and support AI agent testing workflows.
icon: python
keywords: openai, instrumentation, autotrack, langwatch, python
---
diff --git a/integration/python/integrations/other.mdx b/integration/python/integrations/other.mdx
index 91bc612..6cffc64 100644
--- a/integration/python/integrations/other.mdx
+++ b/integration/python/integrations/other.mdx
@@ -1,7 +1,7 @@
---
title: Other OpenTelemetry Instrumentors
sidebarTitle: Other
-description: Learn how to use any OpenTelemetry-compatible instrumentor with LangWatch.
+description: Use any OpenTelemetry-compatible instrumentor with LangWatch to standardize tracing and centralize AI agent testing observability.
keywords: opentelemetry, instrumentation, custom, other, generic, BaseInstrumentor, LangWatch, Python
---
diff --git a/integration/python/integrations/promptflow.mdx b/integration/python/integrations/promptflow.mdx
index f3ba9c4..883fd14 100644
--- a/integration/python/integrations/promptflow.mdx
+++ b/integration/python/integrations/promptflow.mdx
@@ -1,7 +1,7 @@
---
title: PromptFlow Instrumentation
sidebarTitle: PromptFlow
-description: Learn how to instrument PromptFlow applications with LangWatch.
+description: Instrument PromptFlow with LangWatch to trace pipelines, measure outcomes, and power AI agent testing workflows.
keywords: promptflow, python, sdk, instrumentation, opentelemetry, langwatch, tracing
---
diff --git a/integration/python/integrations/pydantic-ai.mdx b/integration/python/integrations/pydantic-ai.mdx
index f7d911c..24a81ea 100644
--- a/integration/python/integrations/pydantic-ai.mdx
+++ b/integration/python/integrations/pydantic-ai.mdx
@@ -1,7 +1,7 @@
---
title: PydanticAI Instrumentation
sidebarTitle: PydanticAI
-description: Learn how to instrument PydanticAI applications with the LangWatch Python SDK.
+description: Connect PydanticAI applications to LangWatch using the Python SDK to trace calls, debug structured outputs, and improve AI agent evaluations.
keywords: pydantic-ai, pydanticai, instrumentation, langwatch, python, tracing
---
diff --git a/integration/python/integrations/semantic-kernel.mdx b/integration/python/integrations/semantic-kernel.mdx
index 4604f6d..b630c4b 100644
--- a/integration/python/integrations/semantic-kernel.mdx
+++ b/integration/python/integrations/semantic-kernel.mdx
@@ -1,7 +1,7 @@
---
title: Semantic Kernel Instrumentation
sidebarTitle: Semantic Kernel
-description: Learn how to instrument Semantic Kernel applications with LangWatch.
+description: Instrument Semantic Kernel applications with LangWatch to trace skills, pipelines, and agent evaluation stages.
keywords: semantic-kernel, python, sdk, instrumentation, opentelemetry, langwatch, tracing, openinference
---
diff --git a/integration/python/integrations/smolagents.mdx b/integration/python/integrations/smolagents.mdx
index 41abde0..93096ed 100644
--- a/integration/python/integrations/smolagents.mdx
+++ b/integration/python/integrations/smolagents.mdx
@@ -1,7 +1,7 @@
---
title: SmolAgents Instrumentation
sidebarTitle: SmolAgents
-description: Learn how to instrument SmolAgents applications with LangWatch.
+description: Add SmolAgents tracing with LangWatch to analyze behaviors, detect errors, and improve AI agent testing accuracy.
keywords: smolagents, python, sdk, instrumentation, opentelemetry, langwatch, tracing
---
diff --git a/integration/python/integrations/strand-agents.mdx b/integration/python/integrations/strand-agents.mdx
index e2f2e73..11c15a8 100644
--- a/integration/python/integrations/strand-agents.mdx
+++ b/integration/python/integrations/strand-agents.mdx
@@ -1,7 +1,7 @@
---
title: Strands Agents Instrumentation
sidebarTitle: Strands Agents
-description: Learn how to instrument Strands Agents applications with LangWatch.
+description: Instrument Strands Agents with LangWatch to capture decision flows and support repeatable AI agent testing.
keywords: strands agents, python, sdk, instrumentation, langwatch, tracing
---
diff --git a/integration/python/reference.mdx b/integration/python/reference.mdx
index bdc93a3..c315ddd 100644
--- a/integration/python/reference.mdx
+++ b/integration/python/reference.mdx
@@ -1,7 +1,7 @@
---
title: Python SDK API Reference
sidebarTitle: Reference
-description: LangWatch Python SDK API reference
+description: Use the LangWatch Python SDK API reference to implement tracing, events, and evaluation logic for AI agent testing workflows.
icon: terminal
---
@@ -795,7 +795,7 @@ Each update operation creates a new version of the prompt. Previous versions are
Deletes a prompt and all its versions from the LangWatch platform.
```python
-# Delete a prompt
+# Delete prompts programmatically using LangWatch to manage versioning and maintain clean evaluation pipelines.
result = langwatch.prompts.delete("customer-support-bot")
```
diff --git a/integration/python/tutorials/capturing-rag.mdx b/integration/python/tutorials/capturing-rag.mdx
index c8a170f..39191d8 100644
--- a/integration/python/tutorials/capturing-rag.mdx
+++ b/integration/python/tutorials/capturing-rag.mdx
@@ -1,7 +1,7 @@
---
title: Capturing RAG
sidebarTitle: Capturing RAG
-description: Learn how to capture Retrieval Augmented Generation (RAG) data with LangWatch.
+description: Learn how to capture Retrieval-Augmented Generation (RAG) data with LangWatch to support evaluations and agent testing.
icon: python
keywords: RAG, Retrieval Augmented Generation, LangChain, LangWatch, LangChain RAG, RAG Span, RAG Chunk, RAG Tool
---
diff --git a/integration/python/tutorials/manual-instrumentation.mdx b/integration/python/tutorials/manual-instrumentation.mdx
index 019951f..4b43c39 100644
--- a/integration/python/tutorials/manual-instrumentation.mdx
+++ b/integration/python/tutorials/manual-instrumentation.mdx
@@ -1,6 +1,6 @@
---
title: Manual Instrumentation
-description: Learn how to manually instrument your code with the LangWatch Python SDK
+description: Learn manual instrumentation with the LangWatch Python SDK for full control over tracing, evaluations, and agent testing.
keywords: manual instrumentation, context managers, span, trace, async, synchronous, LangWatch, Python
---
diff --git a/integration/python/tutorials/open-telemetry.mdx b/integration/python/tutorials/open-telemetry.mdx
index 2d52afd..a85cfde 100644
--- a/integration/python/tutorials/open-telemetry.mdx
+++ b/integration/python/tutorials/open-telemetry.mdx
@@ -1,6 +1,6 @@
---
title: OpenTelemetry Migration
-description: Learn how to integrate the LangWatch Python SDK with your existing OpenTelemetry setup.
+description: Integrate LangWatch with existing OpenTelemetry setups to enhance tracing, analysis, and agent evaluation workflows.
keywords: OpenTelemetry, OTel, auto-instrumentation, OpenAI, Celery, HTTP clients, databases, ORMs, LangWatch, Python
---
diff --git a/integration/python/tutorials/tracking-llm-costs.mdx b/integration/python/tutorials/tracking-llm-costs.mdx
index 42e2861..12426c9 100644
--- a/integration/python/tutorials/tracking-llm-costs.mdx
+++ b/integration/python/tutorials/tracking-llm-costs.mdx
@@ -1,7 +1,7 @@
---
title: Tracking LLM Costs and Tokens
sidebarTitle: Python
-description: Troubleshooting & adjusting cost tracking in LangWatch
+description: Track LLM costs and tokens with LangWatch to monitor efficiency and support performance evaluations in agent testing.
icon: python
keywords: LangWatch, cost tracking, token counting, debugging, troubleshooting, model costs, metrics, LLM spans
---
diff --git a/integration/python/tutorials/tracking-tool-calls.mdx b/integration/python/tutorials/tracking-tool-calls.mdx
index 144f3d0..31dc5b7 100644
--- a/integration/python/tutorials/tracking-tool-calls.mdx
+++ b/integration/python/tutorials/tracking-tool-calls.mdx
@@ -1,7 +1,7 @@
---
title: Tracking Tool Calls
sidebarTitle: Python
-description: Learn how to track tool calls in your LLM agent applications
+description: Track tool calls in Python-based agent applications with LangWatch to improve debugging and evaluation completeness.
icon: python
keywords: langwatch, python, tools, agent, tracking, instrumentation
---
diff --git a/integration/rags-context-tracking.mdx b/integration/rags-context-tracking.mdx
index 676ba51..a2b15b4 100644
--- a/integration/rags-context-tracking.mdx
+++ b/integration/rags-context-tracking.mdx
@@ -1,6 +1,6 @@
---
title: "RAG Context Tracking"
-description: Capture the RAG documents used in your LLM pipelines
+description: Track RAG documents used in LLM applications with LangWatch to improve transparency and AI agent evaluation accuracy.
---
Retrieval Augmented Generation (RAGs) is a common way to augment the generation of your LLM by retrieving a set of documents based on the user query and giving it to the LLM to use as context for answering, either by using a vector database, getting responses from an API, or integrated agent files and memory.
diff --git a/integration/rest-api.mdx b/integration/rest-api.mdx
index 36e4070..86dec6c 100644
--- a/integration/rest-api.mdx
+++ b/integration/rest-api.mdx
@@ -2,7 +2,7 @@
title: REST API
sidebarTitle: HTTP API
icon: globe
-description: Integrate LangWatch with any language by using the REST API
+description: Use the LangWatch REST API to send traces, evaluations, and interactions from any stack, enabling unified agent testing data flows.
keywords: LangWatch, REST API, HTTP API, curl, integration, observability, evaluation, prompts, datasets, workflows, automation
---
diff --git a/integration/typescript/guide.mdx b/integration/typescript/guide.mdx
index 32c8309..37e4056 100644
--- a/integration/typescript/guide.mdx
+++ b/integration/typescript/guide.mdx
@@ -1,7 +1,7 @@
---
title: TypeScript Integration Guide
sidebarTitle: Guide
-description: Get started with LangWatch TypeScript SDK in 5 minutes
+description: Get started with the LangWatch TypeScript SDK to trace LLM calls, track tokens, and prepare data for AI agent testing.
keywords: langwatch, typescript, sdk, guide, observability, tracing, logging, data capture, data collection, data ingestion
---
diff --git a/integration/typescript/integrations/azure.mdx b/integration/typescript/integrations/azure.mdx
index abe7381..c5359d2 100644
--- a/integration/typescript/integrations/azure.mdx
+++ b/integration/typescript/integrations/azure.mdx
@@ -2,7 +2,7 @@
title: Azure OpenAI
sidebarTitle: TypeScript/JS
icon: square-js
-description: LangWatch Azure OpenAI integration guide
+description: Use the LangWatch Azure OpenAI guide to instrument LLM calls, trace interactions, and support AI agent test workflows.
keywords: azure openai, langwatch, typescript, javascript, sdk, instrumentation, opentelemetry
---
diff --git a/integration/typescript/integrations/langchain.mdx b/integration/typescript/integrations/langchain.mdx
index db9d3f1..b1cecda 100644
--- a/integration/typescript/integrations/langchain.mdx
+++ b/integration/typescript/integrations/langchain.mdx
@@ -1,7 +1,7 @@
---
title: LangChain Instrumentation
sidebarTitle: TypeScript/JS
-description: Learn how to instrument Langchain applications with the LangWatch TypeScript SDK.
+description: Instrument LangChain applications with the LangWatch TypeScript SDK to trace chains, RAG flows, and agent evaluation metrics.
icon: square-js
keywords: langchain, instrumentation, callback, langwatch, typescript, tracing
---
diff --git a/integration/typescript/integrations/langgraph.mdx b/integration/typescript/integrations/langgraph.mdx
index 67ea9f3..d575de5 100644
--- a/integration/typescript/integrations/langgraph.mdx
+++ b/integration/typescript/integrations/langgraph.mdx
@@ -1,7 +1,7 @@
---
title: LangGraph Instrumentation
sidebarTitle: TypeScript/JS
-description: Learn how to instrument LangGraph applications with the LangWatch TypeScript SDK.
+description: Instrument LangGraph applications with the LangWatch TypeScript SDK for deep observability and agent testing workflows.
icon: square-js
keywords: langgraph, instrumentation, callback, langwatch, typescript, tracing, state graph, workflow
---
diff --git a/integration/typescript/integrations/open-ai.mdx b/integration/typescript/integrations/open-ai.mdx
index 4147f43..2ede07a 100644
--- a/integration/typescript/integrations/open-ai.mdx
+++ b/integration/typescript/integrations/open-ai.mdx
@@ -1,7 +1,7 @@
---
title: OpenAI
sidebarTitle: TypeScript/JS
-description: LangWatch OpenAI TypeScript integration guide
+description: Follow the LangWatch OpenAI TypeScript integration guide to trace LLM calls and support agent testing workflows.
icon: square-js
keywords: openai, langwatch, typescript, javascript, sdk, instrumentation
---
diff --git a/integration/typescript/integrations/vercel-ai-sdk.mdx b/integration/typescript/integrations/vercel-ai-sdk.mdx
index 3f3d93c..db8e48d 100644
--- a/integration/typescript/integrations/vercel-ai-sdk.mdx
+++ b/integration/typescript/integrations/vercel-ai-sdk.mdx
@@ -1,6 +1,6 @@
---
title: Vercel AI SDK
-description: LangWatch Vercel AI SDK integration guide
+description: Integrate the Vercel AI SDK with LangWatch for TypeScript-based tracing, token tracking, and real-time agent testing.
sidebarTitle: Vercel AI SDK
keywords: vercel ai sdk, langwatch, tracing, observability, vercel, ai, sdk
---
diff --git a/integration/typescript/reference.mdx b/integration/typescript/reference.mdx
index 33e020f..938a3e3 100644
--- a/integration/typescript/reference.mdx
+++ b/integration/typescript/reference.mdx
@@ -1,7 +1,7 @@
---
title: TypeScript SDK API Reference
sidebarTitle: Reference
-description: LangWatch TypeScript SDK API reference
+description: Access the LangWatch TypeScript SDK reference to instrument LLMs, capture traces, and support AI agent testing workflows.
keywords: langwatch, typescript, sdk, api, reference, observability, tracing, logging, data capture, data collection, data ingestion
icon: terminal
---
@@ -411,7 +411,7 @@ Each update operation creates a new version of the prompt. Previous versions are
Deletes a prompt and all its versions from the LangWatch platform.
```typescript
-// Delete a prompt
+// Delete prompts programmatically using LangWatch to manage versioning and maintain clean evaluation pipelines.
const result = await langwatch.prompts.delete("customer-support-bot");
```
diff --git a/integration/typescript/tutorials/capturing-rag.mdx b/integration/typescript/tutorials/capturing-rag.mdx
index 8df23ef..8d18652 100644
--- a/integration/typescript/tutorials/capturing-rag.mdx
+++ b/integration/typescript/tutorials/capturing-rag.mdx
@@ -1,7 +1,7 @@
---
title: Capturing RAG
sidebarTitle: TypeScript/JS
-description: Learn how to capture Retrieval Augmented Generation (RAG) data with LangWatch.
+description: Learn how to capture Retrieval-Augmented Generation (RAG) data with LangWatch to support evaluations and agent testing.
icon: square-js
keywords: RAG, Retrieval Augmented Generation, LangChain, LangWatch, LangChain RAG, RAG Span, RAG Chunk, RAG Tool
---
diff --git a/integration/typescript/tutorials/debugging-typescript.mdx b/integration/typescript/tutorials/debugging-typescript.mdx
index 8abf1e7..a21061e 100644
--- a/integration/typescript/tutorials/debugging-typescript.mdx
+++ b/integration/typescript/tutorials/debugging-typescript.mdx
@@ -1,6 +1,6 @@
---
title: Debugging and Troubleshooting
-description: Debug LangWatch TypeScript SDK integration issues
+description: Debug TypeScript SDK integrations with LangWatch to fix tracing gaps, evaluation mismatches, and agent testing issues.
sidebarTitle: Debugging
---
diff --git a/integration/typescript/tutorials/manual-instrumentation.mdx b/integration/typescript/tutorials/manual-instrumentation.mdx
index 5d64401..e0b7e5c 100644
--- a/integration/typescript/tutorials/manual-instrumentation.mdx
+++ b/integration/typescript/tutorials/manual-instrumentation.mdx
@@ -1,7 +1,7 @@
---
title: "Manual Instrumentation"
sidebarTitle: "Manual Control"
-description: "Learn advanced manual span management techniques for fine-grained observability control"
+description: "Use LangWatch TypeScript manual instrumentation for fine-grained tracing control during AI agent testing."
---
# Manual Instrumentation
diff --git a/integration/typescript/tutorials/opentelemetry-migration.mdx b/integration/typescript/tutorials/opentelemetry-migration.mdx
index f06b28a..e3c19d2 100644
--- a/integration/typescript/tutorials/opentelemetry-migration.mdx
+++ b/integration/typescript/tutorials/opentelemetry-migration.mdx
@@ -1,6 +1,6 @@
---
title: OpenTelemetry Migration
-description: "Migrate from OpenTelemetry to LangWatch while preserving all your custom configurations"
+description: "Migrate from OpenTelemetry to LangWatch while preserving custom tracing to support more advanced AI agent testing."
---
# OpenTelemetry Migration
diff --git a/integration/typescript/tutorials/tracking-llm-costs.mdx b/integration/typescript/tutorials/tracking-llm-costs.mdx
index b880b27..b4ae01d 100644
--- a/integration/typescript/tutorials/tracking-llm-costs.mdx
+++ b/integration/typescript/tutorials/tracking-llm-costs.mdx
@@ -1,7 +1,7 @@
---
title: Tracking LLM Costs and Tokens
sidebarTitle: TypeScript/JS
-description: Troubleshooting & adjusting cost tracking in LangWatch
+description: Track LLM costs and tokens with LangWatch to monitor efficiency and support performance evaluations in agent testing.
icon: square-js
keywords: LangWatch, cost tracking, token counting, debugging, troubleshooting, model costs, metrics, LLM spans
---
diff --git a/integration/typescript/tutorials/tracking-tool-calls.mdx b/integration/typescript/tutorials/tracking-tool-calls.mdx
index aac7b9a..98d7da4 100644
--- a/integration/typescript/tutorials/tracking-tool-calls.mdx
+++ b/integration/typescript/tutorials/tracking-tool-calls.mdx
@@ -1,7 +1,7 @@
---
title: Tracking Tool Calls
sidebarTitle: TypeScript/JS
-description: Learn how to track tool calls in your LLM agent applications
+description: Track tool calls in TypeScript/JavaScript agent applications with LangWatch to improve debugging and evaluation completeness.
icon: square-js
keywords: langwatch, typescript, javascript, tools, agent, tracking, instrumentation
---
diff --git a/llm-evaluation/list.mdx b/llm-evaluation/list.mdx
index ec9b0e8..2933294 100644
--- a/llm-evaluation/list.mdx
+++ b/llm-evaluation/list.mdx
@@ -1,6 +1,6 @@
---
title: List of Evaluators
-description: Find the evaluator for your use case
+description: Browse all available evaluators in LangWatch to find the right scoring method for your AI agent evaluation use case.
---
LangWatch offers an extensive library of evaluators to help you evaluate the quality and guarantee the safety of your LLM apps.
diff --git a/llm-evaluation/offline/code/evaluation-api.mdx b/llm-evaluation/offline/code/evaluation-api.mdx
index 7de2f18..a08abd7 100644
--- a/llm-evaluation/offline/code/evaluation-api.mdx
+++ b/llm-evaluation/offline/code/evaluation-api.mdx
@@ -1,6 +1,6 @@
---
title: Evaluating via Code
-description: Evaluate and visualize your LLM evals with LangWatch
+description: Evaluate LLM behavior using LangWatch’s Evaluation API to run batch tests, visualize metrics, and automate AI agent evaluations.
---
LangWatch makes it incredibly easy to add evaluation tracking to your existing workflows.
diff --git a/llm-evaluation/offline/platform/answer-correctness.mdx b/llm-evaluation/offline/platform/answer-correctness.mdx
index 812f35e..223cf4e 100644
--- a/llm-evaluation/offline/platform/answer-correctness.mdx
+++ b/llm-evaluation/offline/platform/answer-correctness.mdx
@@ -1,6 +1,6 @@
---
title: How to evaluate that your LLM answers correctly
-description: Measuring your LLM performance with Offline Evaluations
+description: Measure correctness in LLM answers using LangWatch’s Offline Evaluations to compare outputs and support AI agent evaluations.
---
](https://colab.research.google.com/github/langwatch/langevals/blob/main/notebooks/tutorials/dspy_rag.ipynb)
@@ -15515,7 +15515,7 @@ Additionally, you can see each LLM call that has been done during the optimizati
---
title: Tracking Custom DSPy Optimizer
sidebarTitle: Custom Optimizer Tracking
-description: Build custom DSPy optimizers and track them in LangWatch
+description: Track custom DSPy optimizer logic in LangWatch to visualize optimization steps and improve AI agent testing workflows.
---
If you are building a custom DSPy optimizer, then LangWatch won't support tracking it out of the box, but adding track to any custom optimizer is also very simple.
@@ -15572,7 +15572,7 @@ For any questions or issues, feel free to contact our support, join our channel
---
title: DSPy Visualization Quickstart
sidebarTitle: Quickstart
-description: Visualize your DSPy notebooks experimentations to better track and debug the optimization process
+description: Quickly visualize DSPy notebooks and optimization experiments in LangWatch to support debugging and agent evaluation.
---
[
](https://colab.research.google.com/github/langwatch/langwatch/blob/main/python-sdk/examples/dspy_visualization.ipynb)
@@ -15697,7 +15697,7 @@ Ensure that you return the `output` as some evaluations may require it. As you c
---
title: Alerts and Triggers
-description: Be alerted when something goes wrong and trigger actions automatically
+description: Configure Alerts and Triggers in LangWatch to detect regressions, notify teams, and enforce automated guardrails for AI agent testing.
---
## Create triggers based on LangWatch filters
@@ -15749,7 +15749,7 @@ Once the trigger is created, you will receive an alert whenever a message meets
---
title: Exporting Analytics
-description: Build and integrate LangWatch graphs on your own systems and applications
+description: Export LangWatch analytics into your own dashboards to monitor LLM quality, agent testing metrics, and evaluation performance.
---
## Export Analytics with REST Endpoint
@@ -15822,7 +15822,7 @@ If you encounter any hurdles or have questions, our support team is eager to ass
---
title: Annotations
-description: Collaborate with domain experts using annotations
+description: Use annotations in LangWatch for expert labeling, trace review, and structured evaluation workflows for AI agent testing.
---
# Create annotations on messages
@@ -15929,7 +15929,7 @@ Thats it! You can now annotate messages and add your custom score metrics to the
---
title: Instrumenting Custom Evaluator
-description: Add your own evaluation results into LangWatch trace
+description: Integrate custom evaluator results into LangWatch to extend scoring logic for advanced AI agent evaluations.
---
If you have a custom evaluator built in-house which run on your own code, either during the LLM pipeline or after, you can still capture the evaluation results
@@ -16029,7 +16029,7 @@ The evaluation `name` is required and must be a string. The other fields are opt
---
title: Evaluation by Thread
-description: Evaluate your LLM applications by thread
+description: Evaluate LLM applications by thread in LangWatch to analyze conversation-level performance in agent testing setups.
---
With LangWatch, you can evaluate your LLM applications by thread. This approach is useful for analyzing the performance of your LLM applications across entire conversation threads, helping you identify which threads are performing well or poorly.
@@ -18665,7 +18665,7 @@ For the full notebook, check it out on: [GitHub](https://github.com/langwatch/co
---
title: Evaluating an AI Coach with LLM-as-a-Judge
-description: A developer guide for building reliable AI coaches using LangWatch
+description: Evaluate AI coaching systems using LangWatch with LLM-as-a-Judge scoring to measure quality and consistency in agent behavior.
keywords: AI coach, evaluation, LangWatch, AI therapist, AI Leadership
---
@@ -18835,7 +18835,7 @@ For more examples of building and evaluating conversational AI, explore [Scenari
---
title: Evaluating Structured Data Extraction
-description: A developer guide for evaluating structured data extraction using LangWatch
+description: Evaluate structured data extraction using LangWatch to validate output correctness and strengthen AI agent testing pipelines.
keywords: structured data extraction, evaluation, LangWatch, ground truth
---
@@ -19008,7 +19008,7 @@ Now that you've set up evaluation for your structured data extraction, you can u
---
title: Evaluating a RAG Chatbot for Technical Manuals
-description: A developer guide for building reliable RAG systems for technical documentation using LangWatch
+description: Use LangWatch to evaluate a technical RAG chatbot by measuring retrieval quality, hallucination rates, and agent performance.
keywords: RAG, technical documentation, evaluation, LangWatch, embeddings, chunking, faithfulness, retrieval evaluation, ground truth
---
@@ -19164,7 +19164,7 @@ For more implementation examples, check out our [RAG cookbook](/cookbooks/build-
---
title: Overview
-description: Track user interactions with your LLM applications
+description: Track user interactions in LangWatch to analyze LLM usage patterns and power AI agent evaluation workflows.
---
Learn how to track user interactions with your LLM applications using the LangWatch REST API. This section provides detailed guides for predefined events such as thumbs up/down, text selection, and waiting times, as well as instructions for custom event tracking.
@@ -19182,7 +19182,7 @@ Learn how to track user interactions with your LLM applications using the LangWa
---
title: Selected Text Events
-description: Track when a user selects text generated by your LLM application
+description: Track selected text events in LangWatch to understand user behavior and improve LLM performance across AI agent evaluations.
---
Selected text events allow you to track when a user selects text generated by your LLM application, indicating the response was useful enough to be copied and used elsewhere.
@@ -19240,7 +19240,7 @@ The `text_length` metric is mandatory and should reflect the length of the selec
---
title: Custom Events
-description: Track any user events with your LLM application, with textual or numeric metrics
+description: Track custom user events in your LLM application using LangWatch to support analytics, evaluations, and agent testing workflows.
---
Apart from the reserved pre-defined events, you can also define your own events relevant to your business to correlate with your LLM messages and threads to measure your product performance.
@@ -19304,7 +19304,7 @@ On the dashboard, you can visualize the tracked events on the "Events" tab when
---
title: Waited To Finish Events
-description: Track if users leave before the LLM application finishes generating a response
+description: Track whether users leave before the LLM response completes to identify UX issues that affect downstream agent evaluations.
---
Waited to finish events are used to determine if users are waiting for the LLM application to finish generating a response or if they leave before it's completed. This is useful for capturing user impatience with regards to the response generation.
@@ -19368,7 +19368,7 @@ curl -X POST "https://app.langwatch.ai/api/track_event" \\
---
title: Thumbs Up/Down
-description: Track user feedback on specific messages or interactions with your chatbot or LLM application
+description: Track thumbs up/down user feedback in LangWatch to evaluate LLM quality and guide AI agent testing improvements.
---
Thumbs up/down events are used to capture user feedback on specific messages or interactions with your chatbot or LLM application, with an optional textual feedback.
@@ -19430,7 +19430,7 @@ The `vote` metric is mandatory and must be either `1` or `-1`. The `feedback` fi
---
title: Environment Variables
-description: Complete list of environment variables for LangWatch self-hosting
+description: Review all environment variables available for LangWatch self-hosting to configure observability and AI agent testing pipelines.
---
# Environment Variables
@@ -19560,7 +19560,7 @@ Check out the [Hybrid Setup](/hybrid-setup/overview) page for more details.
### Docker Compose
-For the open-source version, LangWatch is available as a Docker Compose setup for easy deployment on your local machine.
+For the open-source version, deploy LangWatch using Docker Compose for easy local setups that support observability, evaluations, and AI agent testing.
Check out the [Docker Compose](/self-hosting/docker-compose) page for more details.
@@ -19575,7 +19575,7 @@ Check out the [Helm Chart](/self-hosting/kubernetes-helm) page for more details.
---
title: OnPrem
-description: LangWatch on-premises solution.
+description: Deploy LangWatch on-premises for full control over data, compliance, and secure AI agent evaluation workflows.
---
@@ -19661,7 +19661,7 @@ Schedule a free consultation with our team to get started on the On-Prem setup:
---
title: Docker Compose
-description: LangWatch is available as a Docker Compose setup for easy deployment on your local machine
+description: Deploy LangWatch using Docker Compose for easy local setups supporting observability, evaluations, and AI agent testing.
keywords: self-hosting, docker, docker-compose, deployment, langwatch
---
@@ -19702,7 +19702,7 @@ Check out the [Helm Chart](/self-hosting/kubernetes-helm) page for more details.
---
title: Docker Images
-description: Overview of LangWatch Docker images and their endpoints
+description: Explore LangWatch Docker images and endpoints for setting up observability, evaluations, and AI agent testing environments.
---
# LangWatch Docker Images
@@ -19798,7 +19798,7 @@ For detailed instructions on how to deploy LangWatch using Docker or Kubernetes,
---
title: Kubernetes (Helm Chart)
-description: LangWatch is available as a Kubernetes Helm chart for easy deployment on Kubernetes
+description: Install LangWatch using a Kubernetes Helm chart for production-grade deployments supporting LLM and agent testing workflows.
keywords: self-hosting, helm, kubernetes, deployment, langwatch
---
@@ -20232,7 +20232,7 @@ Alternatively, configure an Istio `ServiceEntry`/egress policy for your OpenSear
---
title: SSO
-description: SSO Setup for LangWatch
+description: Configure SSO for LangWatch to secure access to evaluation dashboards, observability data, and agent testing environments.
keywords: [sso, hybrid, azure, auth0]
---
@@ -20447,7 +20447,7 @@ We look forward to helping you set up LangWatch seamlessly and efficiently.
---
title: Infra Monitoring
-description: Grafana/Prometheus setup for LangWatch
+description: Set up Grafana and Prometheus for LangWatch infra monitoring to track system health in large-scale agent testing setups.
---
## Installation with Monitoring
@@ -20540,7 +20540,7 @@ You can find the LangWatch dashboard template [here](/langwatch-dashboard.json).
---
title: Overview
-description: LangWatch offers a hybrid setup for companies that require strict data control and compliance.
+description: Learn how LangWatch's hybrid setup ensures strict data control, compliance needs, and secure AI agent testing infrastructure.
---
# Hybrid Setup Overview
@@ -20580,7 +20580,7 @@ For more information or to schedule a consultation, please [contact our sales te
---
title: Elasticsearch
-description: Elasticsearch Setup for LangWatch Hybrid Deployment
+description: Set up Elasticsearch for LangWatch Hybrid deployments to enable scalable search and analysis of traces and agent evaluations.
---
### Introduction
@@ -20633,7 +20633,7 @@ LangWatch relies on Elasticsearch (or OpenSearch) for core functionality such as
---
title: S3 Storage
-description: S3 Storage Setup for LangWatch Hybrid Deployment
+description: Configure S3 storage for LangWatch Hybrid deployments to store traces, evaluations, and AI agent testing datasets.
---
### S3-Compatible Object Storage Setup
@@ -20697,7 +20697,7 @@ openapi: 'GET /api/trace/{id}'
---
title: 'Overview'
-description: 'A Trace is a collection of runs that are related to a single operation'
+description: 'Understand LangWatch Traces, how runs are grouped into a single operation, and how to use them for LLM observability and AI agent evaluations.'
---
## Intro
@@ -20764,7 +20764,7 @@ openapi: 'POST /api/trigger/slack'
# FILE: ./api-reference/datasets/post-dataset-entries.mdx
---
-title: 'Add entries to a dataset'
+title: 'Add dataset entries programmatically using the LangWatch API to build evaluation sets for LLM testing and agent validation.'
openapi: 'POST /api/dataset/{slug}/entries'
---
@@ -21061,7 +21061,7 @@ openapi: post /legacy/ragas_answer_relevancy/evaluate
---
title: 'Overview'
-description: 'Annotations are used to annotate traces with additional information'
+description: 'Learn how annotations enhance trace review, labeling, and evaluation workflows for more reliable AI agent testing.'
---
## Intro
@@ -21080,7 +21080,7 @@ To make a call to the Annotations API, you will need to pass through your LangWa
- `DELETE /api/annotations/:id` - Delete a single annotation
- `PATCH /api/annotations/:id` - Update a single annotation
- `GET /api/annotations/trace/:id` - Get the annotations for a single trace
-- `POST /api/annotations/trace/:id` - Create an annotation for a single trace
+- `POST /api/annotations/trace/:id` - Create annotations for traces to support domain labeling, evaluation scoring, and agent testing workflows.
---
@@ -21231,7 +21231,7 @@ openapi: post /api/scenario-events
---
title: "Overview"
-description: "Prompts are used to manage and version your prompts"
+description: "Understand how prompts are versioned, managed, and used in evaluations and agent testing within LangWatch."
---
## Intro
@@ -21244,11 +21244,11 @@ To make a call to the Prompts API, you will need to pass through your LangWatch
#### Allowed Methods
-- `GET /api/prompts` - Get all prompts for a project
+- `GET /api/prompts` - Retrieve all prompts for a project to manage prompt versioning, run comparisons, and support agent testing pipelines.
- `POST /api/prompts` - Create a new prompt
-- `GET /api/prompts/:id` - Get a specific prompt
-- `PUT /api/prompts/:id` - Update a prompt
-- `DELETE /api/prompts/:id` - Delete a prompt
+- `GET /api/prompts/:id` - Retrieve prompts via the API to support versioning, evaluation workflows, and agent testing pipelines.
+- `PUT /api/prompts/:id` - Update prompts to refine versions, run new evaluations, and test agent performance under changing configurations.
+- `DELETE /api/prompts/:id` - Delete prompts programmatically using LangWatch to manage versioning and maintain clean evaluation pipelines.
- `GET /api/prompts/:id/versions` - Get all versions for a prompt
- `POST /api/prompts/:id/versions` - Create a new version for a prompt
diff --git a/llms.txt b/llms.txt
index d6b9c58..f417355 100644
--- a/llms.txt
+++ b/llms.txt
@@ -5,35 +5,25 @@ Always navigate to docs links using the .md extension for better readability.
## Get Started
-- [LangWatch: The Complete LLMOps Platform](https://langwatch.ai/docs/introduction.md): Ship AI agents 8x faster with comprehensive observability, evaluation, and prompt optimization. Open-source platform, with over 2.5k stars on GitHub.
-- [Better Agents](https://langwatch.ai/docs/better-agents/overview.md): Build reliable, testable, production-grade AI agents with Better Agents CLI - the reliability layer for agent development
-- [LangWatch MCP Server](https://langwatch.ai/docs/integration/mcp.md): Turn your coding assistant into a LangWatch expert
-
-## Agent Simulations
-
-- [Introduction to Agent Testing](https://langwatch.ai/docs/agent-simulations/introduction.md)
-- [Overview](https://langwatch.ai/docs/agent-simulations/overview.md)
-- [Getting Started](https://langwatch.ai/docs/agent-simulations/getting-started.md)
-- [Simulation Sets](https://langwatch.ai/docs/agent-simulations/set-overview.md)
-- [Batch Runs](https://langwatch.ai/docs/agent-simulations/batch-runs.md)
-- [Individual Run View](https://langwatch.ai/docs/agent-simulations/individual-run.md)
+- [LangWatch: The Complete LLMOps Platform](https://docs.langwatch.ai/introduction.md): Ship AI agents 8x faster with comprehensive observability, evaluation, and prompt optimization. Open-source platform, with over 2.5k stars on GitHub.
+- [LangWatch MCP Server](https://docs.langwatch.ai/integration/mcp.md): Use the LangWatch MCP Server to extend your coding assistant with deep LangWatch insights for tracing, testing, and agent evaluations.
## Observability
-- [Observability & Tracing](https://langwatch.ai/docs/observability/overview.md): Monitor, debug, and optimize your LLM applications with comprehensive observability and tracing capabilities
-- [Quick Start](https://langwatch.ai/docs/integration/quick-start.md)
-- [Concepts](https://langwatch.ai/docs/concepts.md): LLM tracing and observability conceptual guide
+- [Observability & Tracing](https://docs.langwatch.ai/observability/overview.md): Monitor, debug, and optimize your LLM applications with comprehensive observability and tracing capabilities
+- [Quick Start](https://docs.langwatch.ai/integration/quick-start.md)
+- [Concepts](https://docs.langwatch.ai/concepts.md): Explore core concepts of LLM tracing, observability, datasets, and evaluations in LangWatch to design reliable AI agent testing workflows.
### User Events
-- [Overview](https://langwatch.ai/docs/user-events/overview.md): Track user interactions with your LLM applications
-- [Thumbs Up/Down](https://langwatch.ai/docs/user-events/thumbs-up-down.md): Track user feedback on specific messages or interactions with your chatbot or LLM application
-- [Waited To Finish Events](https://langwatch.ai/docs/user-events/waited-to-finish.md): Track if users leave before the LLM application finishes generating a response
-- [Selected Text Events](https://langwatch.ai/docs/user-events/selected-text.md): Track when a user selects text generated by your LLM application
-- [Custom Events](https://langwatch.ai/docs/user-events/custom.md): Track any user events with your LLM application, with textual or numeric metrics
+- [Overview](https://docs.langwatch.ai/user-events/overview.md): Track user interactions in LangWatch to analyze LLM usage patterns and power AI agent evaluation workflows.
+- [Thumbs Up/Down](https://docs.langwatch.ai/user-events/thumbs-up-down.md): Track thumbs up/down user feedback in LangWatch to evaluate LLM quality and guide AI agent testing improvements.
+- [Waited To Finish Events](https://docs.langwatch.ai/user-events/waited-to-finish.md): Track whether users leave before the LLM response completes to identify UX issues that affect downstream agent evaluations.
+- [Selected Text Events](https://docs.langwatch.ai/user-events/selected-text.md): Track selected text events in LangWatch to understand user behavior and improve LLM performance across AI agent evaluations.
+- [Custom Events](https://docs.langwatch.ai/user-events/custom.md): Track custom user events in your LLM application using LangWatch to support analytics, evaluations, and agent testing workflows.
-- [Alerts and Triggers](https://langwatch.ai/docs/features/triggers.md): Be alerted when something goes wrong and trigger actions automatically
-- [Exporting Analytics](https://langwatch.ai/docs/features/embedded-analytics.md): Build and integrate LangWatch graphs on your own systems and applications
+- [Alerts and Triggers](https://docs.langwatch.ai/features/triggers.md): Configure Alerts and Triggers in LangWatch to detect regressions, notify teams, and enforce automated guardrails for AI agent testing.
+- [Exporting Analytics](https://docs.langwatch.ai/features/embedded-analytics.md): Export LangWatch analytics into your own dashboards to monitor LLM quality, agent testing metrics, and evaluation performance.
# Integrations
@@ -45,25 +35,25 @@ Always navigate to docs links using the .md extension for better readability.
### Python
-- [Python Integration Guide](https://langwatch.ai/docs/integration/python/guide.md): LangWatch Python SDK integration guide
-- [Python SDK API Reference](https://langwatch.ai/docs/integration/python/reference.md): LangWatch Python SDK API reference
-- [Manual Instrumentation](https://langwatch.ai/docs/integration/python/tutorials/manual-instrumentation.md): Learn how to manually instrument your code with the LangWatch Python SDK
-- [OpenTelemetry Migration](https://langwatch.ai/docs/integration/python/tutorials/open-telemetry.md): Learn how to integrate the LangWatch Python SDK with your existing OpenTelemetry setup.
+- [Python Integration Guide](https://docs.langwatch.ai/integration/python/guide.md): Follow the LangWatch Python integration guide to capture traces, debug pipelines, and enable observability for agent testing.
+- [Python SDK API Reference](https://docs.langwatch.ai/integration/python/reference.md): Use the LangWatch Python SDK API reference to implement tracing, events, and evaluation logic for AI agent testing workflows.
+- [Manual Instrumentation](https://docs.langwatch.ai/integration/python/tutorials/manual-instrumentation.md): Learn manual instrumentation with the LangWatch Python SDK for full control over tracing, evaluations, and agent testing.
+- [OpenTelemetry Migration](https://docs.langwatch.ai/integration/python/tutorials/open-telemetry.md): Integrate LangWatch with existing OpenTelemetry setups to enhance tracing, analysis, and agent evaluation workflows.
### TypeScript
-- [TypeScript Integration Guide](https://langwatch.ai/docs/integration/typescript/guide.md): Get started with LangWatch TypeScript SDK in 5 minutes
-- [TypeScript SDK API Reference](https://langwatch.ai/docs/integration/typescript/reference.md): LangWatch TypeScript SDK API reference
-- [Filtering Spans in TypeScript](https://langwatch.ai/docs/integration/typescript/tutorials/filtering-spans.md): Filter which spans are exported to LangWatch using presets or explicit criteria.
-- [Manual Instrumentation](https://langwatch.ai/docs/integration/typescript/tutorials/manual-instrumentation.md): Learn advanced manual span management techniques for fine-grained observability control
-- [OpenTelemetry Migration](https://langwatch.ai/docs/integration/typescript/tutorials/opentelemetry-migration.md): Migrate from OpenTelemetry to LangWatch while preserving all your custom configurations
+- [TypeScript Integration Guide](https://docs.langwatch.ai/integration/typescript/guide.md): Get started with the LangWatch TypeScript SDK to trace LLM calls, track tokens, and prepare data for AI agent testing.
+- [TypeScript SDK API Reference](https://docs.langwatch.ai/integration/typescript/reference.md): Access the LangWatch TypeScript SDK reference to instrument LLMs, capture traces, and support AI agent testing workflows.
+- [Filtering Spans in TypeScript](https://docs.langwatch.ai/integration/typescript/tutorials/filtering-spans.md): Filter which spans are exported to LangWatch using presets or explicit criteria.
+- [Manual Instrumentation](https://docs.langwatch.ai/integration/typescript/tutorials/manual-instrumentation.md): Use LangWatch TypeScript manual instrumentation for fine-grained tracing control during AI agent testing.
+- [OpenTelemetry Migration](https://docs.langwatch.ai/integration/typescript/tutorials/opentelemetry-migration.md): Migrate from OpenTelemetry to LangWatch while preserving custom tracing to support more advanced AI agent testing.
### Go
-- [Go Integration Guide](https://langwatch.ai/docs/integration/go/guide.md): LangWatch Go SDK integration guide for setting up LLM observability and tracing.
-- [Go SDK API Reference](https://langwatch.ai/docs/integration/go/reference.md): Complete API reference for the LangWatch Go SDK, including core functions, OpenAI instrumentation, and span types.
+- [Go Integration Guide](https://docs.langwatch.ai/integration/go/guide.md): Use the LangWatch Go SDK to trace LLM calls, measure performance, and support observability-driven AI agent testing.
+- [Go SDK API Reference](https://docs.langwatch.ai/integration/go/reference.md): Complete API reference for the LangWatch Go SDK, including core functions, OpenAI instrumentation, and span types.
-- [OpenTelemetry Integration Guide](https://langwatch.ai/docs/integration/opentelemetry/guide.md): Use OpenTelemetry to capture LLM traces and send them to LangWatch from any programming language
+- [OpenTelemetry Integration Guide](https://docs.langwatch.ai/integration/opentelemetry/guide.md): Integrate OpenTelemetry with LangWatch to collect LLM spans from any language for unified AI agent evaluation data.
### Tutorials
@@ -74,8 +64,8 @@ Always navigate to docs links using the .md extension for better readability.
#### Capturing RAG
-- [Capturing RAG](https://langwatch.ai/docs/integration/python/tutorials/capturing-rag.md): Learn how to capture Retrieval Augmented Generation (RAG) data with LangWatch.
-- [Capturing RAG](https://langwatch.ai/docs/integration/typescript/tutorials/capturing-rag.md): Learn how to capture Retrieval Augmented Generation (RAG) data with LangWatch.
+- [Capturing RAG](https://docs.langwatch.ai/integration/python/tutorials/capturing-rag.md): Learn how to capture Retrieval-Augmented Generation (RAG) data with LangWatch to support evaluations and agent testing.
+- [Capturing RAG](https://docs.langwatch.ai/integration/typescript/tutorials/capturing-rag.md): Learn how to capture Retrieval-Augmented Generation (RAG) data with LangWatch to support evaluations and agent testing.
#### Capturing Metadata
@@ -84,44 +74,44 @@ Always navigate to docs links using the .md extension for better readability.
#### Tracking LLM Costs
-- [Tracking LLM Costs and Tokens](https://langwatch.ai/docs/integration/python/tutorials/tracking-llm-costs.md): Troubleshooting & adjusting cost tracking in LangWatch
-- [Tracking LLM Costs and Tokens](https://langwatch.ai/docs/integration/typescript/tutorials/tracking-llm-costs.md): Troubleshooting & adjusting cost tracking in LangWatch
+- [Tracking LLM Costs and Tokens](https://docs.langwatch.ai/integration/python/tutorials/tracking-llm-costs.md): Track LLM costs and tokens with LangWatch to monitor efficiency and support performance evaluations in agent testing.
+- [Tracking LLM Costs and Tokens](https://docs.langwatch.ai/integration/typescript/tutorials/tracking-llm-costs.md): Track LLM costs and tokens with LangWatch to monitor efficiency and support performance evaluations in agent testing.
#### Tracking Tool Calls
-- [Tracking Tool Calls](https://langwatch.ai/docs/integration/python/tutorials/tracking-tool-calls.md): Learn how to track tool calls in your LLM agent applications
-- [Tracking Tool Calls](https://langwatch.ai/docs/integration/typescript/tutorials/tracking-tool-calls.md): Learn how to track tool calls in your LLM agent applications
+- [Tracking Tool Calls](https://docs.langwatch.ai/integration/python/tutorials/tracking-tool-calls.md): Track tool calls in Python-based agent applications with LangWatch to improve debugging and evaluation completeness.
+- [Tracking Tool Calls](https://docs.langwatch.ai/integration/typescript/tutorials/tracking-tool-calls.md): Track tool calls in Python-based agent applications with LangWatch to improve debugging and evaluation completeness.
## Frameworks
### LangChain
-- [LangChain Instrumentation](https://langwatch.ai/docs/integration/python/integrations/langchain.md): Learn how to instrument Langchain applications with the LangWatch Python SDK.
-- [LangChain Instrumentation](https://langwatch.ai/docs/integration/typescript/integrations/langchain.md): Learn how to instrument Langchain applications with the LangWatch TypeScript SDK.
+- [LangChain Instrumentation](https://docs.langwatch.ai/integration/python/integrations/langchain.md): Instrument LangChain applications with LangWatch to trace chains, RAG flows, and metrics for AI agent evaluations.
+- [LangChain Instrumentation](https://docs.langwatch.ai/integration/typescript/integrations/langchain.md): Instrument LangChain applications with the LangWatch TypeScript SDK to trace chains, RAG flows, and agent evaluation metrics.
### LangGraph
-- [LangGraph Instrumentation](https://langwatch.ai/docs/integration/python/integrations/langgraph.md): Learn how to instrument LangGraph applications with the LangWatch Python SDK.
-- [LangGraph Instrumentation](https://langwatch.ai/docs/integration/typescript/integrations/langgraph.md): Learn how to instrument LangGraph applications with the LangWatch TypeScript SDK.
-
-- [Vercel AI SDK](https://langwatch.ai/docs/integration/typescript/integrations/vercel-ai-sdk.md): LangWatch Vercel AI SDK integration guide
-- [LiteLLM Instrumentation](https://langwatch.ai/docs/integration/python/integrations/lite-llm.md): Learn how to instrument LiteLLM calls with the LangWatch Python SDK.
-- [OpenAI Agents SDK Instrumentation](https://langwatch.ai/docs/integration/python/integrations/open-ai-agents.md): Learn how to instrument OpenAI Agents with the LangWatch Python SDK
-- [PydanticAI Instrumentation](https://langwatch.ai/docs/integration/python/integrations/pydantic-ai.md): Learn how to instrument PydanticAI applications with the LangWatch Python SDK.
-- [Mastra](https://langwatch.ai/docs/integration/typescript/integrations/mastra.md): Learn how to integrate Mastra, a TypeScript agent framework, with LangWatch for observability and tracing.
-- [DSPy Instrumentation](https://langwatch.ai/docs/integration/python/integrations/dspy.md): Learn how to instrument DSPy programs with the LangWatch Python SDK
-- [LlamaIndex Instrumentation](https://langwatch.ai/docs/integration/python/integrations/llamaindex.md): Learn how to instrument LlamaIndex applications with LangWatch.
-- [Haystack Instrumentation](https://langwatch.ai/docs/integration/python/integrations/haystack.md): Learn how to instrument Haystack pipelines with LangWatch using community OpenTelemetry instrumentors.
-- [Strands Agents Instrumentation](https://langwatch.ai/docs/integration/python/integrations/strand-agents.md): Learn how to instrument Strands Agents applications with LangWatch.
-- [Agno Instrumentation](https://langwatch.ai/docs/integration/python/integrations/agno.md): Learn how to instrument Agno agents and send traces to LangWatch using the Python SDK.
-- [CrewAI](https://langwatch.ai/docs/integration/python/integrations/crew-ai.md): Learn how to instrument the CrewAI Python SDK with LangWatch.
-- [AutoGen Instrumentation](https://langwatch.ai/docs/integration/python/integrations/autogen.md): Learn how to instrument AutoGen applications with LangWatch.
-- [Semantic Kernel Instrumentation](https://langwatch.ai/docs/integration/python/integrations/semantic-kernel.md): Learn how to instrument Semantic Kernel applications with LangWatch.
-- [Spring AI (Java) Integration](https://langwatch.ai/docs/integration/java/integrations/spring-ai.md): Configure OpenTelemetry in your Spring AI app to send traces to LangWatch.
-- [PromptFlow Instrumentation](https://langwatch.ai/docs/integration/python/integrations/promptflow.md): Learn how to instrument PromptFlow applications with LangWatch.
-- [Instructor AI Instrumentation](https://langwatch.ai/docs/integration/python/integrations/instructor.md): Learn how to instrument Instructor AI applications with LangWatch using OpenInference.
-- [SmolAgents Instrumentation](https://langwatch.ai/docs/integration/python/integrations/smolagents.md): Learn how to instrument SmolAgents applications with LangWatch.
-- [Google Agent Development Kit (ADK) Instrumentation](https://langwatch.ai/docs/integration/python/integrations/google-ai.md): Learn how to instrument Google Agent Development Kit (ADK) applications with LangWatch.
+- [LangGraph Instrumentation](https://docs.langwatch.ai/integration/python/integrations/langgraph.md): Instrument LangGraph applications with the LangWatch Python SDK to trace graph nodes, analyze workflows, and support AI agent testing.
+- [LangGraph Instrumentation](https://docs.langwatch.ai/integration/typescript/integrations/langgraph.md): Instrument LangGraph applications with the LangWatch TypeScript SDK for deep observability and agent testing workflows.
+
+- [Vercel AI SDK](https://docs.langwatch.ai/integration/typescript/integrations/vercel-ai-sdk.md): Integrate the Vercel AI SDK with LangWatch for TypeScript-based tracing, token tracking, and real-time agent testing.
+- [LiteLLM Instrumentation](https://docs.langwatch.ai/integration/python/integrations/lite-llm.md): Instrument LiteLLM calls with the LangWatch Python SDK to capture LLM traces, measure quality, and support AI agent testing workflows.
+- [OpenAI Agents SDK Instrumentation](https://docs.langwatch.ai/integration/python/integrations/open-ai-agents.md): Instrument OpenAI Agents with the LangWatch Python SDK to capture traces, run AI agent evaluations, and debug agent testing scenarios.
+- [PydanticAI Instrumentation](https://docs.langwatch.ai/integration/python/integrations/pydantic-ai.md): Connect PydanticAI applications to LangWatch using the Python SDK to trace calls, debug structured outputs, and improve AI agent evaluations.
+- [Mastra](https://docs.langwatch.ai/integration/typescript/integrations/mastra.md): Learn how to integrate Mastra, a TypeScript agent framework, with LangWatch for observability and tracing.
+- [DSPy Instrumentation](https://docs.langwatch.ai/integration/python/integrations/dspy.md): Learn how to instrument DSPy programs with the LangWatch Python SDK to trace RAG pipelines, optimize prompts, and improve AI agent evaluations.
+- [LlamaIndex Instrumentation](https://docs.langwatch.ai/integration/python/integrations/llamaindex.md): Instrument LlamaIndex applications with LangWatch to trace retrieval, generation, and RAG behavior for AI agent evaluations.
+- [Haystack Instrumentation](https://docs.langwatch.ai/integration/python/integrations/haystack.md): Learn how to instrument Haystack pipelines with LangWatch using community OpenTelemetry instrumentors.
+- [Strands Agents Instrumentation](https://docs.langwatch.ai/integration/python/integrations/strand-agents.md): Instrument Strands Agents with LangWatch to capture decision flows and support repeatable AI agent testing.
+- [Agno Instrumentation](https://docs.langwatch.ai/integration/python/integrations/agno.md): Instrument Agno agents with LangWatch’s Python SDK to send traces, analyze behaviors, and strengthen AI agent testing and evaluations.
+- [CrewAI](https://docs.langwatch.ai/integration/python/integrations/crew-ai.md): Integrate the CrewAI Python SDK with LangWatch to trace multi-agent workflows, debug failures, and support systematic AI agent testing.
+- [AutoGen Instrumentation](https://docs.langwatch.ai/integration/python/integrations/autogen.md): Integrate AutoGen applications with LangWatch to trace multi-agent interactions and run systematic AI agent evaluations.
+- [Semantic Kernel Instrumentation](https://docs.langwatch.ai/integration/python/integrations/semantic-kernel.md): Instrument Semantic Kernel applications with LangWatch to trace skills, pipelines, and agent evaluation stages.
+- [Spring AI (Java) Integration](https://docs.langwatch.ai/integration/java/integrations/spring-ai.md): Configure Spring AI with OpenTelemetry and LangWatch to capture LLM traces and enable full-stack AI agent evaluations.
+- [PromptFlow Instrumentation](https://docs.langwatch.ai/integration/python/integrations/promptflow.md): Instrument PromptFlow with LangWatch to trace pipelines, measure outcomes, and power AI agent testing workflows.
+- [Instructor AI Instrumentation](https://docs.langwatch.ai/integration/python/integrations/instructor.md): Instrument Instructor AI with LangWatch to track structured outputs, detect errors, and enhance AI agent testing workflows.
+- [SmolAgents Instrumentation](https://docs.langwatch.ai/integration/python/integrations/smolagents.md): Add SmolAgents tracing with LangWatch to analyze behaviors, detect errors, and improve AI agent testing accuracy.
+- [Google Agent Development Kit (ADK) Instrumentation](https://docs.langwatch.ai/integration/python/integrations/google-ai.md): Integrate Google ADK agents into LangWatch to trace actions, tools, and interactions for structured AI agent evaluations.
## Model Providers
@@ -129,20 +119,20 @@ Always navigate to docs links using the .md extension for better readability.
### OpenAI
-- [OpenAI Instrumentation](https://langwatch.ai/docs/integration/python/integrations/open-ai.md): Learn how to instrument OpenAI API calls with the LangWatch Python SDK
-- [OpenAI](https://langwatch.ai/docs/integration/typescript/integrations/open-ai.md): LangWatch OpenAI TypeScript integration guide
-- [OpenAI Instrumentation](https://langwatch.ai/docs/integration/go/integrations/open-ai.md): Learn how to instrument OpenAI API calls with the LangWatch Go SDK using middleware.
+- [OpenAI Instrumentation](https://docs.langwatch.ai/integration/python/integrations/open-ai.md): Instrument OpenAI API calls with the LangWatch Python SDK to capture traces, debug, and support AI agent testing workflows.
+- [OpenAI](https://docs.langwatch.ai/integration/typescript/integrations/open-ai.md): Follow the LangWatch OpenAI TypeScript integration guide to trace LLM calls and support agent testing workflows.
+- [OpenAI Instrumentation](https://docs.langwatch.ai/integration/go/integrations/open-ai.md): Instrument OpenAI API calls with the Go SDK to trace LLM interactions, measure performance, and support agent evaluation pipelines.
### Anthropic (Claude)
-- [Anthropic Instrumentation](https://langwatch.ai/docs/integration/python/integrations/anthropic.md): Learn how to instrument Anthropic API calls with the LangWatch Python SDK
-- [Anthropic (Claude) Integration](https://langwatch.ai/docs/integration/go/integrations/anthropic.md): Learn how to instrument Anthropic Claude API calls in Go using LangWatch.
+- [Anthropic Instrumentation](https://docs.langwatch.ai/integration/python/integrations/anthropic.md): Instrument Anthropic API calls with LangWatch’s Python SDK to trace usage, debug issues, and support AI agent testing.
+- [Anthropic (Claude) Integration](https://docs.langwatch.ai/integration/go/integrations/anthropic.md): Instrument Anthropic Claude API calls in Go using LangWatch to track performance, detect errors, and improve AI agent testing.
### Microsoft Azure
-- [Azure AI Inference SDK Instrumentation](https://langwatch.ai/docs/integration/python/integrations/azure-ai.md): Learn how to instrument the Azure AI Inference Python SDK with LangWatch.
-- [Azure OpenAI](https://langwatch.ai/docs/integration/typescript/integrations/azure.md): LangWatch Azure OpenAI integration guide
-- [Azure OpenAI Integration](https://langwatch.ai/docs/integration/go/integrations/azure-openai.md): Learn how to instrument Azure OpenAI API calls in Go using the LangWatch SDK.
+- [Azure AI Inference SDK Instrumentation](https://docs.langwatch.ai/integration/python/integrations/azure-ai.md): Instrument Azure AI Inference SDK calls with LangWatch to trace requests, monitor quality, and run AI agent evaluations.
+- [Azure OpenAI](https://docs.langwatch.ai/integration/typescript/integrations/azure.md): Use the LangWatch Azure OpenAI guide to instrument LLM calls, trace interactions, and support AI agent test workflows.
+- [Azure OpenAI Integration](https://docs.langwatch.ai/integration/go/integrations/azure-openai.md): Instrument Azure OpenAI API calls in Go using LangWatch to monitor model usage, latency, and AI agent evaluation metrics.
### Google Cloud
@@ -151,94 +141,102 @@ Always navigate to docs links using the .md extension for better readability.
### Amazon Web Services
-- [AWS Bedrock Instrumentation](https://langwatch.ai/docs/integration/python/integrations/aws-bedrock.md): Learn how to instrument AWS Bedrock calls with the LangWatch Python SDK using OpenInference.
+- [AWS Bedrock Instrumentation](https://docs.langwatch.ai/integration/python/integrations/aws-bedrock.md): Instrument AWS Bedrock calls using OpenInference and LangWatch to capture metrics and behaviors for AI agent testing workflows.
-- [Groq Integration](https://langwatch.ai/docs/integration/go/integrations/groq.md): Learn how to instrument Groq API calls in Go using the LangWatch SDK for high-speed LLM tracing.
-- [Grok (xAI) Integration](https://langwatch.ai/docs/integration/go/integrations/grok.md): Instrument Grok (xAI) API calls in Go with the LangWatch SDK for full observability.
-- [Ollama (Local Models) Integration](https://langwatch.ai/docs/integration/go/integrations/ollama.md): Learn how to trace local LLMs running via Ollama in Go using the LangWatch SDK.
-- [OpenRouter Integration](https://langwatch.ai/docs/integration/go/integrations/openrouter.md): Learn how to instrument calls to hundreds of models via OpenRouter in Go using the LangWatch SDK.
+- [Groq Integration](https://docs.langwatch.ai/integration/go/integrations/groq.md): Instrument Groq API calls in Go using LangWatch for fast LLM observability, cost tracking, and agent evaluation insights.
+- [Grok (xAI) Integration](https://docs.langwatch.ai/integration/go/integrations/grok.md): Instrument Grok (xAI) API calls in Go using LangWatch to capture high-speed traces and improve AI agent evaluations.
+- [Ollama (Local Models) Integration](https://docs.langwatch.ai/integration/go/integrations/ollama.md): Instrument local Ollama models in Go to monitor performance, debug RAG flows, and support AI agent testing environments.
+- [OpenRouter Integration](https://docs.langwatch.ai/integration/go/integrations/openrouter.md): Instrument OpenRouter model calls in Go with LangWatch to compare models, track quality, and run AI agent evaluations.
## No-Code Platforms
-- [LangWatch + n8n Integration](https://langwatch.ai/docs/integration/n8n.md): Complete LangWatch integration for n8n workflows with observability, evaluation, and prompt management
-- [Langflow Integration](https://langwatch.ai/docs/integration/langflow.md): LangWatch is the best observability integration for Langflow
-- [Flowise Integration](https://langwatch.ai/docs/integration/flowise.md): Capture LLM traces and send them to LangWatch from Flowise
+- [LangWatch + n8n Integration](https://docs.langwatch.ai/integration/n8n.md): Complete LangWatch integration for n8n workflows with observability, evaluation, and prompt management
+- [Langflow Integration](https://docs.langwatch.ai/integration/langflow.md): Integrate Langflow with LangWatch to capture node execution, prompt behavior, and evaluation metrics for AI agent testing.
+- [Flowise Integration](https://docs.langwatch.ai/integration/flowise.md): Send Flowise LLM traces to LangWatch to monitor performance, detect issues, and support AI agent evaluation workflows.
## Direct Integrations
-- [OpenTelemetry Integration Guide](https://langwatch.ai/docs/integration/opentelemetry/guide.md): Use OpenTelemetry to capture LLM traces and send them to LangWatch from any programming language
-- [REST API](https://langwatch.ai/docs/integration/rest-api.md): Integrate LangWatch with any language by using the REST API
+- [OpenTelemetry Integration Guide](https://docs.langwatch.ai/integration/opentelemetry/guide.md): Integrate OpenTelemetry with LangWatch to collect LLM spans from any language for unified AI agent evaluation data.
+- [REST API](https://docs.langwatch.ai/integration/rest-api.md): Use the LangWatch REST API to send traces, evaluations, and interactions from any stack, enabling unified agent testing data flows.
+
+## Agent Simulations
+
+- [Introduction to Agent Testing](https://docs.langwatch.ai/agent-simulations/introduction.md)
+- [Overview](https://docs.langwatch.ai/agent-simulations/overview.md)
+- [Getting Started](https://docs.langwatch.ai/agent-simulations/getting-started.md)
+- [Simulation Sets](https://docs.langwatch.ai/agent-simulations/set-overview.md)
+- [Batch Runs](https://docs.langwatch.ai/agent-simulations/batch-runs.md)
+- [Individual Run View](https://docs.langwatch.ai/agent-simulations/individual-run.md)
## Evaluation
-- [LLM Evaluation Overview](https://langwatch.ai/docs/llm-evaluation/overview.md): Overview of LLM evaluation features in LangWatch
-- [Evaluating via Code](https://langwatch.ai/docs/llm-evaluation/offline/code/evaluation-api.md): Evaluate and visualize your LLM evals with LangWatch
+- [LLM Evaluation Overview](https://docs.langwatch.ai/llm-evaluation/overview.md): Get a full overview of LangWatch’s LLM evaluation features, including offline checks, real-time scoring, and agent testing workflows.
+- [Evaluating via Code](https://docs.langwatch.ai/llm-evaluation/offline/code/evaluation-api.md): Evaluate LLM behavior using LangWatch’s Evaluation API to run batch tests, visualize metrics, and automate AI agent evaluations.
### Offline Evaluation
-- [How to evaluate that your LLM answers correctly](https://langwatch.ai/docs/llm-evaluation/offline/platform/answer-correctness.md): Measuring your LLM performance with Offline Evaluations
-- [How to evaluate an LLM when you don't have defined answers](https://langwatch.ai/docs/llm-evaluation/offline/platform/llm-as-a-judge.md): Measuring your LLM performance using an LLM-as-a-judge
+- [How to evaluate that your LLM answers correctly](https://docs.langwatch.ai/llm-evaluation/offline/platform/answer-correctness.md): Measure correctness in LLM answers using LangWatch’s Offline Evaluations to compare outputs and support AI agent evaluations.
+- [How to evaluate an LLM when you don't have defined answers](https://docs.langwatch.ai/llm-evaluation/offline/platform/llm-as-a-judge.md): Measure LLM performance using LLM-as-a-Judge when no ground-truth answers exist to support scalable AI agent evaluations.
### Real-Time Evaluation
-- [Setting up Real-Time Evaluations](https://langwatch.ai/docs/llm-evaluation/realtime/setup.md): How to set up Real-Time LLM Evaluations
-- [Instrumenting Custom Evaluator](https://langwatch.ai/docs/evaluations/custom-evaluator-integration.md): Add your own evaluation results into LangWatch trace
-- [Evaluation by Thread](https://langwatch.ai/docs/evaluations/evaluation-by-thread.md): Evaluate your LLM applications by thread
+- [Setting up Real-Time Evaluations](https://docs.langwatch.ai/llm-evaluation/realtime/setup.md): Set up real-time LLM evaluations in LangWatch to score outputs instantly and support continuous AI agent testing.
+- [Instrumenting Custom Evaluator](https://docs.langwatch.ai/evaluations/custom-evaluator-integration.md): Integrate custom evaluator results into LangWatch to extend scoring logic for advanced AI agent evaluations.
+- [Evaluation by Thread](https://docs.langwatch.ai/evaluations/evaluation-by-thread.md): Evaluate LLM applications by thread in LangWatch to analyze conversation-level performance in agent testing setups.
### Built-in Evaluators
-- [List of Evaluators](https://langwatch.ai/docs/llm-evaluation/list.md): Find the evaluator for your use case
+- [List of Evaluators](https://docs.langwatch.ai/llm-evaluation/list.md): Browse all available evaluators in LangWatch to find the right scoring method for your AI agent evaluation use case.
### Datasets
-- [Datasets](https://langwatch.ai/docs/datasets/overview.md): Create and manage datasets with LangWatch
-- [Generating a dataset with AI](https://langwatch.ai/docs/datasets/ai-dataset-generation.md): Bootstrap your evaluations by generating sample data
-- [Automatically build datasets from real-time traces](https://langwatch.ai/docs/datasets/automatically-from-traces.md): Continuously populate your datasets with comming data from production
-- [Add trace threads to datasets](https://langwatch.ai/docs/datasets/dataset-threads.md): Add full conversation threads to your datasets on a per row basis
-- [View images in datasets](https://langwatch.ai/docs/datasets/dataset-images.md): Add ability to view images in datasets
+- [Datasets](https://docs.langwatch.ai/datasets/overview.md): Create and manage datasets in LangWatch to build evaluation sets for LLMs and structured AI agent testing.
+- [Generating a dataset with AI](https://docs.langwatch.ai/datasets/ai-dataset-generation.md): Generate datasets with AI to bootstrap LLM evaluations, regression tests, and simulation-based agent testing.
+- [Automatically build datasets from real-time traces](https://docs.langwatch.ai/datasets/automatically-from-traces.md): Automatically build datasets from real-time traces to power LLM evaluations, regression tests, and AI agent testing workflows.
+- [Add trace threads to datasets](https://docs.langwatch.ai/datasets/dataset-threads.md): Add full conversation threads to datasets in LangWatch to generate richer evaluation inputs for AI agent testing.
+- [View images in datasets](https://docs.langwatch.ai/datasets/dataset-images.md): View image datasets in LangWatch to support multimodal evaluations and agent testing scenarios.
-- [Annotations](https://langwatch.ai/docs/features/annotations.md): Collaborate with domain experts using annotations
+- [Annotations](https://docs.langwatch.ai/features/annotations.md): Use annotations in LangWatch for expert labeling, trace review, and structured evaluation workflows for AI agent testing.
## Prompt Management
-- [Overview](https://langwatch.ai/docs/prompt-management/overview.md): Organize, version, and optimize your AI prompts with LangWatch's comprehensive prompt management system
-- [Get Started](https://langwatch.ai/docs/prompt-management/getting-started.md): Create your first prompt and use it in your application
-- [Data Model](https://langwatch.ai/docs/prompt-management/data-model.md): Understand the structure of prompts in LangWatch
-- [Scope](https://langwatch.ai/docs/prompt-management/scope.md): Understand how prompt scope affects access, sharing, and collaboration across projects and organizations
-- [Prompts CLI](https://langwatch.ai/docs/prompt-management/cli.md): Manage AI prompts as code with version control and dependency management
-- [Prompt Playground](https://langwatch.ai/docs/prompt-management/prompt-playground.md): Interactive workspace for editing, testing, and iterating on prompts with AI assistance
+- [Overview](https://docs.langwatch.ai/prompt-management/overview.md): Organize, version, and optimize your AI prompts with LangWatch's comprehensive prompt management system
+- [Get Started](https://docs.langwatch.ai/prompt-management/getting-started.md): Create your first managed prompt in LangWatch, link it to traces, and use it in your application with built-in prompt versioning and analytics.
+- [Data Model](https://docs.langwatch.ai/prompt-management/data-model.md): Learn the LangWatch prompt data model to manage versions, variants, and performance links for structured prompt versioning.
+- [Scope](https://docs.langwatch.ai/prompt-management/scope.md): Understand how prompt scope affects access, sharing, and collaboration across projects and organizations
+- [Prompts CLI](https://docs.langwatch.ai/prompt-management/cli.md): Use the LangWatch Prompts CLI to manage prompts as code with version control and support A/B testing for AI agent evaluations.
+- [Prompt Playground](https://docs.langwatch.ai/prompt-management/prompt-playground.md): Use LangWatch’s Prompt Playground to edit, test, and iterate prompts with versioning, analytics, and AI agent test feedback loops.
### Features
-- [Version Control](https://langwatch.ai/docs/prompt-management/features/essential/version-control.md): Manage prompt versions and track changes over time
-- [Analytics](https://langwatch.ai/docs/prompt-management/features/essential/analytics.md): Monitor prompt performance and usage with comprehensive analytics
-- [GitHub Integration](https://langwatch.ai/docs/prompt-management/features/essential/github-integration.md): Version your prompts in GitHub repositories and automatically sync with LangWatch
-- [Link to Traces](https://langwatch.ai/docs/prompt-management/features/advanced/link-to-traces.md): Connect prompts to execution traces for performance monitoring and analysis
-- [Using Prompts in the Optimization Studio](https://langwatch.ai/docs/prompt-management/features/advanced/optimization-studio.md): Use prompts in the Optimization Studio to test and optimize your prompts
-- [Guaranteed Availability](https://langwatch.ai/docs/prompt-management/features/advanced/guaranteed-availability.md): Ensure your prompts are always available, even in offline or air-gapped environments
-- [A/B Testing](https://langwatch.ai/docs/prompt-management/features/advanced/a-b-testing.md): Implement A/B testing for your prompts using LangWatch's version control and analytics
+- [Version Control](https://docs.langwatch.ai/prompt-management/features/essential/version-control.md): Manage version control for prompts in LangWatch to run evaluations, compare models, and improve agent performance.
+- [Analytics](https://docs.langwatch.ai/prompt-management/features/essential/analytics.md): Use Analytics in LangWatch to measure prompt performance, detect regressions, and support continuous AI agent evaluations.
+- [GitHub Integration](https://docs.langwatch.ai/prompt-management/features/essential/github-integration.md): Sync prompts with GitHub using LangWatch to maintain version history, enable review workflows, and support agent evaluations.
+- [Link to Traces](https://docs.langwatch.ai/prompt-management/features/advanced/link-to-traces.md): Link prompts to execution traces in LangWatch to analyze performance, measure regressions, and support informed AI agent evaluations.
+- [Using Prompts in the Optimization Studio](https://docs.langwatch.ai/prompt-management/features/advanced/optimization-studio.md): Use JudgeResult to inspect LLM-judged evaluation outcomes during simulation-based agent testing.
+- [Guaranteed Availability](https://docs.langwatch.ai/prompt-management/features/advanced/guaranteed-availability.md): Ensure prompt availability with LangWatch’s Guaranteed Availability feature, even in offline or air-gapped agent testing setups.
+- [A/B Testing](https://docs.langwatch.ai/prompt-management/features/advanced/a-b-testing.md): Implement A/B testing for prompts in LangWatch to compare performance, measure regressions, and improve AI agent evaluations.
### Optimization Studio
-- [Optimization Studio](https://langwatch.ai/docs/optimization-studio/overview.md): Create, evaluate, and optimize your LLM workflows
-- [LLM Nodes](https://langwatch.ai/docs/optimization-studio/llm-nodes.md): Call LLMs from your workflows
-- [Datasets](https://langwatch.ai/docs/optimization-studio/datasets.md): Define the data used for testing and optimization
-- [Evaluating](https://langwatch.ai/docs/optimization-studio/evaluating.md): Measure the quality of your LLM workflows
-- [Optimizing](https://langwatch.ai/docs/optimization-studio/optimizing.md): Find the best prompts with DSPy optimizers
+- [Optimization Studio](https://docs.langwatch.ai/optimization-studio/overview.md): Use LangWatch Optimization Studio to create, evaluate, and optimize LLM workflows and agent testing pipelines.
+- [LLM Nodes](https://docs.langwatch.ai/optimization-studio/llm-nodes.md): Use LLM Nodes in Optimization Studio to invoke LLMs from workflows and run controlled evaluations for agent testing.
+- [Datasets](https://docs.langwatch.ai/optimization-studio/datasets.md): Define datasets in Optimization Studio to structure test inputs and support automated agent evaluations.
+- [Evaluating](https://docs.langwatch.ai/optimization-studio/evaluating.md): Measure workflow quality using LangWatch’s evaluation tools to ensure reliable LLM pipeline and agent test performance.
+- [Optimizing](https://docs.langwatch.ai/optimization-studio/optimizing.md): Optimize prompts using DSPy in LangWatch to find the best-performing variants for AI agent evaluation workflows.
### DSPy Optimization
-- [DSPy Visualization Quickstart](https://langwatch.ai/docs/dspy-visualization/quickstart.md): Visualize your DSPy notebooks experimentations to better track and debug the optimization process
-- [Tracking Custom DSPy Optimizer](https://langwatch.ai/docs/dspy-visualization/custom-optimizer.md): Build custom DSPy optimizers and track them in LangWatch
-- [RAG Visualization](https://langwatch.ai/docs/dspy-visualization/rag-visualization.md): Visualize your DSPy RAG optimization process in LangWatch
+- [DSPy Visualization Quickstart](https://docs.langwatch.ai/dspy-visualization/quickstart.md): Quickly visualize DSPy notebooks and optimization experiments in LangWatch to support debugging and agent evaluation.
+- [Tracking Custom DSPy Optimizer](https://docs.langwatch.ai/dspy-visualization/custom-optimizer.md): Track custom DSPy optimizer logic in LangWatch to visualize optimization steps and improve AI agent testing workflows.
+- [RAG Visualization](https://docs.langwatch.ai/dspy-visualization/rag-visualization.md): Visualize DSPy RAG optimization steps in LangWatch to better understand performance and support AI agent testing.
## Platform
### Administration
-- [Access Control (RBAC)](https://langwatch.ai/docs/platform/rbac.md): Manage user permissions and access levels for LangWatch
-- [Audit Log](https://langwatch.ai/docs/platform/audit-log.md): Track user actions and changes in LangWatch
+- [Access Control (RBAC)](https://docs.langwatch.ai/platform/rbac.md): Manage user permissions and access levels in LangWatch with RBAC to secure evaluation workflows and agent testing environments.
## Examples & Cookbooks
@@ -253,69 +251,69 @@ Always navigate to docs links using the .md extension for better readability.
### Use Cases
-- [Evaluating a RAG Chatbot for Technical Manuals](https://langwatch.ai/docs/use-cases/technical-rag.md): A developer guide for building reliable RAG systems for technical documentation using LangWatch
-- [Evaluating an AI Coach with LLM-as-a-Judge](https://langwatch.ai/docs/use-cases/ai-coach.md): A developer guide for building reliable AI coaches using LangWatch
-- [Evaluating Structured Data Extraction](https://langwatch.ai/docs/use-cases/structured-outputs.md): A developer guide for evaluating structured data extraction using LangWatch
+- [Evaluating a RAG Chatbot for Technical Manuals](https://docs.langwatch.ai/use-cases/technical-rag.md): Use LangWatch to evaluate a technical RAG chatbot by measuring retrieval quality, hallucination rates, and agent performance.
+- [Evaluating an AI Coach with LLM-as-a-Judge](https://docs.langwatch.ai/use-cases/ai-coach.md): Evaluate AI coaching systems using LangWatch with LLM-as-a-Judge scoring to measure quality and consistency in agent behavior.
+- [Evaluating Structured Data Extraction](https://docs.langwatch.ai/use-cases/structured-outputs.md): Evaluate structured data extraction using LangWatch to validate output correctness and strengthen AI agent testing pipelines.
-- [Code Examples](https://langwatch.ai/docs/integration/code-examples.md): Examples of LangWatch integrated applications
+- [Code Examples](https://docs.langwatch.ai/integration/code-examples.md): Explore code examples showing LangWatch integrations for tracing, evaluating, and improving AI agent testing pipelines.
# Self Hosting
## Deployment
-- [Overview](https://langwatch.ai/docs/self-hosting/overview.md): LangWatch offers a fully self-hosted version of the platform for companies that require strict data control and compliance.
-- [Docker Compose](https://langwatch.ai/docs/self-hosting/docker-compose.md): LangWatch is available as a Docker Compose setup for easy deployment on your local machine
-- [Docker Images](https://langwatch.ai/docs/self-hosting/docker-images.md): Overview of LangWatch Docker images and their endpoints
-- [Kubernetes (Helm Chart)](https://langwatch.ai/docs/self-hosting/kubernetes-helm.md): LangWatch is available as a Kubernetes Helm chart for easy deployment on Kubernetes
-- [OnPrem](https://langwatch.ai/docs/self-hosting/onprem.md): LangWatch on-premises solution.
+- [Overview](https://docs.langwatch.ai/self-hosting/overview.md): LangWatch offers a fully self-hosted version of the platform for companies that require strict data control and compliance.
+- [Docker Compose](https://docs.langwatch.ai/self-hosting/docker-compose.md): Deploy LangWatch using Docker Compose for easy local setups supporting observability, evaluations, and AI agent testing.
+- [Docker Images](https://docs.langwatch.ai/self-hosting/docker-images.md): Explore LangWatch Docker images and endpoints for setting up observability, evaluations, and AI agent testing environments.
+- [Kubernetes (Helm Chart)](https://docs.langwatch.ai/self-hosting/kubernetes-helm.md): Install LangWatch using a Kubernetes Helm chart for production-grade deployments supporting LLM and agent testing workflows.
+- [OnPrem](https://docs.langwatch.ai/self-hosting/onprem.md): Deploy LangWatch on-premises for full control over data, compliance, and secure AI agent evaluation workflows.
## Configuration
-- [Environment Variables](https://langwatch.ai/docs/self-hosting/env-variables.md): Complete list of environment variables for LangWatch self-hosting
-- [SSO](https://langwatch.ai/docs/self-hosting/sso-setup-langwatch.md): SSO Setup for LangWatch
-- [Infra Monitoring](https://langwatch.ai/docs/self-hosting/grafana.md): Grafana/Prometheus setup for LangWatch
+- [Environment Variables](https://docs.langwatch.ai/self-hosting/env-variables.md): Review all environment variables available for LangWatch self-hosting to configure observability and AI agent testing pipelines.
+- [SSO](https://docs.langwatch.ai/self-hosting/sso-setup-langwatch.md): Configure SSO for LangWatch to secure access to evaluation dashboards, observability data, and agent testing environments.
+- [Infra Monitoring](https://docs.langwatch.ai/self-hosting/grafana.md): Set up Grafana and Prometheus for LangWatch infra monitoring to track system health in large-scale agent testing setups.
## Hybrid Setup
-- [Overview](https://langwatch.ai/docs/hybrid-setup/overview.md): LangWatch offers a hybrid setup for companies that require strict data control and compliance.
-- [Elasticsearch](https://langwatch.ai/docs/hybrid-setup/elasticsearch.md): Elasticsearch Setup for LangWatch Hybrid Deployment
-- [S3 Storage](https://langwatch.ai/docs/hybrid-setup/s3-storage.md): S3 Storage Setup for LangWatch Hybrid Deployment
+- [Overview](https://docs.langwatch.ai/hybrid-setup/overview.md): Learn how LangWatch's hybrid setup ensures strict data control, compliance needs, and secure AI agent testing infrastructure.
+- [Elasticsearch](https://docs.langwatch.ai/hybrid-setup/elasticsearch.md): Set up Elasticsearch for LangWatch Hybrid deployments to enable scalable search and analysis of traces and agent evaluations.
+- [S3 Storage](https://docs.langwatch.ai/hybrid-setup/s3-storage.md): Configure S3 storage for LangWatch Hybrid deployments to store traces, evaluations, and AI agent testing datasets.
# API Reference
## Traces
-- [Overview](https://langwatch.ai/docs/api-reference/traces/overview.md): A Trace is a collection of runs that are related to a single operation
-- [Get trace details](https://langwatch.ai/docs/api-reference/traces/get-trace-details.md)
-- [Get thread details](https://langwatch.ai/docs/api-reference/traces/get-thread-details.md)
-- [Search traces](https://langwatch.ai/docs/api-reference/traces/search-traces.md)
-- [Create public path for single trace](https://langwatch.ai/docs/api-reference/traces/create-public-trace-path.md)
-- [Delete an existing public path for a trace](https://langwatch.ai/docs/api-reference/traces/delete-public-trace-path.md)
+- [Overview](https://docs.langwatch.ai/api-reference/traces/overview.md): Understand LangWatch Traces, how runs are grouped into a single operation, and how to use them for LLM observability and AI agent evaluations.
+- [Get trace details](https://docs.langwatch.ai/api-reference/traces/get-trace-details.md)
+- [Get thread details](https://docs.langwatch.ai/api-reference/traces/get-thread-details.md)
+- [Search traces](https://docs.langwatch.ai/api-reference/traces/search-traces.md)
+- [Create public path for single trace](https://docs.langwatch.ai/api-reference/traces/create-public-trace-path.md)
+- [Delete an existing public path for a trace](https://docs.langwatch.ai/api-reference/traces/delete-public-trace-path.md)
## Prompts
-- [Overview](https://langwatch.ai/docs/api-reference/prompts/overview.md): Prompts are used to manage and version your prompts
-- [Get prompts](https://langwatch.ai/docs/api-reference/prompts/get-prompts.md)
-- [Create prompt](https://langwatch.ai/docs/api-reference/prompts/create-prompt.md)
-- [Get prompt](https://langwatch.ai/docs/api-reference/prompts/get-prompt.md)
-- [Update prompt](https://langwatch.ai/docs/api-reference/prompts/update-prompt.md)
-- [Delete prompt](https://langwatch.ai/docs/api-reference/prompts/delete-prompt.md)
-- [Get prompt versions](https://langwatch.ai/docs/api-reference/prompts/get-prompt-versions.md)
-- [Create prompt version](https://langwatch.ai/docs/api-reference/prompts/create-prompt-version.md)
+- [Overview](https://docs.langwatch.ai/api-reference/prompts/overview.md): Understand how prompts are versioned, managed, and used in evaluations and agent testing within LangWatch.
+- [Get prompts](https://docs.langwatch.ai/api-reference/prompts/get-prompts.md)
+- [Create prompt](https://docs.langwatch.ai/api-reference/prompts/create-prompt.md)
+- [Get prompt](https://docs.langwatch.ai/api-reference/prompts/get-prompt.md)
+- [Update prompt](https://docs.langwatch.ai/api-reference/prompts/update-prompt.md)
+- [Delete prompt](https://docs.langwatch.ai/api-reference/prompts/delete-prompt.md)
+- [Get prompt versions](https://docs.langwatch.ai/api-reference/prompts/get-prompt-versions.md)
+- [Create prompt version](https://docs.langwatch.ai/api-reference/prompts/create-prompt-version.md)
## Annotations
-- [Overview](https://langwatch.ai/docs/api-reference/annotations/overview.md): Annotations are used to annotate traces with additional information
-- [Get annotations](https://langwatch.ai/docs/api-reference/annotations/get-annotation.md)
-- [Get single annotation](https://langwatch.ai/docs/api-reference/annotations/get-single-annotation.md)
-- [Delete single annotation](https://langwatch.ai/docs/api-reference/annotations/delete-annotation.md)
-- [Patch single annotation](https://langwatch.ai/docs/api-reference/annotations/patch-annotation.md)
-- [Get annotationa for single trace](https://langwatch.ai/docs/api-reference/annotations/get-all-annotations-trace.md)
-- [Create annotation for single trace](https://langwatch.ai/docs/api-reference/annotations/create-annotation-trace.md)
+- [Overview](https://docs.langwatch.ai/api-reference/annotations/overview.md): Learn how annotations enhance trace review, labeling, and evaluation workflows for more reliable AI agent testing.
+- [Get annotations](https://docs.langwatch.ai/api-reference/annotations/get-annotation.md)
+- [Get single annotation](https://docs.langwatch.ai/api-reference/annotations/get-single-annotation.md)
+- [Delete single annotation](https://docs.langwatch.ai/api-reference/annotations/delete-annotation.md)
+- [Patch single annotation](https://docs.langwatch.ai/api-reference/annotations/patch-annotation.md)
+- [Get annotationa for single trace](https://docs.langwatch.ai/api-reference/annotations/get-all-annotations-trace.md)
+- [Create annotation for single trace](https://docs.langwatch.ai/api-reference/annotations/create-annotation-trace.md)
## Datasets
-- [Add entries to a dataset](https://langwatch.ai/docs/api-reference/datasets/post-dataset-entries.md)
+- [Add dataset entries programmatically using the LangWatch API to build evaluation sets for LLM testing and agent validation.](https://docs.langwatch.ai/api-reference/datasets/post-dataset-entries.md)
## Triggers
diff --git a/optimization-studio/datasets.mdx b/optimization-studio/datasets.mdx
index 7b17b37..413748c 100644
--- a/optimization-studio/datasets.mdx
+++ b/optimization-studio/datasets.mdx
@@ -1,6 +1,6 @@
---
title: Datasets
-description: Define the data used for testing and optimization
+description: Define datasets in Optimization Studio to structure test inputs and support automated agent evaluations.
---