langwatch · Aryansharma28 · Dec 19, 2025 · Dec 11, 2025 · Dec 15, 2025 · Dec 19, 2025
diff --git a/api-reference/annotations/overview.mdx b/api-reference/annotations/overview.mdx
@@ -1,6 +1,6 @@
 ---
 title: 'Overview'
-description: 'Annotations are used to annotate traces with additional information'
+description: 'Learn how annotations enhance trace review, labeling, and evaluation workflows for more reliable AI agent testing.'
 ---
 
 ## Intro
@@ -19,7 +19,7 @@ To make a call to the Annotations API, you will need to pass through your LangWa
 - `DELETE /api/annotations/:id` - Delete a single annotation
 - `PATCH /api/annotations/:id` - Update a single annotation
 - `GET /api/annotations/trace/:id` - Get the annotations for a single trace
-- `POST /api/annotations/trace/:id` - Create an annotation for a single trace
+- `POST /api/annotations/trace/:id` - Create annotations for traces to support domain labeling, evaluation scoring, and agent testing workflows.
 
 
 

diff --git a/api-reference/datasets/post-dataset-entries.mdx b/api-reference/datasets/post-dataset-entries.mdx
@@ -1,4 +1,4 @@
 ---
-title: 'Add entries to a dataset'
+title: 'Add dataset entries programmatically using the LangWatch API to build evaluation sets for LLM testing and agent validation.'
 openapi: 'POST /api/dataset/{slug}/entries'
 ---
diff --git a/api-reference/openapi-evals.json b/api-reference/openapi-evals.json
@@ -111,7 +111,7 @@
     "/langevals/basic/evaluate": {
       "post": {
         "summary": "Custom Basic Evaluator",
-        "description": "Allows you to check for simple text matches or regex evaluation.",
+        "description": "Configure the Custom Basic Evaluator to check simple matches or regex rules for lightweight automated AI agent evaluations.",
         "operationId": "langevals_basic_evaluate",
         "requestBody": {
           "content": {
@@ -202,7 +202,7 @@
     "/langevals/competitor_blocklist/evaluate": {
       "post": {
         "summary": "Competitor Blocklist",
-        "description": "This evaluator checks if any of the specified competitors was mentioned",
+        "description": "Detect competitor mentions using LangWatch’s Competitor Blocklist evaluator to enforce content rules in AI agent testing pipelines.",
         "operationId": "langevals_competitor_blocklist_evaluate",
         "requestBody": {
           "content": {
@@ -475,7 +475,7 @@
     "/langevals/exact_match/evaluate": {
       "post": {
         "summary": "Exact Match Evaluator",
-        "description": "A simple evaluator that checks if the output matches the expected_output exactly.",
+        "description": "Use the Exact Match evaluator in LangWatch to verify outputs that require precise matching during AI agent testing.",
         "operationId": "langevals_exact_match_evaluate",
         "requestBody": {
           "content": {
@@ -657,7 +657,7 @@
     "/langevals/llm_boolean/evaluate": {
       "post": {
         "summary": "LLM-as-a-Judge Boolean Evaluator",
-        "description": "Use an LLM as a judge with a custom prompt to do a true/false boolean evaluation of the message.",
+        "description": "Use the LLM-as-a-Judge Boolean Evaluator to classify outputs as true or false for fast automated agent evaluations.",
         "operationId": "langevals_llm_boolean_evaluate",
         "requestBody": {
           "content": {
@@ -748,7 +748,7 @@
     "/langevals/llm_category/evaluate": {
       "post": {
         "summary": "LLM-as-a-Judge Category Evaluator",
-        "description": "Use an LLM as a judge with a custom prompt to classify the message into custom defined categories.",
+        "description": "Use the LLM-as-a-Judge Category Evaluator to classify outputs into custom categories for structured AI agent evaluations.",
         "operationId": "langevals_llm_category_evaluate",
         "requestBody": {
           "content": {
@@ -839,7 +839,7 @@
     "/langevals/llm_score/evaluate": {
       "post": {
         "summary": "LLM-as-a-Judge Score Evaluator",
-        "description": "Use an LLM as a judge with custom prompt to do a numeric score evaluation of the message.",
+        "description": "Score messages with an LLM-as-a-Judge evaluator to generate numeric performance metrics for AI agent testing.",
         "operationId": "langevals_llm_score_evaluate",
         "requestBody": {
           "content": {
@@ -930,7 +930,7 @@
     "/langevals/off_topic/evaluate": {
       "post": {
         "summary": "Off Topic Evaluator",
-        "description": "This evaluator checks if the user message is concerning one of the allowed topics of the chatbot",
+        "description": "Detect off-topic messages using LangWatch’s Off Topic Evaluator to enforce domain boundaries during AI agent testing.",
         "operationId": "langevals_off_topic_evaluate",
         "requestBody": {
           "content": {
@@ -1385,7 +1385,7 @@
     "/azure/jailbreak/evaluate": {
       "post": {
         "summary": "Azure Jailbreak Detection",
-        "description": "This evaluator checks for jailbreak-attempt in the input using Azure's Content Safety API.",
+        "description": "Use Azure Jailbreak Detection in LangWatch to identify jailbreak attempts and improve safety across AI agent testing workflows.",
         "operationId": "azure_jailbreak_evaluate",
         "requestBody": {
           "content": {
@@ -2362,7 +2362,7 @@
     "/example/word_count/evaluate": {
       "post": {
         "summary": "Example Evaluator",
-        "description": "This evaluator serves as a boilerplate for creating new evaluators.",
+        "description": "Use the Example Evaluator template in LangWatch to implement and test custom evaluation logic. This endpoint evaluates outputs by counting words, serving as a template for building your own evaluators.",
         "operationId": "example_word_count_evaluate",
         "requestBody": {
           "content": {
@@ -2441,7 +2441,7 @@
     "/ragas/bleu_score/evaluate": {
       "post": {
         "summary": "BLEU Score",
-        "description": "Traditional NLP metric. BLEU score for evaluating the similarity between two strings.",
+        "description": "Use the BLEU Score evaluator to measure string similarity and support automated NLP and AI agent evaluation workflows.",
         "operationId": "ragas_bleu_score_evaluate",
         "requestBody": {
           "content": {
@@ -2793,7 +2793,7 @@
     "/ragas/factual_correctness/evaluate": {
       "post": {
         "summary": "LLM Factual Match",
-        "description": "Computes with an LLM how factually similar the generated answer is to the expected output.",
+        "description": "Compute factual similarity with LangWatch’s LLM Factual Match evaluator to validate truthfulness in AI agent evaluations.",
         "operationId": "ragas_factual_correctness_evaluate",
         "requestBody": {
           "content": {
@@ -3248,7 +3248,7 @@
     "/ragas/rouge_score/evaluate": {
       "post": {
         "summary": "ROUGE Score",
-        "description": "Traditional NLP metric. ROUGE score for evaluating the similarity between two strings.",
+        "description": "Use the ROUGE Score evaluator in LangWatch to measure text similarity and support AI agent evaluations and NLP quality checks.",
         "operationId": "ragas_rouge_score_evaluate",
         "requestBody": {
           "content": {
@@ -3521,7 +3521,7 @@
     "/ragas/summarization_score/evaluate": {
       "post": {
         "summary": "Summarization Score",
-        "description": "Measures how well the summary captures important information from the retrieved contexts.",
+        "description": "Measure summary quality with LangWatch’s Summarization Score to support RAG evaluations and AI agent testing accuracy.",
         "operationId": "ragas_summarization_score_evaluate",
         "requestBody": {
           "content": {

diff --git a/api-reference/openapiLangWatch.json b/api-reference/openapiLangWatch.json
diff --git a/api-reference/traces/overview.mdx b/api-reference/traces/overview.mdx
@@ -1,6 +1,6 @@
 ---
 title: 'Overview'
-description: 'A Trace is a collection of runs that are related to a single operation'
+description: 'Understand LangWatch Traces, how runs are grouped into a single operation, and how to use them for LLM observability and AI agent evaluations.'
 ---
 
 ## Intro

diff --git a/concepts.mdx b/concepts.mdx
@@ -1,6 +1,6 @@
 ---
 title: Concepts
-description: LLM tracing and observability conceptual guide
+description: Explore core concepts of LLM tracing, observability, datasets, and evaluations in LangWatch to design reliable AI agent testing workflows.
 keywords: LangWatch, concepts, tracing, observability, LLM, AI, travel, blog, user, customer, labels, threads, traces, spans
 ---
 

diff --git a/datasets/ai-dataset-generation.mdx b/datasets/ai-dataset-generation.mdx
@@ -1,6 +1,6 @@
 ---
 title: Generating a dataset with AI
-description: Bootstrap your evaluations by generating sample data
+description: Generate datasets with AI to bootstrap LLM evaluations, regression tests, and simulation-based agent testing.
 ---
 
 Getting started with evaluations can be a bit daunting, especially when you don't have a dataset to use yet.

diff --git a/datasets/automatically-from-traces.mdx b/datasets/automatically-from-traces.mdx
@@ -1,6 +1,6 @@
 ---
 title: Automatically build datasets from real-time traces
-description: Continuously populate your datasets with comming data from production
+description: Automatically build datasets from real-time traces to power LLM evaluations, regression tests, and AI agent testing workflows.
 ---
 
 You can keep continously populating the dataset with new data arriving from production by using **Triggers**, mapping trace fields to any dataset columns you prefer.

diff --git a/datasets/dataset-images.mdx b/datasets/dataset-images.mdx
@@ -1,6 +1,6 @@
 ---
 title: View images in datasets
-description: Add ability to view images in datasets
+description: View image datasets in LangWatch to support multimodal evaluations and agent testing scenarios.
 ---
 
 With the your images column type set to type set to `image (URL)`, you will be able to view images in your dataset. This is useful to analyze the images at a glance.

diff --git a/datasets/dataset-threads.mdx b/datasets/dataset-threads.mdx
@@ -1,6 +1,6 @@
 ---
 title: Add trace threads to datasets
-description: Add full conversation threads to your datasets on a per row basis
+description: Add full conversation threads to datasets in LangWatch to generate richer evaluation inputs for AI agent testing.
 ---
 
 To add trace threads to a dataset, follow these steps:

diff --git a/datasets/overview.mdx b/datasets/overview.mdx
@@ -1,7 +1,7 @@
 ---
 title: Datasets
 sidebarTitle: Overview
-description: Create and manage datasets with LangWatch
+description: Create and manage datasets in LangWatch to build evaluation sets for LLMs and structured AI agent testing.
 ---
 
 ## Create datasets

diff --git a/dspy-visualization/custom-optimizer.mdx b/dspy-visualization/custom-optimizer.mdx
@@ -1,7 +1,7 @@
 ---
 title: Tracking Custom DSPy Optimizer
 sidebarTitle: Custom Optimizer Tracking
-description: Build custom DSPy optimizers and track them in LangWatch
+description: Track custom DSPy optimizer logic in LangWatch to visualize optimization steps and improve AI agent testing workflows.
 ---
 
 If you are building a custom DSPy optimizer, then LangWatch won't support tracking it out of the box, but adding track to any custom optimizer is also very simple.

diff --git a/dspy-visualization/quickstart.mdx b/dspy-visualization/quickstart.mdx
@@ -1,7 +1,7 @@
 ---
 title: DSPy Visualization Quickstart
 sidebarTitle: Quickstart
-description: Visualize your DSPy notebooks experimentations to better track and debug the optimization process
+description: Quickly visualize DSPy notebooks and optimization experiments in LangWatch to support debugging and agent evaluation.
 ---
 
 [<img align="center" src="https://colab.research.google.com/assets/colab-badge.svg" />](https://colab.research.google.com/github/langwatch/langwatch/blob/main/python-sdk/examples/dspy_visualization.ipynb)