From af21c5d5ab4fba42f5a268e059fc946ae1e3b739 Mon Sep 17 00:00:00 2001 From: Abhishek Govindarasu Date: Wed, 5 Nov 2025 15:49:21 -0800 Subject: [PATCH 01/18] feat: init v1 --- examples/pom.xml | 2 +- .../examples/v1_quick_start/V1QuickStart.java | 134 +++++++ judgeval-java/pom.xml | 2 +- .../api_scorers/AnswerCorrectnessScorer.java | 16 + .../api_scorers/AnswerRelevancyScorer.java | 16 + .../scorers/api_scorers/DerailmentScorer.java | 16 + .../api_scorers/FaithfulnessScorer.java | 16 + .../InstructionAdherenceScorer.java | 16 + .../custom_scorer/CustomScorer.java | 15 + .../prompt_scorer/PromptScorer.java | 16 + .../prompt_scorer/TracePromptScorer.java | 16 + .../judgmentlabs/judgeval/tracer/Tracer.java | 19 + .../judgeval/v1/JudgmentClient.java | 64 ++++ .../judgeval/v1/evaluation/Evaluation.java | 30 ++ .../v1/evaluation/EvaluationFactory.java | 19 + .../judgeval/v1/package-info.java | 4 + .../judgeval/v1/scorers/ScorersFactory.java | 34 ++ .../built_in/AnswerCorrectnessScorer.java | 60 +++ .../built_in/AnswerRelevancyScorer.java | 60 +++ .../built_in/BuiltInScorersFactory.java | 26 ++ .../v1/scorers/built_in/DerailmentScorer.java | 60 +++ .../scorers/built_in/FaithfulnessScorer.java | 60 +++ .../built_in/InstructionAdherenceScorer.java | 60 +++ .../scorers/custom_scorer/CustomScorer.java | 48 +++ .../custom_scorer/CustomScorerFactory.java | 22 ++ .../scorers/prompt_scorer/PromptScorer.java | 119 ++++++ .../prompt_scorer/PromptScorerFactory.java | 125 +++++++ .../judgeval/v1/tracer/BaseTracer.java | 352 ++++++++++++++++++ .../judgeval/v1/tracer/Tracer.java | 171 +++++++++ .../judgeval/v1/tracer/TracerFactory.java | 24 ++ 30 files changed, 1620 insertions(+), 2 deletions(-) create mode 100644 examples/src/main/java/examples/v1_quick_start/V1QuickStart.java create mode 100644 judgeval-java/src/main/java/com/judgmentlabs/judgeval/v1/JudgmentClient.java create mode 100644 judgeval-java/src/main/java/com/judgmentlabs/judgeval/v1/evaluation/Evaluation.java create mode 100644 judgeval-java/src/main/java/com/judgmentlabs/judgeval/v1/evaluation/EvaluationFactory.java create mode 100644 judgeval-java/src/main/java/com/judgmentlabs/judgeval/v1/package-info.java create mode 100644 judgeval-java/src/main/java/com/judgmentlabs/judgeval/v1/scorers/ScorersFactory.java create mode 100644 judgeval-java/src/main/java/com/judgmentlabs/judgeval/v1/scorers/built_in/AnswerCorrectnessScorer.java create mode 100644 judgeval-java/src/main/java/com/judgmentlabs/judgeval/v1/scorers/built_in/AnswerRelevancyScorer.java create mode 100644 judgeval-java/src/main/java/com/judgmentlabs/judgeval/v1/scorers/built_in/BuiltInScorersFactory.java create mode 100644 judgeval-java/src/main/java/com/judgmentlabs/judgeval/v1/scorers/built_in/DerailmentScorer.java create mode 100644 judgeval-java/src/main/java/com/judgmentlabs/judgeval/v1/scorers/built_in/FaithfulnessScorer.java create mode 100644 judgeval-java/src/main/java/com/judgmentlabs/judgeval/v1/scorers/built_in/InstructionAdherenceScorer.java create mode 100644 judgeval-java/src/main/java/com/judgmentlabs/judgeval/v1/scorers/custom_scorer/CustomScorer.java create mode 100644 judgeval-java/src/main/java/com/judgmentlabs/judgeval/v1/scorers/custom_scorer/CustomScorerFactory.java create mode 100644 judgeval-java/src/main/java/com/judgmentlabs/judgeval/v1/scorers/prompt_scorer/PromptScorer.java create mode 100644 judgeval-java/src/main/java/com/judgmentlabs/judgeval/v1/scorers/prompt_scorer/PromptScorerFactory.java create mode 100644 judgeval-java/src/main/java/com/judgmentlabs/judgeval/v1/tracer/BaseTracer.java create mode 100644 judgeval-java/src/main/java/com/judgmentlabs/judgeval/v1/tracer/Tracer.java create mode 100644 judgeval-java/src/main/java/com/judgmentlabs/judgeval/v1/tracer/TracerFactory.java diff --git a/examples/pom.xml b/examples/pom.xml index d8a58ec..7f06a15 100644 --- a/examples/pom.xml +++ b/examples/pom.xml @@ -26,7 +26,7 @@ com.judgmentlabs judgeval-java - 0.2.3 + 0.3.0 io.opentelemetry diff --git a/examples/src/main/java/examples/v1_quick_start/V1QuickStart.java b/examples/src/main/java/examples/v1_quick_start/V1QuickStart.java new file mode 100644 index 0000000..c7c96e5 --- /dev/null +++ b/examples/src/main/java/examples/v1_quick_start/V1QuickStart.java @@ -0,0 +1,134 @@ +package examples.v1_quick_start; + +import com.judgmentlabs.judgeval.data.Example; +import com.judgmentlabs.judgeval.v1.JudgmentClient; +import com.judgmentlabs.judgeval.v1.scorers.built_in.AnswerCorrectnessScorer; +import com.judgmentlabs.judgeval.v1.scorers.built_in.FaithfulnessScorer; +import com.judgmentlabs.judgeval.v1.scorers.custom_scorer.CustomScorer; +import com.judgmentlabs.judgeval.v1.scorers.prompt_scorer.PromptScorer; +import com.judgmentlabs.judgeval.v1.tracer.Tracer; + +public class V1QuickStart { + public static void main(String[] args) { + System.out.println("=== Judgeval SDK V1 Quick Start ===\n"); + + System.out.println("1. Initialize JudgmentClient"); + JudgmentClient client = JudgmentClient.builder() + .apiKey(System.getenv("JUDGMENT_API_KEY")) + .organizationId(System.getenv("JUDGMENT_ORG_ID")) + .build(); + System.out.println(" Client initialized\n"); + + System.out.println("2. Create and initialize Tracer"); + Tracer tracer = client.tracer().create() + .projectName("quickstart-project") + .enableEvaluation(true) + .build(); + tracer.initialize(); + System.out.println(" Tracer initialized for project: quickstart-project\n"); + + System.out.println("3. Use Tracer for distributed tracing"); + tracer.span("example-operation", () -> { + tracer.setLLMSpan(); + tracer.setInput("What is the capital of France?"); + + String llmOutput = "The capital of France is Paris."; + + tracer.setOutput(llmOutput); + System.out.println(" Traced operation with input/output"); + + System.out.println(); + + System.out.println("4. Access PromptScorer (fetch existing)"); + try { + PromptScorer existingScorer = client.scorers() + .promptScorer() + .get("example-scorer"); + System.out.println(" Retrieved PromptScorer: " + existingScorer.getName()); + } catch (Exception e) { + System.out.println(" Note: Scorer 'example-scorer' not found (expected for first run)"); + } + System.out.println(); + + System.out.println("5. Create new PromptScorer"); + PromptScorer newScorer = client.scorers() + .promptScorer() + .create() + .name("kindness-scorer") + .prompt("Did the assistant respond kindly and respectfully?") + .threshold(0.7) + .build(); + System.out.println(" Created PromptScorer: " + newScorer.getName()); + System.out.println(" Threshold: " + newScorer.getThreshold()); + System.out.println(); + + System.out.println("6. Use TracePromptScorer"); + try { + PromptScorer traceScorer = client.scorers() + .tracePromptScorer() + .create() + .name("trace-quality-scorer") + .prompt("Does the entire trace show high quality reasoning?") + .threshold(0.8) + .build(); + System.out.println(" Created TracePromptScorer: " + traceScorer.getName()); + } catch (Exception e) { + System.out.println(" TracePromptScorer creation demo"); + } + System.out.println(); + + System.out.println("7. Use CustomScorer"); + CustomScorer customScorer = client.scorers() + .customScorer() + .get("my-custom-scorer", "MyCustomScorerClass"); + System.out.println(" Created CustomScorer: " + customScorer.getName()); + System.out.println(); + + System.out.println("8. Use Built-in Scorers"); + AnswerCorrectnessScorer correctnessScorer = client.scorers() + .builtIn() + .answerCorrectness() + .threshold(0.8) + .build(); + System.out.println(" Created AnswerCorrectnessScorer with threshold: " + + correctnessScorer.getThreshold()); + + FaithfulnessScorer faithfulnessScorer = client.scorers() + .builtIn() + .faithfulness() + .build(); + System.out.println(" Created FaithfulnessScorer with default threshold: " + + faithfulnessScorer.getThreshold()); + System.out.println(); + + System.out.println("9. Run Evaluation"); + + System.out.println("10. Complete workflow example"); + tracer.span("complete-llm-call", () -> { + tracer.setLLMSpan(); + tracer.setInput("Explain quantum computing in simple terms"); + + String response = "Quantum computing uses quantum mechanics to process information..."; + + tracer.setOutput(response); + + Example evaluationExample = Example.builder() + .property("input", "Explain quantum computing in simple terms") + .property("actual_output", response) + .property("expected_output", "A clear, simple explanation") + .build(); + tracer.asyncEvaluate(client.scorers().builtIn().answerCorrectness().build(), evaluationExample); + + System.out.println(" Traced LLM call with evaluation example ready"); + }); + System.out.println(); + }); + + try { + Thread.sleep(10000); + } catch (InterruptedException e) { + e.printStackTrace(); + } + + } +} diff --git a/judgeval-java/pom.xml b/judgeval-java/pom.xml index 2f44450..2173dab 100644 --- a/judgeval-java/pom.xml +++ b/judgeval-java/pom.xml @@ -3,7 +3,7 @@ 4.0.0 com.judgmentlabs judgeval-java - 0.2.4 + 0.3.0 jar Judgeval Java Java SDK for Judgeval diff --git a/judgeval-java/src/main/java/com/judgmentlabs/judgeval/scorers/api_scorers/AnswerCorrectnessScorer.java b/judgeval-java/src/main/java/com/judgmentlabs/judgeval/scorers/api_scorers/AnswerCorrectnessScorer.java index e0ec743..74d1dd7 100644 --- a/judgeval-java/src/main/java/com/judgmentlabs/judgeval/scorers/api_scorers/AnswerCorrectnessScorer.java +++ b/judgeval-java/src/main/java/com/judgmentlabs/judgeval/scorers/api_scorers/AnswerCorrectnessScorer.java @@ -5,6 +5,22 @@ import com.judgmentlabs.judgeval.data.APIScorerType; import com.judgmentlabs.judgeval.scorers.APIScorer; +/** + * @deprecated Use {@link com.judgmentlabs.judgeval.v1.JudgmentClient} instead. + * + *

+ * Migration example: + * + *

{@code
+ * // Old way:
+ * AnswerCorrectnessScorer scorer = AnswerCorrectnessScorer.create();
+ * 
+ * // New way:
+ * JudgmentClient client = JudgmentClient.builder().build();
+ * AnswerCorrectnessScorer scorer = client.scorers().builtIn().answerCorrectness().build();
+ * }
+ */ +@Deprecated public class AnswerCorrectnessScorer extends APIScorer { public AnswerCorrectnessScorer() { super(APIScorerType.ANSWER_CORRECTNESS); diff --git a/judgeval-java/src/main/java/com/judgmentlabs/judgeval/scorers/api_scorers/AnswerRelevancyScorer.java b/judgeval-java/src/main/java/com/judgmentlabs/judgeval/scorers/api_scorers/AnswerRelevancyScorer.java index 7434399..fec9a09 100644 --- a/judgeval-java/src/main/java/com/judgmentlabs/judgeval/scorers/api_scorers/AnswerRelevancyScorer.java +++ b/judgeval-java/src/main/java/com/judgmentlabs/judgeval/scorers/api_scorers/AnswerRelevancyScorer.java @@ -5,6 +5,22 @@ import com.judgmentlabs.judgeval.data.APIScorerType; import com.judgmentlabs.judgeval.scorers.APIScorer; +/** + * @deprecated Use {@link com.judgmentlabs.judgeval.v1.JudgmentClient} instead. + * + *

+ * Migration example: + * + *

{@code
+ * // Old way:
+ * AnswerRelevancyScorer scorer = AnswerRelevancyScorer.create();
+ * 
+ * // New way:
+ * JudgmentClient client = JudgmentClient.builder().build();
+ * AnswerRelevancyScorer scorer = client.scorers().builtIn().answerRelevancy().build();
+ * }
+ */ +@Deprecated public class AnswerRelevancyScorer extends APIScorer { public AnswerRelevancyScorer() { super(APIScorerType.ANSWER_RELEVANCY); diff --git a/judgeval-java/src/main/java/com/judgmentlabs/judgeval/scorers/api_scorers/DerailmentScorer.java b/judgeval-java/src/main/java/com/judgmentlabs/judgeval/scorers/api_scorers/DerailmentScorer.java index 431728d..4958cbf 100644 --- a/judgeval-java/src/main/java/com/judgmentlabs/judgeval/scorers/api_scorers/DerailmentScorer.java +++ b/judgeval-java/src/main/java/com/judgmentlabs/judgeval/scorers/api_scorers/DerailmentScorer.java @@ -3,6 +3,22 @@ import com.judgmentlabs.judgeval.data.APIScorerType; import com.judgmentlabs.judgeval.scorers.APIScorer; +/** + * @deprecated Use {@link com.judgmentlabs.judgeval.v1.JudgmentClient} instead. + * + *

+ * Migration example: + * + *

{@code
+ * // Old way:
+ * DerailmentScorer scorer = DerailmentScorer.create();
+ * 
+ * // New way:
+ * JudgmentClient client = JudgmentClient.builder().build();
+ * DerailmentScorer scorer = client.scorers().builtIn().derailment().build();
+ * }
+ */ +@Deprecated public class DerailmentScorer extends APIScorer { public DerailmentScorer() { super(APIScorerType.DERAILMENT); diff --git a/judgeval-java/src/main/java/com/judgmentlabs/judgeval/scorers/api_scorers/FaithfulnessScorer.java b/judgeval-java/src/main/java/com/judgmentlabs/judgeval/scorers/api_scorers/FaithfulnessScorer.java index bb53d23..d4567ec 100644 --- a/judgeval-java/src/main/java/com/judgmentlabs/judgeval/scorers/api_scorers/FaithfulnessScorer.java +++ b/judgeval-java/src/main/java/com/judgmentlabs/judgeval/scorers/api_scorers/FaithfulnessScorer.java @@ -5,6 +5,22 @@ import com.judgmentlabs.judgeval.data.APIScorerType; import com.judgmentlabs.judgeval.scorers.APIScorer; +/** + * @deprecated Use {@link com.judgmentlabs.judgeval.v1.JudgmentClient} instead. + * + *

+ * Migration example: + * + *

{@code
+ * // Old way:
+ * FaithfulnessScorer scorer = FaithfulnessScorer.create();
+ * 
+ * // New way:
+ * JudgmentClient client = JudgmentClient.builder().build();
+ * FaithfulnessScorer scorer = client.scorers().builtIn().faithfulness().build();
+ * }
+ */ +@Deprecated public class FaithfulnessScorer extends APIScorer { public FaithfulnessScorer() { super(APIScorerType.FAITHFULNESS); diff --git a/judgeval-java/src/main/java/com/judgmentlabs/judgeval/scorers/api_scorers/InstructionAdherenceScorer.java b/judgeval-java/src/main/java/com/judgmentlabs/judgeval/scorers/api_scorers/InstructionAdherenceScorer.java index da71351..391eeaa 100644 --- a/judgeval-java/src/main/java/com/judgmentlabs/judgeval/scorers/api_scorers/InstructionAdherenceScorer.java +++ b/judgeval-java/src/main/java/com/judgmentlabs/judgeval/scorers/api_scorers/InstructionAdherenceScorer.java @@ -5,6 +5,22 @@ import com.judgmentlabs.judgeval.data.APIScorerType; import com.judgmentlabs.judgeval.scorers.APIScorer; +/** + * @deprecated Use {@link com.judgmentlabs.judgeval.v1.JudgmentClient} instead. + * + *

+ * Migration example: + * + *

{@code
+ * // Old way:
+ * InstructionAdherenceScorer scorer = InstructionAdherenceScorer.create();
+ * 
+ * // New way:
+ * JudgmentClient client = JudgmentClient.builder().build();
+ * InstructionAdherenceScorer scorer = client.scorers().builtIn().instructionAdherence().build();
+ * }
+ */ +@Deprecated public class InstructionAdherenceScorer extends APIScorer { public InstructionAdherenceScorer() { super(APIScorerType.INSTRUCTION_ADHERENCE); diff --git a/judgeval-java/src/main/java/com/judgmentlabs/judgeval/scorers/api_scorers/custom_scorer/CustomScorer.java b/judgeval-java/src/main/java/com/judgmentlabs/judgeval/scorers/api_scorers/custom_scorer/CustomScorer.java index 268962a..d576faf 100644 --- a/judgeval-java/src/main/java/com/judgmentlabs/judgeval/scorers/api_scorers/custom_scorer/CustomScorer.java +++ b/judgeval-java/src/main/java/com/judgmentlabs/judgeval/scorers/api_scorers/custom_scorer/CustomScorer.java @@ -8,7 +8,22 @@ * Server-hosted custom scorer representation for enqueue payloads. * Instances serialize into ExampleEvaluationRun.custom_scorers with score_type * "Custom", server_hosted=true, and optional class_name for server routing. + * + * @deprecated Use {@link com.judgmentlabs.judgeval.v1.JudgmentClient} instead. + * + *

+ * Migration example: + * + *

{@code
+ * // Old way:
+ * CustomScorer scorer = CustomScorer.get("my-scorer");
+ * 
+ * // New way:
+ * JudgmentClient client = JudgmentClient.builder().build();
+ * CustomScorer scorer = client.scorers().customScorer().get("my-scorer");
+ * }
*/ +@Deprecated public class CustomScorer extends APIScorer { public CustomScorer() { super(APIScorerType.CUSTOM); diff --git a/judgeval-java/src/main/java/com/judgmentlabs/judgeval/scorers/api_scorers/prompt_scorer/PromptScorer.java b/judgeval-java/src/main/java/com/judgmentlabs/judgeval/scorers/api_scorers/prompt_scorer/PromptScorer.java index 7ebd225..e07d588 100644 --- a/judgeval-java/src/main/java/com/judgmentlabs/judgeval/scorers/api_scorers/prompt_scorer/PromptScorer.java +++ b/judgeval-java/src/main/java/com/judgmentlabs/judgeval/scorers/api_scorers/prompt_scorer/PromptScorer.java @@ -9,6 +9,22 @@ import com.judgmentlabs.judgeval.exceptions.JudgmentAPIError; import com.judgmentlabs.judgeval.internal.api.models.ScorerConfig; +/** + * @deprecated Use {@link com.judgmentlabs.judgeval.v1.JudgmentClient} instead. + * + *

+ * Migration example: + * + *

{@code
+ * // Old way:
+ * PromptScorer scorer = PromptScorer.get("my-scorer");
+ * 
+ * // New way:
+ * JudgmentClient client = JudgmentClient.builder().build();
+ * PromptScorer scorer = client.scorers().promptScorer().get("my-scorer");
+ * }
+ */ +@Deprecated public class PromptScorer extends BasePromptScorer { public PromptScorer(String name, String prompt, double threshold, Map options) { diff --git a/judgeval-java/src/main/java/com/judgmentlabs/judgeval/scorers/api_scorers/prompt_scorer/TracePromptScorer.java b/judgeval-java/src/main/java/com/judgmentlabs/judgeval/scorers/api_scorers/prompt_scorer/TracePromptScorer.java index fe813bb..5e83766 100644 --- a/judgeval-java/src/main/java/com/judgmentlabs/judgeval/scorers/api_scorers/prompt_scorer/TracePromptScorer.java +++ b/judgeval-java/src/main/java/com/judgmentlabs/judgeval/scorers/api_scorers/prompt_scorer/TracePromptScorer.java @@ -9,6 +9,22 @@ import com.judgmentlabs.judgeval.exceptions.JudgmentAPIError; import com.judgmentlabs.judgeval.internal.api.models.ScorerConfig; +/** + * @deprecated Use {@link com.judgmentlabs.judgeval.v1.JudgmentClient} instead. + * + *

+ * Migration example: + * + *

{@code
+ * // Old way:
+ * TracePromptScorer scorer = TracePromptScorer.get("my-scorer");
+ * 
+ * // New way:
+ * JudgmentClient client = JudgmentClient.builder().build();
+ * PromptScorer scorer = client.scorers().tracePromptScorer().get("my-scorer");
+ * }
+ */ +@Deprecated public class TracePromptScorer extends BasePromptScorer { public TracePromptScorer(String name, String prompt, double threshold, Map options) { diff --git a/judgeval-java/src/main/java/com/judgmentlabs/judgeval/tracer/Tracer.java b/judgeval-java/src/main/java/com/judgmentlabs/judgeval/tracer/Tracer.java index db8defe..ebf98ff 100644 --- a/judgeval-java/src/main/java/com/judgmentlabs/judgeval/tracer/Tracer.java +++ b/judgeval-java/src/main/java/com/judgmentlabs/judgeval/tracer/Tracer.java @@ -16,11 +16,30 @@ /** * Main tracer for Judgment Labs distributed tracing and evaluation. * + * @deprecated Use {@link com.judgmentlabs.judgeval.v1.JudgmentClient} instead. + * + *

+ * Migration example: + * + *

{@code
+ * // Old way:
+ * Tracer tracer = Tracer.createDefault("my-project");
+ * tracer.initialize();
+ * 
+ * // New way:
+ * JudgmentClient client = JudgmentClient.builder().build();
+ * Tracer tracer = client.tracer().create()
+ *     .projectName("my-project")
+ *     .build();
+ * tracer.initialize();
+ * }
+ * * @see TracerConfiguration * @see SpanExporter * @see com.judgmentlabs.judgeval.scorers.BaseScorer * @see com.judgmentlabs.judgeval.data.Example */ +@Deprecated public final class Tracer extends BaseTracer { private Tracer(TracerConfiguration configuration, ISerializer serializer, boolean shouldInitialize) { diff --git a/judgeval-java/src/main/java/com/judgmentlabs/judgeval/v1/JudgmentClient.java b/judgeval-java/src/main/java/com/judgmentlabs/judgeval/v1/JudgmentClient.java new file mode 100644 index 0000000..e9cdf23 --- /dev/null +++ b/judgeval-java/src/main/java/com/judgmentlabs/judgeval/v1/JudgmentClient.java @@ -0,0 +1,64 @@ +package com.judgmentlabs.judgeval.v1; + +import java.util.Objects; + +import com.judgmentlabs.judgeval.Env; +import com.judgmentlabs.judgeval.internal.api.JudgmentSyncClient; +import com.judgmentlabs.judgeval.v1.evaluation.EvaluationFactory; +import com.judgmentlabs.judgeval.v1.scorers.ScorersFactory; +import com.judgmentlabs.judgeval.v1.tracer.TracerFactory; + +public final class JudgmentClient { + private final String apiKey; + private final String organizationId; + private final String apiUrl; + private final JudgmentSyncClient internalClient; + + private JudgmentClient(Builder builder) { + this.apiKey = Objects.requireNonNull(builder.apiKey, "apiKey required"); + this.organizationId = Objects.requireNonNull(builder.organizationId, "organizationId required"); + this.apiUrl = builder.apiUrl != null ? builder.apiUrl : Env.JUDGMENT_API_URL; + this.internalClient = new JudgmentSyncClient(apiUrl, apiKey, organizationId); + } + + public TracerFactory tracer() { + return new TracerFactory(internalClient, apiKey, organizationId, apiUrl); + } + + public ScorersFactory scorers() { + return new ScorersFactory(internalClient, apiKey, organizationId); + } + + public EvaluationFactory evaluation() { + return new EvaluationFactory(internalClient, apiKey, organizationId); + } + + public static Builder builder() { + return new Builder(); + } + + public static final class Builder { + private String apiKey = Env.JUDGMENT_API_KEY; + private String organizationId = Env.JUDGMENT_ORG_ID; + private String apiUrl = Env.JUDGMENT_API_URL; + + public Builder apiKey(String apiKey) { + this.apiKey = apiKey; + return this; + } + + public Builder organizationId(String organizationId) { + this.organizationId = organizationId; + return this; + } + + public Builder apiUrl(String apiUrl) { + this.apiUrl = apiUrl; + return this; + } + + public JudgmentClient build() { + return new JudgmentClient(this); + } + } +} diff --git a/judgeval-java/src/main/java/com/judgmentlabs/judgeval/v1/evaluation/Evaluation.java b/judgeval-java/src/main/java/com/judgmentlabs/judgeval/v1/evaluation/Evaluation.java new file mode 100644 index 0000000..309f3ab --- /dev/null +++ b/judgeval-java/src/main/java/com/judgmentlabs/judgeval/v1/evaluation/Evaluation.java @@ -0,0 +1,30 @@ +package com.judgmentlabs.judgeval.v1.evaluation; + +import java.util.Objects; + +import com.judgmentlabs.judgeval.internal.api.JudgmentSyncClient; + +public final class Evaluation { + private final JudgmentSyncClient client; + + private Evaluation(Builder builder) { + this.client = Objects.requireNonNull(builder.client, "client required"); + } + + public static Builder builder() { + return new Builder(); + } + + public static final class Builder { + private JudgmentSyncClient client; + + Builder client(JudgmentSyncClient client) { + this.client = client; + return this; + } + + public Evaluation build() { + return new Evaluation(this); + } + } +} diff --git a/judgeval-java/src/main/java/com/judgmentlabs/judgeval/v1/evaluation/EvaluationFactory.java b/judgeval-java/src/main/java/com/judgmentlabs/judgeval/v1/evaluation/EvaluationFactory.java new file mode 100644 index 0000000..43a2995 --- /dev/null +++ b/judgeval-java/src/main/java/com/judgmentlabs/judgeval/v1/evaluation/EvaluationFactory.java @@ -0,0 +1,19 @@ +package com.judgmentlabs.judgeval.v1.evaluation; + +import com.judgmentlabs.judgeval.internal.api.JudgmentSyncClient; + +public final class EvaluationFactory { + private final JudgmentSyncClient client; + private final String apiKey; + private final String organizationId; + + public EvaluationFactory(JudgmentSyncClient client, String apiKey, String organizationId) { + this.client = client; + this.apiKey = apiKey; + this.organizationId = organizationId; + } + + public Evaluation.Builder create() { + return Evaluation.builder().client(client); + } +} diff --git a/judgeval-java/src/main/java/com/judgmentlabs/judgeval/v1/package-info.java b/judgeval-java/src/main/java/com/judgmentlabs/judgeval/v1/package-info.java new file mode 100644 index 0000000..a896949 --- /dev/null +++ b/judgeval-java/src/main/java/com/judgmentlabs/judgeval/v1/package-info.java @@ -0,0 +1,4 @@ +/** + * Judgeval SDK v1 API. + */ +package com.judgmentlabs.judgeval.v1; diff --git a/judgeval-java/src/main/java/com/judgmentlabs/judgeval/v1/scorers/ScorersFactory.java b/judgeval-java/src/main/java/com/judgmentlabs/judgeval/v1/scorers/ScorersFactory.java new file mode 100644 index 0000000..66eaa94 --- /dev/null +++ b/judgeval-java/src/main/java/com/judgmentlabs/judgeval/v1/scorers/ScorersFactory.java @@ -0,0 +1,34 @@ +package com.judgmentlabs.judgeval.v1.scorers; + +import com.judgmentlabs.judgeval.internal.api.JudgmentSyncClient; +import com.judgmentlabs.judgeval.v1.scorers.built_in.BuiltInScorersFactory; +import com.judgmentlabs.judgeval.v1.scorers.custom_scorer.CustomScorerFactory; +import com.judgmentlabs.judgeval.v1.scorers.prompt_scorer.PromptScorerFactory; + +public final class ScorersFactory { + private final JudgmentSyncClient client; + private final String apiKey; + private final String organizationId; + + public ScorersFactory(JudgmentSyncClient client, String apiKey, String organizationId) { + this.client = client; + this.apiKey = apiKey; + this.organizationId = organizationId; + } + + public PromptScorerFactory promptScorer() { + return new PromptScorerFactory(client, apiKey, organizationId, false); + } + + public PromptScorerFactory tracePromptScorer() { + return new PromptScorerFactory(client, apiKey, organizationId, true); + } + + public CustomScorerFactory customScorer() { + return new CustomScorerFactory(); + } + + public BuiltInScorersFactory builtIn() { + return new BuiltInScorersFactory(); + } +} diff --git a/judgeval-java/src/main/java/com/judgmentlabs/judgeval/v1/scorers/built_in/AnswerCorrectnessScorer.java b/judgeval-java/src/main/java/com/judgmentlabs/judgeval/v1/scorers/built_in/AnswerCorrectnessScorer.java new file mode 100644 index 0000000..fc504a9 --- /dev/null +++ b/judgeval-java/src/main/java/com/judgmentlabs/judgeval/v1/scorers/built_in/AnswerCorrectnessScorer.java @@ -0,0 +1,60 @@ +package com.judgmentlabs.judgeval.v1.scorers.built_in; + +import java.util.Arrays; + +import com.judgmentlabs.judgeval.data.APIScorerType; +import com.judgmentlabs.judgeval.scorers.APIScorer; + +public final class AnswerCorrectnessScorer extends APIScorer { + private AnswerCorrectnessScorer(Builder builder) { + super(APIScorerType.ANSWER_CORRECTNESS); + setRequiredParams(Arrays.asList("input", "actual_output", "expected_output")); + if (builder.threshold >= 0) { + setThreshold(builder.threshold); + } + if (builder.name != null) { + setName(builder.name); + } + if (builder.strictMode != null) { + setStrictMode(builder.strictMode); + } + if (builder.model != null) { + setModel(builder.model); + } + } + + public static Builder builder() { + return new Builder(); + } + + public static final class Builder { + private double threshold = -1; + private String name; + private Boolean strictMode; + private String model; + + public Builder threshold(double threshold) { + this.threshold = threshold; + return this; + } + + public Builder name(String name) { + this.name = name; + return this; + } + + public Builder strictMode(boolean strictMode) { + this.strictMode = strictMode; + return this; + } + + public Builder model(String model) { + this.model = model; + return this; + } + + public AnswerCorrectnessScorer build() { + return new AnswerCorrectnessScorer(this); + } + } +} diff --git a/judgeval-java/src/main/java/com/judgmentlabs/judgeval/v1/scorers/built_in/AnswerRelevancyScorer.java b/judgeval-java/src/main/java/com/judgmentlabs/judgeval/v1/scorers/built_in/AnswerRelevancyScorer.java new file mode 100644 index 0000000..a2b3a0a --- /dev/null +++ b/judgeval-java/src/main/java/com/judgmentlabs/judgeval/v1/scorers/built_in/AnswerRelevancyScorer.java @@ -0,0 +1,60 @@ +package com.judgmentlabs.judgeval.v1.scorers.built_in; + +import java.util.Arrays; + +import com.judgmentlabs.judgeval.data.APIScorerType; +import com.judgmentlabs.judgeval.scorers.APIScorer; + +public final class AnswerRelevancyScorer extends APIScorer { + private AnswerRelevancyScorer(Builder builder) { + super(APIScorerType.ANSWER_RELEVANCY); + setRequiredParams(Arrays.asList("input", "actual_output")); + if (builder.threshold >= 0) { + setThreshold(builder.threshold); + } + if (builder.name != null) { + setName(builder.name); + } + if (builder.strictMode != null) { + setStrictMode(builder.strictMode); + } + if (builder.model != null) { + setModel(builder.model); + } + } + + public static Builder builder() { + return new Builder(); + } + + public static final class Builder { + private double threshold = -1; + private String name; + private Boolean strictMode; + private String model; + + public Builder threshold(double threshold) { + this.threshold = threshold; + return this; + } + + public Builder name(String name) { + this.name = name; + return this; + } + + public Builder strictMode(boolean strictMode) { + this.strictMode = strictMode; + return this; + } + + public Builder model(String model) { + this.model = model; + return this; + } + + public AnswerRelevancyScorer build() { + return new AnswerRelevancyScorer(this); + } + } +} diff --git a/judgeval-java/src/main/java/com/judgmentlabs/judgeval/v1/scorers/built_in/BuiltInScorersFactory.java b/judgeval-java/src/main/java/com/judgmentlabs/judgeval/v1/scorers/built_in/BuiltInScorersFactory.java new file mode 100644 index 0000000..c8356f3 --- /dev/null +++ b/judgeval-java/src/main/java/com/judgmentlabs/judgeval/v1/scorers/built_in/BuiltInScorersFactory.java @@ -0,0 +1,26 @@ +package com.judgmentlabs.judgeval.v1.scorers.built_in; + +public final class BuiltInScorersFactory { + public BuiltInScorersFactory() { + } + + public AnswerCorrectnessScorer.Builder answerCorrectness() { + return AnswerCorrectnessScorer.builder(); + } + + public AnswerRelevancyScorer.Builder answerRelevancy() { + return AnswerRelevancyScorer.builder(); + } + + public FaithfulnessScorer.Builder faithfulness() { + return FaithfulnessScorer.builder(); + } + + public InstructionAdherenceScorer.Builder instructionAdherence() { + return InstructionAdherenceScorer.builder(); + } + + public DerailmentScorer.Builder derailment() { + return DerailmentScorer.builder(); + } +} diff --git a/judgeval-java/src/main/java/com/judgmentlabs/judgeval/v1/scorers/built_in/DerailmentScorer.java b/judgeval-java/src/main/java/com/judgmentlabs/judgeval/v1/scorers/built_in/DerailmentScorer.java new file mode 100644 index 0000000..fbed3a8 --- /dev/null +++ b/judgeval-java/src/main/java/com/judgmentlabs/judgeval/v1/scorers/built_in/DerailmentScorer.java @@ -0,0 +1,60 @@ +package com.judgmentlabs.judgeval.v1.scorers.built_in; + +import java.util.Arrays; + +import com.judgmentlabs.judgeval.data.APIScorerType; +import com.judgmentlabs.judgeval.scorers.APIScorer; + +public final class DerailmentScorer extends APIScorer { + private DerailmentScorer(Builder builder) { + super(APIScorerType.DERAILMENT); + setRequiredParams(Arrays.asList("input", "actual_output")); + if (builder.threshold >= 0) { + setThreshold(builder.threshold); + } + if (builder.name != null) { + setName(builder.name); + } + if (builder.strictMode != null) { + setStrictMode(builder.strictMode); + } + if (builder.model != null) { + setModel(builder.model); + } + } + + public static Builder builder() { + return new Builder(); + } + + public static final class Builder { + private double threshold = -1; + private String name; + private Boolean strictMode; + private String model; + + public Builder threshold(double threshold) { + this.threshold = threshold; + return this; + } + + public Builder name(String name) { + this.name = name; + return this; + } + + public Builder strictMode(boolean strictMode) { + this.strictMode = strictMode; + return this; + } + + public Builder model(String model) { + this.model = model; + return this; + } + + public DerailmentScorer build() { + return new DerailmentScorer(this); + } + } +} diff --git a/judgeval-java/src/main/java/com/judgmentlabs/judgeval/v1/scorers/built_in/FaithfulnessScorer.java b/judgeval-java/src/main/java/com/judgmentlabs/judgeval/v1/scorers/built_in/FaithfulnessScorer.java new file mode 100644 index 0000000..7109368 --- /dev/null +++ b/judgeval-java/src/main/java/com/judgmentlabs/judgeval/v1/scorers/built_in/FaithfulnessScorer.java @@ -0,0 +1,60 @@ +package com.judgmentlabs.judgeval.v1.scorers.built_in; + +import java.util.Arrays; + +import com.judgmentlabs.judgeval.data.APIScorerType; +import com.judgmentlabs.judgeval.scorers.APIScorer; + +public final class FaithfulnessScorer extends APIScorer { + private FaithfulnessScorer(Builder builder) { + super(APIScorerType.FAITHFULNESS); + setRequiredParams(Arrays.asList("context", "actual_output")); + if (builder.threshold >= 0) { + setThreshold(builder.threshold); + } + if (builder.name != null) { + setName(builder.name); + } + if (builder.strictMode != null) { + setStrictMode(builder.strictMode); + } + if (builder.model != null) { + setModel(builder.model); + } + } + + public static Builder builder() { + return new Builder(); + } + + public static final class Builder { + private double threshold = -1; + private String name; + private Boolean strictMode; + private String model; + + public Builder threshold(double threshold) { + this.threshold = threshold; + return this; + } + + public Builder name(String name) { + this.name = name; + return this; + } + + public Builder strictMode(boolean strictMode) { + this.strictMode = strictMode; + return this; + } + + public Builder model(String model) { + this.model = model; + return this; + } + + public FaithfulnessScorer build() { + return new FaithfulnessScorer(this); + } + } +} diff --git a/judgeval-java/src/main/java/com/judgmentlabs/judgeval/v1/scorers/built_in/InstructionAdherenceScorer.java b/judgeval-java/src/main/java/com/judgmentlabs/judgeval/v1/scorers/built_in/InstructionAdherenceScorer.java new file mode 100644 index 0000000..fe64b8c --- /dev/null +++ b/judgeval-java/src/main/java/com/judgmentlabs/judgeval/v1/scorers/built_in/InstructionAdherenceScorer.java @@ -0,0 +1,60 @@ +package com.judgmentlabs.judgeval.v1.scorers.built_in; + +import java.util.Arrays; + +import com.judgmentlabs.judgeval.data.APIScorerType; +import com.judgmentlabs.judgeval.scorers.APIScorer; + +public final class InstructionAdherenceScorer extends APIScorer { + private InstructionAdherenceScorer(Builder builder) { + super(APIScorerType.INSTRUCTION_ADHERENCE); + setRequiredParams(Arrays.asList("input", "actual_output")); + if (builder.threshold >= 0) { + setThreshold(builder.threshold); + } + if (builder.name != null) { + setName(builder.name); + } + if (builder.strictMode != null) { + setStrictMode(builder.strictMode); + } + if (builder.model != null) { + setModel(builder.model); + } + } + + public static Builder builder() { + return new Builder(); + } + + public static final class Builder { + private double threshold = -1; + private String name; + private Boolean strictMode; + private String model; + + public Builder threshold(double threshold) { + this.threshold = threshold; + return this; + } + + public Builder name(String name) { + this.name = name; + return this; + } + + public Builder strictMode(boolean strictMode) { + this.strictMode = strictMode; + return this; + } + + public Builder model(String model) { + this.model = model; + return this; + } + + public InstructionAdherenceScorer build() { + return new InstructionAdherenceScorer(this); + } + } +} diff --git a/judgeval-java/src/main/java/com/judgmentlabs/judgeval/v1/scorers/custom_scorer/CustomScorer.java b/judgeval-java/src/main/java/com/judgmentlabs/judgeval/v1/scorers/custom_scorer/CustomScorer.java new file mode 100644 index 0000000..7ad6dd3 --- /dev/null +++ b/judgeval-java/src/main/java/com/judgmentlabs/judgeval/v1/scorers/custom_scorer/CustomScorer.java @@ -0,0 +1,48 @@ +package com.judgmentlabs.judgeval.v1.scorers.custom_scorer; + +import com.judgmentlabs.judgeval.data.APIScorerType; +import com.judgmentlabs.judgeval.internal.api.models.ScorerConfig; +import com.judgmentlabs.judgeval.scorers.APIScorer; + +public final class CustomScorer extends APIScorer { + private CustomScorer(Builder builder) { + super(APIScorerType.CUSTOM); + setName(builder.name); + setClassName(builder.className); + setServerHosted(builder.serverHosted); + } + + @Override + public ScorerConfig getScorerConfig() { + throw new UnsupportedOperationException("CustomScorer does not use ScorerConfig"); + } + + public static Builder builder() { + return new Builder(); + } + + public static final class Builder { + private String name; + private String className; + private boolean serverHosted; + + public Builder name(String name) { + this.name = name; + return this; + } + + public Builder className(String className) { + this.className = className; + return this; + } + + public Builder serverHosted(boolean serverHosted) { + this.serverHosted = serverHosted; + return this; + } + + public CustomScorer build() { + return new CustomScorer(this); + } + } +} diff --git a/judgeval-java/src/main/java/com/judgmentlabs/judgeval/v1/scorers/custom_scorer/CustomScorerFactory.java b/judgeval-java/src/main/java/com/judgmentlabs/judgeval/v1/scorers/custom_scorer/CustomScorerFactory.java new file mode 100644 index 0000000..3734de2 --- /dev/null +++ b/judgeval-java/src/main/java/com/judgmentlabs/judgeval/v1/scorers/custom_scorer/CustomScorerFactory.java @@ -0,0 +1,22 @@ +package com.judgmentlabs.judgeval.v1.scorers.custom_scorer; + +public final class CustomScorerFactory { + public CustomScorerFactory() { + } + + public CustomScorer get(String name) { + return CustomScorer.builder() + .name(name) + .className(name) + .serverHosted(true) + .build(); + } + + public CustomScorer get(String name, String className) { + return CustomScorer.builder() + .name(name) + .className(className) + .serverHosted(true) + .build(); + } +} diff --git a/judgeval-java/src/main/java/com/judgmentlabs/judgeval/v1/scorers/prompt_scorer/PromptScorer.java b/judgeval-java/src/main/java/com/judgmentlabs/judgeval/v1/scorers/prompt_scorer/PromptScorer.java new file mode 100644 index 0000000..671214d --- /dev/null +++ b/judgeval-java/src/main/java/com/judgmentlabs/judgeval/v1/scorers/prompt_scorer/PromptScorer.java @@ -0,0 +1,119 @@ +package com.judgmentlabs.judgeval.v1.scorers.prompt_scorer; + +import java.util.HashMap; +import java.util.Map; +import java.util.Objects; + +import com.judgmentlabs.judgeval.data.APIScorerType; +import com.judgmentlabs.judgeval.internal.api.models.ScorerConfig; +import com.judgmentlabs.judgeval.scorers.APIScorer; + +public final class PromptScorer extends APIScorer { + private final String prompt; + private final Map options; + private final String judgmentApiKey; + private final String organizationId; + private final boolean isTrace; + + private PromptScorer(Builder builder) { + super(builder.isTrace ? APIScorerType.TRACE_PROMPT_SCORER : APIScorerType.PROMPT_SCORER); + this.prompt = Objects.requireNonNull(builder.prompt, "prompt required"); + this.options = builder.options; + this.judgmentApiKey = builder.apiKey; + this.organizationId = builder.organizationId; + this.isTrace = builder.isTrace; + setName(Objects.requireNonNull(builder.name, "name required")); + setThreshold(builder.threshold); + } + + public String getPrompt() { + return prompt; + } + + public Map getOptions() { + return options != null ? new HashMap<>(options) : null; + } + + public String getScorerName() { + return getName(); + } + + @Override + public ScorerConfig getScorerConfig() { + ScorerConfig cfg = new ScorerConfig(); + cfg.setScoreType(getScoreType()); + cfg.setThreshold(getThreshold()); + cfg.setName(getName()); + cfg.setStrictMode(getStrictMode()); + cfg.setRequiredParams(getRequiredParams()); + Map kwargs = new HashMap<>(); + kwargs.put("prompt", prompt); + if (options != null) { + kwargs.put("options", options); + } + if (getAdditionalProperties() != null) { + kwargs.putAll(getAdditionalProperties()); + } + cfg.setKwargs(kwargs); + return cfg; + } + + public static Builder builder() { + return new Builder(); + } + + public static final class Builder { + private String name; + private String prompt; + private double threshold = 0.5; + private Map options; + private String apiKey; + private String organizationId; + private boolean isTrace; + + public Builder name(String name) { + this.name = name; + return this; + } + + public Builder prompt(String prompt) { + this.prompt = prompt; + return this; + } + + public Builder threshold(double threshold) { + this.threshold = threshold; + return this; + } + + public Builder options(Map options) { + this.options = options; + return this; + } + + Builder apiKey(String apiKey) { + this.apiKey = apiKey; + return this; + } + + Builder organizationId(String organizationId) { + this.organizationId = organizationId; + return this; + } + + Builder isTrace(boolean isTrace) { + this.isTrace = isTrace; + return this; + } + + public PromptScorer build() { + return new PromptScorer(this); + } + } + + @Override + public String toString() { + return "PromptScorer(name=" + getName() + ", prompt=" + prompt + ", threshold=" + getThreshold() + + ", options=" + options + ", isTrace=" + isTrace + ")"; + } +} diff --git a/judgeval-java/src/main/java/com/judgmentlabs/judgeval/v1/scorers/prompt_scorer/PromptScorerFactory.java b/judgeval-java/src/main/java/com/judgmentlabs/judgeval/v1/scorers/prompt_scorer/PromptScorerFactory.java new file mode 100644 index 0000000..2f74c4d --- /dev/null +++ b/judgeval-java/src/main/java/com/judgmentlabs/judgeval/v1/scorers/prompt_scorer/PromptScorerFactory.java @@ -0,0 +1,125 @@ +package com.judgmentlabs.judgeval.v1.scorers.prompt_scorer; + +import java.util.HashMap; +import java.util.Map; +import java.util.Objects; +import java.util.Optional; +import java.util.concurrent.ConcurrentHashMap; + +import com.judgmentlabs.judgeval.exceptions.JudgmentAPIError; +import com.judgmentlabs.judgeval.internal.api.JudgmentSyncClient; +import com.judgmentlabs.judgeval.internal.api.models.FetchPromptScorersRequest; +import com.judgmentlabs.judgeval.internal.api.models.FetchPromptScorersResponse; + +public final class PromptScorerFactory { + private final JudgmentSyncClient client; + private final String apiKey; + private final String organizationId; + private final boolean isTrace; + private static final Map cache = new ConcurrentHashMap<>(); + + public PromptScorerFactory(JudgmentSyncClient client, String apiKey, String organizationId, boolean isTrace) { + this.client = client; + this.apiKey = apiKey; + this.organizationId = organizationId; + this.isTrace = isTrace; + } + + public PromptScorer get(String name) { + CacheKey key = new CacheKey(name, apiKey, organizationId); + com.judgmentlabs.judgeval.internal.api.models.PromptScorer cached = cache.get(key); + if (cached != null) { + return createFromModel(cached, name); + } + + try { + FetchPromptScorersRequest request = new FetchPromptScorersRequest(); + request.setNames(java.util.Collections.singletonList(name)); + + FetchPromptScorersResponse response = client.fetchScorers(request); + + com.judgmentlabs.judgeval.internal.api.models.PromptScorer scorer = Optional.ofNullable(response) + .map(FetchPromptScorersResponse::getScorers) + .filter(scorers -> scorers != null && !scorers.isEmpty()) + .map(scorers -> scorers.get(0)) + .orElseThrow( + () -> new JudgmentAPIError(404, "Failed to fetch prompt scorer '" + name + "': not found")); + + if (Boolean.TRUE.equals(scorer.getIsTrace()) != isTrace) { + String expectedType = isTrace ? "TracePromptScorer" : "PromptScorer"; + String actualType = Boolean.TRUE.equals(scorer.getIsTrace()) ? "TracePromptScorer" : "PromptScorer"; + throw new JudgmentAPIError(400, + "Scorer with name " + name + " is a " + actualType + ", not a " + expectedType); + } + + cache.put(key, scorer); + return createFromModel(scorer, name); + } catch (JudgmentAPIError e) { + throw e; + } catch (Exception e) { + throw new JudgmentAPIError(500, "Failed to fetch prompt scorer '" + name + "': " + e.getMessage()); + } + } + + private PromptScorer createFromModel(com.judgmentlabs.judgeval.internal.api.models.PromptScorer model, + String name) { + Map options = null; + if (model.getOptions() != null) { + if (model.getOptions() instanceof Map) { + @SuppressWarnings("unchecked") + Map rawOptions = (Map) model.getOptions(); + options = new HashMap<>(); + for (Map.Entry entry : rawOptions.entrySet()) { + if (entry.getValue() instanceof Number) { + options.put(entry.getKey(), ((Number) entry.getValue()).doubleValue()); + } + } + } + } + + return PromptScorer.builder() + .name(name) + .prompt(model.getPrompt()) + .threshold(Optional.ofNullable(model.getThreshold()).orElse(0.5)) + .options(options) + .apiKey(apiKey) + .organizationId(organizationId) + .isTrace(isTrace) + .build(); + } + + public PromptScorer.Builder create() { + return PromptScorer.builder() + .apiKey(apiKey) + .organizationId(organizationId) + .isTrace(isTrace); + } + + private static final class CacheKey { + private final String name; + private final String apiKey; + private final String organizationId; + + CacheKey(String name, String apiKey, String organizationId) { + this.name = name; + this.apiKey = apiKey; + this.organizationId = organizationId; + } + + @Override + public boolean equals(Object obj) { + if (this == obj) + return true; + if (obj == null || getClass() != obj.getClass()) + return false; + CacheKey that = (CacheKey) obj; + return Objects.equals(name, that.name) && Objects.equals(apiKey, that.apiKey) + && Objects.equals(organizationId, that.organizationId); + } + + @Override + public int hashCode() { + return Objects.hash(name, apiKey, organizationId); + } + } +} diff --git a/judgeval-java/src/main/java/com/judgmentlabs/judgeval/v1/tracer/BaseTracer.java b/judgeval-java/src/main/java/com/judgmentlabs/judgeval/v1/tracer/BaseTracer.java new file mode 100644 index 0000000..360fc00 --- /dev/null +++ b/judgeval-java/src/main/java/com/judgmentlabs/judgeval/v1/tracer/BaseTracer.java @@ -0,0 +1,352 @@ +package com.judgmentlabs.judgeval.v1.tracer; + +import java.lang.reflect.Type; +import java.util.Map; +import java.util.Objects; +import java.util.Optional; + +import com.fasterxml.jackson.databind.ObjectMapper; +import com.judgmentlabs.judgeval.Env; +import com.judgmentlabs.judgeval.data.EvaluationRunBuilder; +import com.judgmentlabs.judgeval.data.Example; +import com.judgmentlabs.judgeval.data.TraceEvaluationRunBuilder; +import com.judgmentlabs.judgeval.internal.api.JudgmentSyncClient; +import com.judgmentlabs.judgeval.internal.api.models.ExampleEvaluationRun; +import com.judgmentlabs.judgeval.internal.api.models.ResolveProjectNameRequest; +import com.judgmentlabs.judgeval.internal.api.models.ResolveProjectNameResponse; +import com.judgmentlabs.judgeval.internal.api.models.TraceEvaluationRun; +import com.judgmentlabs.judgeval.scorers.BaseScorer; +import com.judgmentlabs.judgeval.tracer.ISerializer; +import com.judgmentlabs.judgeval.tracer.JudgevalTraceKeys; +import com.judgmentlabs.judgeval.tracer.TracerConfiguration; +import com.judgmentlabs.judgeval.tracer.exporters.JudgmentSpanExporter; +import com.judgmentlabs.judgeval.tracer.exporters.NoOpSpanExporter; +import com.judgmentlabs.judgeval.utils.Logger; + +import io.opentelemetry.api.GlobalOpenTelemetry; +import io.opentelemetry.api.trace.Span; +import io.opentelemetry.api.trace.SpanContext; +import io.opentelemetry.api.trace.StatusCode; +import io.opentelemetry.context.Scope; +import io.opentelemetry.sdk.trace.export.SpanExporter; + +public abstract class BaseTracer { + public static final String TRACER_NAME = "judgeval"; + + protected final TracerConfiguration configuration; + protected final JudgmentSyncClient apiClient; + protected final ISerializer serializer; + protected final ObjectMapper jacksonMapper; + protected final Optional projectId; + + protected BaseTracer(TracerConfiguration configuration, JudgmentSyncClient apiClient, ISerializer serializer) { + this.configuration = Objects.requireNonNull(configuration, "configuration required"); + this.apiClient = Objects.requireNonNull(apiClient, "apiClient required"); + this.serializer = Objects.requireNonNull(serializer, "serializer required"); + this.jacksonMapper = new ObjectMapper(); + this.projectId = resolveProjectId(configuration.projectName()); + + this.projectId.ifPresentOrElse(id -> { + }, () -> Logger.error("Failed to resolve project " + configuration.projectName() + + ", please create it first at https://app.judgmentlabs.ai/org/" + configuration.organizationId() + + "/projects. Skipping Judgment export.")); + } + + public abstract void initialize(); + + public abstract boolean forceFlush(int timeoutMillis); + + public abstract void shutdown(int timeoutMillis); + + public SpanExporter getSpanExporter() { + return projectId.map(this::createJudgmentSpanExporter) + .orElseGet(() -> { + Logger.error("Project not resolved; cannot create exporter, returning NoOpSpanExporter"); + return new NoOpSpanExporter(); + }); + } + + public void setSpanKind(String kind) { + Optional.ofNullable(kind) + .ifPresent(k -> withCurrentSpan( + span -> span.setAttribute(JudgevalTraceKeys.AttributeKeys.JUDGMENT_SPAN_KIND, k))); + } + + private static void withCurrentSpan(java.util.function.Consumer action) { + Optional.ofNullable(Span.current()) + .ifPresent(action); + } + + private static boolean isValidKey(String key) { + return key != null && !key.isEmpty(); + } + + public void setAttribute(String key, Object value) { + if (!isValidKey(key)) { + return; + } + if (value != null) { + setAttribute(key, value, value.getClass()); + } + } + + public void setAttribute(String key, Object value, Type type) { + if (!isValidKey(key)) { + return; + } + if (value != null) { + withCurrentSpan(span -> span.setAttribute(key, serializer.serialize(value, type))); + } + } + + public void setAttribute(String key, String value) { + if (!isValidKey(key)) { + return; + } + withCurrentSpan(span -> span.setAttribute(key, value)); + } + + public void setAttribute(String key, long value) { + if (!isValidKey(key)) { + return; + } + withCurrentSpan(span -> span.setAttribute(key, value)); + } + + public void setAttribute(String key, double value) { + if (!isValidKey(key)) { + return; + } + withCurrentSpan(span -> span.setAttribute(key, value)); + } + + public void setAttribute(String key, boolean value) { + if (!isValidKey(key)) { + return; + } + withCurrentSpan(span -> span.setAttribute(key, value)); + } + + private Optional getSampledSpanContext() { + return Optional.ofNullable(Span.current()) + .filter(span -> span.getSpanContext() + .isSampled()) + .map(Span::getSpanContext); + } + + private Optional getSampledSpan() { + return Optional.ofNullable(Span.current()) + .filter(span -> span.getSpanContext() + .isSampled()); + } + + private boolean isEvaluationEnabled() { + return configuration.enableEvaluation(); + } + + private void logEvaluationInfo(String method, String traceId, String spanId, String scorerName) { + Logger.info(method + ": project=" + configuration.projectName() + ", traceId=" + traceId + ", spanId=" + + spanId + ", scorer=" + scorerName); + } + + private void safeExecute(String operation, Runnable action) { + try { + action.run(); + } catch (Exception e) { + Logger.error("Failed to " + operation + ": " + e.getMessage()); + } + } + + public void asyncEvaluate(BaseScorer scorer, Example example, String model) { + safeExecute("evaluate scorer", () -> { + if (!isEvaluationEnabled()) { + return; + } + + getSampledSpanContext().ifPresent(spanContext -> { + String traceId = spanContext.getTraceId(); + String spanId = spanContext.getSpanId(); + + logEvaluationInfo("asyncEvaluate", traceId, spanId, scorer.getName()); + + ExampleEvaluationRun evaluationRun = createEvaluationRun(scorer, example, model, traceId, spanId); + enqueueEvaluation(evaluationRun); + }); + }); + } + + public void asyncEvaluate(BaseScorer scorer, Example example) { + asyncEvaluate(scorer, example, null); + } + + public void asyncTraceEvaluate(BaseScorer scorer, String model) { + safeExecute("evaluate trace scorer", () -> { + if (!isEvaluationEnabled()) { + return; + } + + getSampledSpan().ifPresent(currentSpan -> { + SpanContext spanContext = currentSpan.getSpanContext(); + String traceId = spanContext.getTraceId(); + String spanId = spanContext.getSpanId(); + + logEvaluationInfo("asyncTraceEvaluate", traceId, spanId, scorer.getName()); + + TraceEvaluationRun evaluationRun = createTraceEvaluationRun(scorer, model, traceId, spanId); + try { + String traceEvalJson = jacksonMapper.writeValueAsString(evaluationRun); + currentSpan.setAttribute(JudgevalTraceKeys.AttributeKeys.PENDING_TRACE_EVAL, traceEvalJson); + } catch (Exception e) { + Logger.error("Failed to serialize trace evaluation: " + e.getMessage()); + } + }); + }); + } + + public void asyncTraceEvaluate(BaseScorer scorer) { + asyncTraceEvaluate(scorer, null); + } + + public void setAttributes(Map attributes) { + Optional.ofNullable(attributes) + .ifPresent(attrs -> attrs.forEach(this::setAttribute)); + } + + public void setLLMSpan() { + setSpanKind("llm"); + } + + public void setToolSpan() { + setSpanKind("tool"); + } + + public void setGeneralSpan() { + setSpanKind("span"); + } + + public void setInput(Object input) { + setAttribute(JudgevalTraceKeys.AttributeKeys.JUDGMENT_INPUT, input); + } + + public void setOutput(Object output) { + setAttribute(JudgevalTraceKeys.AttributeKeys.JUDGMENT_OUTPUT, output); + } + + public void setInput(Object input, Type type) { + setAttribute(JudgevalTraceKeys.AttributeKeys.JUDGMENT_INPUT, input, type); + } + + public void setOutput(Object output, Type type) { + setAttribute(JudgevalTraceKeys.AttributeKeys.JUDGMENT_OUTPUT, output, type); + } + + public void span(String spanName, Runnable runnable) { + Span span = getTracer().spanBuilder(spanName) + .startSpan(); + try (Scope scope = span.makeCurrent()) { + runnable.run(); + } catch (Exception e) { + span.setStatus(StatusCode.ERROR).recordException(e); + throw e; + } finally { + span.end(); + } + } + + public T span(String spanName, java.util.concurrent.Callable callable) throws Exception { + Span span = getTracer().spanBuilder(spanName) + .startSpan(); + try (Scope scope = span.makeCurrent()) { + return callable.call(); + } catch (Exception e) { + span.setStatus(StatusCode.ERROR).recordException(e); + throw e; + } finally { + span.end(); + } + } + + public io.opentelemetry.api.trace.Tracer getTracer() { + return GlobalOpenTelemetry.get() + .getTracer(TRACER_NAME); + } + + public TracerConfiguration getConfiguration() { + return configuration; + } + + public Optional getProjectId() { + return projectId; + } + + public static Span span(String spanName) { + return GlobalOpenTelemetry.get() + .getTracer(TRACER_NAME) + .spanBuilder(spanName) + .startSpan(); + } + + private Optional resolveProjectId(String name) { + try { + ResolveProjectNameRequest request = new ResolveProjectNameRequest(); + request.setProjectName(name); + ResolveProjectNameResponse response = apiClient.projectsResolve(request); + return Optional.ofNullable(response.getProjectId()) + .map(Object::toString); + } catch (Exception e) { + return Optional.empty(); + } + } + + private String buildEndpoint(String baseUrl) { + return baseUrl.endsWith("/") ? baseUrl + "otel/v1/traces" : baseUrl + "/otel/v1/traces"; + } + + private JudgmentSpanExporter createJudgmentSpanExporter(String projectId) { + return JudgmentSpanExporter.builder() + .endpoint(buildEndpoint(configuration.apiUrl())) + .apiKey(configuration.apiKey()) + .organizationId(configuration.organizationId()) + .projectId(projectId) + .build(); + } + + private String generateRunId(String prefix, String spanId) { + return prefix + Optional.ofNullable(spanId) + .orElseGet(() -> String.valueOf(System.currentTimeMillis())); + } + + private ExampleEvaluationRun createEvaluationRun(BaseScorer scorer, Example example, String model, String traceId, + String spanId) { + String runId = generateRunId("async_evaluate_", spanId); + return new EvaluationRunBuilder() + .projectName(configuration.projectName()) + .evalName(runId) + .model(model != null ? model : Env.JUDGMENT_DEFAULT_GPT_MODEL) + .example(example) + .trace(traceId, spanId) + .addScorer(scorer) + .build(); + } + + private TraceEvaluationRun createTraceEvaluationRun(BaseScorer scorer, String model, String traceId, + String spanId) { + String evalName = generateRunId("async_trace_evaluate_", spanId); + return new TraceEvaluationRunBuilder() + .projectName(configuration.projectName()) + .evalName(evalName) + .model(model != null ? model : Env.JUDGMENT_DEFAULT_GPT_MODEL) + .trace(traceId, spanId) + .addScorer(scorer) + .build(); + } + + private void enqueueEvaluation(ExampleEvaluationRun evaluationRun) { + try { + apiClient.addToRunEvalQueue(evaluationRun); + } catch (Exception e) { + Logger.error("Failed to enqueue evaluation run: " + e.getMessage()); + } + } +} + diff --git a/judgeval-java/src/main/java/com/judgmentlabs/judgeval/v1/tracer/Tracer.java b/judgeval-java/src/main/java/com/judgmentlabs/judgeval/v1/tracer/Tracer.java new file mode 100644 index 0000000..d09c9f9 --- /dev/null +++ b/judgeval-java/src/main/java/com/judgmentlabs/judgeval/v1/tracer/Tracer.java @@ -0,0 +1,171 @@ +package com.judgmentlabs.judgeval.v1.tracer; + +import java.lang.reflect.Type; +import java.util.Objects; +import java.util.Optional; + +import com.google.gson.Gson; +import com.judgmentlabs.judgeval.Env; +import com.judgmentlabs.judgeval.Version; +import com.judgmentlabs.judgeval.internal.api.JudgmentSyncClient; +import com.judgmentlabs.judgeval.tracer.ISerializer; +import com.judgmentlabs.judgeval.tracer.TracerConfiguration; +import com.judgmentlabs.judgeval.utils.Logger; + +import io.opentelemetry.api.GlobalOpenTelemetry; +import io.opentelemetry.api.OpenTelemetry; +import io.opentelemetry.api.common.Attributes; +import io.opentelemetry.sdk.OpenTelemetrySdk; +import io.opentelemetry.sdk.resources.Resource; +import io.opentelemetry.sdk.trace.SdkTracerProvider; +import io.opentelemetry.sdk.trace.export.BatchSpanProcessor; +import io.opentelemetry.sdk.trace.export.SpanExporter; + +public final class Tracer extends BaseTracer { + private SdkTracerProvider tracerProvider; + + private Tracer(Builder builder) { + super( + buildConfiguration(builder), + new JudgmentSyncClient( + builder.apiUrl != null ? builder.apiUrl : Env.JUDGMENT_API_URL, + Objects.requireNonNull(builder.apiKey, "apiKey required"), + Objects.requireNonNull(builder.organizationId, "organizationId required")), + builder.serializer != null ? builder.serializer : new GsonSerializer()); + + if (builder.initialize) { + initialize(); + } + } + + private static TracerConfiguration buildConfiguration(Builder builder) { + return TracerConfiguration.builder() + .projectName(Objects.requireNonNull(builder.projectName, "projectName required")) + .apiKey(Objects.requireNonNull(builder.apiKey, "apiKey required")) + .organizationId(Objects.requireNonNull(builder.organizationId, "organizationId required")) + .apiUrl(builder.apiUrl != null ? builder.apiUrl : Env.JUDGMENT_API_URL) + .enableEvaluation(builder.enableEvaluation) + .build(); + } + + @Override + public void initialize() { + SpanExporter spanExporter = getSpanExporter(); + + var resource = Resource.getDefault() + .merge(Resource.create(Attributes.builder() + .put("service.name", configuration.projectName()) + .put("telemetry.sdk.name", TRACER_NAME) + .put("telemetry.sdk.version", Version.getVersion()) + .build())); + + this.tracerProvider = SdkTracerProvider.builder() + .setResource(resource) + .addSpanProcessor(BatchSpanProcessor.builder(spanExporter) + .build()) + .build(); + + OpenTelemetry openTelemetry = OpenTelemetrySdk.builder() + .setTracerProvider(this.tracerProvider) + .build(); + + GlobalOpenTelemetry.set(openTelemetry); + } + + @Override + public boolean forceFlush(int timeoutMillis) { + if (tracerProvider == null) { + Logger.error("Cannot forceFlush: tracer not initialized"); + return false; + } + return tracerProvider.forceFlush() + .join(timeoutMillis, java.util.concurrent.TimeUnit.MILLISECONDS) + .isSuccess(); + } + + @Override + public void shutdown(int timeoutMillis) { + if (tracerProvider == null) { + Logger.error("Cannot shutdown: tracer not initialized"); + return; + } + tracerProvider.shutdown() + .join(timeoutMillis, java.util.concurrent.TimeUnit.MILLISECONDS); + } + + public static Builder builder() { + return new Builder(); + } + + public static final class Builder { + private String projectName; + private String apiKey; + private String organizationId; + private String apiUrl; + private boolean enableEvaluation = true; + private ISerializer serializer; + private boolean initialize = false; + + public Builder projectName(String projectName) { + this.projectName = projectName; + return this; + } + + public Builder apiKey(String apiKey) { + this.apiKey = apiKey; + return this; + } + + public Builder organizationId(String organizationId) { + this.organizationId = organizationId; + return this; + } + + public Builder apiUrl(String apiUrl) { + this.apiUrl = apiUrl; + return this; + } + + public Builder enableEvaluation(boolean enableEvaluation) { + this.enableEvaluation = enableEvaluation; + return this; + } + + public Builder serializer(ISerializer serializer) { + this.serializer = serializer; + return this; + } + + public Builder initialize(boolean initialize) { + this.initialize = initialize; + return this; + } + + public Tracer build() { + return new Tracer(this); + } + } + + private static class GsonSerializer implements ISerializer { + private final Gson gson = new Gson(); + + @Override + public String serialize(Object obj) { + return Optional.ofNullable(obj) + .map(o -> serialize(o, o.getClass())) + .orElse(null); + } + + @Override + public String serialize(Object obj, Type type) { + try { + return gson.toJson(obj, type); + } catch (Exception e) { + Logger.error("Failed to serialize object: " + e.getMessage()); + return Optional.ofNullable(obj) + .map(Object::toString) + .orElse(null); + } + } + } +} diff --git a/judgeval-java/src/main/java/com/judgmentlabs/judgeval/v1/tracer/TracerFactory.java b/judgeval-java/src/main/java/com/judgmentlabs/judgeval/v1/tracer/TracerFactory.java new file mode 100644 index 0000000..14dbf29 --- /dev/null +++ b/judgeval-java/src/main/java/com/judgmentlabs/judgeval/v1/tracer/TracerFactory.java @@ -0,0 +1,24 @@ +package com.judgmentlabs.judgeval.v1.tracer; + +import com.judgmentlabs.judgeval.internal.api.JudgmentSyncClient; + +public final class TracerFactory { + private final JudgmentSyncClient client; + private final String apiKey; + private final String organizationId; + private final String apiUrl; + + public TracerFactory(JudgmentSyncClient client, String apiKey, String organizationId, String apiUrl) { + this.client = client; + this.apiKey = apiKey; + this.organizationId = organizationId; + this.apiUrl = apiUrl; + } + + public Tracer.Builder create() { + return Tracer.builder() + .apiKey(apiKey) + .organizationId(organizationId) + .apiUrl(apiUrl); + } +} From 53453a99bdbba247845339a3c7b1462bf9390b1b Mon Sep 17 00:00:00 2001 From: Abhishek Govindarasu Date: Wed, 5 Nov 2025 16:09:11 -0800 Subject: [PATCH 02/18] guide --- MIGRATION_V1.md | 507 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 507 insertions(+) create mode 100644 MIGRATION_V1.md diff --git a/MIGRATION_V1.md b/MIGRATION_V1.md new file mode 100644 index 0000000..22c9055 --- /dev/null +++ b/MIGRATION_V1.md @@ -0,0 +1,507 @@ +# Migration Guide: v0 to v1 + +This guide shows how to migrate from the deprecated v0 API to the new v1 API in `judgeval-java`. + +## Client Initialization + +**After (v1):** + +```java +import com.judgmentlabs.judgeval.v1.JudgmentClient; + +JudgmentClient client = JudgmentClient.builder() + .apiKey("your-api-key") // or use JUDGMENT_API_KEY env var + .organizationId("your-org-id") // or use JUDGMENT_ORG_ID env var + .apiUrl("https://api.judgmentlabs.ai") // optional, defaults to production + .build(); +``` + +The client automatically creates an internal `JudgmentSyncClient` that is passed to all child objects. + +## 1. Tracer Migration + +### Basic Tracer + +**Before (v0):** + +```java +import com.judgmentlabs.judgeval.tracer.Tracer; + +Tracer tracer = Tracer.createDefault("my-project"); +tracer.initialize(); +``` + +**After (v1):** + +```java +import com.judgmentlabs.judgeval.v1.JudgmentClient; +import com.judgmentlabs.judgeval.v1.tracer.Tracer; + +JudgmentClient client = JudgmentClient.builder().build(); + +Tracer tracer = client.tracer() + .create() + .projectName("my-project") + .build(); + +tracer.initialize(); +``` + +### Tracer with Custom Configuration + +**Before (v0):** + +```java +import com.judgmentlabs.judgeval.tracer.Tracer; +import com.judgmentlabs.judgeval.tracer.TracerConfiguration; + +TracerConfiguration config = TracerConfiguration.builder() + .projectName("my-project") + .apiKey("key") + .organizationId("org") + .enableEvaluation(true) + .build(); + +Tracer tracer = Tracer.createWithConfiguration(config); +tracer.initialize(); +``` + +**After (v1):** + +```java +import com.judgmentlabs.judgeval.v1.JudgmentClient; +import com.judgmentlabs.judgeval.v1.tracer.Tracer; + +JudgmentClient client = JudgmentClient.builder() + .apiKey("key") + .organizationId("org") + .build(); + +Tracer tracer = client.tracer() + .create() + .projectName("my-project") + .enableEvaluation(true) + .build(); + +tracer.initialize(); +``` + +### Using Tracer Methods + +All `BaseTracer` methods remain unchanged: + +```java +tracer.setAttribute("key", "value"); +tracer.setInput(inputData); +tracer.setOutput(outputData); +tracer.setLLMSpan(); +tracer.asyncEvaluate(scorer, example); +tracer.asyncTraceEvaluate(scorer); + +tracer.span("operation", () -> { + // your code +}); + +tracer.forceFlush(5000); +tracer.shutdown(5000); +``` + +## 2. PromptScorer Migration + +### Fetching Existing Scorer + +**Before (v0):** + +```java +import com.judgmentlabs.judgeval.scorers.api_scorers.prompt_scorer.PromptScorer; + +PromptScorer scorer = PromptScorer.get("my-scorer"); +``` + +**After (v1):** + +```java +import com.judgmentlabs.judgeval.v1.JudgmentClient; +import com.judgmentlabs.judgeval.v1.scorers.prompt_scorer.PromptScorer; + +JudgmentClient client = JudgmentClient.builder().build(); + +PromptScorer scorer = client.scorers() + .promptScorer() + .get("my-scorer"); +``` + +### Creating New Scorer + +**Before (v0):** + +```java +import com.judgmentlabs.judgeval.scorers.api_scorers.prompt_scorer.PromptScorer; +import java.util.Map; + +PromptScorer scorer = new PromptScorer( + "accuracy-checker", + "Does the output accurately answer the question?", + 0.7, + Map.of("yes", 1.0, "no", 0.0) +); +``` + +**After (v1):** + +```java +import com.judgmentlabs.judgeval.v1.JudgmentClient; +import com.judgmentlabs.judgeval.v1.scorers.prompt_scorer.PromptScorer; +import java.util.Map; + +JudgmentClient client = JudgmentClient.builder().build(); + +PromptScorer scorer = client.scorers() + .promptScorer() + .create() + .name("accuracy-checker") + .prompt("Does the output accurately answer the question?") + .threshold(0.7) + .options(Map.of("yes", 1.0, "no", 0.0)) + .build(); +``` + +## 3. TracePromptScorer Migration + +### Fetching Existing Trace Scorer + +**Before (v0):** + +```java +import com.judgmentlabs.judgeval.scorers.api_scorers.prompt_scorer.TracePromptScorer; + +TracePromptScorer scorer = TracePromptScorer.get("my-scorer"); +``` + +**After (v1):** + +```java +import com.judgmentlabs.judgeval.v1.JudgmentClient; +import com.judgmentlabs.judgeval.v1.scorers.prompt_scorer.TracePromptScorer; + +JudgmentClient client = JudgmentClient.builder().build(); + +TracePromptScorer scorer = client.scorers() + .promptScorer() + .getTrace("my-scorer"); +``` + +### Creating New Trace Scorer + +**Before (v0):** + +```java +import com.judgmentlabs.judgeval.scorers.api_scorers.prompt_scorer.TracePromptScorer; +import java.util.Map; + +TracePromptScorer scorer = new TracePromptScorer( + "response-quality", + "Does this trace show a high-quality response flow?", + 0.75, + null +); +``` + +**After (v1):** + +```java +import com.judgmentlabs.judgeval.v1.JudgmentClient; +import com.judgmentlabs.judgeval.v1.scorers.prompt_scorer.TracePromptScorer; + +JudgmentClient client = JudgmentClient.builder().build(); + +TracePromptScorer scorer = client.scorers() + .promptScorer() + .createTrace() + .name("response-quality") + .prompt("Does this trace show a high-quality response flow?") + .threshold(0.75) + .build(); +``` + +## 4. CustomScorer Migration + +### Basic Custom Scorer + +**Before (v0):** + +```java +import com.judgmentlabs.judgeval.scorers.api_scorers.custom_scorer.CustomScorer; + +CustomScorer scorer = CustomScorer.get("my-custom-scorer"); +``` + +**After (v1):** + +```java +import com.judgmentlabs.judgeval.v1.JudgmentClient; +import com.judgmentlabs.judgeval.v1.scorers.custom_scorer.CustomScorer; + +JudgmentClient client = JudgmentClient.builder().build(); + +CustomScorer scorer = client.scorers() + .customScorer() + .get("my-custom-scorer"); +``` + +### Custom Scorer with Class Name + +**Before (v0):** + +```java +import com.judgmentlabs.judgeval.scorers.api_scorers.custom_scorer.CustomScorer; + +CustomScorer scorer = CustomScorer.get("my-scorer", "MyCustomScorerClass"); +``` + +**After (v1):** + +```java +import com.judgmentlabs.judgeval.v1.JudgmentClient; +import com.judgmentlabs.judgeval.v1.scorers.custom_scorer.CustomScorer; + +JudgmentClient client = JudgmentClient.builder().build(); + +CustomScorer scorer = client.scorers() + .customScorer() + .get("my-scorer", "MyCustomScorerClass"); +``` + +## 5. Built-in Scorers Migration + +### AnswerCorrectnessScorer + +**Before (v0):** + +```java +import com.judgmentlabs.judgeval.scorers.api_scorers.AnswerCorrectnessScorer; + +AnswerCorrectnessScorer scorer = AnswerCorrectnessScorer.create(); + +AnswerCorrectnessScorer scorerWithThreshold = AnswerCorrectnessScorer.create(0.8); + +AnswerCorrectnessScorer customScorer = AnswerCorrectnessScorer.builder() + .threshold(0.7) + .name("custom-correctness") + .build(); +``` + +**After (v1):** + +```java +import com.judgmentlabs.judgeval.v1.JudgmentClient; +import com.judgmentlabs.judgeval.v1.scorers.built_in.AnswerCorrectnessScorer; + +JudgmentClient client = JudgmentClient.builder().build(); + +AnswerCorrectnessScorer scorer = client.scorers() + .builtIn() + .answerCorrectness() + .build(); + +AnswerCorrectnessScorer scorerWithThreshold = client.scorers() + .builtIn() + .answerCorrectness() + .threshold(0.8) + .build(); + +AnswerCorrectnessScorer customScorer = client.scorers() + .builtIn() + .answerCorrectness() + .threshold(0.7) + .name("custom-correctness") + .build(); +``` + +### AnswerRelevancyScorer + +**Before (v0):** + +```java +import com.judgmentlabs.judgeval.scorers.api_scorers.AnswerRelevancyScorer; + +AnswerRelevancyScorer scorer = AnswerRelevancyScorer.create(); +``` + +**After (v1):** + +```java +import com.judgmentlabs.judgeval.v1.JudgmentClient; +import com.judgmentlabs.judgeval.v1.scorers.built_in.AnswerRelevancyScorer; + +JudgmentClient client = JudgmentClient.builder().build(); + +AnswerRelevancyScorer scorer = client.scorers() + .builtIn() + .answerRelevancy() + .build(); +``` + +### FaithfulnessScorer + +**Before (v0):** + +```java +import com.judgmentlabs.judgeval.scorers.api_scorers.FaithfulnessScorer; + +FaithfulnessScorer scorer = FaithfulnessScorer.create(); +``` + +**After (v1):** + +```java +import com.judgmentlabs.judgeval.v1.JudgmentClient; +import com.judgmentlabs.judgeval.v1.scorers.built_in.FaithfulnessScorer; + +JudgmentClient client = JudgmentClient.builder().build(); + +FaithfulnessScorer scorer = client.scorers() + .builtIn() + .faithfulness() + .build(); +``` + +### InstructionAdherenceScorer + +**Before (v0):** + +```java +import com.judgmentlabs.judgeval.scorers.api_scorers.InstructionAdherenceScorer; + +InstructionAdherenceScorer scorer = InstructionAdherenceScorer.create(); +``` + +**After (v1):** + +```java +import com.judgmentlabs.judgeval.v1.JudgmentClient; +import com.judgmentlabs.judgeval.v1.scorers.built_in.InstructionAdherenceScorer; + +JudgmentClient client = JudgmentClient.builder().build(); + +InstructionAdherenceScorer scorer = client.scorers() + .builtIn() + .instructionAdherence() + .build(); +``` + +### DerailmentScorer + +**Before (v0):** + +```java +import com.judgmentlabs.judgeval.scorers.api_scorers.DerailmentScorer; + +DerailmentScorer scorer = DerailmentScorer.create(); +``` + +**After (v1):** + +```java +import com.judgmentlabs.judgeval.v1.JudgmentClient; +import com.judgmentlabs.judgeval.v1.scorers.built_in.DerailmentScorer; + +JudgmentClient client = JudgmentClient.builder().build(); + +DerailmentScorer scorer = client.scorers() + .builtIn() + .derailment() + .build(); +``` + +## Complete Example: Before and After + +### Before (v0) + +```java +import com.judgmentlabs.judgeval.tracer.Tracer; +import com.judgmentlabs.judgeval.scorers.api_scorers.prompt_scorer.PromptScorer; +import com.judgmentlabs.judgeval.scorers.api_scorers.AnswerCorrectnessScorer; +import com.judgmentlabs.judgeval.data.Example; + +public class OldExample { + public static void main(String[] args) { + Tracer tracer = Tracer.createDefault("my-project"); + tracer.initialize(); + + PromptScorer promptScorer = PromptScorer.get("accuracy-checker"); + + AnswerCorrectnessScorer builtInScorer = AnswerCorrectnessScorer.create(0.8); + + Example example = Example.builder() + .property("input", "What is 2+2?") + .property("actual_output", "4") + .property("expected_output", "4") + .build(); + + tracer.span("evaluate", () -> { + tracer.setInput("What is 2+2?"); + tracer.setOutput("4"); + tracer.asyncEvaluate(promptScorer, example); + tracer.asyncEvaluate(builtInScorer, example); + }); + + tracer.forceFlush(5000); + tracer.shutdown(5000); + } +} +``` + +### After (v1) + +```java +import com.judgmentlabs.judgeval.v1.JudgmentClient; +import com.judgmentlabs.judgeval.v1.tracer.Tracer; +import com.judgmentlabs.judgeval.v1.scorers.prompt_scorer.PromptScorer; +import com.judgmentlabs.judgeval.v1.scorers.built_in.AnswerCorrectnessScorer; +import com.judgmentlabs.judgeval.data.Example; + +public class NewExample { + public static void main(String[] args) { + JudgmentClient client = JudgmentClient.builder() + .apiKey(System.getenv("JUDGMENT_API_KEY")) + .organizationId(System.getenv("JUDGMENT_ORG_ID")) + .build(); + + Tracer tracer = client.tracer() + .create() + .projectName("my-project") + .enableEvaluation(true) + .build(); + + tracer.initialize(); + + PromptScorer promptScorer = client.scorers() + .promptScorer() + .get("accuracy-checker"); + + AnswerCorrectnessScorer builtInScorer = client.scorers() + .builtIn() + .answerCorrectness() + .threshold(0.8) + .build(); + + Example example = Example.builder() + .property("input", "What is 2+2?") + .property("actual_output", "4") + .property("expected_output", "4") + .build(); + + tracer.span("evaluate", () -> { + tracer.setInput("What is 2+2?"); + tracer.setOutput("4"); + tracer.asyncEvaluate(promptScorer, example); + tracer.asyncEvaluate(builtInScorer, example); + }); + + tracer.forceFlush(5000); + tracer.shutdown(5000); + } +} +``` + From f362421f35f2b84574f8d63ce3cae586d16b7759 Mon Sep 17 00:00:00 2001 From: Abhishek Govindarasu Date: Wed, 5 Nov 2025 17:01:42 -0800 Subject: [PATCH 03/18] updates --- .vscode/settings.json | 2 + .../examples/v1_quick_start/V1QuickStart.java | 2 +- .../judgeval/JudgmentAttributeKeys.java | 51 +++++ .../judgeval/data/APIScorerType.java | 5 + .../judgeval/data/EvaluationRunBuilder.java | 5 + .../judgmentlabs/judgeval/data/Example.java | 4 + .../judgeval/data/ScorerData.java | 4 + .../judgeval/data/ScoringResult.java | 5 + .../data/TraceEvaluationRunBuilder.java | 5 + .../judgeval/scorers/APIScorer.java | 5 + .../judgeval/scorers/BaseScorer.java | 4 + .../judgeval/tracer/ISerializer.java | 5 + .../judgeval/tracer/TracerConfiguration.java | 3 + .../judgeval/v1/data/APIScorerType.java | 31 +++ .../judgeval/v1/data/Example.java | 42 ++++ .../judgeval/v1/data/ScorerData.java | 77 ++++++++ .../judgeval/v1/data/ScoringResult.java | 55 ++++++ .../judgeval/v1/scorers/APIScorer.java | 135 +++++++++++++ .../judgeval/v1/scorers/BaseScorer.java | 23 +++ .../built_in/AnswerCorrectnessScorer.java | 4 +- .../built_in/AnswerRelevancyScorer.java | 4 +- .../v1/scorers/built_in/DerailmentScorer.java | 4 +- .../scorers/built_in/FaithfulnessScorer.java | 4 +- .../built_in/InstructionAdherenceScorer.java | 4 +- .../scorers/custom_scorer/CustomScorer.java | 4 +- .../scorers/prompt_scorer/PromptScorer.java | 4 +- .../judgeval/v1/tracer/BaseTracer.java | 137 +++++++++----- .../judgeval/v1/tracer/ISerializer.java | 11 ++ .../judgeval/v1/tracer/Tracer.java | 32 ++-- .../exporters/JudgmentSpanExporter.java | 179 ++++++++++++++++++ .../v1/tracer/exporters/NoOpSpanExporter.java | 45 +++++ package.json | 33 ++++ scripts/run-example.sh | 17 ++ 33 files changed, 859 insertions(+), 86 deletions(-) create mode 100644 judgeval-java/src/main/java/com/judgmentlabs/judgeval/JudgmentAttributeKeys.java create mode 100644 judgeval-java/src/main/java/com/judgmentlabs/judgeval/v1/data/APIScorerType.java create mode 100644 judgeval-java/src/main/java/com/judgmentlabs/judgeval/v1/data/Example.java create mode 100644 judgeval-java/src/main/java/com/judgmentlabs/judgeval/v1/data/ScorerData.java create mode 100644 judgeval-java/src/main/java/com/judgmentlabs/judgeval/v1/data/ScoringResult.java create mode 100644 judgeval-java/src/main/java/com/judgmentlabs/judgeval/v1/scorers/APIScorer.java create mode 100644 judgeval-java/src/main/java/com/judgmentlabs/judgeval/v1/scorers/BaseScorer.java create mode 100644 judgeval-java/src/main/java/com/judgmentlabs/judgeval/v1/tracer/ISerializer.java create mode 100644 judgeval-java/src/main/java/com/judgmentlabs/judgeval/v1/tracer/exporters/JudgmentSpanExporter.java create mode 100644 judgeval-java/src/main/java/com/judgmentlabs/judgeval/v1/tracer/exporters/NoOpSpanExporter.java create mode 100644 package.json create mode 100755 scripts/run-example.sh diff --git a/.vscode/settings.json b/.vscode/settings.json index 49b061d..6c6c247 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -1,4 +1,6 @@ { "java.configuration.updateBuildConfiguration": "automatic", + "java.import.maven.enabled": true, + "java.compile.nullAnalysis.mode": "automatic", "java.format.settings.url": ".vscode/java-formatter.xml" } diff --git a/examples/src/main/java/examples/v1_quick_start/V1QuickStart.java b/examples/src/main/java/examples/v1_quick_start/V1QuickStart.java index c7c96e5..b7d937b 100644 --- a/examples/src/main/java/examples/v1_quick_start/V1QuickStart.java +++ b/examples/src/main/java/examples/v1_quick_start/V1QuickStart.java @@ -1,6 +1,6 @@ package examples.v1_quick_start; -import com.judgmentlabs.judgeval.data.Example; +import com.judgmentlabs.judgeval.v1.data.Example; import com.judgmentlabs.judgeval.v1.JudgmentClient; import com.judgmentlabs.judgeval.v1.scorers.built_in.AnswerCorrectnessScorer; import com.judgmentlabs.judgeval.v1.scorers.built_in.FaithfulnessScorer; diff --git a/judgeval-java/src/main/java/com/judgmentlabs/judgeval/JudgmentAttributeKeys.java b/judgeval-java/src/main/java/com/judgmentlabs/judgeval/JudgmentAttributeKeys.java new file mode 100644 index 0000000..004b8d5 --- /dev/null +++ b/judgeval-java/src/main/java/com/judgmentlabs/judgeval/JudgmentAttributeKeys.java @@ -0,0 +1,51 @@ +package com.judgmentlabs.judgeval; + +public final class JudgmentAttributeKeys { + public static final class AttributeKeys { + public static final String JUDGMENT_SPAN_KIND = "judgment.span_kind"; + public static final String JUDGMENT_INPUT = "judgment.input"; + public static final String JUDGMENT_OUTPUT = "judgment.output"; + public static final String JUDGMENT_OFFLINE_MODE = "judgment.offline_mode"; + public static final String JUDGMENT_UPDATE_ID = "judgment.update_id"; + public static final String JUDGMENT_CUSTOMER_ID = "judgment.customer_id"; + public static final String JUDGMENT_AGENT_ID = "judgment.agent_id"; + public static final String JUDGMENT_PARENT_AGENT_ID = "judgment.parent_agent_id"; + public static final String JUDGMENT_AGENT_CLASS_NAME = "judgment.agent_class_name"; + public static final String JUDGMENT_AGENT_INSTANCE_NAME = "judgment.agent_instance_name"; + public static final String JUDGMENT_IS_AGENT_ENTRY_POINT = "judgment.is_agent_entry_point"; + public static final String JUDGMENT_CUMULATIVE_LLM_COST = "judgment.cumulative_llm_cost"; + public static final String JUDGMENT_STATE_BEFORE = "judgment.state_before"; + public static final String JUDGMENT_STATE_AFTER = "judgment.state_after"; + public static final String JUDGMENT_PENDING_TRACE_EVAL = "judgment.pending_trace_eval"; + + public static final String GEN_AI_PROMPT = "gen_ai.prompt"; + public static final String GEN_AI_COMPLETION = "gen_ai.completion"; + public static final String GEN_AI_REQUEST_MODEL = "gen_ai.request.model"; + public static final String GEN_AI_RESPONSE_MODEL = "gen_ai.response.model"; + public static final String GEN_AI_SYSTEM = "gen_ai.system"; + public static final String GEN_AI_USAGE_INPUT_TOKENS = "gen_ai.usage.input_tokens"; + public static final String GEN_AI_USAGE_OUTPUT_TOKENS = "gen_ai.usage.output_tokens"; + public static final String GEN_AI_USAGE_CACHE_CREATION_INPUT_TOKENS = "gen_ai.usage.cache_creation_input_tokens"; + public static final String GEN_AI_USAGE_CACHE_READ_INPUT_TOKENS = "gen_ai.usage.cache_read_input_tokens"; + public static final String GEN_AI_REQUEST_TEMPERATURE = "gen_ai.request.temperature"; + public static final String GEN_AI_REQUEST_MAX_TOKENS = "gen_ai.request.max_tokens"; + public static final String GEN_AI_RESPONSE_FINISH_REASONS = "gen_ai.response.finish_reasons"; + + private AttributeKeys() { + } + } + + public static final class ResourceKeys { + public static final String SERVICE_NAME = "service.name"; + public static final String TELEMETRY_SDK_LANGUAGE = "telemetry.sdk.language"; + public static final String TELEMETRY_SDK_NAME = "telemetry.sdk.name"; + public static final String TELEMETRY_SDK_VERSION = "telemetry.sdk.version"; + public static final String JUDGMENT_PROJECT_ID = "judgment.project_id"; + + private ResourceKeys() { + } + } + + private JudgmentAttributeKeys() { + } +} diff --git a/judgeval-java/src/main/java/com/judgmentlabs/judgeval/data/APIScorerType.java b/judgeval-java/src/main/java/com/judgmentlabs/judgeval/data/APIScorerType.java index 49e9af2..e3765f7 100644 --- a/judgeval-java/src/main/java/com/judgmentlabs/judgeval/data/APIScorerType.java +++ b/judgeval-java/src/main/java/com/judgmentlabs/judgeval/data/APIScorerType.java @@ -1,5 +1,10 @@ package com.judgmentlabs.judgeval.data; +/** + * @deprecated Use {@link com.judgmentlabs.judgeval.v1.data.APIScorerType} + * instead. + */ +@Deprecated public enum APIScorerType { PROMPT_SCORER("Prompt Scorer"), TRACE_PROMPT_SCORER("Trace Prompt Scorer"), diff --git a/judgeval-java/src/main/java/com/judgmentlabs/judgeval/data/EvaluationRunBuilder.java b/judgeval-java/src/main/java/com/judgmentlabs/judgeval/data/EvaluationRunBuilder.java index 5e0bb09..f6cc75e 100644 --- a/judgeval-java/src/main/java/com/judgmentlabs/judgeval/data/EvaluationRunBuilder.java +++ b/judgeval-java/src/main/java/com/judgmentlabs/judgeval/data/EvaluationRunBuilder.java @@ -13,6 +13,11 @@ import com.judgmentlabs.judgeval.scorers.BaseScorer; import com.judgmentlabs.judgeval.scorers.api_scorers.custom_scorer.CustomScorer; +/** + * @deprecated Replaced by + * com.judgmentlabs.judgeval.v1.data.EvaluationRunBuilder + */ +@Deprecated public class EvaluationRunBuilder { private String projectName; private String evalName; diff --git a/judgeval-java/src/main/java/com/judgmentlabs/judgeval/data/Example.java b/judgeval-java/src/main/java/com/judgmentlabs/judgeval/data/Example.java index a09ec10..ff0f53c 100644 --- a/judgeval-java/src/main/java/com/judgmentlabs/judgeval/data/Example.java +++ b/judgeval-java/src/main/java/com/judgmentlabs/judgeval/data/Example.java @@ -3,6 +3,10 @@ import java.time.Instant; import java.util.UUID; +/** + * @deprecated Use {@link com.judgmentlabs.judgeval.v1.data.Example} instead. + */ +@Deprecated public class Example extends com.judgmentlabs.judgeval.internal.api.models.Example { public Example() { diff --git a/judgeval-java/src/main/java/com/judgmentlabs/judgeval/data/ScorerData.java b/judgeval-java/src/main/java/com/judgmentlabs/judgeval/data/ScorerData.java index fb84d4e..aa94afb 100644 --- a/judgeval-java/src/main/java/com/judgmentlabs/judgeval/data/ScorerData.java +++ b/judgeval-java/src/main/java/com/judgmentlabs/judgeval/data/ScorerData.java @@ -2,6 +2,10 @@ import java.util.Map; +/** + * @deprecated Use {@link com.judgmentlabs.judgeval.v1.data.ScorerData} instead. + */ +@Deprecated public class ScorerData extends com.judgmentlabs.judgeval.internal.api.models.ScorerData { public static Builder builder() { diff --git a/judgeval-java/src/main/java/com/judgmentlabs/judgeval/data/ScoringResult.java b/judgeval-java/src/main/java/com/judgmentlabs/judgeval/data/ScoringResult.java index 0f262c7..60ae78e 100644 --- a/judgeval-java/src/main/java/com/judgmentlabs/judgeval/data/ScoringResult.java +++ b/judgeval-java/src/main/java/com/judgmentlabs/judgeval/data/ScoringResult.java @@ -2,6 +2,11 @@ import java.util.List; +/** + * @deprecated Use {@link com.judgmentlabs.judgeval.v1.data.ScoringResult} + * instead. + */ +@Deprecated public class ScoringResult extends com.judgmentlabs.judgeval.internal.api.models.ScoringResult { public static Builder builder() { diff --git a/judgeval-java/src/main/java/com/judgmentlabs/judgeval/data/TraceEvaluationRunBuilder.java b/judgeval-java/src/main/java/com/judgmentlabs/judgeval/data/TraceEvaluationRunBuilder.java index 4e3dd33..7c114eb 100644 --- a/judgeval-java/src/main/java/com/judgmentlabs/judgeval/data/TraceEvaluationRunBuilder.java +++ b/judgeval-java/src/main/java/com/judgmentlabs/judgeval/data/TraceEvaluationRunBuilder.java @@ -13,6 +13,11 @@ import com.judgmentlabs.judgeval.scorers.BaseScorer; import com.judgmentlabs.judgeval.scorers.api_scorers.custom_scorer.CustomScorer; +/** + * @deprecated Replaced by + * com.judgmentlabs.judgeval.v1.data.TraceEvaluationRunBuilder + */ +@Deprecated public class TraceEvaluationRunBuilder { private String projectName; private String evalName; diff --git a/judgeval-java/src/main/java/com/judgmentlabs/judgeval/scorers/APIScorer.java b/judgeval-java/src/main/java/com/judgmentlabs/judgeval/scorers/APIScorer.java index 565b48b..1c6e739 100644 --- a/judgeval-java/src/main/java/com/judgmentlabs/judgeval/scorers/APIScorer.java +++ b/judgeval-java/src/main/java/com/judgmentlabs/judgeval/scorers/APIScorer.java @@ -10,6 +10,11 @@ import com.judgmentlabs.judgeval.data.APIScorerType; import com.judgmentlabs.judgeval.internal.api.models.ScorerConfig; +/** + * @deprecated Use {@link com.judgmentlabs.judgeval.v1.scorers.APIScorer} + * instead. + */ +@Deprecated public class APIScorer extends com.judgmentlabs.judgeval.internal.api.models.BaseScorer implements BaseScorer { private APIScorerType scoreType; diff --git a/judgeval-java/src/main/java/com/judgmentlabs/judgeval/scorers/BaseScorer.java b/judgeval-java/src/main/java/com/judgmentlabs/judgeval/scorers/BaseScorer.java index 3a5b231..0eb8c46 100644 --- a/judgeval-java/src/main/java/com/judgmentlabs/judgeval/scorers/BaseScorer.java +++ b/judgeval-java/src/main/java/com/judgmentlabs/judgeval/scorers/BaseScorer.java @@ -5,7 +5,11 @@ /** * Minimal interface for scorers used by BaseTracer. Only requires the essential * methods needed for evaluation. + * + * @deprecated Use {@link com.judgmentlabs.judgeval.v1.scorers.BaseScorer} + * instead. */ +@Deprecated public interface BaseScorer { /** * Gets the name of the scorer. diff --git a/judgeval-java/src/main/java/com/judgmentlabs/judgeval/tracer/ISerializer.java b/judgeval-java/src/main/java/com/judgmentlabs/judgeval/tracer/ISerializer.java index 2f5e0f2..737d80e 100644 --- a/judgeval-java/src/main/java/com/judgmentlabs/judgeval/tracer/ISerializer.java +++ b/judgeval-java/src/main/java/com/judgmentlabs/judgeval/tracer/ISerializer.java @@ -2,6 +2,11 @@ import java.lang.reflect.Type; +/** + * @deprecated Use {@link com.judgmentlabs.judgeval.v1.tracer.ISerializer} + * instead. + */ +@Deprecated public interface ISerializer { String serialize(Object obj); diff --git a/judgeval-java/src/main/java/com/judgmentlabs/judgeval/tracer/TracerConfiguration.java b/judgeval-java/src/main/java/com/judgmentlabs/judgeval/tracer/TracerConfiguration.java index bd296ea..93270a7 100644 --- a/judgeval-java/src/main/java/com/judgmentlabs/judgeval/tracer/TracerConfiguration.java +++ b/judgeval-java/src/main/java/com/judgmentlabs/judgeval/tracer/TracerConfiguration.java @@ -23,7 +23,10 @@ * } * * @see Tracer + * @deprecated Replaced by + * com.judgmentlabs.judgeval.v1.tracer.TracerConfiguration */ +@Deprecated public final class TracerConfiguration { private final String projectName; private final String apiKey; diff --git a/judgeval-java/src/main/java/com/judgmentlabs/judgeval/v1/data/APIScorerType.java b/judgeval-java/src/main/java/com/judgmentlabs/judgeval/v1/data/APIScorerType.java new file mode 100644 index 0000000..05d48e8 --- /dev/null +++ b/judgeval-java/src/main/java/com/judgmentlabs/judgeval/v1/data/APIScorerType.java @@ -0,0 +1,31 @@ +package com.judgmentlabs.judgeval.v1.data; + +public enum APIScorerType { + PROMPT_SCORER("Prompt Scorer"), + TRACE_PROMPT_SCORER("Trace Prompt Scorer"), + FAITHFULNESS("Faithfulness"), + ANSWER_RELEVANCY("Answer Relevancy"), + ANSWER_CORRECTNESS("Answer Correctness"), + INSTRUCTION_ADHERENCE("Instruction Adherence"), + EXECUTION_ORDER("Execution Order"), + DERAILMENT("Derailment"), + TOOL_ORDER("Tool Order"), + CLASSIFIER("Classifier"), + TOOL_DEPENDENCY("Tool Dependency"), + CUSTOM("Custom"); + + private final String value; + + APIScorerType(String value) { + this.value = value; + } + + public String getValue() { + return value; + } + + @Override + public String toString() { + return value; + } +} diff --git a/judgeval-java/src/main/java/com/judgmentlabs/judgeval/v1/data/Example.java b/judgeval-java/src/main/java/com/judgmentlabs/judgeval/v1/data/Example.java new file mode 100644 index 0000000..5cc01d2 --- /dev/null +++ b/judgeval-java/src/main/java/com/judgmentlabs/judgeval/v1/data/Example.java @@ -0,0 +1,42 @@ +package com.judgmentlabs.judgeval.v1.data; + +import java.time.Instant; +import java.util.UUID; + +public class Example extends com.judgmentlabs.judgeval.internal.api.models.Example { + + public Example() { + super(); + setExampleId(UUID.randomUUID() + .toString()); + setCreatedAt(Instant.now() + .toString()); + setName(null); + } + + public static Builder builder() { + return new Builder(); + } + + public static final class Builder { + private final Example example; + + private Builder() { + this.example = new Example(); + } + + public Builder property(String key, Object value) { + example.setAdditionalProperty(key, value); + return this; + } + + public Builder name(String name) { + example.setName(name); + return this; + } + + public Example build() { + return example; + } + } +} diff --git a/judgeval-java/src/main/java/com/judgmentlabs/judgeval/v1/data/ScorerData.java b/judgeval-java/src/main/java/com/judgmentlabs/judgeval/v1/data/ScorerData.java new file mode 100644 index 0000000..a23e396 --- /dev/null +++ b/judgeval-java/src/main/java/com/judgmentlabs/judgeval/v1/data/ScorerData.java @@ -0,0 +1,77 @@ +package com.judgmentlabs.judgeval.v1.data; + +import java.util.Map; + +public class ScorerData extends com.judgmentlabs.judgeval.internal.api.models.ScorerData { + + public static Builder builder() { + return new Builder(); + } + + public static final class Builder { + private final ScorerData scorerData; + + private Builder() { + this.scorerData = new ScorerData(); + } + + public Builder name(String name) { + scorerData.setName(name); + return this; + } + + public Builder score(Double score) { + scorerData.setScore(score); + return this; + } + + public Builder success(Boolean success) { + scorerData.setSuccess(success); + return this; + } + + public Builder reason(String reason) { + scorerData.setReason(reason); + return this; + } + + public Builder threshold(Double threshold) { + scorerData.setThreshold(threshold); + return this; + } + + public Builder strictMode(Boolean strictMode) { + scorerData.setStrictMode(strictMode); + return this; + } + + public Builder evaluationModel(String evaluationModel) { + scorerData.setEvaluationModel(evaluationModel); + return this; + } + + public Builder error(String error) { + scorerData.setError(error); + return this; + } + + public Builder additionalMetadata(Map additionalMetadata) { + scorerData.setAdditionalMetadata(additionalMetadata); + return this; + } + + public Builder metadata(String key, Object value) { + if (scorerData.getAdditionalMetadata() == null) { + scorerData.setAdditionalMetadata(new java.util.HashMap<>()); + } + @SuppressWarnings("unchecked") + Map metadata = (Map) scorerData.getAdditionalMetadata(); + metadata.put(key, value); + return this; + } + + public ScorerData build() { + return scorerData; + } + } +} diff --git a/judgeval-java/src/main/java/com/judgmentlabs/judgeval/v1/data/ScoringResult.java b/judgeval-java/src/main/java/com/judgmentlabs/judgeval/v1/data/ScoringResult.java new file mode 100644 index 0000000..0671f4d --- /dev/null +++ b/judgeval-java/src/main/java/com/judgmentlabs/judgeval/v1/data/ScoringResult.java @@ -0,0 +1,55 @@ +package com.judgmentlabs.judgeval.v1.data; + +import java.util.List; + +public class ScoringResult extends com.judgmentlabs.judgeval.internal.api.models.ScoringResult { + + public static Builder builder() { + return new Builder(); + } + + public static final class Builder { + private final ScoringResult result; + + private Builder() { + this.result = new ScoringResult(); + } + + public Builder success(Boolean success) { + result.setSuccess(success); + return this; + } + + public Builder scorersData(List scorersData) { + @SuppressWarnings("unchecked") + List internalList = (List) (List) scorersData; + result.setScorersData(internalList); + return this; + } + + public Builder scorerData(ScorerData scorerData) { + if (result.getScorersData() == null) { + result.setScorersData(new java.util.ArrayList<>()); + } + result.getScorersData() + .add(scorerData); + return this; + } + + public Builder dataObject(Example dataObject) { + // Store Example in additional properties since setDataObject + // expects TraceSpan + // This indicates a potential API design issue - ScoringResult may + // be + // trace-specific + if (dataObject != null) { + result.setAdditionalProperty("example", dataObject); + } + return this; + } + + public ScoringResult build() { + return result; + } + } +} diff --git a/judgeval-java/src/main/java/com/judgmentlabs/judgeval/v1/scorers/APIScorer.java b/judgeval-java/src/main/java/com/judgmentlabs/judgeval/v1/scorers/APIScorer.java new file mode 100644 index 0000000..e4ab3b9 --- /dev/null +++ b/judgeval-java/src/main/java/com/judgmentlabs/judgeval/v1/scorers/APIScorer.java @@ -0,0 +1,135 @@ +package com.judgmentlabs.judgeval.v1.scorers; + +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Optional; + +import com.fasterxml.jackson.annotation.JsonIgnore; +import com.fasterxml.jackson.annotation.JsonProperty; +import com.judgmentlabs.judgeval.internal.api.models.ScorerConfig; +import com.judgmentlabs.judgeval.v1.data.APIScorerType; + +public class APIScorer extends com.judgmentlabs.judgeval.internal.api.models.BaseScorer implements BaseScorer { + private APIScorerType scoreType; + + @JsonIgnore + private List requiredParams; + + public APIScorer(APIScorerType scoreType) { + super(); + this.scoreType = scoreType; + setName(scoreType.toString()); + setScoreType(scoreType.toString()); + this.requiredParams = new java.util.ArrayList<>(); + if (Boolean.TRUE.equals(getStrictMode())) { + setThreshold(1.0); + } + } + + public void setThreshold(double threshold) { + if (threshold < 0 || threshold > 1) { + throw new IllegalArgumentException("Threshold must be between 0 and 1, got: " + threshold); + } + super.setThreshold(threshold); + } + + @JsonProperty("score_type") + public String getScoreType() { + return scoreType.toString(); + } + + public List getRequiredParams() { + return requiredParams; + } + + public void setRequiredParams(List requiredParams) { + this.requiredParams = requiredParams; + } + + @Override + public Double getThreshold() { + return Optional.ofNullable(super.getThreshold()) + .orElse(0.5); + } + + @Override + public String getName() { + return Optional.ofNullable(super.getName()) + .map(Object::toString) + .orElse(null); + } + + @Override + public Boolean getStrictMode() { + return Optional.ofNullable(super.getStrictMode()) + .orElse(false); + } + + @Override + @JsonIgnore + public ScorerConfig getScorerConfig() { + ScorerConfig cfg = new ScorerConfig(); + cfg.setScoreType(getScoreType()); + cfg.setThreshold(getThreshold()); + cfg.setName(getName()); + cfg.setStrictMode(getStrictMode()); + cfg.setRequiredParams(getRequiredParams()); + Map kwargs = new HashMap<>(); + if (getAdditionalProperties() != null) + kwargs.putAll(getAdditionalProperties()); + cfg.setKwargs(kwargs); + return cfg; + } + + public static Builder builder(Class scorerClass) { + return new Builder<>(scorerClass); + } + + public static final class Builder { + private final T scorer; + + private Builder(Class scorerClass) { + try { + this.scorer = scorerClass.getDeclaredConstructor() + .newInstance(); + } catch (Exception e) { + throw new RuntimeException("Failed to create scorer instance", e); + } + } + + public Builder threshold(double threshold) { + scorer.setThreshold(threshold); + return this; + } + + public Builder name(String name) { + scorer.setName(name); + return this; + } + + public Builder strictMode(boolean strictMode) { + scorer.setStrictMode(strictMode); + return this; + } + + public Builder requiredParams(List requiredParams) { + scorer.setRequiredParams(requiredParams); + return this; + } + + public Builder model(String model) { + scorer.setModel(model); + return this; + } + + public Builder additionalProperty(String key, Object value) { + scorer.setAdditionalProperty(key, value); + return this; + } + + public T build() { + return scorer; + } + } +} diff --git a/judgeval-java/src/main/java/com/judgmentlabs/judgeval/v1/scorers/BaseScorer.java b/judgeval-java/src/main/java/com/judgmentlabs/judgeval/v1/scorers/BaseScorer.java new file mode 100644 index 0000000..760b6c6 --- /dev/null +++ b/judgeval-java/src/main/java/com/judgmentlabs/judgeval/v1/scorers/BaseScorer.java @@ -0,0 +1,23 @@ +package com.judgmentlabs.judgeval.v1.scorers; + +import com.judgmentlabs.judgeval.internal.api.models.ScorerConfig; + +/** + * Minimal interface for scorers used by BaseTracer. Only requires the essential + * methods needed for evaluation. + */ +public interface BaseScorer { + /** + * Gets the name of the scorer. + * + * @return the scorer name + */ + String getName(); + + /** + * Gets the scorer configuration for evaluation runs. + * + * @return the scorer configuration + */ + ScorerConfig getScorerConfig(); +} diff --git a/judgeval-java/src/main/java/com/judgmentlabs/judgeval/v1/scorers/built_in/AnswerCorrectnessScorer.java b/judgeval-java/src/main/java/com/judgmentlabs/judgeval/v1/scorers/built_in/AnswerCorrectnessScorer.java index fc504a9..d3a395a 100644 --- a/judgeval-java/src/main/java/com/judgmentlabs/judgeval/v1/scorers/built_in/AnswerCorrectnessScorer.java +++ b/judgeval-java/src/main/java/com/judgmentlabs/judgeval/v1/scorers/built_in/AnswerCorrectnessScorer.java @@ -2,8 +2,8 @@ import java.util.Arrays; -import com.judgmentlabs.judgeval.data.APIScorerType; -import com.judgmentlabs.judgeval.scorers.APIScorer; +import com.judgmentlabs.judgeval.v1.data.APIScorerType; +import com.judgmentlabs.judgeval.v1.scorers.APIScorer; public final class AnswerCorrectnessScorer extends APIScorer { private AnswerCorrectnessScorer(Builder builder) { diff --git a/judgeval-java/src/main/java/com/judgmentlabs/judgeval/v1/scorers/built_in/AnswerRelevancyScorer.java b/judgeval-java/src/main/java/com/judgmentlabs/judgeval/v1/scorers/built_in/AnswerRelevancyScorer.java index a2b3a0a..2c04f81 100644 --- a/judgeval-java/src/main/java/com/judgmentlabs/judgeval/v1/scorers/built_in/AnswerRelevancyScorer.java +++ b/judgeval-java/src/main/java/com/judgmentlabs/judgeval/v1/scorers/built_in/AnswerRelevancyScorer.java @@ -2,8 +2,8 @@ import java.util.Arrays; -import com.judgmentlabs.judgeval.data.APIScorerType; -import com.judgmentlabs.judgeval.scorers.APIScorer; +import com.judgmentlabs.judgeval.v1.data.APIScorerType; +import com.judgmentlabs.judgeval.v1.scorers.APIScorer; public final class AnswerRelevancyScorer extends APIScorer { private AnswerRelevancyScorer(Builder builder) { diff --git a/judgeval-java/src/main/java/com/judgmentlabs/judgeval/v1/scorers/built_in/DerailmentScorer.java b/judgeval-java/src/main/java/com/judgmentlabs/judgeval/v1/scorers/built_in/DerailmentScorer.java index fbed3a8..c6f1d74 100644 --- a/judgeval-java/src/main/java/com/judgmentlabs/judgeval/v1/scorers/built_in/DerailmentScorer.java +++ b/judgeval-java/src/main/java/com/judgmentlabs/judgeval/v1/scorers/built_in/DerailmentScorer.java @@ -2,8 +2,8 @@ import java.util.Arrays; -import com.judgmentlabs.judgeval.data.APIScorerType; -import com.judgmentlabs.judgeval.scorers.APIScorer; +import com.judgmentlabs.judgeval.v1.data.APIScorerType; +import com.judgmentlabs.judgeval.v1.scorers.APIScorer; public final class DerailmentScorer extends APIScorer { private DerailmentScorer(Builder builder) { diff --git a/judgeval-java/src/main/java/com/judgmentlabs/judgeval/v1/scorers/built_in/FaithfulnessScorer.java b/judgeval-java/src/main/java/com/judgmentlabs/judgeval/v1/scorers/built_in/FaithfulnessScorer.java index 7109368..62ccc58 100644 --- a/judgeval-java/src/main/java/com/judgmentlabs/judgeval/v1/scorers/built_in/FaithfulnessScorer.java +++ b/judgeval-java/src/main/java/com/judgmentlabs/judgeval/v1/scorers/built_in/FaithfulnessScorer.java @@ -2,8 +2,8 @@ import java.util.Arrays; -import com.judgmentlabs.judgeval.data.APIScorerType; -import com.judgmentlabs.judgeval.scorers.APIScorer; +import com.judgmentlabs.judgeval.v1.data.APIScorerType; +import com.judgmentlabs.judgeval.v1.scorers.APIScorer; public final class FaithfulnessScorer extends APIScorer { private FaithfulnessScorer(Builder builder) { diff --git a/judgeval-java/src/main/java/com/judgmentlabs/judgeval/v1/scorers/built_in/InstructionAdherenceScorer.java b/judgeval-java/src/main/java/com/judgmentlabs/judgeval/v1/scorers/built_in/InstructionAdherenceScorer.java index fe64b8c..a34f196 100644 --- a/judgeval-java/src/main/java/com/judgmentlabs/judgeval/v1/scorers/built_in/InstructionAdherenceScorer.java +++ b/judgeval-java/src/main/java/com/judgmentlabs/judgeval/v1/scorers/built_in/InstructionAdherenceScorer.java @@ -2,8 +2,8 @@ import java.util.Arrays; -import com.judgmentlabs.judgeval.data.APIScorerType; -import com.judgmentlabs.judgeval.scorers.APIScorer; +import com.judgmentlabs.judgeval.v1.data.APIScorerType; +import com.judgmentlabs.judgeval.v1.scorers.APIScorer; public final class InstructionAdherenceScorer extends APIScorer { private InstructionAdherenceScorer(Builder builder) { diff --git a/judgeval-java/src/main/java/com/judgmentlabs/judgeval/v1/scorers/custom_scorer/CustomScorer.java b/judgeval-java/src/main/java/com/judgmentlabs/judgeval/v1/scorers/custom_scorer/CustomScorer.java index 7ad6dd3..fc6c7a4 100644 --- a/judgeval-java/src/main/java/com/judgmentlabs/judgeval/v1/scorers/custom_scorer/CustomScorer.java +++ b/judgeval-java/src/main/java/com/judgmentlabs/judgeval/v1/scorers/custom_scorer/CustomScorer.java @@ -1,8 +1,8 @@ package com.judgmentlabs.judgeval.v1.scorers.custom_scorer; -import com.judgmentlabs.judgeval.data.APIScorerType; import com.judgmentlabs.judgeval.internal.api.models.ScorerConfig; -import com.judgmentlabs.judgeval.scorers.APIScorer; +import com.judgmentlabs.judgeval.v1.data.APIScorerType; +import com.judgmentlabs.judgeval.v1.scorers.APIScorer; public final class CustomScorer extends APIScorer { private CustomScorer(Builder builder) { diff --git a/judgeval-java/src/main/java/com/judgmentlabs/judgeval/v1/scorers/prompt_scorer/PromptScorer.java b/judgeval-java/src/main/java/com/judgmentlabs/judgeval/v1/scorers/prompt_scorer/PromptScorer.java index 671214d..ac3b345 100644 --- a/judgeval-java/src/main/java/com/judgmentlabs/judgeval/v1/scorers/prompt_scorer/PromptScorer.java +++ b/judgeval-java/src/main/java/com/judgmentlabs/judgeval/v1/scorers/prompt_scorer/PromptScorer.java @@ -4,9 +4,9 @@ import java.util.Map; import java.util.Objects; -import com.judgmentlabs.judgeval.data.APIScorerType; import com.judgmentlabs.judgeval.internal.api.models.ScorerConfig; -import com.judgmentlabs.judgeval.scorers.APIScorer; +import com.judgmentlabs.judgeval.v1.data.APIScorerType; +import com.judgmentlabs.judgeval.v1.scorers.APIScorer; public final class PromptScorer extends APIScorer { private final String prompt; diff --git a/judgeval-java/src/main/java/com/judgmentlabs/judgeval/v1/tracer/BaseTracer.java b/judgeval-java/src/main/java/com/judgmentlabs/judgeval/v1/tracer/BaseTracer.java index 360fc00..c241d8b 100644 --- a/judgeval-java/src/main/java/com/judgmentlabs/judgeval/v1/tracer/BaseTracer.java +++ b/judgeval-java/src/main/java/com/judgmentlabs/judgeval/v1/tracer/BaseTracer.java @@ -1,27 +1,25 @@ package com.judgmentlabs.judgeval.v1.tracer; import java.lang.reflect.Type; +import java.util.ArrayList; +import java.util.List; import java.util.Map; import java.util.Objects; import java.util.Optional; import com.fasterxml.jackson.databind.ObjectMapper; import com.judgmentlabs.judgeval.Env; -import com.judgmentlabs.judgeval.data.EvaluationRunBuilder; -import com.judgmentlabs.judgeval.data.Example; -import com.judgmentlabs.judgeval.data.TraceEvaluationRunBuilder; +import com.judgmentlabs.judgeval.JudgmentAttributeKeys; import com.judgmentlabs.judgeval.internal.api.JudgmentSyncClient; import com.judgmentlabs.judgeval.internal.api.models.ExampleEvaluationRun; import com.judgmentlabs.judgeval.internal.api.models.ResolveProjectNameRequest; import com.judgmentlabs.judgeval.internal.api.models.ResolveProjectNameResponse; import com.judgmentlabs.judgeval.internal.api.models.TraceEvaluationRun; -import com.judgmentlabs.judgeval.scorers.BaseScorer; -import com.judgmentlabs.judgeval.tracer.ISerializer; -import com.judgmentlabs.judgeval.tracer.JudgevalTraceKeys; -import com.judgmentlabs.judgeval.tracer.TracerConfiguration; -import com.judgmentlabs.judgeval.tracer.exporters.JudgmentSpanExporter; -import com.judgmentlabs.judgeval.tracer.exporters.NoOpSpanExporter; import com.judgmentlabs.judgeval.utils.Logger; +import com.judgmentlabs.judgeval.v1.data.Example; +import com.judgmentlabs.judgeval.v1.scorers.BaseScorer; +import com.judgmentlabs.judgeval.v1.tracer.exporters.JudgmentSpanExporter; +import com.judgmentlabs.judgeval.v1.tracer.exporters.NoOpSpanExporter; import io.opentelemetry.api.GlobalOpenTelemetry; import io.opentelemetry.api.trace.Span; @@ -31,24 +29,33 @@ import io.opentelemetry.sdk.trace.export.SpanExporter; public abstract class BaseTracer { - public static final String TRACER_NAME = "judgeval"; - - protected final TracerConfiguration configuration; - protected final JudgmentSyncClient apiClient; - protected final ISerializer serializer; - protected final ObjectMapper jacksonMapper; - protected final Optional projectId; - - protected BaseTracer(TracerConfiguration configuration, JudgmentSyncClient apiClient, ISerializer serializer) { - this.configuration = Objects.requireNonNull(configuration, "configuration required"); + public static final String TRACER_NAME = "judgeval"; + + protected final String projectName; + protected final String apiKey; + protected final String organizationId; + protected final String apiUrl; + protected final boolean enableEvaluation; + protected final JudgmentSyncClient apiClient; + protected final ISerializer serializer; + protected final ObjectMapper jacksonMapper; + protected final Optional projectId; + + protected BaseTracer(String projectName, String apiKey, String organizationId, String apiUrl, + boolean enableEvaluation, JudgmentSyncClient apiClient, ISerializer serializer) { + this.projectName = Objects.requireNonNull(projectName, "projectName required"); + this.apiKey = Objects.requireNonNull(apiKey, "apiKey required"); + this.organizationId = Objects.requireNonNull(organizationId, "organizationId required"); + this.apiUrl = Objects.requireNonNull(apiUrl, "apiUrl required"); + this.enableEvaluation = enableEvaluation; this.apiClient = Objects.requireNonNull(apiClient, "apiClient required"); this.serializer = Objects.requireNonNull(serializer, "serializer required"); this.jacksonMapper = new ObjectMapper(); - this.projectId = resolveProjectId(configuration.projectName()); + this.projectId = resolveProjectId(projectName); this.projectId.ifPresentOrElse(id -> { - }, () -> Logger.error("Failed to resolve project " + configuration.projectName() - + ", please create it first at https://app.judgmentlabs.ai/org/" + configuration.organizationId() + }, () -> Logger.error("Failed to resolve project " + projectName + + ", please create it first at https://app.judgmentlabs.ai/org/" + organizationId + "/projects. Skipping Judgment export.")); } @@ -69,7 +76,7 @@ public SpanExporter getSpanExporter() { public void setSpanKind(String kind) { Optional.ofNullable(kind) .ifPresent(k -> withCurrentSpan( - span -> span.setAttribute(JudgevalTraceKeys.AttributeKeys.JUDGMENT_SPAN_KIND, k))); + span -> span.setAttribute(JudgmentAttributeKeys.AttributeKeys.JUDGMENT_SPAN_KIND, k))); } private static void withCurrentSpan(java.util.function.Consumer action) { @@ -141,11 +148,11 @@ private Optional getSampledSpan() { } private boolean isEvaluationEnabled() { - return configuration.enableEvaluation(); + return enableEvaluation; } private void logEvaluationInfo(String method, String traceId, String spanId, String scorerName) { - Logger.info(method + ": project=" + configuration.projectName() + ", traceId=" + traceId + ", spanId=" + Logger.info(method + ": project=" + projectName + ", traceId=" + traceId + ", spanId=" + spanId + ", scorer=" + scorerName); } @@ -195,7 +202,8 @@ public void asyncTraceEvaluate(BaseScorer scorer, String model) { TraceEvaluationRun evaluationRun = createTraceEvaluationRun(scorer, model, traceId, spanId); try { String traceEvalJson = jacksonMapper.writeValueAsString(evaluationRun); - currentSpan.setAttribute(JudgevalTraceKeys.AttributeKeys.PENDING_TRACE_EVAL, traceEvalJson); + currentSpan.setAttribute(JudgmentAttributeKeys.AttributeKeys.JUDGMENT_PENDING_TRACE_EVAL, + traceEvalJson); } catch (Exception e) { Logger.error("Failed to serialize trace evaluation: " + e.getMessage()); } @@ -225,19 +233,19 @@ public void setGeneralSpan() { } public void setInput(Object input) { - setAttribute(JudgevalTraceKeys.AttributeKeys.JUDGMENT_INPUT, input); + setAttribute(JudgmentAttributeKeys.AttributeKeys.JUDGMENT_INPUT, input); } public void setOutput(Object output) { - setAttribute(JudgevalTraceKeys.AttributeKeys.JUDGMENT_OUTPUT, output); + setAttribute(JudgmentAttributeKeys.AttributeKeys.JUDGMENT_OUTPUT, output); } public void setInput(Object input, Type type) { - setAttribute(JudgevalTraceKeys.AttributeKeys.JUDGMENT_INPUT, input, type); + setAttribute(JudgmentAttributeKeys.AttributeKeys.JUDGMENT_INPUT, input, type); } public void setOutput(Object output, Type type) { - setAttribute(JudgevalTraceKeys.AttributeKeys.JUDGMENT_OUTPUT, output, type); + setAttribute(JudgmentAttributeKeys.AttributeKeys.JUDGMENT_OUTPUT, output, type); } public void span(String spanName, Runnable runnable) { @@ -271,8 +279,24 @@ public io.opentelemetry.api.trace.Tracer getTracer() { .getTracer(TRACER_NAME); } - public TracerConfiguration getConfiguration() { - return configuration; + public String getProjectName() { + return projectName; + } + + public String getApiKey() { + return apiKey; + } + + public String getOrganizationId() { + return organizationId; + } + + public String getApiUrl() { + return apiUrl; + } + + public boolean isEnableEvaluation() { + return enableEvaluation; } public Optional getProjectId() { @@ -304,9 +328,9 @@ private String buildEndpoint(String baseUrl) { private JudgmentSpanExporter createJudgmentSpanExporter(String projectId) { return JudgmentSpanExporter.builder() - .endpoint(buildEndpoint(configuration.apiUrl())) - .apiKey(configuration.apiKey()) - .organizationId(configuration.organizationId()) + .endpoint(buildEndpoint(apiUrl)) + .apiKey(apiKey) + .organizationId(organizationId) .projectId(projectId) .build(); } @@ -319,26 +343,38 @@ private String generateRunId(String prefix, String spanId) { private ExampleEvaluationRun createEvaluationRun(BaseScorer scorer, Example example, String model, String traceId, String spanId) { String runId = generateRunId("async_evaluate_", spanId); - return new EvaluationRunBuilder() - .projectName(configuration.projectName()) - .evalName(runId) - .model(model != null ? model : Env.JUDGMENT_DEFAULT_GPT_MODEL) - .example(example) - .trace(traceId, spanId) - .addScorer(scorer) - .build(); + String modelName = model != null ? model : Env.JUDGMENT_DEFAULT_GPT_MODEL; + + ExampleEvaluationRun evaluationRun = new ExampleEvaluationRun(); + evaluationRun.setProjectName(projectName); + evaluationRun.setEvalName(runId); + evaluationRun.setModel(modelName); + evaluationRun.setTraceId(traceId); + evaluationRun.setTraceSpanId(spanId); + + List examples = new ArrayList<>(); + examples.add(example); + evaluationRun.setExamples(examples); + + evaluationRun.setCustomScorers(List.of()); + evaluationRun.setJudgmentScorers(List.of(scorer.getScorerConfig())); + + return evaluationRun; } private TraceEvaluationRun createTraceEvaluationRun(BaseScorer scorer, String model, String traceId, String spanId) { String evalName = generateRunId("async_trace_evaluate_", spanId); - return new TraceEvaluationRunBuilder() - .projectName(configuration.projectName()) - .evalName(evalName) - .model(model != null ? model : Env.JUDGMENT_DEFAULT_GPT_MODEL) - .trace(traceId, spanId) - .addScorer(scorer) - .build(); + String modelName = model != null ? model : Env.JUDGMENT_DEFAULT_GPT_MODEL; + + TraceEvaluationRun evaluationRun = new TraceEvaluationRun(); + evaluationRun.setProjectName(projectName); + evaluationRun.setEvalName(evalName); + evaluationRun.setModel(modelName); + evaluationRun.setTraceAndSpanIds(List.of(List.of(traceId, spanId))); + evaluationRun.setJudgmentScorers(List.of(scorer.getScorerConfig())); + + return evaluationRun; } private void enqueueEvaluation(ExampleEvaluationRun evaluationRun) { @@ -349,4 +385,3 @@ private void enqueueEvaluation(ExampleEvaluationRun evaluationRun) { } } } - diff --git a/judgeval-java/src/main/java/com/judgmentlabs/judgeval/v1/tracer/ISerializer.java b/judgeval-java/src/main/java/com/judgmentlabs/judgeval/v1/tracer/ISerializer.java new file mode 100644 index 0000000..3f6ff48 --- /dev/null +++ b/judgeval-java/src/main/java/com/judgmentlabs/judgeval/v1/tracer/ISerializer.java @@ -0,0 +1,11 @@ +package com.judgmentlabs.judgeval.v1.tracer; + +import java.lang.reflect.Type; + +public interface ISerializer { + String serialize(Object obj); + + default String serialize(Object obj, Type type) { + return serialize(obj); + } +} diff --git a/judgeval-java/src/main/java/com/judgmentlabs/judgeval/v1/tracer/Tracer.java b/judgeval-java/src/main/java/com/judgmentlabs/judgeval/v1/tracer/Tracer.java index d09c9f9..684e579 100644 --- a/judgeval-java/src/main/java/com/judgmentlabs/judgeval/v1/tracer/Tracer.java +++ b/judgeval-java/src/main/java/com/judgmentlabs/judgeval/v1/tracer/Tracer.java @@ -8,8 +8,6 @@ import com.judgmentlabs.judgeval.Env; import com.judgmentlabs.judgeval.Version; import com.judgmentlabs.judgeval.internal.api.JudgmentSyncClient; -import com.judgmentlabs.judgeval.tracer.ISerializer; -import com.judgmentlabs.judgeval.tracer.TracerConfiguration; import com.judgmentlabs.judgeval.utils.Logger; import io.opentelemetry.api.GlobalOpenTelemetry; @@ -26,7 +24,11 @@ public final class Tracer extends BaseTracer { private Tracer(Builder builder) { super( - buildConfiguration(builder), + Objects.requireNonNull(builder.projectName, "projectName required"), + Objects.requireNonNull(builder.apiKey, "apiKey required"), + Objects.requireNonNull(builder.organizationId, "organizationId required"), + builder.apiUrl != null ? builder.apiUrl : Env.JUDGMENT_API_URL, + builder.enableEvaluation, new JudgmentSyncClient( builder.apiUrl != null ? builder.apiUrl : Env.JUDGMENT_API_URL, Objects.requireNonNull(builder.apiKey, "apiKey required"), @@ -38,23 +40,13 @@ private Tracer(Builder builder) { } } - private static TracerConfiguration buildConfiguration(Builder builder) { - return TracerConfiguration.builder() - .projectName(Objects.requireNonNull(builder.projectName, "projectName required")) - .apiKey(Objects.requireNonNull(builder.apiKey, "apiKey required")) - .organizationId(Objects.requireNonNull(builder.organizationId, "organizationId required")) - .apiUrl(builder.apiUrl != null ? builder.apiUrl : Env.JUDGMENT_API_URL) - .enableEvaluation(builder.enableEvaluation) - .build(); - } - @Override public void initialize() { SpanExporter spanExporter = getSpanExporter(); var resource = Resource.getDefault() .merge(Resource.create(Attributes.builder() - .put("service.name", configuration.projectName()) + .put("service.name", projectName) .put("telemetry.sdk.name", TRACER_NAME) .put("telemetry.sdk.version", Version.getVersion()) .build())); @@ -98,13 +90,13 @@ public static Builder builder() { } public static final class Builder { - private String projectName; - private String apiKey; - private String organizationId; - private String apiUrl; - private boolean enableEvaluation = true; + private String projectName; + private String apiKey; + private String organizationId; + private String apiUrl; + private boolean enableEvaluation = true; private ISerializer serializer; - private boolean initialize = false; + private boolean initialize = false; public Builder projectName(String projectName) { this.projectName = projectName; diff --git a/judgeval-java/src/main/java/com/judgmentlabs/judgeval/v1/tracer/exporters/JudgmentSpanExporter.java b/judgeval-java/src/main/java/com/judgmentlabs/judgeval/v1/tracer/exporters/JudgmentSpanExporter.java new file mode 100644 index 0000000..d31674d --- /dev/null +++ b/judgeval-java/src/main/java/com/judgmentlabs/judgeval/v1/tracer/exporters/JudgmentSpanExporter.java @@ -0,0 +1,179 @@ +package com.judgmentlabs.judgeval.v1.tracer.exporters; + +import java.util.Collection; +import java.util.Optional; + +import com.judgmentlabs.judgeval.utils.Logger; + +import io.opentelemetry.exporter.otlp.http.trace.OtlpHttpSpanExporter; +import io.opentelemetry.sdk.common.CompletableResultCode; +import io.opentelemetry.sdk.trace.data.SpanData; +import io.opentelemetry.sdk.trace.export.SpanExporter; + +/** + * SpanExporter implementation that sends spans to Judgment Labs with project + * identification. + *

+ * This exporter wraps the OTLP HTTP exporter and adds Judgment Labs specific + * headers and project identification to all exported spans. + */ +public class JudgmentSpanExporter implements SpanExporter { + private final SpanExporter delegate; + + /** + * Creates a new JudgmentSpanExporter with the specified configuration. + * + * @param endpoint + * the OTLP endpoint URL + * @param apiKey + * the API key for authentication + * @param organizationId + * the organization ID + * @param projectId + * the project ID (must not be null or empty) + * @throws IllegalArgumentException + * if projectId is null or empty + */ + protected JudgmentSpanExporter(String endpoint, String apiKey, String organizationId, String projectId) { + if (projectId.isEmpty()) { + throw new IllegalArgumentException("projectId is required for JudgmentSpanExporter"); + } + this.delegate = OtlpHttpSpanExporter.builder() + .setEndpoint(endpoint) + .addHeader("Authorization", "Bearer " + apiKey) + .addHeader("X-Organization-Id", organizationId) + .addHeader("X-Project-Id", projectId) + .build(); + } + + /** + * Creates a new builder for constructing JudgmentSpanExporter instances. + * + * @return a new Builder instance + */ + public static Builder builder() { + return new Builder(); + } + + /** + * Exports the collection of spans to the Judgment Labs backend. + * + * @param spans + * the collection of spans to export + * @return a CompletableResultCode representing the export operation status + */ + @Override + public CompletableResultCode export(Collection spans) { + Logger.info("Exported " + spans.size() + " spans"); + return delegate.export(spans); + } + + /** + * Flushes any pending span exports. + * + * @return a CompletableResultCode representing the flush operation status + */ + @Override + public CompletableResultCode flush() { + return delegate.flush(); + } + + /** + * Shuts down this exporter and releases any resources. + * + * @return a CompletableResultCode representing the shutdown operation status + */ + @Override + public CompletableResultCode shutdown() { + return delegate.shutdown(); + } + + /** + * Builder for creating JudgmentSpanExporter instances. + */ + public static final class Builder { + private String endpoint; + private String apiKey; + private String organizationId; + private String projectId; + + private Builder() { + } + + /** + * Sets the OTLP endpoint URL. + * + * @param endpoint + * the endpoint URL + * @return this builder for method chaining + */ + public Builder endpoint(String endpoint) { + this.endpoint = endpoint; + return this; + } + + /** + * Sets the API key for authentication. + * + * @param apiKey + * the API key + * @return this builder for method chaining + */ + public Builder apiKey(String apiKey) { + this.apiKey = apiKey; + return this; + } + + /** + * Sets the organization ID. + * + * @param organizationId + * the organization ID + * @return this builder for method chaining + */ + public Builder organizationId(String organizationId) { + this.organizationId = organizationId; + return this; + } + + /** + * Sets the project ID. + * + * @param projectId + * the project ID + * @return this builder for method chaining + */ + public Builder projectId(String projectId) { + this.projectId = projectId; + return this; + } + + /** + * Builds a new JudgmentSpanExporter instance with the configured settings. + * + * @return a new JudgmentSpanExporter instance + * @throws IllegalArgumentException + * if any required field is null or empty + */ + public JudgmentSpanExporter build() { + String validEndpoint = Optional.ofNullable(endpoint) + .map(String::trim) + .filter(e -> !e.isEmpty()) + .orElseThrow(() -> new IllegalArgumentException("Endpoint is required")); + String validApiKey = Optional.ofNullable(apiKey) + .map(String::trim) + .filter(key -> !key.isEmpty()) + .orElseThrow(() -> new IllegalArgumentException("API key is required")); + String validOrganizationId = Optional.ofNullable(organizationId) + .map(String::trim) + .filter(id -> !id.isEmpty()) + .orElseThrow(() -> new IllegalArgumentException("Organization ID is required")); + String validProjectId = Optional.ofNullable(projectId) + .map(String::trim) + .filter(id -> !id.isEmpty()) + .orElseThrow(() -> new IllegalArgumentException("Project ID is required")); + + return new JudgmentSpanExporter(validEndpoint, validApiKey, validOrganizationId, validProjectId); + } + } +} diff --git a/judgeval-java/src/main/java/com/judgmentlabs/judgeval/v1/tracer/exporters/NoOpSpanExporter.java b/judgeval-java/src/main/java/com/judgmentlabs/judgeval/v1/tracer/exporters/NoOpSpanExporter.java new file mode 100644 index 0000000..6bb5244 --- /dev/null +++ b/judgeval-java/src/main/java/com/judgmentlabs/judgeval/v1/tracer/exporters/NoOpSpanExporter.java @@ -0,0 +1,45 @@ +package com.judgmentlabs.judgeval.v1.tracer.exporters; + +import java.util.Collection; + +import io.opentelemetry.sdk.common.CompletableResultCode; +import io.opentelemetry.sdk.trace.data.SpanData; +import io.opentelemetry.sdk.trace.export.SpanExporter; + +/** + * A no-op implementation of SpanExporter that discards all spans. Used as a + * fallback when project resolution fails or when spans should not be exported. + */ +public class NoOpSpanExporter implements SpanExporter { + /** + * Discards the collection of spans without exporting. + * + * @param spans + * the collection of spans (ignored) + * @return a successful CompletableResultCode + */ + @Override + public CompletableResultCode export(Collection spans) { + return CompletableResultCode.ofSuccess(); + } + + /** + * Performs a no-op flush operation. + * + * @return a successful CompletableResultCode + */ + @Override + public CompletableResultCode flush() { + return CompletableResultCode.ofSuccess(); + } + + /** + * Performs a no-op shutdown operation. + * + * @return a successful CompletableResultCode + */ + @Override + public CompletableResultCode shutdown() { + return CompletableResultCode.ofSuccess(); + } +} diff --git a/package.json b/package.json new file mode 100644 index 0000000..fc25fe5 --- /dev/null +++ b/package.json @@ -0,0 +1,33 @@ +{ + "name": "judgeval-java", + "version": "0.0.0", + "private": true, + "scripts": { + "format": "npm run format:core && npm run format:openai", + "format:core": "mvn -B -f judgeval-java/pom.xml spotless:apply", + "format:openai": "mvn -B -f instrumentation/judgeval-instrumentation-openai/pom.xml spotless:apply", + "format:check": "npm run format:check:core && npm run format:check:openai", + "format:check:core": "cd judgeval-java && mvn -B spotless:check", + "format:check:openai": "cd instrumentation/judgeval-instrumentation-openai && mvn -B spotless:check", + "check": "cd judgeval-java && mvn -B compile checkstyle:check spotless:check", + "lint": "cd judgeval-java && mvn -B checkstyle:check", + "test": "cd judgeval-java && mvn test", + "clean": "npm run clean:core && npm run clean:openai", + "clean:core": "cd judgeval-java && mvn clean", + "clean:openai": "cd instrumentation/judgeval-instrumentation-openai && mvn clean", + "build": "cd judgeval-java && mvn -B clean compile", + "install:all": "npm run install:core && npm run install:openai", + "install:core": "cd judgeval-java && mvn -B -Dgpg.skip=true clean install", + "install:openai": "cd instrumentation/judgeval-instrumentation-openai && mvn -B -Dgpg.skip=true clean install", + "generate:client": "npm run generate:client:raw && npm run format", + "generate:client:raw": "./scripts/generate-client.sh", + "ci": "cd judgeval-java && mvn -B clean compile test checkstyle:check spotless:check", + "status": "npm run status:core && npm run status:openai", + "status:core": "cd judgeval-java && echo \"[status] judgeval-java\" && G=$(mvn -q -DforceStdout help:evaluate -Dexpression=project.groupId) && A=$(mvn -q -DforceStdout help:evaluate -Dexpression=project.artifactId) && V=$(mvn -q -DforceStdout help:evaluate -Dexpression=project.version) && echo \"GAV: $G:$A:$V\" && (ls -1 target/*.jar 2>/dev/null || echo \"No jar built\")", + "status:openai": "cd instrumentation/judgeval-instrumentation-openai && echo \"[status] instrumentation/judgeval-instrumentation-openai\" && G=$(mvn -q -DforceStdout help:evaluate -Dexpression=project.groupId) && A=$(mvn -q -DforceStdout help:evaluate -Dexpression=project.artifactId) && V=$(mvn -q -DforceStdout help:evaluate -Dexpression=project.version) && echo \"GAV: $G:$A:$V\" && (ls -1 target/*.jar 2>/dev/null || echo \"No jar built\")", + "example": "dotenv -e .env -- ./scripts/run-example.sh" + }, + "devDependencies": { + "dotenv-cli": "^7.4.2" + } +} diff --git a/scripts/run-example.sh b/scripts/run-example.sh new file mode 100755 index 0000000..2f4d17f --- /dev/null +++ b/scripts/run-example.sh @@ -0,0 +1,17 @@ +#!/bin/bash + +if [ -z "$1" ]; then + echo "Usage: npm run run [MAIN=ClassName]" + exit 1 +fi + +EXAMPLE=$1 +MAIN_CLASS=${MAIN:-$(ls examples/src/main/java/examples/${EXAMPLE}/*.java 2>/dev/null | head -n1 | xargs -n1 basename 2>/dev/null | sed 's/.java$//')} + +if [ -z "$MAIN_CLASS" ]; then + echo "Error: Could not find example in examples/src/main/java/examples/${EXAMPLE}/" + exit 1 +fi + +mvn -q -f examples/pom.xml -DskipTests -Dexec.cleanupDaemonThreads=false -Dexec.mainClass=examples.${EXAMPLE}.${MAIN_CLASS} clean compile exec:java + From a406e5bd03edba469703f9d4165ad41354982457 Mon Sep 17 00:00:00 2001 From: Abhishek Govindarasu Date: Wed, 5 Nov 2025 17:26:56 -0800 Subject: [PATCH 04/18] chore: tests --- .cursor/rules | 227 ------------------ .../workflows/test-instrumentation-openai.yml | 4 + .github/workflows/test-judgeval-java.yml | 4 + .../examples/v1_quick_start/V1QuickStart.java | 134 ----------- judgeval-java/pom.xml | 24 +- .../internal/api/JudgmentAsyncClient.java | 12 + .../internal/api/JudgmentSyncClient.java | 12 + .../judgeval/v1/tracer/Tracer.java | 38 +-- .../judgeval/v1/tracer/TracerFactory.java | 4 +- .../judgeval/v1/JudgmentClientTest.java | 79 ++++++ .../judgeval/v1/data/ExampleTest.java | 51 ++++ .../v1/evaluation/EvaluationFactoryTest.java | 35 +++ .../v1/scorers/ScorersFactoryTest.java | 49 ++++ .../built_in/BuiltInScorersFactoryTest.java | 45 ++++ .../judgeval/v1/tracer/BaseTracerTest.java | 199 +++++++++++++++ .../judgeval/v1/tracer/TracerFactoryTest.java | 28 +++ .../judgeval/v1/tracer/TracerTest.java | 50 ++++ scripts/generate_client.py | 12 + 18 files changed, 627 insertions(+), 380 deletions(-) delete mode 100644 .cursor/rules delete mode 100644 examples/src/main/java/examples/v1_quick_start/V1QuickStart.java create mode 100644 judgeval-java/src/test/java/com/judgmentlabs/judgeval/v1/JudgmentClientTest.java create mode 100644 judgeval-java/src/test/java/com/judgmentlabs/judgeval/v1/data/ExampleTest.java create mode 100644 judgeval-java/src/test/java/com/judgmentlabs/judgeval/v1/evaluation/EvaluationFactoryTest.java create mode 100644 judgeval-java/src/test/java/com/judgmentlabs/judgeval/v1/scorers/ScorersFactoryTest.java create mode 100644 judgeval-java/src/test/java/com/judgmentlabs/judgeval/v1/scorers/built_in/BuiltInScorersFactoryTest.java create mode 100644 judgeval-java/src/test/java/com/judgmentlabs/judgeval/v1/tracer/BaseTracerTest.java create mode 100644 judgeval-java/src/test/java/com/judgmentlabs/judgeval/v1/tracer/TracerFactoryTest.java create mode 100644 judgeval-java/src/test/java/com/judgmentlabs/judgeval/v1/tracer/TracerTest.java diff --git a/.cursor/rules b/.cursor/rules deleted file mode 100644 index 8422f18..0000000 --- a/.cursor/rules +++ /dev/null @@ -1,227 +0,0 @@ -# Judgeval Java - Internal API Patterns - -## Context - -The `com.judgmentlabs.judgeval.internal` package contains internal API code that is auto-generated from OpenAPI specifications using `scripts/generate_client.py`, but is explicitly modifiable for internal improvements. This code is NOT part of the public API and can be refactored, cleaned up, and optimized as needed. - -## Internal API Models (`internal/api/models/`) - -### Structure Pattern -- **Package**: `com.judgmentlabs.judgeval.internal.api.models` -- **Naming**: PascalCase class names matching OpenAPI schema names (e.g., `EvalResultsFetch`, `ResolveProjectNameResponse`) -- **Purpose**: Jackson-annotated POJOs for serializing/deserializing JSON requests and responses - -### Field Patterns -- **Field Declaration**: - - Private fields with camelCase naming (e.g., `experimentRunId`, `projectName`) - - Jackson `@JsonProperty` annotation mapping snake_case JSON keys to camelCase fields - - Use wrapper types (String, Integer, Boolean, List, etc.) - never primitives for nullable fields - - All fields are nullable by default (no `@NotNull` annotations) - -### Additional Properties Pattern -All model classes MUST include: -```java -private Map additionalProperties = new HashMap<>(); - -@JsonAnyGetter -public Map getAdditionalProperties() { - return additionalProperties; -} - -@JsonAnySetter -public void setAdditionalProperty(String name, Object value) { - additionalProperties.put(name, value); -} -``` -This allows unknown JSON fields to be preserved during deserialization. - -### Getter/Setter Pattern -- **Getters**: Public methods following JavaBean convention `getFieldName()` returning field type -- **Setters**: Public methods following JavaBean convention `setFieldName(Type fieldName)` returning void -- **Order**: Fields declared first, then additionalProperties, then getters, then setters - -### Equals/HashCode Pattern -- **Equals**: Must include all fields plus `additionalProperties` -- **Implementation**: Use `Objects.equals()` for all field comparisons -- **HashCode**: Use `Objects.hash()` for all fields, with `Objects.hashCode(additionalProperties)` for the Map -- **Null Safety**: Both methods handle null fields safely via `Objects.equals()` - -### Imports -Standard imports: -- `java.util.HashMap` -- `java.util.List` (if using collections) -- `java.util.Map` -- `java.util.Objects` -- `com.fasterxml.jackson.annotation.JsonAnyGetter` -- `com.fasterxml.jackson.annotation.JsonAnySetter` -- `com.fasterxml.jackson.annotation.JsonProperty` - -## Internal API Clients (`internal/api/`) - -### Client Classes -- **JudgmentSyncClient**: Synchronous HTTP client for blocking API calls -- **JudgmentAsyncClient**: Asynchronous HTTP client for non-blocking API calls -- Both share identical structure and helper methods, differing only in execution model - -### Constructor Pattern -```java -public JudgmentSyncClient(String baseUrl, String apiKey, String organizationId) { - this.baseUrl = baseUrl; - this.apiKey = apiKey; - this.organizationId = organizationId; - this.client = HttpClient.newBuilder().version(HttpClient.Version.HTTP_1_1).build(); - this.mapper = new ObjectMapper(); -} -``` - -### Helper Methods Pattern - -#### buildUrl(String path, Map queryParams) -- Builds full URL from baseUrl + path -- Appends query string from map if non-empty -- Query string format: `key1=value1&key2=value2` -- Returns complete URL string - -#### buildUrl(String path) -- Convenience overload calling `buildUrl(path, new HashMap<>())` - -#### buildHeaders() -- Validates `apiKey` and `organizationId` are not null (throws `IllegalArgumentException`) -- Returns String array with: - - `"Content-Type"`, `"application/json"` - - `"Authorization"`, `"Bearer " + apiKey` - - `"X-Organization-Id"`, `organizationId` - -#### handleResponse(HttpResponse response) -- **Sync**: `throws IOException` -- **Async**: No throws clause (unchecked exceptions) -- Checks status code >= 400, throws `RuntimeException` with status and body -- Attempts to deserialize response body using `mapper.readValue(response.body(), new TypeReference() {})` -- Catches parsing exceptions and wraps in `RuntimeException("Failed to parse response", e)` -- **Issue**: TypeReference generic type erasure makes this unreliable - should use specific class when known - -### Method Naming Pattern -- Method names derived from API path: `/fetch_experiment_run/` → `fetchExperimentRun` -- Path segments separated by underscores/forward slashes become camelCase -- HTTP method determines method signature (GET = no body, POST = has payload) - -### Sync Client Method Pattern -```java -public ReturnType methodName(RequestType payload) throws IOException, InterruptedException { - String url = buildUrl("/api/path/"); - String jsonPayload = mapper.writeValueAsString(payload); - HttpRequest request = HttpRequest.newBuilder() - .POST(HttpRequest.BodyPublishers.ofString(jsonPayload)) - .uri(URI.create(url)) - .headers(buildHeaders()) - .build(); - HttpResponse response = client.send(request, HttpResponse.BodyHandlers.ofString()); - return handleResponse(response); // or mapper.readValue(response.body(), SpecificType.class); -} -``` - -### Async Client Method Pattern -```java -public CompletableFuture methodName(RequestType payload) { - String url = buildUrl("/api/path/"); - String jsonPayload; - try { - jsonPayload = mapper.writeValueAsString(payload); - } catch (Exception e) { - throw new RuntimeException("Failed to serialize payload", e); - } - HttpRequest request = HttpRequest.newBuilder() - .POST(HttpRequest.BodyPublishers.ofString(jsonPayload)) - .uri(URI.create(url)) - .headers(buildHeaders()) - .build(); - return client.sendAsync(request, HttpResponse.BodyHandlers.ofString()) - .thenApply(this::handleResponse); -} -``` - -### Return Type Patterns -- **Consistency Issue**: Some methods return `Object`, others return specific response types -- **Preferred**: Return specific response model types when known (e.g., `ScorerExistsResponse`, `ResolveProjectNameResponse`) -- **Fallback**: Use `Object` only when response schema is truly unknown/variable -- **Async**: Return type wrapped in `CompletableFuture` - -### Error Handling Standards - -#### Sync Client -- `IOException` and `InterruptedException` propagate (method signature) -- `RuntimeException` for HTTP errors (4xx/5xx) - thrown by `handleResponse` -- `RuntimeException` for deserialization errors - thrown by `handleResponse` or `mapper.readValue` - -#### Async Client -- All exceptions caught internally and wrapped in `RuntimeException` -- Serialization errors caught immediately and rethrown before async operation -- HTTP/deserialization errors thrown from `handleResponse` in CompletableFuture chain -- No checked exceptions in method signatures - -### Query Parameters Pattern -For GET requests or POST requests with query params: -```java -Map queryParams = new HashMap<>(); -queryParams.put("param_name", paramValue); -String url = buildUrl("/api/path/", queryParams); -``` - -## Code Quality Expectations - -### Documentation -- Add JavaDoc to all public methods in client classes -- Document parameters, return types, and thrown exceptions -- Explain complex business logic or non-obvious patterns - -### Null Safety -- Use `Optional` internally for nullable values when appropriate -- Use `@NotNull` annotations for parameters that must not be null (only if needed for external safety) -- Validate constructor parameters that must not be null - -### Error Messages -- Error messages should be descriptive and include context -- HTTP errors should include status code and response body -- Serialization errors should indicate the operation that failed - -### Code Organization -- Group related methods together -- Keep helper methods private -- Maintain consistent method ordering (constructors, helpers, public API methods) -- Extract common patterns into reusable helpers when duplicated across sync/async clients - -### Type Safety -- Prefer specific types over `Object` for return values -- Use proper generic types for collections -- Avoid raw types and unchecked casts - -### Consistency -- Sync and async clients should have identical API (only differing by CompletableFuture wrapper) -- Method names should be consistent across both clients -- Helper methods should behave identically in both clients - -## Refactoring Opportunities - -### Known Issues -1. **TypeReference generic erasure**: `handleResponse` uses `TypeReference()` which doesn't preserve type information. Consider passing `Class` or using Jackson's `TypeFactory`. - -2. **Return type inconsistency**: Some sync methods return `Object`, others return specific types. Standardize based on OpenAPI response schemas. - -3. **Code duplication**: Sync and async clients share ~90% of code. Consider extracting common base class or using composition. - -4. **Error handling**: Async client wraps serialization errors, sync client doesn't. Consider consistent error handling strategy. - -5. **Missing validation**: No validation of request payloads before serialization. Consider adding validation for required fields. - -6. **Missing documentation**: No JavaDoc comments on any client methods. Should document all public methods. - -### Recommended Improvements -- Extract common client logic to base class `BaseJudgmentClient` -- Standardize return types based on OpenAPI response schemas -- Add request/response validation -- Improve error handling with custom exception types -- Add comprehensive JavaDoc documentation -- Consider using builder pattern for request construction -- Add retry logic for transient failures -- Add request/response logging - diff --git a/.github/workflows/test-instrumentation-openai.yml b/.github/workflows/test-instrumentation-openai.yml index 08252f8..9ce5ed5 100644 --- a/.github/workflows/test-instrumentation-openai.yml +++ b/.github/workflows/test-instrumentation-openai.yml @@ -27,6 +27,10 @@ jobs: key: ${{ runner.os }}-m2-${{ hashFiles('**/pom.xml') }} restore-keys: ${{ runner.os }}-m2 + - name: Run Tests + run: | + mvn -B -pl instrumentation/judgeval-instrumentation-openai -am test + - name: Test Install run: | mvn -B -Dgpg.skip=true -pl instrumentation/judgeval-instrumentation-openai -am clean install diff --git a/.github/workflows/test-judgeval-java.yml b/.github/workflows/test-judgeval-java.yml index 76fb936..a7066ed 100644 --- a/.github/workflows/test-judgeval-java.yml +++ b/.github/workflows/test-judgeval-java.yml @@ -27,6 +27,10 @@ jobs: key: ${{ runner.os }}-m2-${{ hashFiles('**/pom.xml') }} restore-keys: ${{ runner.os }}-m2 + - name: Run Tests + run: | + mvn -B -pl judgeval-java -am test + - name: Test Install run: | mvn -B -Dgpg.skip=true -pl judgeval-java -am clean install diff --git a/examples/src/main/java/examples/v1_quick_start/V1QuickStart.java b/examples/src/main/java/examples/v1_quick_start/V1QuickStart.java deleted file mode 100644 index b7d937b..0000000 --- a/examples/src/main/java/examples/v1_quick_start/V1QuickStart.java +++ /dev/null @@ -1,134 +0,0 @@ -package examples.v1_quick_start; - -import com.judgmentlabs.judgeval.v1.data.Example; -import com.judgmentlabs.judgeval.v1.JudgmentClient; -import com.judgmentlabs.judgeval.v1.scorers.built_in.AnswerCorrectnessScorer; -import com.judgmentlabs.judgeval.v1.scorers.built_in.FaithfulnessScorer; -import com.judgmentlabs.judgeval.v1.scorers.custom_scorer.CustomScorer; -import com.judgmentlabs.judgeval.v1.scorers.prompt_scorer.PromptScorer; -import com.judgmentlabs.judgeval.v1.tracer.Tracer; - -public class V1QuickStart { - public static void main(String[] args) { - System.out.println("=== Judgeval SDK V1 Quick Start ===\n"); - - System.out.println("1. Initialize JudgmentClient"); - JudgmentClient client = JudgmentClient.builder() - .apiKey(System.getenv("JUDGMENT_API_KEY")) - .organizationId(System.getenv("JUDGMENT_ORG_ID")) - .build(); - System.out.println(" Client initialized\n"); - - System.out.println("2. Create and initialize Tracer"); - Tracer tracer = client.tracer().create() - .projectName("quickstart-project") - .enableEvaluation(true) - .build(); - tracer.initialize(); - System.out.println(" Tracer initialized for project: quickstart-project\n"); - - System.out.println("3. Use Tracer for distributed tracing"); - tracer.span("example-operation", () -> { - tracer.setLLMSpan(); - tracer.setInput("What is the capital of France?"); - - String llmOutput = "The capital of France is Paris."; - - tracer.setOutput(llmOutput); - System.out.println(" Traced operation with input/output"); - - System.out.println(); - - System.out.println("4. Access PromptScorer (fetch existing)"); - try { - PromptScorer existingScorer = client.scorers() - .promptScorer() - .get("example-scorer"); - System.out.println(" Retrieved PromptScorer: " + existingScorer.getName()); - } catch (Exception e) { - System.out.println(" Note: Scorer 'example-scorer' not found (expected for first run)"); - } - System.out.println(); - - System.out.println("5. Create new PromptScorer"); - PromptScorer newScorer = client.scorers() - .promptScorer() - .create() - .name("kindness-scorer") - .prompt("Did the assistant respond kindly and respectfully?") - .threshold(0.7) - .build(); - System.out.println(" Created PromptScorer: " + newScorer.getName()); - System.out.println(" Threshold: " + newScorer.getThreshold()); - System.out.println(); - - System.out.println("6. Use TracePromptScorer"); - try { - PromptScorer traceScorer = client.scorers() - .tracePromptScorer() - .create() - .name("trace-quality-scorer") - .prompt("Does the entire trace show high quality reasoning?") - .threshold(0.8) - .build(); - System.out.println(" Created TracePromptScorer: " + traceScorer.getName()); - } catch (Exception e) { - System.out.println(" TracePromptScorer creation demo"); - } - System.out.println(); - - System.out.println("7. Use CustomScorer"); - CustomScorer customScorer = client.scorers() - .customScorer() - .get("my-custom-scorer", "MyCustomScorerClass"); - System.out.println(" Created CustomScorer: " + customScorer.getName()); - System.out.println(); - - System.out.println("8. Use Built-in Scorers"); - AnswerCorrectnessScorer correctnessScorer = client.scorers() - .builtIn() - .answerCorrectness() - .threshold(0.8) - .build(); - System.out.println(" Created AnswerCorrectnessScorer with threshold: " - + correctnessScorer.getThreshold()); - - FaithfulnessScorer faithfulnessScorer = client.scorers() - .builtIn() - .faithfulness() - .build(); - System.out.println(" Created FaithfulnessScorer with default threshold: " - + faithfulnessScorer.getThreshold()); - System.out.println(); - - System.out.println("9. Run Evaluation"); - - System.out.println("10. Complete workflow example"); - tracer.span("complete-llm-call", () -> { - tracer.setLLMSpan(); - tracer.setInput("Explain quantum computing in simple terms"); - - String response = "Quantum computing uses quantum mechanics to process information..."; - - tracer.setOutput(response); - - Example evaluationExample = Example.builder() - .property("input", "Explain quantum computing in simple terms") - .property("actual_output", response) - .property("expected_output", "A clear, simple explanation") - .build(); - tracer.asyncEvaluate(client.scorers().builtIn().answerCorrectness().build(), evaluationExample); - - System.out.println(" Traced LLM call with evaluation example ready"); - }); - System.out.println(); - }); - - try { - Thread.sleep(10000); - } catch (InterruptedException e) { - e.printStackTrace(); - } - - } -} diff --git a/judgeval-java/pom.xml b/judgeval-java/pom.xml index 2173dab..e7a99d0 100644 --- a/judgeval-java/pom.xml +++ b/judgeval-java/pom.xml @@ -54,13 +54,25 @@ org.mockito mockito-core - 5.8.0 + 5.15.2 test org.mockito mockito-junit-jupiter - 5.8.0 + 5.15.2 + test + + + net.bytebuddy + byte-buddy + 1.17.5 + test + + + net.bytebuddy + byte-buddy-agent + 1.17.5 test @@ -145,6 +157,14 @@ 21 + + org.apache.maven.plugins + maven-surefire-plugin + 3.2.5 + + -XX:+EnableDynamicAgentLoading + + org.apache.maven.plugins maven-jar-plugin diff --git a/judgeval-java/src/main/java/com/judgmentlabs/judgeval/internal/api/JudgmentAsyncClient.java b/judgeval-java/src/main/java/com/judgmentlabs/judgeval/internal/api/JudgmentAsyncClient.java index 0ad218d..c6b5e3b 100644 --- a/judgeval-java/src/main/java/com/judgmentlabs/judgeval/internal/api/JudgmentAsyncClient.java +++ b/judgeval-java/src/main/java/com/judgmentlabs/judgeval/internal/api/JudgmentAsyncClient.java @@ -30,6 +30,18 @@ public JudgmentAsyncClient(String baseUrl, String apiKey, String organizationId) this.mapper = new ObjectMapper(); } + public String getApiUrl() { + return baseUrl; + } + + public String getApiKey() { + return apiKey; + } + + public String getOrganizationId() { + return organizationId; + } + private String buildUrl(String path, Map queryParams) { StringBuilder url = new StringBuilder(baseUrl).append(path); if (!queryParams.isEmpty()) { diff --git a/judgeval-java/src/main/java/com/judgmentlabs/judgeval/internal/api/JudgmentSyncClient.java b/judgeval-java/src/main/java/com/judgmentlabs/judgeval/internal/api/JudgmentSyncClient.java index d0f1f5d..2e6851d 100644 --- a/judgeval-java/src/main/java/com/judgmentlabs/judgeval/internal/api/JudgmentSyncClient.java +++ b/judgeval-java/src/main/java/com/judgmentlabs/judgeval/internal/api/JudgmentSyncClient.java @@ -30,6 +30,18 @@ public JudgmentSyncClient(String baseUrl, String apiKey, String organizationId) this.mapper = new ObjectMapper(); } + public String getApiUrl() { + return baseUrl; + } + + public String getApiKey() { + return apiKey; + } + + public String getOrganizationId() { + return organizationId; + } + private String buildUrl(String path, Map queryParams) { StringBuilder url = new StringBuilder(baseUrl).append(path); if (!queryParams.isEmpty()) { diff --git a/judgeval-java/src/main/java/com/judgmentlabs/judgeval/v1/tracer/Tracer.java b/judgeval-java/src/main/java/com/judgmentlabs/judgeval/v1/tracer/Tracer.java index 684e579..a7cc7bc 100644 --- a/judgeval-java/src/main/java/com/judgmentlabs/judgeval/v1/tracer/Tracer.java +++ b/judgeval-java/src/main/java/com/judgmentlabs/judgeval/v1/tracer/Tracer.java @@ -25,14 +25,18 @@ public final class Tracer extends BaseTracer { private Tracer(Builder builder) { super( Objects.requireNonNull(builder.projectName, "projectName required"), - Objects.requireNonNull(builder.apiKey, "apiKey required"), - Objects.requireNonNull(builder.organizationId, "organizationId required"), - builder.apiUrl != null ? builder.apiUrl : Env.JUDGMENT_API_URL, + builder.client != null ? builder.client.getApiKey() + : Objects.requireNonNull(builder.apiKey, "apiKey required"), + builder.client != null ? builder.client.getOrganizationId() + : Objects.requireNonNull(builder.organizationId, "organizationId required"), + builder.client != null ? builder.client.getApiUrl() + : (builder.apiUrl != null ? builder.apiUrl : Env.JUDGMENT_API_URL), builder.enableEvaluation, - new JudgmentSyncClient( - builder.apiUrl != null ? builder.apiUrl : Env.JUDGMENT_API_URL, - Objects.requireNonNull(builder.apiKey, "apiKey required"), - Objects.requireNonNull(builder.organizationId, "organizationId required")), + builder.client != null ? builder.client + : new JudgmentSyncClient( + builder.apiUrl != null ? builder.apiUrl : Env.JUDGMENT_API_URL, + Objects.requireNonNull(builder.apiKey, "apiKey required"), + Objects.requireNonNull(builder.organizationId, "organizationId required")), builder.serializer != null ? builder.serializer : new GsonSerializer()); if (builder.initialize) { @@ -90,13 +94,19 @@ public static Builder builder() { } public static final class Builder { - private String projectName; - private String apiKey; - private String organizationId; - private String apiUrl; - private boolean enableEvaluation = true; - private ISerializer serializer; - private boolean initialize = false; + private JudgmentSyncClient client; + private String projectName; + private String apiKey; + private String organizationId; + private String apiUrl; + private boolean enableEvaluation = true; + private ISerializer serializer; + private boolean initialize = false; + + public Builder client(JudgmentSyncClient client) { + this.client = client; + return this; + } public Builder projectName(String projectName) { this.projectName = projectName; diff --git a/judgeval-java/src/main/java/com/judgmentlabs/judgeval/v1/tracer/TracerFactory.java b/judgeval-java/src/main/java/com/judgmentlabs/judgeval/v1/tracer/TracerFactory.java index 14dbf29..ea67b10 100644 --- a/judgeval-java/src/main/java/com/judgmentlabs/judgeval/v1/tracer/TracerFactory.java +++ b/judgeval-java/src/main/java/com/judgmentlabs/judgeval/v1/tracer/TracerFactory.java @@ -17,8 +17,6 @@ public TracerFactory(JudgmentSyncClient client, String apiKey, String organizati public Tracer.Builder create() { return Tracer.builder() - .apiKey(apiKey) - .organizationId(organizationId) - .apiUrl(apiUrl); + .client(client); } } diff --git a/judgeval-java/src/test/java/com/judgmentlabs/judgeval/v1/JudgmentClientTest.java b/judgeval-java/src/test/java/com/judgmentlabs/judgeval/v1/JudgmentClientTest.java new file mode 100644 index 0000000..e078306 --- /dev/null +++ b/judgeval-java/src/test/java/com/judgmentlabs/judgeval/v1/JudgmentClientTest.java @@ -0,0 +1,79 @@ +package com.judgmentlabs.judgeval.v1; + +import static org.junit.jupiter.api.Assertions.*; + +import org.junit.jupiter.api.Test; + +import com.judgmentlabs.judgeval.v1.evaluation.EvaluationFactory; +import com.judgmentlabs.judgeval.v1.scorers.ScorersFactory; +import com.judgmentlabs.judgeval.v1.tracer.TracerFactory; + +class JudgmentClientTest { + private static final String TEST_API_URL = "https://api.test.com"; + private static final String TEST_API_KEY = "test-key"; + private static final String TEST_ORG_ID = "test-org"; + + @Test + void builder_withAllParameters_buildsSuccessfully() { + JudgmentClient client = JudgmentClient.builder() + .apiKey(TEST_API_KEY) + .organizationId(TEST_ORG_ID) + .apiUrl(TEST_API_URL) + .build(); + + assertNotNull(client); + } + + @Test + void builder_withNullApiKey_throwsException() { + assertThrows(NullPointerException.class, () -> { + JudgmentClient.builder() + .apiKey(null) + .organizationId(TEST_ORG_ID) + .build(); + }); + } + + @Test + void builder_withNullOrganizationId_throwsException() { + assertThrows(NullPointerException.class, () -> { + JudgmentClient.builder() + .apiKey(TEST_API_KEY) + .organizationId(null) + .build(); + }); + } + + @Test + void tracer_returnsTracerFactory() { + JudgmentClient client = JudgmentClient.builder() + .apiKey(TEST_API_KEY) + .organizationId(TEST_ORG_ID) + .build(); + + TracerFactory factory = client.tracer(); + assertNotNull(factory); + } + + @Test + void scorers_returnsScorersFactory() { + JudgmentClient client = JudgmentClient.builder() + .apiKey(TEST_API_KEY) + .organizationId(TEST_ORG_ID) + .build(); + + ScorersFactory factory = client.scorers(); + assertNotNull(factory); + } + + @Test + void evaluation_returnsEvaluationFactory() { + JudgmentClient client = JudgmentClient.builder() + .apiKey(TEST_API_KEY) + .organizationId(TEST_ORG_ID) + .build(); + + EvaluationFactory factory = client.evaluation(); + assertNotNull(factory); + } +} diff --git a/judgeval-java/src/test/java/com/judgmentlabs/judgeval/v1/data/ExampleTest.java b/judgeval-java/src/test/java/com/judgmentlabs/judgeval/v1/data/ExampleTest.java new file mode 100644 index 0000000..204fe53 --- /dev/null +++ b/judgeval-java/src/test/java/com/judgmentlabs/judgeval/v1/data/ExampleTest.java @@ -0,0 +1,51 @@ +package com.judgmentlabs.judgeval.v1.data; + +import static org.junit.jupiter.api.Assertions.*; + +import org.junit.jupiter.api.Test; + +class ExampleTest { + + @Test + void constructor_generatesExampleId() { + Example example = new Example(); + assertNotNull(example.getExampleId()); + } + + @Test + void constructor_setsCreatedAt() { + Example example = new Example(); + assertNotNull(example.getCreatedAt()); + } + + @Test + void constructor_setsNameToNull() { + Example example = new Example(); + assertNull(example.getName()); + } + + @Test + void builder_createsExample() { + Example example = Example.builder().build(); + assertNotNull(example); + } + + @Test + void builder_withProperty_setsProperty() { + Example example = Example.builder() + .property("key", "value") + .build(); + + assertNotNull(example); + assertEquals("value", example.getAdditionalProperties().get("key")); + } + + @Test + void builder_withName_setsName() { + Example example = Example.builder() + .name("test-example") + .build(); + + assertEquals("test-example", example.getName()); + } +} diff --git a/judgeval-java/src/test/java/com/judgmentlabs/judgeval/v1/evaluation/EvaluationFactoryTest.java b/judgeval-java/src/test/java/com/judgmentlabs/judgeval/v1/evaluation/EvaluationFactoryTest.java new file mode 100644 index 0000000..2c7b6aa --- /dev/null +++ b/judgeval-java/src/test/java/com/judgmentlabs/judgeval/v1/evaluation/EvaluationFactoryTest.java @@ -0,0 +1,35 @@ +package com.judgmentlabs.judgeval.v1.evaluation; + +import static org.junit.jupiter.api.Assertions.*; + +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; + +import com.judgmentlabs.judgeval.internal.api.JudgmentSyncClient; + +class EvaluationFactoryTest { + private static final String TEST_API_URL = "https://api.test.com"; + private static final String TEST_API_KEY = "test-key"; + private static final String TEST_ORG_ID = "test-org"; + + private EvaluationFactory factory; + + @BeforeEach + void setUp() { + JudgmentSyncClient client = new JudgmentSyncClient(TEST_API_URL, TEST_API_KEY, TEST_ORG_ID); + factory = new EvaluationFactory(client, TEST_API_KEY, TEST_ORG_ID); + } + + @Test + void create_returnsConfiguredBuilder() { + Evaluation.Builder builder = factory.create(); + assertNotNull(builder); + } + + @Test + void create_builderBuildsEvaluation() { + Evaluation.Builder builder = factory.create(); + Evaluation evaluation = builder.build(); + assertNotNull(evaluation); + } +} diff --git a/judgeval-java/src/test/java/com/judgmentlabs/judgeval/v1/scorers/ScorersFactoryTest.java b/judgeval-java/src/test/java/com/judgmentlabs/judgeval/v1/scorers/ScorersFactoryTest.java new file mode 100644 index 0000000..0041bd1 --- /dev/null +++ b/judgeval-java/src/test/java/com/judgmentlabs/judgeval/v1/scorers/ScorersFactoryTest.java @@ -0,0 +1,49 @@ +package com.judgmentlabs.judgeval.v1.scorers; + +import static org.junit.jupiter.api.Assertions.*; + +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; + +import com.judgmentlabs.judgeval.internal.api.JudgmentSyncClient; +import com.judgmentlabs.judgeval.v1.scorers.built_in.BuiltInScorersFactory; +import com.judgmentlabs.judgeval.v1.scorers.custom_scorer.CustomScorerFactory; +import com.judgmentlabs.judgeval.v1.scorers.prompt_scorer.PromptScorerFactory; + +class ScorersFactoryTest { + private static final String TEST_API_URL = "https://api.test.com"; + private static final String TEST_API_KEY = "test-key"; + private static final String TEST_ORG_ID = "test-org"; + + private ScorersFactory factory; + + @BeforeEach + void setUp() { + JudgmentSyncClient client = new JudgmentSyncClient(TEST_API_URL, TEST_API_KEY, TEST_ORG_ID); + factory = new ScorersFactory(client, TEST_API_KEY, TEST_ORG_ID); + } + + @Test + void promptScorer_returnsFactory() { + PromptScorerFactory promptScorerFactory = factory.promptScorer(); + assertNotNull(promptScorerFactory); + } + + @Test + void tracePromptScorer_returnsFactory() { + PromptScorerFactory tracePromptScorerFactory = factory.tracePromptScorer(); + assertNotNull(tracePromptScorerFactory); + } + + @Test + void customScorer_returnsFactory() { + CustomScorerFactory customScorerFactory = factory.customScorer(); + assertNotNull(customScorerFactory); + } + + @Test + void builtIn_returnsFactory() { + BuiltInScorersFactory builtInFactory = factory.builtIn(); + assertNotNull(builtInFactory); + } +} diff --git a/judgeval-java/src/test/java/com/judgmentlabs/judgeval/v1/scorers/built_in/BuiltInScorersFactoryTest.java b/judgeval-java/src/test/java/com/judgmentlabs/judgeval/v1/scorers/built_in/BuiltInScorersFactoryTest.java new file mode 100644 index 0000000..779376f --- /dev/null +++ b/judgeval-java/src/test/java/com/judgmentlabs/judgeval/v1/scorers/built_in/BuiltInScorersFactoryTest.java @@ -0,0 +1,45 @@ +package com.judgmentlabs.judgeval.v1.scorers.built_in; + +import static org.junit.jupiter.api.Assertions.*; + +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; + +class BuiltInScorersFactoryTest { + private BuiltInScorersFactory factory; + + @BeforeEach + void setUp() { + factory = new BuiltInScorersFactory(); + } + + @Test + void answerCorrectness_returnsBuilder() { + AnswerCorrectnessScorer.Builder builder = factory.answerCorrectness(); + assertNotNull(builder); + } + + @Test + void answerRelevancy_returnsBuilder() { + AnswerRelevancyScorer.Builder builder = factory.answerRelevancy(); + assertNotNull(builder); + } + + @Test + void faithfulness_returnsBuilder() { + FaithfulnessScorer.Builder builder = factory.faithfulness(); + assertNotNull(builder); + } + + @Test + void instructionAdherence_returnsBuilder() { + InstructionAdherenceScorer.Builder builder = factory.instructionAdherence(); + assertNotNull(builder); + } + + @Test + void derailment_returnsBuilder() { + DerailmentScorer.Builder builder = factory.derailment(); + assertNotNull(builder); + } +} diff --git a/judgeval-java/src/test/java/com/judgmentlabs/judgeval/v1/tracer/BaseTracerTest.java b/judgeval-java/src/test/java/com/judgmentlabs/judgeval/v1/tracer/BaseTracerTest.java new file mode 100644 index 0000000..203b9d0 --- /dev/null +++ b/judgeval-java/src/test/java/com/judgmentlabs/judgeval/v1/tracer/BaseTracerTest.java @@ -0,0 +1,199 @@ +package com.judgmentlabs.judgeval.v1.tracer; + +import static org.junit.jupiter.api.Assertions.*; +import static org.mockito.ArgumentMatchers.*; +import static org.mockito.Mockito.*; +import static org.mockito.Mockito.lenient; + +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.extension.ExtendWith; +import org.mockito.Mock; +import org.mockito.junit.jupiter.MockitoExtension; + +import com.judgmentlabs.judgeval.internal.api.JudgmentSyncClient; +import com.judgmentlabs.judgeval.internal.api.models.ResolveProjectNameRequest; +import com.judgmentlabs.judgeval.internal.api.models.ResolveProjectNameResponse; +import com.judgmentlabs.judgeval.v1.tracer.exporters.JudgmentSpanExporter; +import com.judgmentlabs.judgeval.v1.tracer.exporters.NoOpSpanExporter; + +import io.opentelemetry.sdk.trace.export.SpanExporter; + +@ExtendWith(MockitoExtension.class) +class BaseTracerTest { + private static final String TEST_PROJECT_NAME = "test-project"; + private static final String TEST_PROJECT_ID = "test-project-id-123"; + private static final String TEST_API_KEY = "test-key"; + private static final String TEST_ORG_ID = "test-org"; + private static final String TEST_API_URL = "https://api.test.com"; + @Mock + private JudgmentSyncClient mockClient; + + @Mock + private ISerializer mockSerializer; + + private TestableBaseTracer tracer; + + @BeforeEach + void setUp() throws Exception { + ResolveProjectNameResponse response = new ResolveProjectNameResponse(); + response.setProjectId(TEST_PROJECT_ID); + + lenient().when(mockClient.projectsResolve(any(ResolveProjectNameRequest.class))) + .thenReturn(response); + + lenient().when(mockSerializer.serialize(any())).thenReturn("serialized"); + lenient().when(mockSerializer.serialize(any(), any())).thenReturn("serialized"); + + tracer = new TestableBaseTracer( + TEST_PROJECT_NAME, + TEST_API_KEY, + TEST_ORG_ID, + TEST_API_URL, + true, + mockClient, + mockSerializer); + } + + @Test + void constructor_withValidParameters_resolvesProject() { + assertNotNull(tracer); + assertEquals(TEST_PROJECT_NAME, tracer.getProjectName()); + assertEquals(TEST_API_KEY, tracer.getApiKey()); + assertEquals(TEST_ORG_ID, tracer.getOrganizationId()); + assertEquals(TEST_API_URL, tracer.getApiUrl()); + assertTrue(tracer.isEnableEvaluation()); + assertTrue(tracer.getProjectId().isPresent()); + assertEquals(TEST_PROJECT_ID, tracer.getProjectId().get()); + } + + @Test + void constructor_withNullProjectName_throwsException() { + assertThrows(NullPointerException.class, () -> { + new TestableBaseTracer( + null, + TEST_API_KEY, + TEST_ORG_ID, + TEST_API_URL, + true, + mockClient, + mockSerializer); + }); + } + + @Test + void constructor_withNullApiKey_throwsException() { + assertThrows(NullPointerException.class, () -> { + new TestableBaseTracer( + TEST_PROJECT_NAME, + null, + TEST_ORG_ID, + TEST_API_URL, + true, + mockClient, + mockSerializer); + }); + } + + @Test + void constructor_withNullOrganizationId_throwsException() { + assertThrows(NullPointerException.class, () -> { + new TestableBaseTracer( + TEST_PROJECT_NAME, + TEST_API_KEY, + null, + TEST_API_URL, + true, + mockClient, + mockSerializer); + }); + } + + @Test + void constructor_withNullApiUrl_throwsException() { + assertThrows(NullPointerException.class, () -> { + new TestableBaseTracer( + TEST_PROJECT_NAME, + TEST_API_KEY, + TEST_ORG_ID, + null, + true, + mockClient, + mockSerializer); + }); + } + + @Test + void constructor_withFailedProjectResolution_hasEmptyProjectId() throws Exception { + when(mockClient.projectsResolve(any(ResolveProjectNameRequest.class))) + .thenThrow(new RuntimeException("Project not found")); + + TestableBaseTracer failedTracer = new TestableBaseTracer( + TEST_PROJECT_NAME, + TEST_API_KEY, + TEST_ORG_ID, + TEST_API_URL, + true, + mockClient, + mockSerializer); + + assertFalse(failedTracer.getProjectId().isPresent()); + } + + @Test + void getSpanExporter_withValidProjectId_returnsJudgmentSpanExporter() { + SpanExporter exporter = tracer.getSpanExporter(); + assertNotNull(exporter); + assertTrue(exporter instanceof JudgmentSpanExporter); + } + + @Test + void getSpanExporter_withoutProjectId_returnsNoOpSpanExporter() throws Exception { + when(mockClient.projectsResolve(any(ResolveProjectNameRequest.class))) + .thenReturn(null); + + TestableBaseTracer failedTracer = new TestableBaseTracer( + TEST_PROJECT_NAME, + TEST_API_KEY, + TEST_ORG_ID, + TEST_API_URL, + true, + mockClient, + mockSerializer); + + SpanExporter exporter = failedTracer.getSpanExporter(); + assertNotNull(exporter); + assertTrue(exporter instanceof NoOpSpanExporter); + } + + @Test + void getTracer_returnsTracer() { + io.opentelemetry.api.trace.Tracer otelTracer = tracer.getTracer(); + assertNotNull(otelTracer); + } + + @Test + void setAttributes_withNull_doesNotThrow() { + tracer.setAttributes(null); + } + + private static class TestableBaseTracer extends BaseTracer { + protected TestableBaseTracer(String projectName, String apiKey, String organizationId, String apiUrl, + boolean enableEvaluation, JudgmentSyncClient apiClient, ISerializer serializer) { + super(projectName, apiKey, organizationId, apiUrl, enableEvaluation, apiClient, serializer); + } + + @Override + public void initialize() { + } + + @Override + public boolean forceFlush(int timeoutMillis) { + return false; + } + + @Override + public void shutdown(int timeoutMillis) { + } + } +} diff --git a/judgeval-java/src/test/java/com/judgmentlabs/judgeval/v1/tracer/TracerFactoryTest.java b/judgeval-java/src/test/java/com/judgmentlabs/judgeval/v1/tracer/TracerFactoryTest.java new file mode 100644 index 0000000..30a0515 --- /dev/null +++ b/judgeval-java/src/test/java/com/judgmentlabs/judgeval/v1/tracer/TracerFactoryTest.java @@ -0,0 +1,28 @@ +package com.judgmentlabs.judgeval.v1.tracer; + +import static org.junit.jupiter.api.Assertions.*; + +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; + +import com.judgmentlabs.judgeval.internal.api.JudgmentSyncClient; + +class TracerFactoryTest { + private static final String TEST_API_URL = "https://api.test.com"; + private static final String TEST_API_KEY = "test-key"; + private static final String TEST_ORG_ID = "test-org"; + + private TracerFactory factory; + + @BeforeEach + void setUp() { + JudgmentSyncClient client = new JudgmentSyncClient(TEST_API_URL, TEST_API_KEY, TEST_ORG_ID); + factory = new TracerFactory(client, TEST_API_KEY, TEST_ORG_ID, TEST_API_URL); + } + + @Test + void create_returnsConfiguredBuilder() { + Tracer.Builder builder = factory.create(); + assertNotNull(builder); + } +} diff --git a/judgeval-java/src/test/java/com/judgmentlabs/judgeval/v1/tracer/TracerTest.java b/judgeval-java/src/test/java/com/judgmentlabs/judgeval/v1/tracer/TracerTest.java new file mode 100644 index 0000000..647bff3 --- /dev/null +++ b/judgeval-java/src/test/java/com/judgmentlabs/judgeval/v1/tracer/TracerTest.java @@ -0,0 +1,50 @@ +package com.judgmentlabs.judgeval.v1.tracer; + +import static org.junit.jupiter.api.Assertions.*; + +import org.junit.jupiter.api.Test; + +class TracerTest { + private static final String TEST_PROJECT_NAME = "test-project"; + private static final String TEST_API_KEY = "test-key"; + private static final String TEST_ORG_ID = "test-org"; + + @Test + void builder_returnsBuilder() { + Tracer.Builder builder = Tracer.builder(); + assertNotNull(builder); + } + + @Test + void builder_withNullProjectName_throwsException() { + assertThrows(NullPointerException.class, () -> { + Tracer.builder() + .projectName(null) + .apiKey(TEST_API_KEY) + .organizationId(TEST_ORG_ID) + .build(); + }); + } + + @Test + void builder_withNullApiKey_throwsException() { + assertThrows(NullPointerException.class, () -> { + Tracer.builder() + .projectName(TEST_PROJECT_NAME) + .apiKey(null) + .organizationId(TEST_ORG_ID) + .build(); + }); + } + + @Test + void builder_withNullOrganizationId_throwsException() { + assertThrows(NullPointerException.class, () -> { + Tracer.builder() + .projectName(TEST_PROJECT_NAME) + .apiKey(TEST_API_KEY) + .organizationId(null) + .build(); + }); + } +} diff --git a/scripts/generate_client.py b/scripts/generate_client.py index 2a3cb68..682de7a 100755 --- a/scripts/generate_client.py +++ b/scripts/generate_client.py @@ -463,6 +463,18 @@ def generate_client_class( " this.mapper = new ObjectMapper();", " }", "", + " public String getApiUrl() {", + " return baseUrl;", + " }", + "", + " public String getApiKey() {", + " return apiKey;", + " }", + "", + " public String getOrganizationId() {", + " return organizationId;", + " }", + "", " private String buildUrl(String path, Map queryParams) {", " StringBuilder url = new StringBuilder(baseUrl).append(path);", " if (!queryParams.isEmpty()) {", From 08beb2c94d93b5f5ff2e50d30db6e53499cd79c8 Mon Sep 17 00:00:00 2001 From: Abhishek Govindarasu Date: Wed, 5 Nov 2025 17:29:02 -0800 Subject: [PATCH 05/18] fix: tests --- .github/workflows/test-instrumentation-openai.yml | 4 ++-- .github/workflows/test-judgeval-java.yml | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/workflows/test-instrumentation-openai.yml b/.github/workflows/test-instrumentation-openai.yml index 9ce5ed5..cffa159 100644 --- a/.github/workflows/test-instrumentation-openai.yml +++ b/.github/workflows/test-instrumentation-openai.yml @@ -29,11 +29,11 @@ jobs: - name: Run Tests run: | - mvn -B -pl instrumentation/judgeval-instrumentation-openai -am test + cd instrumentation/judgeval-instrumentation-openai && mvn -B test - name: Test Install run: | - mvn -B -Dgpg.skip=true -pl instrumentation/judgeval-instrumentation-openai -am clean install + cd instrumentation/judgeval-instrumentation-openai && mvn -B -Dgpg.skip=true clean install - name: Verify Build Artifacts run: | diff --git a/.github/workflows/test-judgeval-java.yml b/.github/workflows/test-judgeval-java.yml index a7066ed..6bc4123 100644 --- a/.github/workflows/test-judgeval-java.yml +++ b/.github/workflows/test-judgeval-java.yml @@ -29,11 +29,11 @@ jobs: - name: Run Tests run: | - mvn -B -pl judgeval-java -am test + cd judgeval-java && mvn -B test - name: Test Install run: | - mvn -B -Dgpg.skip=true -pl judgeval-java -am clean install + cd judgeval-java && mvn -B -Dgpg.skip=true clean install - name: Verify Build Artifacts run: | From af8beceaa7fe330d000fbe60f61c336a0c463beb Mon Sep 17 00:00:00 2001 From: Abhishek Govindarasu Date: Wed, 5 Nov 2025 21:17:50 -0800 Subject: [PATCH 06/18] cleanup --- .../judgeval/v1/JudgmentClient.java | 59 +++- .../judgeval/v1/data/APIScorerType.java | 6 +- .../judgeval/v1/data/Example.java | 32 +++ .../judgeval/v1/data/ScorerData.java | 88 ++++++ .../judgeval/v1/data/ScoringResult.java | 49 +++- .../judgeval/v1/evaluation/Evaluation.java | 17 ++ .../v1/evaluation/EvaluationFactory.java | 14 +- .../judgeval/v1/scorers/APIScorer.java | 18 ++ .../judgeval/v1/scorers/BaseScorer.java | 7 +- .../judgeval/v1/scorers/ScorersFactory.java | 33 ++- .../built_in/AnswerCorrectnessScorer.java | 12 + .../built_in/AnswerRelevancyScorer.java | 11 + .../built_in/BuiltInScorersFactory.java | 28 ++ .../v1/scorers/built_in/DerailmentScorer.java | 12 + .../scorers/built_in/FaithfulnessScorer.java | 11 + .../built_in/InstructionAdherenceScorer.java | 11 + .../scorers/custom_scorer/CustomScorer.java | 29 +- .../custom_scorer/CustomScorerFactory.java | 21 +- .../scorers/prompt_scorer/PromptScorer.java | 47 ++-- .../prompt_scorer/PromptScorerFactory.java | 30 ++- .../judgeval/v1/tracer/BaseTracer.java | 252 +++++++++++++++++- .../judgeval/v1/tracer/ISerializer.java | 19 ++ .../judgeval/v1/tracer/Tracer.java | 88 ++++++ .../judgeval/v1/tracer/TracerFactory.java | 16 +- .../v1/evaluation/EvaluationFactoryTest.java | 2 +- .../v1/scorers/ScorersFactoryTest.java | 2 +- .../judgeval/v1/tracer/TracerFactoryTest.java | 2 +- 27 files changed, 825 insertions(+), 91 deletions(-) diff --git a/judgeval-java/src/main/java/com/judgmentlabs/judgeval/v1/JudgmentClient.java b/judgeval-java/src/main/java/com/judgmentlabs/judgeval/v1/JudgmentClient.java index e9cdf23..dbb2689 100644 --- a/judgeval-java/src/main/java/com/judgmentlabs/judgeval/v1/JudgmentClient.java +++ b/judgeval-java/src/main/java/com/judgmentlabs/judgeval/v1/JudgmentClient.java @@ -8,6 +8,10 @@ import com.judgmentlabs.judgeval.v1.scorers.ScorersFactory; import com.judgmentlabs.judgeval.v1.tracer.TracerFactory; +/** + * Main entry point for the Judgment SDK. Provides access to tracer, scorer, and + * evaluation factories. + */ public final class JudgmentClient { private final String apiKey; private final String organizationId; @@ -21,42 +25,91 @@ private JudgmentClient(Builder builder) { this.internalClient = new JudgmentSyncClient(apiUrl, apiKey, organizationId); } + /** + * Returns a factory for creating tracers. + * + * @return the tracer factory + */ public TracerFactory tracer() { - return new TracerFactory(internalClient, apiKey, organizationId, apiUrl); + return new TracerFactory(internalClient); } + /** + * Returns a factory for creating scorers. + * + * @return the scorer factory + */ public ScorersFactory scorers() { - return new ScorersFactory(internalClient, apiKey, organizationId); + return new ScorersFactory(internalClient); } + /** + * Returns a factory for creating evaluations. + * + * @return the evaluation factory + */ public EvaluationFactory evaluation() { - return new EvaluationFactory(internalClient, apiKey, organizationId); + return new EvaluationFactory(internalClient); } + /** + * Creates a new builder for configuring a JudgmentClient. + * + * @return a new builder instance + */ public static Builder builder() { return new Builder(); } + /** + * Builder for configuring and creating JudgmentClient instances. + */ public static final class Builder { private String apiKey = Env.JUDGMENT_API_KEY; private String organizationId = Env.JUDGMENT_ORG_ID; private String apiUrl = Env.JUDGMENT_API_URL; + /** + * Sets the API key for authentication. + * + * @param apiKey + * the API key + * @return this builder + */ public Builder apiKey(String apiKey) { this.apiKey = apiKey; return this; } + /** + * Sets the organization ID. + * + * @param organizationId + * the organization ID + * @return this builder + */ public Builder organizationId(String organizationId) { this.organizationId = organizationId; return this; } + /** + * Sets the API URL. + * + * @param apiUrl + * the API URL + * @return this builder + */ public Builder apiUrl(String apiUrl) { this.apiUrl = apiUrl; return this; } + /** + * Builds and returns a new JudgmentClient instance. + * + * @return the configured JudgmentClient + */ public JudgmentClient build() { return new JudgmentClient(this); } diff --git a/judgeval-java/src/main/java/com/judgmentlabs/judgeval/v1/data/APIScorerType.java b/judgeval-java/src/main/java/com/judgmentlabs/judgeval/v1/data/APIScorerType.java index 05d48e8..ad13bc4 100644 --- a/judgeval-java/src/main/java/com/judgmentlabs/judgeval/v1/data/APIScorerType.java +++ b/judgeval-java/src/main/java/com/judgmentlabs/judgeval/v1/data/APIScorerType.java @@ -1,5 +1,8 @@ package com.judgmentlabs.judgeval.v1.data; +/** + * Available types of API-based scorers. + */ public enum APIScorerType { PROMPT_SCORER("Prompt Scorer"), TRACE_PROMPT_SCORER("Trace Prompt Scorer"), @@ -9,9 +12,6 @@ public enum APIScorerType { INSTRUCTION_ADHERENCE("Instruction Adherence"), EXECUTION_ORDER("Execution Order"), DERAILMENT("Derailment"), - TOOL_ORDER("Tool Order"), - CLASSIFIER("Classifier"), - TOOL_DEPENDENCY("Tool Dependency"), CUSTOM("Custom"); private final String value; diff --git a/judgeval-java/src/main/java/com/judgmentlabs/judgeval/v1/data/Example.java b/judgeval-java/src/main/java/com/judgmentlabs/judgeval/v1/data/Example.java index 5cc01d2..6a5e66a 100644 --- a/judgeval-java/src/main/java/com/judgmentlabs/judgeval/v1/data/Example.java +++ b/judgeval-java/src/main/java/com/judgmentlabs/judgeval/v1/data/Example.java @@ -3,6 +3,9 @@ import java.time.Instant; import java.util.UUID; +/** + * Represents an evaluation example with arbitrary properties. + */ public class Example extends com.judgmentlabs.judgeval.internal.api.models.Example { public Example() { @@ -14,10 +17,18 @@ public Example() { setName(null); } + /** + * Creates a new builder for configuring an Example. + * + * @return a new builder instance + */ public static Builder builder() { return new Builder(); } + /** + * Builder for configuring and creating Example instances. + */ public static final class Builder { private final Example example; @@ -25,16 +36,37 @@ private Builder() { this.example = new Example(); } + /** + * Sets a custom property on the example. + * + * @param key + * the property key + * @param value + * the property value + * @return this builder + */ public Builder property(String key, Object value) { example.setAdditionalProperty(key, value); return this; } + /** + * Sets the name of the example. + * + * @param name + * the example name + * @return this builder + */ public Builder name(String name) { example.setName(name); return this; } + /** + * Builds and returns the configured Example. + * + * @return the configured Example + */ public Example build() { return example; } diff --git a/judgeval-java/src/main/java/com/judgmentlabs/judgeval/v1/data/ScorerData.java b/judgeval-java/src/main/java/com/judgmentlabs/judgeval/v1/data/ScorerData.java index a23e396..3347aad 100644 --- a/judgeval-java/src/main/java/com/judgmentlabs/judgeval/v1/data/ScorerData.java +++ b/judgeval-java/src/main/java/com/judgmentlabs/judgeval/v1/data/ScorerData.java @@ -2,12 +2,23 @@ import java.util.Map; +/** + * Represents the result of a single scorer evaluation. + */ public class ScorerData extends com.judgmentlabs.judgeval.internal.api.models.ScorerData { + /** + * Creates a new builder for configuring ScorerData. + * + * @return a new builder instance + */ public static Builder builder() { return new Builder(); } + /** + * Builder for configuring and creating ScorerData instances. + */ public static final class Builder { private final ScorerData scorerData; @@ -15,51 +26,123 @@ private Builder() { this.scorerData = new ScorerData(); } + /** + * Sets the scorer name. + * + * @param name + * the scorer name + * @return this builder + */ public Builder name(String name) { scorerData.setName(name); return this; } + /** + * Sets the evaluation score. + * + * @param score + * the score value + * @return this builder + */ public Builder score(Double score) { scorerData.setScore(score); return this; } + /** + * Sets whether the evaluation succeeded. + * + * @param success + * true if evaluation succeeded + * @return this builder + */ public Builder success(Boolean success) { scorerData.setSuccess(success); return this; } + /** + * Sets the reason for the evaluation result. + * + * @param reason + * the evaluation reason + * @return this builder + */ public Builder reason(String reason) { scorerData.setReason(reason); return this; } + /** + * Sets the evaluation threshold. + * + * @param threshold + * the threshold value + * @return this builder + */ public Builder threshold(Double threshold) { scorerData.setThreshold(threshold); return this; } + /** + * Sets strict mode for evaluation. + * + * @param strictMode + * true for strict mode + * @return this builder + */ public Builder strictMode(Boolean strictMode) { scorerData.setStrictMode(strictMode); return this; } + /** + * Sets the model used for evaluation. + * + * @param evaluationModel + * the model name + * @return this builder + */ public Builder evaluationModel(String evaluationModel) { scorerData.setEvaluationModel(evaluationModel); return this; } + /** + * Sets an error message if evaluation failed. + * + * @param error + * the error message + * @return this builder + */ public Builder error(String error) { scorerData.setError(error); return this; } + /** + * Sets additional metadata for the evaluation. + * + * @param additionalMetadata + * the metadata map + * @return this builder + */ public Builder additionalMetadata(Map additionalMetadata) { scorerData.setAdditionalMetadata(additionalMetadata); return this; } + /** + * Adds a single metadata entry. + * + * @param key + * the metadata key + * @param value + * the metadata value + * @return this builder + */ public Builder metadata(String key, Object value) { if (scorerData.getAdditionalMetadata() == null) { scorerData.setAdditionalMetadata(new java.util.HashMap<>()); @@ -70,6 +153,11 @@ public Builder metadata(String key, Object value) { return this; } + /** + * Builds and returns the configured ScorerData. + * + * @return the configured ScorerData + */ public ScorerData build() { return scorerData; } diff --git a/judgeval-java/src/main/java/com/judgmentlabs/judgeval/v1/data/ScoringResult.java b/judgeval-java/src/main/java/com/judgmentlabs/judgeval/v1/data/ScoringResult.java index 0671f4d..0811f62 100644 --- a/judgeval-java/src/main/java/com/judgmentlabs/judgeval/v1/data/ScoringResult.java +++ b/judgeval-java/src/main/java/com/judgmentlabs/judgeval/v1/data/ScoringResult.java @@ -2,12 +2,23 @@ import java.util.List; +/** + * Represents a collection of scorer evaluation results. + */ public class ScoringResult extends com.judgmentlabs.judgeval.internal.api.models.ScoringResult { + /** + * Creates a new builder for configuring a ScoringResult. + * + * @return a new builder instance + */ public static Builder builder() { return new Builder(); } + /** + * Builder for configuring and creating ScoringResult instances. + */ public static final class Builder { private final ScoringResult result; @@ -15,11 +26,25 @@ private Builder() { this.result = new ScoringResult(); } + /** + * Sets whether the overall evaluation succeeded. + * + * @param success + * true if evaluation succeeded + * @return this builder + */ public Builder success(Boolean success) { result.setSuccess(success); return this; } + /** + * Sets the list of scorer results. + * + * @param scorersData + * the list of scorer data + * @return this builder + */ public Builder scorersData(List scorersData) { @SuppressWarnings("unchecked") List internalList = (List) (List) scorersData; @@ -27,6 +52,13 @@ public Builder scorersData(List scorersData) { return this; } + /** + * Adds a single scorer result. + * + * @param scorerData + * the scorer data to add + * @return this builder + */ public Builder scorerData(ScorerData scorerData) { if (result.getScorersData() == null) { result.setScorersData(new java.util.ArrayList<>()); @@ -36,18 +68,25 @@ public Builder scorerData(ScorerData scorerData) { return this; } + /** + * Sets the data object for the evaluation. + * + * @param dataObject + * the example data + * @return this builder + */ public Builder dataObject(Example dataObject) { - // Store Example in additional properties since setDataObject - // expects TraceSpan - // This indicates a potential API design issue - ScoringResult may - // be - // trace-specific if (dataObject != null) { result.setAdditionalProperty("example", dataObject); } return this; } + /** + * Builds and returns the configured ScoringResult. + * + * @return the configured ScoringResult + */ public ScoringResult build() { return result; } diff --git a/judgeval-java/src/main/java/com/judgmentlabs/judgeval/v1/evaluation/Evaluation.java b/judgeval-java/src/main/java/com/judgmentlabs/judgeval/v1/evaluation/Evaluation.java index 309f3ab..c8efdc4 100644 --- a/judgeval-java/src/main/java/com/judgmentlabs/judgeval/v1/evaluation/Evaluation.java +++ b/judgeval-java/src/main/java/com/judgmentlabs/judgeval/v1/evaluation/Evaluation.java @@ -4,17 +4,29 @@ import com.judgmentlabs.judgeval.internal.api.JudgmentSyncClient; +/** + * Evaluation configuration for running evaluations against traces and spans. + */ public final class Evaluation { + @SuppressWarnings("unused") // TODO: will add run_evaluation here private final JudgmentSyncClient client; private Evaluation(Builder builder) { this.client = Objects.requireNonNull(builder.client, "client required"); } + /** + * Creates a new builder for configuring an Evaluation. + * + * @return a new builder instance + */ public static Builder builder() { return new Builder(); } + /** + * Builder for configuring and creating Evaluation instances. + */ public static final class Builder { private JudgmentSyncClient client; @@ -23,6 +35,11 @@ Builder client(JudgmentSyncClient client) { return this; } + /** + * Builds and returns a new Evaluation instance. + * + * @return the configured Evaluation + */ public Evaluation build() { return new Evaluation(this); } diff --git a/judgeval-java/src/main/java/com/judgmentlabs/judgeval/v1/evaluation/EvaluationFactory.java b/judgeval-java/src/main/java/com/judgmentlabs/judgeval/v1/evaluation/EvaluationFactory.java index 43a2995..372929b 100644 --- a/judgeval-java/src/main/java/com/judgmentlabs/judgeval/v1/evaluation/EvaluationFactory.java +++ b/judgeval-java/src/main/java/com/judgmentlabs/judgeval/v1/evaluation/EvaluationFactory.java @@ -2,17 +2,21 @@ import com.judgmentlabs.judgeval.internal.api.JudgmentSyncClient; +/** + * Factory for creating evaluation builders. + */ public final class EvaluationFactory { private final JudgmentSyncClient client; - private final String apiKey; - private final String organizationId; - public EvaluationFactory(JudgmentSyncClient client, String apiKey, String organizationId) { + public EvaluationFactory(JudgmentSyncClient client) { this.client = client; - this.apiKey = apiKey; - this.organizationId = organizationId; } + /** + * Creates a new evaluation builder configured with this factory's client. + * + * @return a new evaluation builder + */ public Evaluation.Builder create() { return Evaluation.builder().client(client); } diff --git a/judgeval-java/src/main/java/com/judgmentlabs/judgeval/v1/scorers/APIScorer.java b/judgeval-java/src/main/java/com/judgmentlabs/judgeval/v1/scorers/APIScorer.java index e4ab3b9..e3c0d96 100644 --- a/judgeval-java/src/main/java/com/judgmentlabs/judgeval/v1/scorers/APIScorer.java +++ b/judgeval-java/src/main/java/com/judgmentlabs/judgeval/v1/scorers/APIScorer.java @@ -10,6 +10,9 @@ import com.judgmentlabs.judgeval.internal.api.models.ScorerConfig; import com.judgmentlabs.judgeval.v1.data.APIScorerType; +/** + * Base class for API-based scorers that evaluate using the Judgment backend. + */ public class APIScorer extends com.judgmentlabs.judgeval.internal.api.models.BaseScorer implements BaseScorer { private APIScorerType scoreType; @@ -82,10 +85,25 @@ public ScorerConfig getScorerConfig() { return cfg; } + /** + * Creates a new builder for an APIScorer subclass. + * + * @param + * the scorer type + * @param scorerClass + * the scorer class + * @return a new builder instance + */ public static Builder builder(Class scorerClass) { return new Builder<>(scorerClass); } + /** + * Builder for configuring and creating APIScorer instances. + * + * @param + * the scorer type + */ public static final class Builder { private final T scorer; diff --git a/judgeval-java/src/main/java/com/judgmentlabs/judgeval/v1/scorers/BaseScorer.java b/judgeval-java/src/main/java/com/judgmentlabs/judgeval/v1/scorers/BaseScorer.java index 760b6c6..37e35cd 100644 --- a/judgeval-java/src/main/java/com/judgmentlabs/judgeval/v1/scorers/BaseScorer.java +++ b/judgeval-java/src/main/java/com/judgmentlabs/judgeval/v1/scorers/BaseScorer.java @@ -3,19 +3,18 @@ import com.judgmentlabs.judgeval.internal.api.models.ScorerConfig; /** - * Minimal interface for scorers used by BaseTracer. Only requires the essential - * methods needed for evaluation. + * Base interface for all scorers in the Judgment evaluation system. */ public interface BaseScorer { /** - * Gets the name of the scorer. + * Returns the name of this scorer. * * @return the scorer name */ String getName(); /** - * Gets the scorer configuration for evaluation runs. + * Returns the configuration for this scorer. * * @return the scorer configuration */ diff --git a/judgeval-java/src/main/java/com/judgmentlabs/judgeval/v1/scorers/ScorersFactory.java b/judgeval-java/src/main/java/com/judgmentlabs/judgeval/v1/scorers/ScorersFactory.java index 66eaa94..edc26d5 100644 --- a/judgeval-java/src/main/java/com/judgmentlabs/judgeval/v1/scorers/ScorersFactory.java +++ b/judgeval-java/src/main/java/com/judgmentlabs/judgeval/v1/scorers/ScorersFactory.java @@ -5,29 +5,48 @@ import com.judgmentlabs.judgeval.v1.scorers.custom_scorer.CustomScorerFactory; import com.judgmentlabs.judgeval.v1.scorers.prompt_scorer.PromptScorerFactory; +/** + * Factory for creating scorer builders and accessing scorer types. + */ public final class ScorersFactory { private final JudgmentSyncClient client; - private final String apiKey; - private final String organizationId; - public ScorersFactory(JudgmentSyncClient client, String apiKey, String organizationId) { + public ScorersFactory(JudgmentSyncClient client) { this.client = client; - this.apiKey = apiKey; - this.organizationId = organizationId; } + /** + * Returns a factory for creating prompt-based scorers. + * + * @return the prompt scorer factory + */ public PromptScorerFactory promptScorer() { - return new PromptScorerFactory(client, apiKey, organizationId, false); + return new PromptScorerFactory(client, false); } + /** + * Returns a factory for creating trace-level prompt scorers. + * + * @return the trace prompt scorer factory + */ public PromptScorerFactory tracePromptScorer() { - return new PromptScorerFactory(client, apiKey, organizationId, true); + return new PromptScorerFactory(client, true); } + /** + * Returns a factory for creating custom scorers. + * + * @return the custom scorer factory + */ public CustomScorerFactory customScorer() { return new CustomScorerFactory(); } + /** + * Returns a factory for creating built-in scorers. + * + * @return the built-in scorers factory + */ public BuiltInScorersFactory builtIn() { return new BuiltInScorersFactory(); } diff --git a/judgeval-java/src/main/java/com/judgmentlabs/judgeval/v1/scorers/built_in/AnswerCorrectnessScorer.java b/judgeval-java/src/main/java/com/judgmentlabs/judgeval/v1/scorers/built_in/AnswerCorrectnessScorer.java index d3a395a..2fddcd1 100644 --- a/judgeval-java/src/main/java/com/judgmentlabs/judgeval/v1/scorers/built_in/AnswerCorrectnessScorer.java +++ b/judgeval-java/src/main/java/com/judgmentlabs/judgeval/v1/scorers/built_in/AnswerCorrectnessScorer.java @@ -5,6 +5,10 @@ import com.judgmentlabs.judgeval.v1.data.APIScorerType; import com.judgmentlabs.judgeval.v1.scorers.APIScorer; +/** + * Scorer that evaluates the correctness of an answer against an expected + * output. + */ public final class AnswerCorrectnessScorer extends APIScorer { private AnswerCorrectnessScorer(Builder builder) { super(APIScorerType.ANSWER_CORRECTNESS); @@ -23,10 +27,18 @@ private AnswerCorrectnessScorer(Builder builder) { } } + /** + * Creates a new builder for configuring an AnswerCorrectnessScorer. + * + * @return a new builder instance + */ public static Builder builder() { return new Builder(); } + /** + * Builder for configuring and creating AnswerCorrectnessScorer instances. + */ public static final class Builder { private double threshold = -1; private String name; diff --git a/judgeval-java/src/main/java/com/judgmentlabs/judgeval/v1/scorers/built_in/AnswerRelevancyScorer.java b/judgeval-java/src/main/java/com/judgmentlabs/judgeval/v1/scorers/built_in/AnswerRelevancyScorer.java index 2c04f81..b2d9c04 100644 --- a/judgeval-java/src/main/java/com/judgmentlabs/judgeval/v1/scorers/built_in/AnswerRelevancyScorer.java +++ b/judgeval-java/src/main/java/com/judgmentlabs/judgeval/v1/scorers/built_in/AnswerRelevancyScorer.java @@ -5,6 +5,9 @@ import com.judgmentlabs.judgeval.v1.data.APIScorerType; import com.judgmentlabs.judgeval.v1.scorers.APIScorer; +/** + * Scorer that evaluates the relevancy of an answer to the input question. + */ public final class AnswerRelevancyScorer extends APIScorer { private AnswerRelevancyScorer(Builder builder) { super(APIScorerType.ANSWER_RELEVANCY); @@ -23,10 +26,18 @@ private AnswerRelevancyScorer(Builder builder) { } } + /** + * Creates a new builder for configuring an AnswerRelevancyScorer. + * + * @return a new builder instance + */ public static Builder builder() { return new Builder(); } + /** + * Builder for configuring and creating AnswerRelevancyScorer instances. + */ public static final class Builder { private double threshold = -1; private String name; diff --git a/judgeval-java/src/main/java/com/judgmentlabs/judgeval/v1/scorers/built_in/BuiltInScorersFactory.java b/judgeval-java/src/main/java/com/judgmentlabs/judgeval/v1/scorers/built_in/BuiltInScorersFactory.java index c8356f3..ba3fe0d 100644 --- a/judgeval-java/src/main/java/com/judgmentlabs/judgeval/v1/scorers/built_in/BuiltInScorersFactory.java +++ b/judgeval-java/src/main/java/com/judgmentlabs/judgeval/v1/scorers/built_in/BuiltInScorersFactory.java @@ -1,25 +1,53 @@ package com.judgmentlabs.judgeval.v1.scorers.built_in; +/** + * Factory for creating built-in scorer builders. + */ public final class BuiltInScorersFactory { public BuiltInScorersFactory() { } + /** + * Creates a builder for an answer correctness scorer. + * + * @return the scorer builder + */ public AnswerCorrectnessScorer.Builder answerCorrectness() { return AnswerCorrectnessScorer.builder(); } + /** + * Creates a builder for an answer relevancy scorer. + * + * @return the scorer builder + */ public AnswerRelevancyScorer.Builder answerRelevancy() { return AnswerRelevancyScorer.builder(); } + /** + * Creates a builder for a faithfulness scorer. + * + * @return the scorer builder + */ public FaithfulnessScorer.Builder faithfulness() { return FaithfulnessScorer.builder(); } + /** + * Creates a builder for an instruction adherence scorer. + * + * @return the scorer builder + */ public InstructionAdherenceScorer.Builder instructionAdherence() { return InstructionAdherenceScorer.builder(); } + /** + * Creates a builder for a derailment scorer. + * + * @return the scorer builder + */ public DerailmentScorer.Builder derailment() { return DerailmentScorer.builder(); } diff --git a/judgeval-java/src/main/java/com/judgmentlabs/judgeval/v1/scorers/built_in/DerailmentScorer.java b/judgeval-java/src/main/java/com/judgmentlabs/judgeval/v1/scorers/built_in/DerailmentScorer.java index c6f1d74..8a3ed8d 100644 --- a/judgeval-java/src/main/java/com/judgmentlabs/judgeval/v1/scorers/built_in/DerailmentScorer.java +++ b/judgeval-java/src/main/java/com/judgmentlabs/judgeval/v1/scorers/built_in/DerailmentScorer.java @@ -5,6 +5,10 @@ import com.judgmentlabs.judgeval.v1.data.APIScorerType; import com.judgmentlabs.judgeval.v1.scorers.APIScorer; +/** + * Scorer that detects whether a conversation has derailed from its intended + * topic. + */ public final class DerailmentScorer extends APIScorer { private DerailmentScorer(Builder builder) { super(APIScorerType.DERAILMENT); @@ -23,10 +27,18 @@ private DerailmentScorer(Builder builder) { } } + /** + * Creates a new builder for configuring a DerailmentScorer. + * + * @return a new builder instance + */ public static Builder builder() { return new Builder(); } + /** + * Builder for configuring and creating DerailmentScorer instances. + */ public static final class Builder { private double threshold = -1; private String name; diff --git a/judgeval-java/src/main/java/com/judgmentlabs/judgeval/v1/scorers/built_in/FaithfulnessScorer.java b/judgeval-java/src/main/java/com/judgmentlabs/judgeval/v1/scorers/built_in/FaithfulnessScorer.java index 62ccc58..291fe4a 100644 --- a/judgeval-java/src/main/java/com/judgmentlabs/judgeval/v1/scorers/built_in/FaithfulnessScorer.java +++ b/judgeval-java/src/main/java/com/judgmentlabs/judgeval/v1/scorers/built_in/FaithfulnessScorer.java @@ -5,6 +5,9 @@ import com.judgmentlabs.judgeval.v1.data.APIScorerType; import com.judgmentlabs.judgeval.v1.scorers.APIScorer; +/** + * Scorer that evaluates whether an answer is faithful to the provided context. + */ public final class FaithfulnessScorer extends APIScorer { private FaithfulnessScorer(Builder builder) { super(APIScorerType.FAITHFULNESS); @@ -23,10 +26,18 @@ private FaithfulnessScorer(Builder builder) { } } + /** + * Creates a new builder for configuring a FaithfulnessScorer. + * + * @return a new builder instance + */ public static Builder builder() { return new Builder(); } + /** + * Builder for configuring and creating FaithfulnessScorer instances. + */ public static final class Builder { private double threshold = -1; private String name; diff --git a/judgeval-java/src/main/java/com/judgmentlabs/judgeval/v1/scorers/built_in/InstructionAdherenceScorer.java b/judgeval-java/src/main/java/com/judgmentlabs/judgeval/v1/scorers/built_in/InstructionAdherenceScorer.java index a34f196..ef92bc0 100644 --- a/judgeval-java/src/main/java/com/judgmentlabs/judgeval/v1/scorers/built_in/InstructionAdherenceScorer.java +++ b/judgeval-java/src/main/java/com/judgmentlabs/judgeval/v1/scorers/built_in/InstructionAdherenceScorer.java @@ -5,6 +5,9 @@ import com.judgmentlabs.judgeval.v1.data.APIScorerType; import com.judgmentlabs.judgeval.v1.scorers.APIScorer; +/** + * Scorer that evaluates whether an answer adheres to the given instructions. + */ public final class InstructionAdherenceScorer extends APIScorer { private InstructionAdherenceScorer(Builder builder) { super(APIScorerType.INSTRUCTION_ADHERENCE); @@ -23,10 +26,18 @@ private InstructionAdherenceScorer(Builder builder) { } } + /** + * Creates a new builder for configuring an InstructionAdherenceScorer. + * + * @return a new builder instance + */ public static Builder builder() { return new Builder(); } + /** + * Builder for configuring and creating InstructionAdherenceScorer instances. + */ public static final class Builder { private double threshold = -1; private String name; diff --git a/judgeval-java/src/main/java/com/judgmentlabs/judgeval/v1/scorers/custom_scorer/CustomScorer.java b/judgeval-java/src/main/java/com/judgmentlabs/judgeval/v1/scorers/custom_scorer/CustomScorer.java index fc6c7a4..da8ea85 100644 --- a/judgeval-java/src/main/java/com/judgmentlabs/judgeval/v1/scorers/custom_scorer/CustomScorer.java +++ b/judgeval-java/src/main/java/com/judgmentlabs/judgeval/v1/scorers/custom_scorer/CustomScorer.java @@ -4,12 +4,21 @@ import com.judgmentlabs.judgeval.v1.data.APIScorerType; import com.judgmentlabs.judgeval.v1.scorers.APIScorer; +/** + * Scorer that uses custom user-defined evaluation logic. Scorers are hosted on + * Judgment Servers + * and can be uploaded using the judgeval CLI. + * + * @see Judgment + * Docs: Upload Scorers + */ public final class CustomScorer extends APIScorer { private CustomScorer(Builder builder) { super(APIScorerType.CUSTOM); setName(builder.name); setClassName(builder.className); - setServerHosted(builder.serverHosted); + // Java SDK only supports server-hosted scorers + setServerHosted(true); } @Override @@ -17,14 +26,21 @@ public ScorerConfig getScorerConfig() { throw new UnsupportedOperationException("CustomScorer does not use ScorerConfig"); } + /** + * Creates a new builder for configuring a CustomScorer. + * + * @return a new builder instance + */ public static Builder builder() { return new Builder(); } + /** + * Builder for configuring and creating CustomScorer instances. + */ public static final class Builder { - private String name; - private String className; - private boolean serverHosted; + private String name; + private String className; public Builder name(String name) { this.name = name; @@ -36,11 +52,6 @@ public Builder className(String className) { return this; } - public Builder serverHosted(boolean serverHosted) { - this.serverHosted = serverHosted; - return this; - } - public CustomScorer build() { return new CustomScorer(this); } diff --git a/judgeval-java/src/main/java/com/judgmentlabs/judgeval/v1/scorers/custom_scorer/CustomScorerFactory.java b/judgeval-java/src/main/java/com/judgmentlabs/judgeval/v1/scorers/custom_scorer/CustomScorerFactory.java index 3734de2..260af30 100644 --- a/judgeval-java/src/main/java/com/judgmentlabs/judgeval/v1/scorers/custom_scorer/CustomScorerFactory.java +++ b/judgeval-java/src/main/java/com/judgmentlabs/judgeval/v1/scorers/custom_scorer/CustomScorerFactory.java @@ -1,22 +1,39 @@ package com.judgmentlabs.judgeval.v1.scorers.custom_scorer; +/** + * Factory for creating custom scorer instances. + */ public final class CustomScorerFactory { public CustomScorerFactory() { } + /** + * Creates a custom scorer with the specified name. + * + * @param name + * the scorer name + * @return the configured custom scorer + */ public CustomScorer get(String name) { return CustomScorer.builder() .name(name) .className(name) - .serverHosted(true) .build(); } + /** + * Creates a custom scorer with the specified name and class name. + * + * @param name + * the scorer name + * @param className + * the class name + * @return the configured custom scorer + */ public CustomScorer get(String name, String className) { return CustomScorer.builder() .name(name) .className(className) - .serverHosted(true) .build(); } } diff --git a/judgeval-java/src/main/java/com/judgmentlabs/judgeval/v1/scorers/prompt_scorer/PromptScorer.java b/judgeval-java/src/main/java/com/judgmentlabs/judgeval/v1/scorers/prompt_scorer/PromptScorer.java index ac3b345..90dcff6 100644 --- a/judgeval-java/src/main/java/com/judgmentlabs/judgeval/v1/scorers/prompt_scorer/PromptScorer.java +++ b/judgeval-java/src/main/java/com/judgmentlabs/judgeval/v1/scorers/prompt_scorer/PromptScorer.java @@ -8,19 +8,25 @@ import com.judgmentlabs.judgeval.v1.data.APIScorerType; import com.judgmentlabs.judgeval.v1.scorers.APIScorer; +/** + * Scorer that evaluates traces using Judgment-hosted prompt scorers. + * + * Prompt scorers are hosted on Judgment Servers and can be configured using the + * Scorer Playground. + * + * @see Judgment + * Docs: Prompt Scorers + */ public final class PromptScorer extends APIScorer { - private final String prompt; + private final String prompt; private final Map options; - private final String judgmentApiKey; - private final String organizationId; - private final boolean isTrace; + private final boolean isTrace; private PromptScorer(Builder builder) { super(builder.isTrace ? APIScorerType.TRACE_PROMPT_SCORER : APIScorerType.PROMPT_SCORER); this.prompt = Objects.requireNonNull(builder.prompt, "prompt required"); this.options = builder.options; - this.judgmentApiKey = builder.apiKey; - this.organizationId = builder.organizationId; this.isTrace = builder.isTrace; setName(Objects.requireNonNull(builder.name, "name required")); setThreshold(builder.threshold); @@ -46,6 +52,7 @@ public ScorerConfig getScorerConfig() { cfg.setName(getName()); cfg.setStrictMode(getStrictMode()); cfg.setRequiredParams(getRequiredParams()); + Map kwargs = new HashMap<>(); kwargs.put("prompt", prompt); if (options != null) { @@ -58,18 +65,24 @@ public ScorerConfig getScorerConfig() { return cfg; } + /** + * Creates a new builder for configuring a PromptScorer. + * + * @return a new builder instance + */ public static Builder builder() { return new Builder(); } + /** + * Builder for configuring and creating PromptScorer instances. + */ public static final class Builder { - private String name; - private String prompt; - private double threshold = 0.5; + private String name; + private String prompt; + private double threshold = 0.5; private Map options; - private String apiKey; - private String organizationId; - private boolean isTrace; + private boolean isTrace; public Builder name(String name) { this.name = name; @@ -91,16 +104,6 @@ public Builder options(Map options) { return this; } - Builder apiKey(String apiKey) { - this.apiKey = apiKey; - return this; - } - - Builder organizationId(String organizationId) { - this.organizationId = organizationId; - return this; - } - Builder isTrace(boolean isTrace) { this.isTrace = isTrace; return this; diff --git a/judgeval-java/src/main/java/com/judgmentlabs/judgeval/v1/scorers/prompt_scorer/PromptScorerFactory.java b/judgeval-java/src/main/java/com/judgmentlabs/judgeval/v1/scorers/prompt_scorer/PromptScorerFactory.java index 2f74c4d..6adcf55 100644 --- a/judgeval-java/src/main/java/com/judgmentlabs/judgeval/v1/scorers/prompt_scorer/PromptScorerFactory.java +++ b/judgeval-java/src/main/java/com/judgmentlabs/judgeval/v1/scorers/prompt_scorer/PromptScorerFactory.java @@ -11,22 +11,31 @@ import com.judgmentlabs.judgeval.internal.api.models.FetchPromptScorersRequest; import com.judgmentlabs.judgeval.internal.api.models.FetchPromptScorersResponse; +/** + * Factory for retrieving and creating prompt-based scorers. + */ public final class PromptScorerFactory { private final JudgmentSyncClient client; - private final String apiKey; - private final String organizationId; private final boolean isTrace; private static final Map cache = new ConcurrentHashMap<>(); - public PromptScorerFactory(JudgmentSyncClient client, String apiKey, String organizationId, boolean isTrace) { + public PromptScorerFactory(JudgmentSyncClient client, boolean isTrace) { this.client = client; - this.apiKey = apiKey; - this.organizationId = organizationId; this.isTrace = isTrace; } + /** + * Retrieves a prompt scorer by name from the Judgment API. + * Results are cached to avoid repeated API calls. + * + * @param name + * the scorer name + * @return the configured prompt scorer + * @throws JudgmentAPIError + * if the scorer is not found or retrieval fails + */ public PromptScorer get(String name) { - CacheKey key = new CacheKey(name, apiKey, organizationId); + CacheKey key = new CacheKey(name, client.getApiKey(), client.getOrganizationId()); com.judgmentlabs.judgeval.internal.api.models.PromptScorer cached = cache.get(key); if (cached != null) { return createFromModel(cached, name); @@ -82,16 +91,17 @@ private PromptScorer createFromModel(com.judgmentlabs.judgeval.internal.api.mode .prompt(model.getPrompt()) .threshold(Optional.ofNullable(model.getThreshold()).orElse(0.5)) .options(options) - .apiKey(apiKey) - .organizationId(organizationId) .isTrace(isTrace) .build(); } + /** + * Creates a new prompt scorer builder. + * + * @return a new scorer builder + */ public PromptScorer.Builder create() { return PromptScorer.builder() - .apiKey(apiKey) - .organizationId(organizationId) .isTrace(isTrace); } diff --git a/judgeval-java/src/main/java/com/judgmentlabs/judgeval/v1/tracer/BaseTracer.java b/judgeval-java/src/main/java/com/judgmentlabs/judgeval/v1/tracer/BaseTracer.java index c241d8b..16878e6 100644 --- a/judgeval-java/src/main/java/com/judgmentlabs/judgeval/v1/tracer/BaseTracer.java +++ b/judgeval-java/src/main/java/com/judgmentlabs/judgeval/v1/tracer/BaseTracer.java @@ -28,18 +28,22 @@ import io.opentelemetry.context.Scope; import io.opentelemetry.sdk.trace.export.SpanExporter; +/** + * Base tracer providing span manipulation, attribute setting, and evaluation + * capabilities. + */ public abstract class BaseTracer { - public static final String TRACER_NAME = "judgeval"; + public static final String TRACER_NAME = "judgeval"; - protected final String projectName; - protected final String apiKey; - protected final String organizationId; - protected final String apiUrl; - protected final boolean enableEvaluation; + protected final String projectName; + protected final String apiKey; + protected final String organizationId; + protected final String apiUrl; + protected final boolean enableEvaluation; protected final JudgmentSyncClient apiClient; - protected final ISerializer serializer; - protected final ObjectMapper jacksonMapper; - protected final Optional projectId; + protected final ISerializer serializer; + protected final ObjectMapper jacksonMapper; + protected final Optional projectId; protected BaseTracer(String projectName, String apiKey, String organizationId, String apiUrl, boolean enableEvaluation, JudgmentSyncClient apiClient, ISerializer serializer) { @@ -59,12 +63,33 @@ protected BaseTracer(String projectName, String apiKey, String organizationId, S + "/projects. Skipping Judgment export.")); } + /** + * Initializes the tracer. + */ public abstract void initialize(); + /** + * Forces pending spans to flush. + * + * @param timeoutMillis + * maximum time to wait in milliseconds + * @return true if flush succeeded within timeout + */ public abstract boolean forceFlush(int timeoutMillis); + /** + * Shuts down the tracer. + * + * @param timeoutMillis + * maximum time to wait for shutdown in milliseconds + */ public abstract void shutdown(int timeoutMillis); + /** + * Returns the span exporter for this tracer. + * + * @return the span exporter + */ public SpanExporter getSpanExporter() { return projectId.map(this::createJudgmentSpanExporter) .orElseGet(() -> { @@ -73,6 +98,12 @@ public SpanExporter getSpanExporter() { }); } + /** + * Sets the span kind attribute on the current span. + * + * @param kind + * the span kind + */ public void setSpanKind(String kind) { Optional.ofNullable(kind) .ifPresent(k -> withCurrentSpan( @@ -88,6 +119,14 @@ private static boolean isValidKey(String key) { return key != null && !key.isEmpty(); } + /** + * Sets an attribute on the current span by serializing the value. + * + * @param key + * the attribute key + * @param value + * the attribute value + */ public void setAttribute(String key, Object value) { if (!isValidKey(key)) { return; @@ -97,6 +136,17 @@ public void setAttribute(String key, Object value) { } } + /** + * Sets an attribute on the current span by serializing the value with the + * specified type. + * + * @param key + * the attribute key + * @param value + * the attribute value + * @param type + * the type to use for serialization + */ public void setAttribute(String key, Object value, Type type) { if (!isValidKey(key)) { return; @@ -106,6 +156,14 @@ public void setAttribute(String key, Object value, Type type) { } } + /** + * Sets a string attribute on the current span. + * + * @param key + * the attribute key + * @param value + * the attribute value + */ public void setAttribute(String key, String value) { if (!isValidKey(key)) { return; @@ -113,6 +171,14 @@ public void setAttribute(String key, String value) { withCurrentSpan(span -> span.setAttribute(key, value)); } + /** + * Sets a long attribute on the current span. + * + * @param key + * the attribute key + * @param value + * the attribute value + */ public void setAttribute(String key, long value) { if (!isValidKey(key)) { return; @@ -120,6 +186,14 @@ public void setAttribute(String key, long value) { withCurrentSpan(span -> span.setAttribute(key, value)); } + /** + * Sets a double attribute on the current span. + * + * @param key + * the attribute key + * @param value + * the attribute value + */ public void setAttribute(String key, double value) { if (!isValidKey(key)) { return; @@ -127,6 +201,14 @@ public void setAttribute(String key, double value) { withCurrentSpan(span -> span.setAttribute(key, value)); } + /** + * Sets a boolean attribute on the current span. + * + * @param key + * the attribute key + * @param value + * the attribute value + */ public void setAttribute(String key, boolean value) { if (!isValidKey(key)) { return; @@ -164,6 +246,19 @@ private void safeExecute(String operation, Runnable action) { } } + /** + * Asynchronously evaluates the current span using the specified scorer and + * example. + * The evaluation is queued and processed asynchronously by the Judgment + * backend. + * + * @param scorer + * the scorer to use for evaluation + * @param example + * the example data to evaluate against + * @param model + * the model to use for evaluation + */ public void asyncEvaluate(BaseScorer scorer, Example example, String model) { safeExecute("evaluate scorer", () -> { if (!isEvaluationEnabled()) { @@ -182,10 +277,29 @@ public void asyncEvaluate(BaseScorer scorer, Example example, String model) { }); } + /** + * Asynchronously evaluates the current span using the specified scorer and + * example. + * + * @param scorer + * the scorer to use for evaluation + * @param example + * the example data to evaluate against + */ public void asyncEvaluate(BaseScorer scorer, Example example) { asyncEvaluate(scorer, example, null); } + /** + * Asynchronously evaluates the current trace using the specified scorer. + * Attaches evaluation metadata to the current span for processing after trace + * completion. + * + * @param scorer + * the scorer to use for trace evaluation + * @param model + * the model to use for evaluation + */ public void asyncTraceEvaluate(BaseScorer scorer, String model) { safeExecute("evaluate trace scorer", () -> { if (!isEvaluationEnabled()) { @@ -211,43 +325,101 @@ public void asyncTraceEvaluate(BaseScorer scorer, String model) { }); } + /** + * Asynchronously evaluates the current trace using the specified scorer. + * + * @param scorer + * the scorer to use for trace evaluation + */ public void asyncTraceEvaluate(BaseScorer scorer) { asyncTraceEvaluate(scorer, null); } + /** + * Sets multiple attributes on the current span. + * + * @param attributes + * map of attribute keys to values + */ public void setAttributes(Map attributes) { Optional.ofNullable(attributes) .ifPresent(attrs -> attrs.forEach(this::setAttribute)); } + /** + * Marks the current span as an LLM span. + */ public void setLLMSpan() { setSpanKind("llm"); } + /** + * Marks the current span as a tool span. + */ public void setToolSpan() { setSpanKind("tool"); } + /** + * Marks the current span as a general span. + */ public void setGeneralSpan() { setSpanKind("span"); } + /** + * Sets the input attribute on the current span. + * + * @param input + * the input value + */ public void setInput(Object input) { - setAttribute(JudgmentAttributeKeys.AttributeKeys.JUDGMENT_INPUT, input); + setInput(input, input.getClass()); } + /** + * Sets the output attribute on the current span. + * + * @param output + * the output value + */ public void setOutput(Object output) { - setAttribute(JudgmentAttributeKeys.AttributeKeys.JUDGMENT_OUTPUT, output); + setOutput(output, output.getClass()); } + /** + * Sets the input attribute on the current span using the specified type. + * + * @param input + * the input value + * @param type + * the type to use for serialization + */ public void setInput(Object input, Type type) { setAttribute(JudgmentAttributeKeys.AttributeKeys.JUDGMENT_INPUT, input, type); } + /** + * Sets the output attribute on the current span using the specified type. + * + * @param output + * the output value + * @param type + * the type to use for serialization + */ public void setOutput(Object output, Type type) { setAttribute(JudgmentAttributeKeys.AttributeKeys.JUDGMENT_OUTPUT, output, type); } + /** + * Executes a runnable within a new span, automatically handling span lifecycle + * and errors. + * + * @param spanName + * the name of the span + * @param runnable + * the code to execute within the span + */ public void span(String spanName, Runnable runnable) { Span span = getTracer().spanBuilder(spanName) .startSpan(); @@ -261,6 +433,20 @@ public void span(String spanName, Runnable runnable) { } } + /** + * Executes a callable within a new span, automatically handling span lifecycle + * and errors. + * + * @param + * the return type + * @param spanName + * the name of the span + * @param callable + * the code to execute within the span + * @return the result of the callable + * @throws Exception + * if the callable throws an exception + */ public T span(String spanName, java.util.concurrent.Callable callable) throws Exception { Span span = getTracer().spanBuilder(spanName) .startSpan(); @@ -274,35 +460,77 @@ public T span(String spanName, java.util.concurrent.Callable callable) th } } + /** + * Returns the OpenTelemetry tracer instance. + * + * @return the OpenTelemetry tracer + */ public io.opentelemetry.api.trace.Tracer getTracer() { return GlobalOpenTelemetry.get() .getTracer(TRACER_NAME); } + /** + * Returns the project name. + * + * @return the project name + */ public String getProjectName() { return projectName; } + /** + * Returns the API key. + * + * @return the API key + */ public String getApiKey() { return apiKey; } + /** + * Returns the organization ID. + * + * @return the organization ID + */ public String getOrganizationId() { return organizationId; } + /** + * Returns the API URL. + * + * @return the API URL + */ public String getApiUrl() { return apiUrl; } + /** + * Returns whether evaluation is enabled. + * + * @return true if evaluation is enabled + */ public boolean isEnableEvaluation() { return enableEvaluation; } + /** + * Returns the resolved project ID if available. + * + * @return the project ID, or empty if not resolved + */ public Optional getProjectId() { return projectId; } + /** + * Creates and returns a new span with the specified name. + * + * @param spanName + * the name of the span + * @return the created span + */ public static Span span(String spanName) { return GlobalOpenTelemetry.get() .getTracer(TRACER_NAME) @@ -373,6 +601,8 @@ private TraceEvaluationRun createTraceEvaluationRun(BaseScorer scorer, String mo evaluationRun.setModel(modelName); evaluationRun.setTraceAndSpanIds(List.of(List.of(traceId, spanId))); evaluationRun.setJudgmentScorers(List.of(scorer.getScorerConfig())); + evaluationRun.setCustomScorers(List.of()); + evaluationRun.setIsOffline(false); return evaluationRun; } diff --git a/judgeval-java/src/main/java/com/judgmentlabs/judgeval/v1/tracer/ISerializer.java b/judgeval-java/src/main/java/com/judgmentlabs/judgeval/v1/tracer/ISerializer.java index 3f6ff48..8d84fe8 100644 --- a/judgeval-java/src/main/java/com/judgmentlabs/judgeval/v1/tracer/ISerializer.java +++ b/judgeval-java/src/main/java/com/judgmentlabs/judgeval/v1/tracer/ISerializer.java @@ -2,9 +2,28 @@ import java.lang.reflect.Type; +/** + * Serializes objects to string representations. + */ public interface ISerializer { + /** + * Serializes an object to a string. + * + * @param obj + * the object to serialize + * @return the serialized string + */ String serialize(Object obj); + /** + * Serializes an object to a string using the specified type. + * + * @param obj + * the object to serialize + * @param type + * the type to use for serialization + * @return the serialized string + */ default String serialize(Object obj, Type type) { return serialize(obj); } diff --git a/judgeval-java/src/main/java/com/judgmentlabs/judgeval/v1/tracer/Tracer.java b/judgeval-java/src/main/java/com/judgmentlabs/judgeval/v1/tracer/Tracer.java index a7cc7bc..1b997ba 100644 --- a/judgeval-java/src/main/java/com/judgmentlabs/judgeval/v1/tracer/Tracer.java +++ b/judgeval-java/src/main/java/com/judgmentlabs/judgeval/v1/tracer/Tracer.java @@ -44,6 +44,12 @@ private Tracer(Builder builder) { } } + /** + * Initializes the tracer by setting up the OpenTelemetry SDK with a span + * exporter, + * configuring the tracer provider with batch span processing, and registering + * it globally. + */ @Override public void initialize() { SpanExporter spanExporter = getSpanExporter(); @@ -68,6 +74,13 @@ public void initialize() { GlobalOpenTelemetry.set(openTelemetry); } + /** + * Forces the tracer to flush any pending spans within the specified timeout. + * + * @param timeoutMillis + * the maximum time to wait in milliseconds + * @return true if the flush completed successfully within the timeout + */ @Override public boolean forceFlush(int timeoutMillis) { if (tracerProvider == null) { @@ -79,6 +92,12 @@ public boolean forceFlush(int timeoutMillis) { .isSuccess(); } + /** + * Shuts down the tracer, flushing any remaining spans and releasing resources. + * + * @param timeoutMillis + * the maximum time to wait for shutdown in milliseconds + */ @Override public void shutdown(int timeoutMillis) { if (tracerProvider == null) { @@ -89,10 +108,18 @@ public void shutdown(int timeoutMillis) { .join(timeoutMillis, java.util.concurrent.TimeUnit.MILLISECONDS); } + /** + * Creates a new builder for configuring a Tracer. + * + * @return a new builder instance + */ public static Builder builder() { return new Builder(); } + /** + * Builder for configuring and creating Tracer instances. + */ public static final class Builder { private JudgmentSyncClient client; private String projectName; @@ -103,46 +130,107 @@ public static final class Builder { private ISerializer serializer; private boolean initialize = false; + /** + * Sets the Judgment API client. + * + * @param client + * the API client + * @return this builder + */ public Builder client(JudgmentSyncClient client) { this.client = client; return this; } + /** + * Sets the project name for this tracer. + * + * @param projectName + * the project name + * @return this builder + */ public Builder projectName(String projectName) { this.projectName = projectName; return this; } + /** + * Sets the API key for authentication. + * + * @param apiKey + * the API key + * @return this builder + */ public Builder apiKey(String apiKey) { this.apiKey = apiKey; return this; } + /** + * Sets the organization ID. + * + * @param organizationId + * the organization ID + * @return this builder + */ public Builder organizationId(String organizationId) { this.organizationId = organizationId; return this; } + /** + * Sets the API URL. + * + * @param apiUrl + * the API URL + * @return this builder + */ public Builder apiUrl(String apiUrl) { this.apiUrl = apiUrl; return this; } + /** + * Sets whether evaluation is enabled. + * + * @param enableEvaluation + * true to enable evaluation + * @return this builder + */ public Builder enableEvaluation(boolean enableEvaluation) { this.enableEvaluation = enableEvaluation; return this; } + /** + * Sets the custom serializer for span attributes. + * + * @param serializer + * the serializer + * @return this builder + */ public Builder serializer(ISerializer serializer) { this.serializer = serializer; return this; } + /** + * Sets whether to automatically initialize the tracer on build. + * + * @param initialize + * true to initialize on build + * @return this builder + */ public Builder initialize(boolean initialize) { this.initialize = initialize; return this; } + /** + * Builds and returns a new Tracer instance. + * + * @return the configured Tracer + */ public Tracer build() { return new Tracer(this); } diff --git a/judgeval-java/src/main/java/com/judgmentlabs/judgeval/v1/tracer/TracerFactory.java b/judgeval-java/src/main/java/com/judgmentlabs/judgeval/v1/tracer/TracerFactory.java index ea67b10..cffa765 100644 --- a/judgeval-java/src/main/java/com/judgmentlabs/judgeval/v1/tracer/TracerFactory.java +++ b/judgeval-java/src/main/java/com/judgmentlabs/judgeval/v1/tracer/TracerFactory.java @@ -2,19 +2,21 @@ import com.judgmentlabs.judgeval.internal.api.JudgmentSyncClient; +/** + * Factory for creating tracer builders. + */ public final class TracerFactory { private final JudgmentSyncClient client; - private final String apiKey; - private final String organizationId; - private final String apiUrl; - public TracerFactory(JudgmentSyncClient client, String apiKey, String organizationId, String apiUrl) { + public TracerFactory(JudgmentSyncClient client) { this.client = client; - this.apiKey = apiKey; - this.organizationId = organizationId; - this.apiUrl = apiUrl; } + /** + * Creates a new tracer builder configured with this factory's client. + * + * @return a new tracer builder + */ public Tracer.Builder create() { return Tracer.builder() .client(client); diff --git a/judgeval-java/src/test/java/com/judgmentlabs/judgeval/v1/evaluation/EvaluationFactoryTest.java b/judgeval-java/src/test/java/com/judgmentlabs/judgeval/v1/evaluation/EvaluationFactoryTest.java index 2c7b6aa..bde3d5a 100644 --- a/judgeval-java/src/test/java/com/judgmentlabs/judgeval/v1/evaluation/EvaluationFactoryTest.java +++ b/judgeval-java/src/test/java/com/judgmentlabs/judgeval/v1/evaluation/EvaluationFactoryTest.java @@ -17,7 +17,7 @@ class EvaluationFactoryTest { @BeforeEach void setUp() { JudgmentSyncClient client = new JudgmentSyncClient(TEST_API_URL, TEST_API_KEY, TEST_ORG_ID); - factory = new EvaluationFactory(client, TEST_API_KEY, TEST_ORG_ID); + factory = new EvaluationFactory(client); } @Test diff --git a/judgeval-java/src/test/java/com/judgmentlabs/judgeval/v1/scorers/ScorersFactoryTest.java b/judgeval-java/src/test/java/com/judgmentlabs/judgeval/v1/scorers/ScorersFactoryTest.java index 0041bd1..625ce85 100644 --- a/judgeval-java/src/test/java/com/judgmentlabs/judgeval/v1/scorers/ScorersFactoryTest.java +++ b/judgeval-java/src/test/java/com/judgmentlabs/judgeval/v1/scorers/ScorersFactoryTest.java @@ -20,7 +20,7 @@ class ScorersFactoryTest { @BeforeEach void setUp() { JudgmentSyncClient client = new JudgmentSyncClient(TEST_API_URL, TEST_API_KEY, TEST_ORG_ID); - factory = new ScorersFactory(client, TEST_API_KEY, TEST_ORG_ID); + factory = new ScorersFactory(client); } @Test diff --git a/judgeval-java/src/test/java/com/judgmentlabs/judgeval/v1/tracer/TracerFactoryTest.java b/judgeval-java/src/test/java/com/judgmentlabs/judgeval/v1/tracer/TracerFactoryTest.java index 30a0515..f3637fe 100644 --- a/judgeval-java/src/test/java/com/judgmentlabs/judgeval/v1/tracer/TracerFactoryTest.java +++ b/judgeval-java/src/test/java/com/judgmentlabs/judgeval/v1/tracer/TracerFactoryTest.java @@ -17,7 +17,7 @@ class TracerFactoryTest { @BeforeEach void setUp() { JudgmentSyncClient client = new JudgmentSyncClient(TEST_API_URL, TEST_API_KEY, TEST_ORG_ID); - factory = new TracerFactory(client, TEST_API_KEY, TEST_ORG_ID, TEST_API_URL); + factory = new TracerFactory(client); } @Test From 2337c488502fbd0eabc0493e3a00ce2fa8a94a38 Mon Sep 17 00:00:00 2001 From: Abhishek Govindarasu Date: Wed, 5 Nov 2025 21:20:04 -0800 Subject: [PATCH 07/18] format --- .../custom_scorer/CustomScorerFactory.java | 6 +- .../scorers/prompt_scorer/PromptScorer.java | 14 +-- .../judgeval/v1/tracer/BaseTracer.java | 94 +++++++++---------- 3 files changed, 57 insertions(+), 57 deletions(-) diff --git a/judgeval-java/src/main/java/com/judgmentlabs/judgeval/v1/scorers/custom_scorer/CustomScorerFactory.java b/judgeval-java/src/main/java/com/judgmentlabs/judgeval/v1/scorers/custom_scorer/CustomScorerFactory.java index 260af30..c575b3c 100644 --- a/judgeval-java/src/main/java/com/judgmentlabs/judgeval/v1/scorers/custom_scorer/CustomScorerFactory.java +++ b/judgeval-java/src/main/java/com/judgmentlabs/judgeval/v1/scorers/custom_scorer/CustomScorerFactory.java @@ -11,7 +11,7 @@ public CustomScorerFactory() { * Creates a custom scorer with the specified name. * * @param name - * the scorer name + * the scorer name * @return the configured custom scorer */ public CustomScorer get(String name) { @@ -25,9 +25,9 @@ public CustomScorer get(String name) { * Creates a custom scorer with the specified name and class name. * * @param name - * the scorer name + * the scorer name * @param className - * the class name + * the class name * @return the configured custom scorer */ public CustomScorer get(String name, String className) { diff --git a/judgeval-java/src/main/java/com/judgmentlabs/judgeval/v1/scorers/prompt_scorer/PromptScorer.java b/judgeval-java/src/main/java/com/judgmentlabs/judgeval/v1/scorers/prompt_scorer/PromptScorer.java index 90dcff6..ea111e0 100644 --- a/judgeval-java/src/main/java/com/judgmentlabs/judgeval/v1/scorers/prompt_scorer/PromptScorer.java +++ b/judgeval-java/src/main/java/com/judgmentlabs/judgeval/v1/scorers/prompt_scorer/PromptScorer.java @@ -19,9 +19,9 @@ * Docs: Prompt Scorers */ public final class PromptScorer extends APIScorer { - private final String prompt; + private final String prompt; private final Map options; - private final boolean isTrace; + private final boolean isTrace; private PromptScorer(Builder builder) { super(builder.isTrace ? APIScorerType.TRACE_PROMPT_SCORER : APIScorerType.PROMPT_SCORER); @@ -52,7 +52,7 @@ public ScorerConfig getScorerConfig() { cfg.setName(getName()); cfg.setStrictMode(getStrictMode()); cfg.setRequiredParams(getRequiredParams()); - + Map kwargs = new HashMap<>(); kwargs.put("prompt", prompt); if (options != null) { @@ -78,11 +78,11 @@ public static Builder builder() { * Builder for configuring and creating PromptScorer instances. */ public static final class Builder { - private String name; - private String prompt; - private double threshold = 0.5; + private String name; + private String prompt; + private double threshold = 0.5; private Map options; - private boolean isTrace; + private boolean isTrace; public Builder name(String name) { this.name = name; diff --git a/judgeval-java/src/main/java/com/judgmentlabs/judgeval/v1/tracer/BaseTracer.java b/judgeval-java/src/main/java/com/judgmentlabs/judgeval/v1/tracer/BaseTracer.java index 16878e6..bcb291c 100644 --- a/judgeval-java/src/main/java/com/judgmentlabs/judgeval/v1/tracer/BaseTracer.java +++ b/judgeval-java/src/main/java/com/judgmentlabs/judgeval/v1/tracer/BaseTracer.java @@ -33,17 +33,17 @@ * capabilities. */ public abstract class BaseTracer { - public static final String TRACER_NAME = "judgeval"; + public static final String TRACER_NAME = "judgeval"; - protected final String projectName; - protected final String apiKey; - protected final String organizationId; - protected final String apiUrl; - protected final boolean enableEvaluation; + protected final String projectName; + protected final String apiKey; + protected final String organizationId; + protected final String apiUrl; + protected final boolean enableEvaluation; protected final JudgmentSyncClient apiClient; - protected final ISerializer serializer; - protected final ObjectMapper jacksonMapper; - protected final Optional projectId; + protected final ISerializer serializer; + protected final ObjectMapper jacksonMapper; + protected final Optional projectId; protected BaseTracer(String projectName, String apiKey, String organizationId, String apiUrl, boolean enableEvaluation, JudgmentSyncClient apiClient, ISerializer serializer) { @@ -72,7 +72,7 @@ protected BaseTracer(String projectName, String apiKey, String organizationId, S * Forces pending spans to flush. * * @param timeoutMillis - * maximum time to wait in milliseconds + * maximum time to wait in milliseconds * @return true if flush succeeded within timeout */ public abstract boolean forceFlush(int timeoutMillis); @@ -81,7 +81,7 @@ protected BaseTracer(String projectName, String apiKey, String organizationId, S * Shuts down the tracer. * * @param timeoutMillis - * maximum time to wait for shutdown in milliseconds + * maximum time to wait for shutdown in milliseconds */ public abstract void shutdown(int timeoutMillis); @@ -102,7 +102,7 @@ public SpanExporter getSpanExporter() { * Sets the span kind attribute on the current span. * * @param kind - * the span kind + * the span kind */ public void setSpanKind(String kind) { Optional.ofNullable(kind) @@ -123,9 +123,9 @@ private static boolean isValidKey(String key) { * Sets an attribute on the current span by serializing the value. * * @param key - * the attribute key + * the attribute key * @param value - * the attribute value + * the attribute value */ public void setAttribute(String key, Object value) { if (!isValidKey(key)) { @@ -141,11 +141,11 @@ public void setAttribute(String key, Object value) { * specified type. * * @param key - * the attribute key + * the attribute key * @param value - * the attribute value + * the attribute value * @param type - * the type to use for serialization + * the type to use for serialization */ public void setAttribute(String key, Object value, Type type) { if (!isValidKey(key)) { @@ -160,9 +160,9 @@ public void setAttribute(String key, Object value, Type type) { * Sets a string attribute on the current span. * * @param key - * the attribute key + * the attribute key * @param value - * the attribute value + * the attribute value */ public void setAttribute(String key, String value) { if (!isValidKey(key)) { @@ -175,9 +175,9 @@ public void setAttribute(String key, String value) { * Sets a long attribute on the current span. * * @param key - * the attribute key + * the attribute key * @param value - * the attribute value + * the attribute value */ public void setAttribute(String key, long value) { if (!isValidKey(key)) { @@ -190,9 +190,9 @@ public void setAttribute(String key, long value) { * Sets a double attribute on the current span. * * @param key - * the attribute key + * the attribute key * @param value - * the attribute value + * the attribute value */ public void setAttribute(String key, double value) { if (!isValidKey(key)) { @@ -205,9 +205,9 @@ public void setAttribute(String key, double value) { * Sets a boolean attribute on the current span. * * @param key - * the attribute key + * the attribute key * @param value - * the attribute value + * the attribute value */ public void setAttribute(String key, boolean value) { if (!isValidKey(key)) { @@ -253,11 +253,11 @@ private void safeExecute(String operation, Runnable action) { * backend. * * @param scorer - * the scorer to use for evaluation + * the scorer to use for evaluation * @param example - * the example data to evaluate against + * the example data to evaluate against * @param model - * the model to use for evaluation + * the model to use for evaluation */ public void asyncEvaluate(BaseScorer scorer, Example example, String model) { safeExecute("evaluate scorer", () -> { @@ -282,9 +282,9 @@ public void asyncEvaluate(BaseScorer scorer, Example example, String model) { * example. * * @param scorer - * the scorer to use for evaluation + * the scorer to use for evaluation * @param example - * the example data to evaluate against + * the example data to evaluate against */ public void asyncEvaluate(BaseScorer scorer, Example example) { asyncEvaluate(scorer, example, null); @@ -296,9 +296,9 @@ public void asyncEvaluate(BaseScorer scorer, Example example) { * completion. * * @param scorer - * the scorer to use for trace evaluation + * the scorer to use for trace evaluation * @param model - * the model to use for evaluation + * the model to use for evaluation */ public void asyncTraceEvaluate(BaseScorer scorer, String model) { safeExecute("evaluate trace scorer", () -> { @@ -329,7 +329,7 @@ public void asyncTraceEvaluate(BaseScorer scorer, String model) { * Asynchronously evaluates the current trace using the specified scorer. * * @param scorer - * the scorer to use for trace evaluation + * the scorer to use for trace evaluation */ public void asyncTraceEvaluate(BaseScorer scorer) { asyncTraceEvaluate(scorer, null); @@ -339,7 +339,7 @@ public void asyncTraceEvaluate(BaseScorer scorer) { * Sets multiple attributes on the current span. * * @param attributes - * map of attribute keys to values + * map of attribute keys to values */ public void setAttributes(Map attributes) { Optional.ofNullable(attributes) @@ -371,7 +371,7 @@ public void setGeneralSpan() { * Sets the input attribute on the current span. * * @param input - * the input value + * the input value */ public void setInput(Object input) { setInput(input, input.getClass()); @@ -381,7 +381,7 @@ public void setInput(Object input) { * Sets the output attribute on the current span. * * @param output - * the output value + * the output value */ public void setOutput(Object output) { setOutput(output, output.getClass()); @@ -391,9 +391,9 @@ public void setOutput(Object output) { * Sets the input attribute on the current span using the specified type. * * @param input - * the input value + * the input value * @param type - * the type to use for serialization + * the type to use for serialization */ public void setInput(Object input, Type type) { setAttribute(JudgmentAttributeKeys.AttributeKeys.JUDGMENT_INPUT, input, type); @@ -403,9 +403,9 @@ public void setInput(Object input, Type type) { * Sets the output attribute on the current span using the specified type. * * @param output - * the output value + * the output value * @param type - * the type to use for serialization + * the type to use for serialization */ public void setOutput(Object output, Type type) { setAttribute(JudgmentAttributeKeys.AttributeKeys.JUDGMENT_OUTPUT, output, type); @@ -416,9 +416,9 @@ public void setOutput(Object output, Type type) { * and errors. * * @param spanName - * the name of the span + * the name of the span * @param runnable - * the code to execute within the span + * the code to execute within the span */ public void span(String spanName, Runnable runnable) { Span span = getTracer().spanBuilder(spanName) @@ -438,14 +438,14 @@ public void span(String spanName, Runnable runnable) { * and errors. * * @param - * the return type + * the return type * @param spanName - * the name of the span + * the name of the span * @param callable - * the code to execute within the span + * the code to execute within the span * @return the result of the callable * @throws Exception - * if the callable throws an exception + * if the callable throws an exception */ public T span(String spanName, java.util.concurrent.Callable callable) throws Exception { Span span = getTracer().spanBuilder(spanName) @@ -528,7 +528,7 @@ public Optional getProjectId() { * Creates and returns a new span with the specified name. * * @param spanName - * the name of the span + * the name of the span * @return the created span */ public static Span span(String spanName) { From f5db6780b0ce85c5ed4b00f53be44c58f54207ec Mon Sep 17 00:00:00 2001 From: Abhishek Govindarasu Date: Thu, 6 Nov 2025 11:55:19 -0800 Subject: [PATCH 08/18] chore: remove api key --- .../judgeval/v1/tracer/BaseTracer.java | 50 ++------------- .../judgeval/v1/tracer/Tracer.java | 52 +-------------- .../judgeval/v1/tracer/BaseTracerTest.java | 63 +++++-------------- .../judgeval/v1/tracer/TracerTest.java | 48 ++++++++------ 4 files changed, 52 insertions(+), 161 deletions(-) diff --git a/judgeval-java/src/main/java/com/judgmentlabs/judgeval/v1/tracer/BaseTracer.java b/judgeval-java/src/main/java/com/judgmentlabs/judgeval/v1/tracer/BaseTracer.java index bcb291c..e6bdc1e 100644 --- a/judgeval-java/src/main/java/com/judgmentlabs/judgeval/v1/tracer/BaseTracer.java +++ b/judgeval-java/src/main/java/com/judgmentlabs/judgeval/v1/tracer/BaseTracer.java @@ -1,7 +1,6 @@ package com.judgmentlabs.judgeval.v1.tracer; import java.lang.reflect.Type; -import java.util.ArrayList; import java.util.List; import java.util.Map; import java.util.Objects; @@ -36,21 +35,15 @@ public abstract class BaseTracer { public static final String TRACER_NAME = "judgeval"; protected final String projectName; - protected final String apiKey; - protected final String organizationId; - protected final String apiUrl; protected final boolean enableEvaluation; protected final JudgmentSyncClient apiClient; protected final ISerializer serializer; protected final ObjectMapper jacksonMapper; protected final Optional projectId; - protected BaseTracer(String projectName, String apiKey, String organizationId, String apiUrl, + protected BaseTracer(String projectName, boolean enableEvaluation, JudgmentSyncClient apiClient, ISerializer serializer) { this.projectName = Objects.requireNonNull(projectName, "projectName required"); - this.apiKey = Objects.requireNonNull(apiKey, "apiKey required"); - this.organizationId = Objects.requireNonNull(organizationId, "organizationId required"); - this.apiUrl = Objects.requireNonNull(apiUrl, "apiUrl required"); this.enableEvaluation = enableEvaluation; this.apiClient = Objects.requireNonNull(apiClient, "apiClient required"); this.serializer = Objects.requireNonNull(serializer, "serializer required"); @@ -59,7 +52,7 @@ protected BaseTracer(String projectName, String apiKey, String organizationId, S this.projectId.ifPresentOrElse(id -> { }, () -> Logger.error("Failed to resolve project " + projectName - + ", please create it first at https://app.judgmentlabs.ai/org/" + organizationId + + ", please create it first at https://app.judgmentlabs.ai/org/" + this.apiClient.getOrganizationId() + "/projects. Skipping Judgment export.")); } @@ -479,33 +472,6 @@ public String getProjectName() { return projectName; } - /** - * Returns the API key. - * - * @return the API key - */ - public String getApiKey() { - return apiKey; - } - - /** - * Returns the organization ID. - * - * @return the organization ID - */ - public String getOrganizationId() { - return organizationId; - } - - /** - * Returns the API URL. - * - * @return the API URL - */ - public String getApiUrl() { - return apiUrl; - } - /** * Returns whether evaluation is enabled. * @@ -556,9 +522,9 @@ private String buildEndpoint(String baseUrl) { private JudgmentSpanExporter createJudgmentSpanExporter(String projectId) { return JudgmentSpanExporter.builder() - .endpoint(buildEndpoint(apiUrl)) - .apiKey(apiKey) - .organizationId(organizationId) + .endpoint(buildEndpoint(apiClient.getApiUrl())) + .apiKey(apiClient.getApiKey()) + .organizationId(apiClient.getOrganizationId()) .projectId(projectId) .build(); } @@ -579,11 +545,7 @@ private ExampleEvaluationRun createEvaluationRun(BaseScorer scorer, Example exam evaluationRun.setModel(modelName); evaluationRun.setTraceId(traceId); evaluationRun.setTraceSpanId(spanId); - - List examples = new ArrayList<>(); - examples.add(example); - evaluationRun.setExamples(examples); - + evaluationRun.setExamples(List.of(example)); evaluationRun.setCustomScorers(List.of()); evaluationRun.setJudgmentScorers(List.of(scorer.getScorerConfig())); diff --git a/judgeval-java/src/main/java/com/judgmentlabs/judgeval/v1/tracer/Tracer.java b/judgeval-java/src/main/java/com/judgmentlabs/judgeval/v1/tracer/Tracer.java index 1b997ba..5d3ac69 100644 --- a/judgeval-java/src/main/java/com/judgmentlabs/judgeval/v1/tracer/Tracer.java +++ b/judgeval-java/src/main/java/com/judgmentlabs/judgeval/v1/tracer/Tracer.java @@ -5,7 +5,6 @@ import java.util.Optional; import com.google.gson.Gson; -import com.judgmentlabs.judgeval.Env; import com.judgmentlabs.judgeval.Version; import com.judgmentlabs.judgeval.internal.api.JudgmentSyncClient; import com.judgmentlabs.judgeval.utils.Logger; @@ -25,18 +24,8 @@ public final class Tracer extends BaseTracer { private Tracer(Builder builder) { super( Objects.requireNonNull(builder.projectName, "projectName required"), - builder.client != null ? builder.client.getApiKey() - : Objects.requireNonNull(builder.apiKey, "apiKey required"), - builder.client != null ? builder.client.getOrganizationId() - : Objects.requireNonNull(builder.organizationId, "organizationId required"), - builder.client != null ? builder.client.getApiUrl() - : (builder.apiUrl != null ? builder.apiUrl : Env.JUDGMENT_API_URL), builder.enableEvaluation, - builder.client != null ? builder.client - : new JudgmentSyncClient( - builder.apiUrl != null ? builder.apiUrl : Env.JUDGMENT_API_URL, - Objects.requireNonNull(builder.apiKey, "apiKey required"), - Objects.requireNonNull(builder.organizationId, "organizationId required")), + Objects.requireNonNull(builder.client, "client required"), builder.serializer != null ? builder.serializer : new GsonSerializer()); if (builder.initialize) { @@ -123,9 +112,6 @@ public static Builder builder() { public static final class Builder { private JudgmentSyncClient client; private String projectName; - private String apiKey; - private String organizationId; - private String apiUrl; private boolean enableEvaluation = true; private ISerializer serializer; private boolean initialize = false; @@ -154,42 +140,6 @@ public Builder projectName(String projectName) { return this; } - /** - * Sets the API key for authentication. - * - * @param apiKey - * the API key - * @return this builder - */ - public Builder apiKey(String apiKey) { - this.apiKey = apiKey; - return this; - } - - /** - * Sets the organization ID. - * - * @param organizationId - * the organization ID - * @return this builder - */ - public Builder organizationId(String organizationId) { - this.organizationId = organizationId; - return this; - } - - /** - * Sets the API URL. - * - * @param apiUrl - * the API URL - * @return this builder - */ - public Builder apiUrl(String apiUrl) { - this.apiUrl = apiUrl; - return this; - } - /** * Sets whether evaluation is enabled. * diff --git a/judgeval-java/src/test/java/com/judgmentlabs/judgeval/v1/tracer/BaseTracerTest.java b/judgeval-java/src/test/java/com/judgmentlabs/judgeval/v1/tracer/BaseTracerTest.java index 203b9d0..86e01fd 100644 --- a/judgeval-java/src/test/java/com/judgmentlabs/judgeval/v1/tracer/BaseTracerTest.java +++ b/judgeval-java/src/test/java/com/judgmentlabs/judgeval/v1/tracer/BaseTracerTest.java @@ -1,9 +1,13 @@ package com.judgmentlabs.judgeval.v1.tracer; -import static org.junit.jupiter.api.Assertions.*; -import static org.mockito.ArgumentMatchers.*; -import static org.mockito.Mockito.*; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertNotNull; +import static org.junit.jupiter.api.Assertions.assertThrows; +import static org.junit.jupiter.api.Assertions.assertTrue; +import static org.mockito.ArgumentMatchers.any; import static org.mockito.Mockito.lenient; +import static org.mockito.Mockito.when; import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; @@ -23,9 +27,6 @@ class BaseTracerTest { private static final String TEST_PROJECT_NAME = "test-project"; private static final String TEST_PROJECT_ID = "test-project-id-123"; - private static final String TEST_API_KEY = "test-key"; - private static final String TEST_ORG_ID = "test-org"; - private static final String TEST_API_URL = "https://api.test.com"; @Mock private JudgmentSyncClient mockClient; @@ -41,15 +42,15 @@ void setUp() throws Exception { lenient().when(mockClient.projectsResolve(any(ResolveProjectNameRequest.class))) .thenReturn(response); + lenient().when(mockClient.getApiUrl()).thenReturn("https://api.example.com"); + lenient().when(mockClient.getApiKey()).thenReturn("test-api-key"); + lenient().when(mockClient.getOrganizationId()).thenReturn("test-org-id"); lenient().when(mockSerializer.serialize(any())).thenReturn("serialized"); lenient().when(mockSerializer.serialize(any(), any())).thenReturn("serialized"); tracer = new TestableBaseTracer( TEST_PROJECT_NAME, - TEST_API_KEY, - TEST_ORG_ID, - TEST_API_URL, true, mockClient, mockSerializer); @@ -59,9 +60,6 @@ void setUp() throws Exception { void constructor_withValidParameters_resolvesProject() { assertNotNull(tracer); assertEquals(TEST_PROJECT_NAME, tracer.getProjectName()); - assertEquals(TEST_API_KEY, tracer.getApiKey()); - assertEquals(TEST_ORG_ID, tracer.getOrganizationId()); - assertEquals(TEST_API_URL, tracer.getApiUrl()); assertTrue(tracer.isEnableEvaluation()); assertTrue(tracer.getProjectId().isPresent()); assertEquals(TEST_PROJECT_ID, tracer.getProjectId().get()); @@ -72,9 +70,6 @@ void constructor_withNullProjectName_throwsException() { assertThrows(NullPointerException.class, () -> { new TestableBaseTracer( null, - TEST_API_KEY, - TEST_ORG_ID, - TEST_API_URL, true, mockClient, mockSerializer); @@ -82,44 +77,24 @@ void constructor_withNullProjectName_throwsException() { } @Test - void constructor_withNullApiKey_throwsException() { + void constructor_withNullClient_throwsException() { assertThrows(NullPointerException.class, () -> { new TestableBaseTracer( TEST_PROJECT_NAME, - null, - TEST_ORG_ID, - TEST_API_URL, true, - mockClient, - mockSerializer); - }); - } - - @Test - void constructor_withNullOrganizationId_throwsException() { - assertThrows(NullPointerException.class, () -> { - new TestableBaseTracer( - TEST_PROJECT_NAME, - TEST_API_KEY, null, - TEST_API_URL, - true, - mockClient, mockSerializer); }); } @Test - void constructor_withNullApiUrl_throwsException() { + void constructor_withNullSerializer_throwsException() { assertThrows(NullPointerException.class, () -> { new TestableBaseTracer( TEST_PROJECT_NAME, - TEST_API_KEY, - TEST_ORG_ID, - null, true, mockClient, - mockSerializer); + null); }); } @@ -130,9 +105,6 @@ void constructor_withFailedProjectResolution_hasEmptyProjectId() throws Exceptio TestableBaseTracer failedTracer = new TestableBaseTracer( TEST_PROJECT_NAME, - TEST_API_KEY, - TEST_ORG_ID, - TEST_API_URL, true, mockClient, mockSerializer); @@ -154,9 +126,6 @@ void getSpanExporter_withoutProjectId_returnsNoOpSpanExporter() throws Exception TestableBaseTracer failedTracer = new TestableBaseTracer( TEST_PROJECT_NAME, - TEST_API_KEY, - TEST_ORG_ID, - TEST_API_URL, true, mockClient, mockSerializer); @@ -178,9 +147,9 @@ void setAttributes_withNull_doesNotThrow() { } private static class TestableBaseTracer extends BaseTracer { - protected TestableBaseTracer(String projectName, String apiKey, String organizationId, String apiUrl, - boolean enableEvaluation, JudgmentSyncClient apiClient, ISerializer serializer) { - super(projectName, apiKey, organizationId, apiUrl, enableEvaluation, apiClient, serializer); + protected TestableBaseTracer(String projectName, boolean enableEvaluation, JudgmentSyncClient apiClient, + ISerializer serializer) { + super(projectName, enableEvaluation, apiClient, serializer); } @Override diff --git a/judgeval-java/src/test/java/com/judgmentlabs/judgeval/v1/tracer/TracerTest.java b/judgeval-java/src/test/java/com/judgmentlabs/judgeval/v1/tracer/TracerTest.java index 647bff3..3a57598 100644 --- a/judgeval-java/src/test/java/com/judgmentlabs/judgeval/v1/tracer/TracerTest.java +++ b/judgeval-java/src/test/java/com/judgmentlabs/judgeval/v1/tracer/TracerTest.java @@ -1,13 +1,36 @@ package com.judgmentlabs.judgeval.v1.tracer; -import static org.junit.jupiter.api.Assertions.*; +import static org.junit.jupiter.api.Assertions.assertNotNull; +import static org.junit.jupiter.api.Assertions.assertThrows; +import static org.mockito.ArgumentMatchers.any; +import static org.mockito.Mockito.lenient; +import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.extension.ExtendWith; +import org.mockito.Mock; +import org.mockito.junit.jupiter.MockitoExtension; +import com.judgmentlabs.judgeval.internal.api.JudgmentSyncClient; +import com.judgmentlabs.judgeval.internal.api.models.ResolveProjectNameRequest; +import com.judgmentlabs.judgeval.internal.api.models.ResolveProjectNameResponse; + +@ExtendWith(MockitoExtension.class) class TracerTest { private static final String TEST_PROJECT_NAME = "test-project"; - private static final String TEST_API_KEY = "test-key"; - private static final String TEST_ORG_ID = "test-org"; + private static final String TEST_PROJECT_ID = "test-project-id-123"; + + @Mock + private JudgmentSyncClient mockClient; + + @BeforeEach + void setUp() throws Exception { + ResolveProjectNameResponse response = new ResolveProjectNameResponse(); + response.setProjectId(TEST_PROJECT_ID); + + lenient().when(mockClient.projectsResolve(any(ResolveProjectNameRequest.class))) + .thenReturn(response); + } @Test void builder_returnsBuilder() { @@ -19,31 +42,18 @@ void builder_returnsBuilder() { void builder_withNullProjectName_throwsException() { assertThrows(NullPointerException.class, () -> { Tracer.builder() + .client(mockClient) .projectName(null) - .apiKey(TEST_API_KEY) - .organizationId(TEST_ORG_ID) - .build(); - }); - } - - @Test - void builder_withNullApiKey_throwsException() { - assertThrows(NullPointerException.class, () -> { - Tracer.builder() - .projectName(TEST_PROJECT_NAME) - .apiKey(null) - .organizationId(TEST_ORG_ID) .build(); }); } @Test - void builder_withNullOrganizationId_throwsException() { + void builder_withNullClient_throwsException() { assertThrows(NullPointerException.class, () -> { Tracer.builder() .projectName(TEST_PROJECT_NAME) - .apiKey(TEST_API_KEY) - .organizationId(null) + .client(null) .build(); }); } From 6e4c21a3b50a32e71fb4aceb18c1ed94c99d8436 Mon Sep 17 00:00:00 2001 From: Abhishek Govindarasu Date: Thu, 6 Nov 2025 20:11:44 -0800 Subject: [PATCH 09/18] wip --- .../java/examples/simple_chat/SimpleChat.java | 16 +++- .../judgeval/v1/tracer/BaseTracer.java | 96 ++++++++++--------- 2 files changed, 66 insertions(+), 46 deletions(-) diff --git a/examples/src/main/java/examples/simple_chat/SimpleChat.java b/examples/src/main/java/examples/simple_chat/SimpleChat.java index c04d465..42017c1 100644 --- a/examples/src/main/java/examples/simple_chat/SimpleChat.java +++ b/examples/src/main/java/examples/simple_chat/SimpleChat.java @@ -3,7 +3,8 @@ import java.time.Duration; import com.judgmentlabs.judgeval.instrumentation.openai.OpenAITelemetry; -import com.judgmentlabs.judgeval.tracer.Tracer; +import com.judgmentlabs.judgeval.v1.JudgmentClient; +import com.judgmentlabs.judgeval.v1.data.Example; import com.openai.client.OpenAIClient; import com.openai.client.okhttp.OpenAIOkHttpClient; import com.openai.models.ChatModel; @@ -13,7 +14,11 @@ public class SimpleChat { public static void main(String[] args) { - var tracer = Tracer.createDefault("SimpleChat-Java"); + var client = JudgmentClient.builder() + .apiKey(System.getenv("JUDGMENT_API_KEY")) + .organizationId(System.getenv("JUDGMENT_ORG_ID")) + .build(); + var tracer = client.tracer().create().projectName("SimpleChat-Java").build(); tracer.initialize(); OpenAIClient baseClient = OpenAIOkHttpClient.fromEnv(); @@ -28,6 +33,13 @@ public static void main(String[] args) { .build(); var res = otelClient.chat().completions().create(req); System.out.println(String.valueOf(res)); + + tracer.asyncEvaluate(client.scorers().builtIn().answerCorrectness().threshold(0.8).build(), + Example.builder() + .property("input", "What is 2+2?") + .property("actual_output", "4") + .property("expected_output", "4") + .build()); }); try { diff --git a/judgeval-java/src/main/java/com/judgmentlabs/judgeval/v1/tracer/BaseTracer.java b/judgeval-java/src/main/java/com/judgmentlabs/judgeval/v1/tracer/BaseTracer.java index e6bdc1e..2b85fab 100644 --- a/judgeval-java/src/main/java/com/judgmentlabs/judgeval/v1/tracer/BaseTracer.java +++ b/judgeval-java/src/main/java/com/judgmentlabs/judgeval/v1/tracer/BaseTracer.java @@ -1,10 +1,14 @@ package com.judgmentlabs.judgeval.v1.tracer; import java.lang.reflect.Type; +import java.time.Instant; +import java.time.ZoneOffset; +import java.time.format.DateTimeFormatter; import java.util.List; import java.util.Map; import java.util.Objects; import java.util.Optional; +import java.util.UUID; import com.fasterxml.jackson.databind.ObjectMapper; import com.judgmentlabs.judgeval.Env; @@ -32,14 +36,14 @@ * capabilities. */ public abstract class BaseTracer { - public static final String TRACER_NAME = "judgeval"; + public static final String TRACER_NAME = "judgeval"; - protected final String projectName; - protected final boolean enableEvaluation; + protected final String projectName; + protected final boolean enableEvaluation; protected final JudgmentSyncClient apiClient; - protected final ISerializer serializer; - protected final ObjectMapper jacksonMapper; - protected final Optional projectId; + protected final ISerializer serializer; + protected final ObjectMapper jacksonMapper; + protected final Optional projectId; protected BaseTracer(String projectName, boolean enableEvaluation, JudgmentSyncClient apiClient, ISerializer serializer) { @@ -65,7 +69,7 @@ protected BaseTracer(String projectName, * Forces pending spans to flush. * * @param timeoutMillis - * maximum time to wait in milliseconds + * maximum time to wait in milliseconds * @return true if flush succeeded within timeout */ public abstract boolean forceFlush(int timeoutMillis); @@ -74,7 +78,7 @@ protected BaseTracer(String projectName, * Shuts down the tracer. * * @param timeoutMillis - * maximum time to wait for shutdown in milliseconds + * maximum time to wait for shutdown in milliseconds */ public abstract void shutdown(int timeoutMillis); @@ -95,7 +99,7 @@ public SpanExporter getSpanExporter() { * Sets the span kind attribute on the current span. * * @param kind - * the span kind + * the span kind */ public void setSpanKind(String kind) { Optional.ofNullable(kind) @@ -116,9 +120,9 @@ private static boolean isValidKey(String key) { * Sets an attribute on the current span by serializing the value. * * @param key - * the attribute key + * the attribute key * @param value - * the attribute value + * the attribute value */ public void setAttribute(String key, Object value) { if (!isValidKey(key)) { @@ -134,11 +138,11 @@ public void setAttribute(String key, Object value) { * specified type. * * @param key - * the attribute key + * the attribute key * @param value - * the attribute value + * the attribute value * @param type - * the type to use for serialization + * the type to use for serialization */ public void setAttribute(String key, Object value, Type type) { if (!isValidKey(key)) { @@ -153,9 +157,9 @@ public void setAttribute(String key, Object value, Type type) { * Sets a string attribute on the current span. * * @param key - * the attribute key + * the attribute key * @param value - * the attribute value + * the attribute value */ public void setAttribute(String key, String value) { if (!isValidKey(key)) { @@ -168,9 +172,9 @@ public void setAttribute(String key, String value) { * Sets a long attribute on the current span. * * @param key - * the attribute key + * the attribute key * @param value - * the attribute value + * the attribute value */ public void setAttribute(String key, long value) { if (!isValidKey(key)) { @@ -183,9 +187,9 @@ public void setAttribute(String key, long value) { * Sets a double attribute on the current span. * * @param key - * the attribute key + * the attribute key * @param value - * the attribute value + * the attribute value */ public void setAttribute(String key, double value) { if (!isValidKey(key)) { @@ -198,9 +202,9 @@ public void setAttribute(String key, double value) { * Sets a boolean attribute on the current span. * * @param key - * the attribute key + * the attribute key * @param value - * the attribute value + * the attribute value */ public void setAttribute(String key, boolean value) { if (!isValidKey(key)) { @@ -246,11 +250,11 @@ private void safeExecute(String operation, Runnable action) { * backend. * * @param scorer - * the scorer to use for evaluation + * the scorer to use for evaluation * @param example - * the example data to evaluate against + * the example data to evaluate against * @param model - * the model to use for evaluation + * the model to use for evaluation */ public void asyncEvaluate(BaseScorer scorer, Example example, String model) { safeExecute("evaluate scorer", () -> { @@ -275,9 +279,9 @@ public void asyncEvaluate(BaseScorer scorer, Example example, String model) { * example. * * @param scorer - * the scorer to use for evaluation + * the scorer to use for evaluation * @param example - * the example data to evaluate against + * the example data to evaluate against */ public void asyncEvaluate(BaseScorer scorer, Example example) { asyncEvaluate(scorer, example, null); @@ -289,9 +293,9 @@ public void asyncEvaluate(BaseScorer scorer, Example example) { * completion. * * @param scorer - * the scorer to use for trace evaluation + * the scorer to use for trace evaluation * @param model - * the model to use for evaluation + * the model to use for evaluation */ public void asyncTraceEvaluate(BaseScorer scorer, String model) { safeExecute("evaluate trace scorer", () -> { @@ -322,7 +326,7 @@ public void asyncTraceEvaluate(BaseScorer scorer, String model) { * Asynchronously evaluates the current trace using the specified scorer. * * @param scorer - * the scorer to use for trace evaluation + * the scorer to use for trace evaluation */ public void asyncTraceEvaluate(BaseScorer scorer) { asyncTraceEvaluate(scorer, null); @@ -332,7 +336,7 @@ public void asyncTraceEvaluate(BaseScorer scorer) { * Sets multiple attributes on the current span. * * @param attributes - * map of attribute keys to values + * map of attribute keys to values */ public void setAttributes(Map attributes) { Optional.ofNullable(attributes) @@ -364,7 +368,7 @@ public void setGeneralSpan() { * Sets the input attribute on the current span. * * @param input - * the input value + * the input value */ public void setInput(Object input) { setInput(input, input.getClass()); @@ -374,7 +378,7 @@ public void setInput(Object input) { * Sets the output attribute on the current span. * * @param output - * the output value + * the output value */ public void setOutput(Object output) { setOutput(output, output.getClass()); @@ -384,9 +388,9 @@ public void setOutput(Object output) { * Sets the input attribute on the current span using the specified type. * * @param input - * the input value + * the input value * @param type - * the type to use for serialization + * the type to use for serialization */ public void setInput(Object input, Type type) { setAttribute(JudgmentAttributeKeys.AttributeKeys.JUDGMENT_INPUT, input, type); @@ -396,9 +400,9 @@ public void setInput(Object input, Type type) { * Sets the output attribute on the current span using the specified type. * * @param output - * the output value + * the output value * @param type - * the type to use for serialization + * the type to use for serialization */ public void setOutput(Object output, Type type) { setAttribute(JudgmentAttributeKeys.AttributeKeys.JUDGMENT_OUTPUT, output, type); @@ -409,9 +413,9 @@ public void setOutput(Object output, Type type) { * and errors. * * @param spanName - * the name of the span + * the name of the span * @param runnable - * the code to execute within the span + * the code to execute within the span */ public void span(String spanName, Runnable runnable) { Span span = getTracer().spanBuilder(spanName) @@ -431,14 +435,14 @@ public void span(String spanName, Runnable runnable) { * and errors. * * @param - * the return type + * the return type * @param spanName - * the name of the span + * the name of the span * @param callable - * the code to execute within the span + * the code to execute within the span * @return the result of the callable * @throws Exception - * if the callable throws an exception + * if the callable throws an exception */ public T span(String spanName, java.util.concurrent.Callable callable) throws Exception { Span span = getTracer().spanBuilder(spanName) @@ -494,7 +498,7 @@ public Optional getProjectId() { * Creates and returns a new span with the specified name. * * @param spanName - * the name of the span + * the name of the span * @return the created span */ public static Span span(String spanName) { @@ -540,6 +544,7 @@ private ExampleEvaluationRun createEvaluationRun(BaseScorer scorer, Example exam String modelName = model != null ? model : Env.JUDGMENT_DEFAULT_GPT_MODEL; ExampleEvaluationRun evaluationRun = new ExampleEvaluationRun(); + evaluationRun.setId(UUID.randomUUID().toString()); evaluationRun.setProjectName(projectName); evaluationRun.setEvalName(runId); evaluationRun.setModel(modelName); @@ -548,6 +553,7 @@ private ExampleEvaluationRun createEvaluationRun(BaseScorer scorer, Example exam evaluationRun.setExamples(List.of(example)); evaluationRun.setCustomScorers(List.of()); evaluationRun.setJudgmentScorers(List.of(scorer.getScorerConfig())); + evaluationRun.setCreatedAt(Instant.now().atOffset(ZoneOffset.UTC).format(DateTimeFormatter.ISO_INSTANT)); return evaluationRun; } @@ -558,6 +564,7 @@ private TraceEvaluationRun createTraceEvaluationRun(BaseScorer scorer, String mo String modelName = model != null ? model : Env.JUDGMENT_DEFAULT_GPT_MODEL; TraceEvaluationRun evaluationRun = new TraceEvaluationRun(); + evaluationRun.setId(UUID.randomUUID().toString()); evaluationRun.setProjectName(projectName); evaluationRun.setEvalName(evalName); evaluationRun.setModel(modelName); @@ -565,6 +572,7 @@ private TraceEvaluationRun createTraceEvaluationRun(BaseScorer scorer, String mo evaluationRun.setJudgmentScorers(List.of(scorer.getScorerConfig())); evaluationRun.setCustomScorers(List.of()); evaluationRun.setIsOffline(false); + evaluationRun.setCreatedAt(Instant.now().atOffset(ZoneOffset.UTC).format(DateTimeFormatter.ISO_INSTANT)); return evaluationRun; } From c6ad9fe00f9d79e480e58a829fcd5f7b8b2ec25a Mon Sep 17 00:00:00 2001 From: Abhishek Govindarasu Date: Fri, 7 Nov 2025 21:00:10 -0800 Subject: [PATCH 10/18] fix: trace scorer --- .idea/workspace.xml | 31 ++++++++++++------- .../java/examples/simple_chat/SimpleChat.java | 8 ++++- judgeval-java/.idea/.gitignore | 3 ++ .../.idea/codeStyles/codeStyleConfig.xml | 5 +++ judgeval-java/.idea/compiler.xml | 13 ++++++++ judgeval-java/.idea/encodings.xml | 7 +++++ judgeval-java/.idea/jarRepositories.xml | 20 ++++++++++++ judgeval-java/.idea/misc.xml | 12 +++++++ judgeval-java/.idea/vcs.xml | 6 ++++ .../judgeval/v1/tracer/BaseTracer.java | 1 + 10 files changed, 94 insertions(+), 12 deletions(-) create mode 100644 judgeval-java/.idea/.gitignore create mode 100644 judgeval-java/.idea/codeStyles/codeStyleConfig.xml create mode 100644 judgeval-java/.idea/compiler.xml create mode 100644 judgeval-java/.idea/encodings.xml create mode 100644 judgeval-java/.idea/jarRepositories.xml create mode 100644 judgeval-java/.idea/misc.xml create mode 100644 judgeval-java/.idea/vcs.xml diff --git a/.idea/workspace.xml b/.idea/workspace.xml index c531a7e..fac94c7 100644 --- a/.idea/workspace.xml +++ b/.idea/workspace.xml @@ -5,11 +5,7 @@ - - - - - +

- * Migration example: - * - *

{@code
- * // Old way:
- * DerailmentScorer scorer = DerailmentScorer.create();
- * 
- * // New way:
- * JudgmentClient client = JudgmentClient.builder().build();
- * DerailmentScorer scorer = client.scorers().builtIn().derailment().build();
- * }
- */ -@Deprecated -public class DerailmentScorer extends APIScorer { - public DerailmentScorer() { - super(APIScorerType.DERAILMENT); - } - - public static APIScorer.Builder builder() { - return APIScorer.builder(DerailmentScorer.class); - } - - public static DerailmentScorer create() { - return new DerailmentScorer(); - } - - public static DerailmentScorer create(double threshold) { - return builder().threshold(threshold) - .build(); - } -} diff --git a/judgeval-java/src/main/java/com/judgmentlabs/judgeval/v1/scorers/built_in/BuiltInScorersFactory.java b/judgeval-java/src/main/java/com/judgmentlabs/judgeval/v1/scorers/built_in/BuiltInScorersFactory.java index ba3fe0d..d064023 100644 --- a/judgeval-java/src/main/java/com/judgmentlabs/judgeval/v1/scorers/built_in/BuiltInScorersFactory.java +++ b/judgeval-java/src/main/java/com/judgmentlabs/judgeval/v1/scorers/built_in/BuiltInScorersFactory.java @@ -42,13 +42,4 @@ public FaithfulnessScorer.Builder faithfulness() { public InstructionAdherenceScorer.Builder instructionAdherence() { return InstructionAdherenceScorer.builder(); } - - /** - * Creates a builder for a derailment scorer. - * - * @return the scorer builder - */ - public DerailmentScorer.Builder derailment() { - return DerailmentScorer.builder(); - } } diff --git a/judgeval-java/src/main/java/com/judgmentlabs/judgeval/v1/scorers/built_in/DerailmentScorer.java b/judgeval-java/src/main/java/com/judgmentlabs/judgeval/v1/scorers/built_in/DerailmentScorer.java deleted file mode 100644 index 8a3ed8d..0000000 --- a/judgeval-java/src/main/java/com/judgmentlabs/judgeval/v1/scorers/built_in/DerailmentScorer.java +++ /dev/null @@ -1,72 +0,0 @@ -package com.judgmentlabs.judgeval.v1.scorers.built_in; - -import java.util.Arrays; - -import com.judgmentlabs.judgeval.v1.data.APIScorerType; -import com.judgmentlabs.judgeval.v1.scorers.APIScorer; - -/** - * Scorer that detects whether a conversation has derailed from its intended - * topic. - */ -public final class DerailmentScorer extends APIScorer { - private DerailmentScorer(Builder builder) { - super(APIScorerType.DERAILMENT); - setRequiredParams(Arrays.asList("input", "actual_output")); - if (builder.threshold >= 0) { - setThreshold(builder.threshold); - } - if (builder.name != null) { - setName(builder.name); - } - if (builder.strictMode != null) { - setStrictMode(builder.strictMode); - } - if (builder.model != null) { - setModel(builder.model); - } - } - - /** - * Creates a new builder for configuring a DerailmentScorer. - * - * @return a new builder instance - */ - public static Builder builder() { - return new Builder(); - } - - /** - * Builder for configuring and creating DerailmentScorer instances. - */ - public static final class Builder { - private double threshold = -1; - private String name; - private Boolean strictMode; - private String model; - - public Builder threshold(double threshold) { - this.threshold = threshold; - return this; - } - - public Builder name(String name) { - this.name = name; - return this; - } - - public Builder strictMode(boolean strictMode) { - this.strictMode = strictMode; - return this; - } - - public Builder model(String model) { - this.model = model; - return this; - } - - public DerailmentScorer build() { - return new DerailmentScorer(this); - } - } -} diff --git a/judgeval-java/src/test/java/com/judgmentlabs/judgeval/v1/scorers/built_in/BuiltInScorersFactoryTest.java b/judgeval-java/src/test/java/com/judgmentlabs/judgeval/v1/scorers/built_in/BuiltInScorersFactoryTest.java index 779376f..65bd6fc 100644 --- a/judgeval-java/src/test/java/com/judgmentlabs/judgeval/v1/scorers/built_in/BuiltInScorersFactoryTest.java +++ b/judgeval-java/src/test/java/com/judgmentlabs/judgeval/v1/scorers/built_in/BuiltInScorersFactoryTest.java @@ -36,10 +36,4 @@ void instructionAdherence_returnsBuilder() { InstructionAdherenceScorer.Builder builder = factory.instructionAdherence(); assertNotNull(builder); } - - @Test - void derailment_returnsBuilder() { - DerailmentScorer.Builder builder = factory.derailment(); - assertNotNull(builder); - } } From 20901c2ba0d575a39e5c2858a55ff4c795e9fce5 Mon Sep 17 00:00:00 2001 From: Abhishek Govindarasu Date: Sat, 8 Nov 2025 16:49:09 -0800 Subject: [PATCH 15/18] chore: dont throw on prompt scorer get --- .../judgeval/v1/data/APIScorerType.java | 3 - .../built_in/AnswerCorrectnessScorer.java | 2 +- .../built_in/AnswerRelevancyScorer.java | 2 +- .../built_in/BuiltInScorersFactory.java | 9 --- .../scorers/built_in/FaithfulnessScorer.java | 2 +- .../built_in/InstructionAdherenceScorer.java | 71 ------------------- .../prompt_scorer/PromptScorerFactory.java | 42 +++++------ .../built_in/BuiltInScorersFactoryTest.java | 6 -- 8 files changed, 21 insertions(+), 116 deletions(-) delete mode 100644 judgeval-java/src/main/java/com/judgmentlabs/judgeval/v1/scorers/built_in/InstructionAdherenceScorer.java diff --git a/judgeval-java/src/main/java/com/judgmentlabs/judgeval/v1/data/APIScorerType.java b/judgeval-java/src/main/java/com/judgmentlabs/judgeval/v1/data/APIScorerType.java index ad13bc4..5af16fa 100644 --- a/judgeval-java/src/main/java/com/judgmentlabs/judgeval/v1/data/APIScorerType.java +++ b/judgeval-java/src/main/java/com/judgmentlabs/judgeval/v1/data/APIScorerType.java @@ -9,9 +9,6 @@ public enum APIScorerType { FAITHFULNESS("Faithfulness"), ANSWER_RELEVANCY("Answer Relevancy"), ANSWER_CORRECTNESS("Answer Correctness"), - INSTRUCTION_ADHERENCE("Instruction Adherence"), - EXECUTION_ORDER("Execution Order"), - DERAILMENT("Derailment"), CUSTOM("Custom"); private final String value; diff --git a/judgeval-java/src/main/java/com/judgmentlabs/judgeval/v1/scorers/built_in/AnswerCorrectnessScorer.java b/judgeval-java/src/main/java/com/judgmentlabs/judgeval/v1/scorers/built_in/AnswerCorrectnessScorer.java index 2fddcd1..73f7b76 100644 --- a/judgeval-java/src/main/java/com/judgmentlabs/judgeval/v1/scorers/built_in/AnswerCorrectnessScorer.java +++ b/judgeval-java/src/main/java/com/judgmentlabs/judgeval/v1/scorers/built_in/AnswerCorrectnessScorer.java @@ -40,7 +40,7 @@ public static Builder builder() { * Builder for configuring and creating AnswerCorrectnessScorer instances. */ public static final class Builder { - private double threshold = -1; + private double threshold = 0.5; private String name; private Boolean strictMode; private String model; diff --git a/judgeval-java/src/main/java/com/judgmentlabs/judgeval/v1/scorers/built_in/AnswerRelevancyScorer.java b/judgeval-java/src/main/java/com/judgmentlabs/judgeval/v1/scorers/built_in/AnswerRelevancyScorer.java index b2d9c04..952391b 100644 --- a/judgeval-java/src/main/java/com/judgmentlabs/judgeval/v1/scorers/built_in/AnswerRelevancyScorer.java +++ b/judgeval-java/src/main/java/com/judgmentlabs/judgeval/v1/scorers/built_in/AnswerRelevancyScorer.java @@ -39,7 +39,7 @@ public static Builder builder() { * Builder for configuring and creating AnswerRelevancyScorer instances. */ public static final class Builder { - private double threshold = -1; + private double threshold = 0.5; private String name; private Boolean strictMode; private String model; diff --git a/judgeval-java/src/main/java/com/judgmentlabs/judgeval/v1/scorers/built_in/BuiltInScorersFactory.java b/judgeval-java/src/main/java/com/judgmentlabs/judgeval/v1/scorers/built_in/BuiltInScorersFactory.java index d064023..b3c0a50 100644 --- a/judgeval-java/src/main/java/com/judgmentlabs/judgeval/v1/scorers/built_in/BuiltInScorersFactory.java +++ b/judgeval-java/src/main/java/com/judgmentlabs/judgeval/v1/scorers/built_in/BuiltInScorersFactory.java @@ -33,13 +33,4 @@ public AnswerRelevancyScorer.Builder answerRelevancy() { public FaithfulnessScorer.Builder faithfulness() { return FaithfulnessScorer.builder(); } - - /** - * Creates a builder for an instruction adherence scorer. - * - * @return the scorer builder - */ - public InstructionAdherenceScorer.Builder instructionAdherence() { - return InstructionAdherenceScorer.builder(); - } } diff --git a/judgeval-java/src/main/java/com/judgmentlabs/judgeval/v1/scorers/built_in/FaithfulnessScorer.java b/judgeval-java/src/main/java/com/judgmentlabs/judgeval/v1/scorers/built_in/FaithfulnessScorer.java index 291fe4a..4e20d36 100644 --- a/judgeval-java/src/main/java/com/judgmentlabs/judgeval/v1/scorers/built_in/FaithfulnessScorer.java +++ b/judgeval-java/src/main/java/com/judgmentlabs/judgeval/v1/scorers/built_in/FaithfulnessScorer.java @@ -39,7 +39,7 @@ public static Builder builder() { * Builder for configuring and creating FaithfulnessScorer instances. */ public static final class Builder { - private double threshold = -1; + private double threshold = 0.5; private String name; private Boolean strictMode; private String model; diff --git a/judgeval-java/src/main/java/com/judgmentlabs/judgeval/v1/scorers/built_in/InstructionAdherenceScorer.java b/judgeval-java/src/main/java/com/judgmentlabs/judgeval/v1/scorers/built_in/InstructionAdherenceScorer.java deleted file mode 100644 index ef92bc0..0000000 --- a/judgeval-java/src/main/java/com/judgmentlabs/judgeval/v1/scorers/built_in/InstructionAdherenceScorer.java +++ /dev/null @@ -1,71 +0,0 @@ -package com.judgmentlabs.judgeval.v1.scorers.built_in; - -import java.util.Arrays; - -import com.judgmentlabs.judgeval.v1.data.APIScorerType; -import com.judgmentlabs.judgeval.v1.scorers.APIScorer; - -/** - * Scorer that evaluates whether an answer adheres to the given instructions. - */ -public final class InstructionAdherenceScorer extends APIScorer { - private InstructionAdherenceScorer(Builder builder) { - super(APIScorerType.INSTRUCTION_ADHERENCE); - setRequiredParams(Arrays.asList("input", "actual_output")); - if (builder.threshold >= 0) { - setThreshold(builder.threshold); - } - if (builder.name != null) { - setName(builder.name); - } - if (builder.strictMode != null) { - setStrictMode(builder.strictMode); - } - if (builder.model != null) { - setModel(builder.model); - } - } - - /** - * Creates a new builder for configuring an InstructionAdherenceScorer. - * - * @return a new builder instance - */ - public static Builder builder() { - return new Builder(); - } - - /** - * Builder for configuring and creating InstructionAdherenceScorer instances. - */ - public static final class Builder { - private double threshold = -1; - private String name; - private Boolean strictMode; - private String model; - - public Builder threshold(double threshold) { - this.threshold = threshold; - return this; - } - - public Builder name(String name) { - this.name = name; - return this; - } - - public Builder strictMode(boolean strictMode) { - this.strictMode = strictMode; - return this; - } - - public Builder model(String model) { - this.model = model; - return this; - } - - public InstructionAdherenceScorer build() { - return new InstructionAdherenceScorer(this); - } - } -} diff --git a/judgeval-java/src/main/java/com/judgmentlabs/judgeval/v1/scorers/prompt_scorer/PromptScorerFactory.java b/judgeval-java/src/main/java/com/judgmentlabs/judgeval/v1/scorers/prompt_scorer/PromptScorerFactory.java index 6adcf55..6f38121 100644 --- a/judgeval-java/src/main/java/com/judgmentlabs/judgeval/v1/scorers/prompt_scorer/PromptScorerFactory.java +++ b/judgeval-java/src/main/java/com/judgmentlabs/judgeval/v1/scorers/prompt_scorer/PromptScorerFactory.java @@ -6,13 +6,13 @@ import java.util.Optional; import java.util.concurrent.ConcurrentHashMap; -import com.judgmentlabs.judgeval.exceptions.JudgmentAPIError; import com.judgmentlabs.judgeval.internal.api.JudgmentSyncClient; import com.judgmentlabs.judgeval.internal.api.models.FetchPromptScorersRequest; import com.judgmentlabs.judgeval.internal.api.models.FetchPromptScorersResponse; +import com.judgmentlabs.judgeval.utils.Logger; /** - * Factory for retrieving and creating prompt-based scorers. + * Factory for retrieving prompt-based scorers. */ public final class PromptScorerFactory { private final JudgmentSyncClient client; @@ -30,9 +30,7 @@ public PromptScorerFactory(JudgmentSyncClient client, boolean isTrace) { * * @param name * the scorer name - * @return the configured prompt scorer - * @throws JudgmentAPIError - * if the scorer is not found or retrieval fails + * @return the configured prompt scorer or null if not found or retrieval fails */ public PromptScorer get(String name) { CacheKey key = new CacheKey(name, client.getApiKey(), client.getOrganizationId()); @@ -51,22 +49,28 @@ public PromptScorer get(String name) { .map(FetchPromptScorersResponse::getScorers) .filter(scorers -> scorers != null && !scorers.isEmpty()) .map(scorers -> scorers.get(0)) - .orElseThrow( - () -> new JudgmentAPIError(404, "Failed to fetch prompt scorer '" + name + "': not found")); + .orElseGet( + () -> { + Logger.error("Failed to fetch prompt scorer '" + name + "': not found"); + return null; + }); + + if (scorer == null) { + return null; + } if (Boolean.TRUE.equals(scorer.getIsTrace()) != isTrace) { - String expectedType = isTrace ? "TracePromptScorer" : "PromptScorer"; - String actualType = Boolean.TRUE.equals(scorer.getIsTrace()) ? "TracePromptScorer" : "PromptScorer"; - throw new JudgmentAPIError(400, - "Scorer with name " + name + " is a " + actualType + ", not a " + expectedType); + Logger.error("Scorer with name " + name + " is a " + + (Boolean.TRUE.equals(scorer.getIsTrace()) ? "TracePromptScorer" : "PromptScorer") + ", not a " + + (isTrace ? "TracePromptScorer" : "PromptScorer")); + return null; } cache.put(key, scorer); return createFromModel(scorer, name); - } catch (JudgmentAPIError e) { - throw e; } catch (Exception e) { - throw new JudgmentAPIError(500, "Failed to fetch prompt scorer '" + name + "': " + e.getMessage()); + Logger.error("Failed to fetch prompt scorer '" + name + "': " + e.getMessage()); + return null; } } @@ -95,16 +99,6 @@ private PromptScorer createFromModel(com.judgmentlabs.judgeval.internal.api.mode .build(); } - /** - * Creates a new prompt scorer builder. - * - * @return a new scorer builder - */ - public PromptScorer.Builder create() { - return PromptScorer.builder() - .isTrace(isTrace); - } - private static final class CacheKey { private final String name; private final String apiKey; diff --git a/judgeval-java/src/test/java/com/judgmentlabs/judgeval/v1/scorers/built_in/BuiltInScorersFactoryTest.java b/judgeval-java/src/test/java/com/judgmentlabs/judgeval/v1/scorers/built_in/BuiltInScorersFactoryTest.java index 65bd6fc..fe8d71d 100644 --- a/judgeval-java/src/test/java/com/judgmentlabs/judgeval/v1/scorers/built_in/BuiltInScorersFactoryTest.java +++ b/judgeval-java/src/test/java/com/judgmentlabs/judgeval/v1/scorers/built_in/BuiltInScorersFactoryTest.java @@ -30,10 +30,4 @@ void faithfulness_returnsBuilder() { FaithfulnessScorer.Builder builder = factory.faithfulness(); assertNotNull(builder); } - - @Test - void instructionAdherence_returnsBuilder() { - InstructionAdherenceScorer.Builder builder = factory.instructionAdherence(); - assertNotNull(builder); - } } From 3c7ea8476496a9964b0e9ac751a598bc5fbe5a62 Mon Sep 17 00:00:00 2001 From: Abhishek Govindarasu Date: Sat, 8 Nov 2025 17:55:37 -0800 Subject: [PATCH 16/18] cleanup: no model --- .../java/examples/simple_chat/SimpleChat.java | 3 ++ .../judgeval/v1/tracer/BaseTracer.java | 44 +++---------------- 2 files changed, 9 insertions(+), 38 deletions(-) diff --git a/examples/src/main/java/examples/simple_chat/SimpleChat.java b/examples/src/main/java/examples/simple_chat/SimpleChat.java index 257fda2..29710db 100644 --- a/examples/src/main/java/examples/simple_chat/SimpleChat.java +++ b/examples/src/main/java/examples/simple_chat/SimpleChat.java @@ -40,6 +40,9 @@ public static void main(String[] args) { .property("actual_output", "4") .property("expected_output", "4") .build()); + + tracer.asyncTraceEvaluate(client.scorers().tracePromptScorer().get("ExampleTraceScorer")); + }); try { diff --git a/judgeval-java/src/main/java/com/judgmentlabs/judgeval/v1/tracer/BaseTracer.java b/judgeval-java/src/main/java/com/judgmentlabs/judgeval/v1/tracer/BaseTracer.java index fdd1175..41d68b5 100644 --- a/judgeval-java/src/main/java/com/judgmentlabs/judgeval/v1/tracer/BaseTracer.java +++ b/judgeval-java/src/main/java/com/judgmentlabs/judgeval/v1/tracer/BaseTracer.java @@ -11,7 +11,6 @@ import java.util.UUID; import com.fasterxml.jackson.databind.ObjectMapper; -import com.judgmentlabs.judgeval.Env; import com.judgmentlabs.judgeval.JudgmentAttributeKeys; import com.judgmentlabs.judgeval.internal.api.JudgmentSyncClient; import com.judgmentlabs.judgeval.internal.api.models.ExampleEvaluationRun; @@ -254,10 +253,8 @@ private void safeExecute(String operation, Runnable action) { * the scorer to use for evaluation * @param example * the example data to evaluate against - * @param model - * the model to use for evaluation */ - public void asyncEvaluate(BaseScorer scorer, Example example, String model) { + public void asyncEvaluate(BaseScorer scorer, Example example) { safeExecute("evaluate scorer", () -> { if (!isEvaluationEnabled()) { return; @@ -269,25 +266,12 @@ public void asyncEvaluate(BaseScorer scorer, Example example, String model) { logEvaluationInfo("asyncEvaluate", traceId, spanId, scorer.getName()); - ExampleEvaluationRun evaluationRun = createEvaluationRun(scorer, example, model, traceId, spanId); + ExampleEvaluationRun evaluationRun = createEvaluationRun(scorer, example, traceId, spanId); enqueueEvaluation(evaluationRun); }); }); } - /** - * Asynchronously evaluates the current span using the specified scorer and - * example. - * - * @param scorer - * the scorer to use for evaluation - * @param example - * the example data to evaluate against - */ - public void asyncEvaluate(BaseScorer scorer, Example example) { - asyncEvaluate(scorer, example, null); - } - /** * Asynchronously evaluates the current trace using the specified scorer. * Attaches evaluation metadata to the current span for processing after trace @@ -295,10 +279,8 @@ public void asyncEvaluate(BaseScorer scorer, Example example) { * * @param scorer * the scorer to use for trace evaluation - * @param model - * the model to use for evaluation */ - public void asyncTraceEvaluate(BaseScorer scorer, String model) { + public void asyncTraceEvaluate(BaseScorer scorer) { safeExecute("evaluate trace scorer", () -> { if (!isEvaluationEnabled()) { return; @@ -311,7 +293,7 @@ public void asyncTraceEvaluate(BaseScorer scorer, String model) { logEvaluationInfo("asyncTraceEvaluate", traceId, spanId, scorer.getName()); - TraceEvaluationRun evaluationRun = createTraceEvaluationRun(scorer, model, traceId, spanId); + TraceEvaluationRun evaluationRun = createTraceEvaluationRun(scorer, traceId, spanId); try { String traceEvalJson = jacksonMapper.writeValueAsString(evaluationRun); currentSpan.setAttribute(JudgmentAttributeKeys.AttributeKeys.JUDGMENT_PENDING_TRACE_EVAL, @@ -323,16 +305,6 @@ public void asyncTraceEvaluate(BaseScorer scorer, String model) { }); } - /** - * Asynchronously evaluates the current trace using the specified scorer. - * - * @param scorer - * the scorer to use for trace evaluation - */ - public void asyncTraceEvaluate(BaseScorer scorer) { - asyncTraceEvaluate(scorer, null); - } - /** * Sets multiple attributes on the current span. * @@ -539,16 +511,14 @@ private String generateRunId(String prefix, String spanId) { .orElseGet(() -> String.valueOf(System.currentTimeMillis())); } - private ExampleEvaluationRun createEvaluationRun(BaseScorer scorer, Example example, String model, String traceId, + private ExampleEvaluationRun createEvaluationRun(BaseScorer scorer, Example example, String traceId, String spanId) { String runId = generateRunId("async_evaluate_", spanId); - String modelName = model != null ? model : Env.JUDGMENT_DEFAULT_GPT_MODEL; ExampleEvaluationRun evaluationRun = new ExampleEvaluationRun(); evaluationRun.setId(UUID.randomUUID().toString()); evaluationRun.setProjectName(projectName); evaluationRun.setEvalName(runId); - evaluationRun.setModel(modelName); evaluationRun.setTraceId(traceId); evaluationRun.setTraceSpanId(spanId); evaluationRun.setExamples(List.of(example)); @@ -565,16 +535,14 @@ private ExampleEvaluationRun createEvaluationRun(BaseScorer scorer, Example exam return evaluationRun; } - private TraceEvaluationRun createTraceEvaluationRun(BaseScorer scorer, String model, String traceId, + private TraceEvaluationRun createTraceEvaluationRun(BaseScorer scorer, String traceId, String spanId) { String evalName = generateRunId("async_trace_evaluate_", spanId); - String modelName = model != null ? model : Env.JUDGMENT_DEFAULT_GPT_MODEL; TraceEvaluationRun evaluationRun = new TraceEvaluationRun(); evaluationRun.setId(UUID.randomUUID().toString()); evaluationRun.setProjectName(projectName); evaluationRun.setEvalName(evalName); - evaluationRun.setModel(modelName); evaluationRun.setTraceAndSpanIds(List.of(List.of(traceId, spanId))); evaluationRun.setJudgmentScorers(List.of(scorer.getScorerConfig())); evaluationRun.setCustomScorers(List.of()); From d1ecd5821b8c9550393ca384f5b4a2cdf993d951 Mon Sep 17 00:00:00 2001 From: Abhishek Govindarasu Date: Mon, 10 Nov 2025 22:47:38 -0800 Subject: [PATCH 17/18] judgment client -> judgeval --- .../java/examples/simple_chat/SimpleChat.java | 4 ++-- .../com/judgmentlabs/judgeval/Judgeval.java | 18 ++++++++++++++++++ .../api_scorers/AnswerCorrectnessScorer.java | 4 ++-- .../api_scorers/AnswerRelevancyScorer.java | 4 ++-- .../api_scorers/FaithfulnessScorer.java | 4 ++-- .../InstructionAdherenceScorer.java | 4 ++-- .../custom_scorer/CustomScorer.java | 4 ++-- .../prompt_scorer/PromptScorer.java | 4 ++-- .../prompt_scorer/TracePromptScorer.java | 4 ++-- .../judgmentlabs/judgeval/tracer/Tracer.java | 4 ++-- .../v1/{JudgmentClient.java => Judgeval.java} | 18 +++++++++--------- ...dgmentClientTest.java => JudgevalTest.java} | 14 +++++++------- 12 files changed, 52 insertions(+), 34 deletions(-) create mode 100644 judgeval-java/src/main/java/com/judgmentlabs/judgeval/Judgeval.java rename judgeval-java/src/main/java/com/judgmentlabs/judgeval/v1/{JudgmentClient.java => Judgeval.java} (86%) rename judgeval-java/src/test/java/com/judgmentlabs/judgeval/v1/{JudgmentClientTest.java => JudgevalTest.java} (86%) diff --git a/examples/src/main/java/examples/simple_chat/SimpleChat.java b/examples/src/main/java/examples/simple_chat/SimpleChat.java index 29710db..1beeb1b 100644 --- a/examples/src/main/java/examples/simple_chat/SimpleChat.java +++ b/examples/src/main/java/examples/simple_chat/SimpleChat.java @@ -3,7 +3,7 @@ import java.time.Duration; import com.judgmentlabs.judgeval.instrumentation.openai.OpenAITelemetry; -import com.judgmentlabs.judgeval.v1.JudgmentClient; +import com.judgmentlabs.judgeval.v1.Judgeval; import com.judgmentlabs.judgeval.v1.data.Example; import com.openai.client.OpenAIClient; import com.openai.client.okhttp.OpenAIOkHttpClient; @@ -14,7 +14,7 @@ public class SimpleChat { public static void main(String[] args) { - var client = JudgmentClient.builder() + var client = Judgeval.builder() .apiKey(System.getenv("JUDGMENT_API_KEY")) .organizationId(System.getenv("JUDGMENT_ORG_ID")) .build(); diff --git a/judgeval-java/src/main/java/com/judgmentlabs/judgeval/Judgeval.java b/judgeval-java/src/main/java/com/judgmentlabs/judgeval/Judgeval.java new file mode 100644 index 0000000..926ca88 --- /dev/null +++ b/judgeval-java/src/main/java/com/judgmentlabs/judgeval/Judgeval.java @@ -0,0 +1,18 @@ +package com.judgmentlabs.judgeval; + +public final class Judgeval extends com.judgmentlabs.judgeval.v1.Judgeval { + private Judgeval(Builder builder) { + super(builder); + } + + public static Builder builder() { + return new Builder(); + } + + public static final class Builder extends com.judgmentlabs.judgeval.v1.Judgeval.Builder { + @Override + public Judgeval build() { + return new Judgeval(this); + } + } +} diff --git a/judgeval-java/src/main/java/com/judgmentlabs/judgeval/scorers/api_scorers/AnswerCorrectnessScorer.java b/judgeval-java/src/main/java/com/judgmentlabs/judgeval/scorers/api_scorers/AnswerCorrectnessScorer.java index 74d1dd7..750844a 100644 --- a/judgeval-java/src/main/java/com/judgmentlabs/judgeval/scorers/api_scorers/AnswerCorrectnessScorer.java +++ b/judgeval-java/src/main/java/com/judgmentlabs/judgeval/scorers/api_scorers/AnswerCorrectnessScorer.java @@ -6,7 +6,7 @@ import com.judgmentlabs.judgeval.scorers.APIScorer; /** - * @deprecated Use {@link com.judgmentlabs.judgeval.v1.JudgmentClient} instead. + * @deprecated Use {@link com.judgmentlabs.judgeval.v1.Judgeval} instead. * *

* Migration example: @@ -16,7 +16,7 @@ * AnswerCorrectnessScorer scorer = AnswerCorrectnessScorer.create(); * * // New way: - * JudgmentClient client = JudgmentClient.builder().build(); + * Judgeval client = Judgeval.builder().build(); * AnswerCorrectnessScorer scorer = client.scorers().builtIn().answerCorrectness().build(); * } */ diff --git a/judgeval-java/src/main/java/com/judgmentlabs/judgeval/scorers/api_scorers/AnswerRelevancyScorer.java b/judgeval-java/src/main/java/com/judgmentlabs/judgeval/scorers/api_scorers/AnswerRelevancyScorer.java index fec9a09..652dcfe 100644 --- a/judgeval-java/src/main/java/com/judgmentlabs/judgeval/scorers/api_scorers/AnswerRelevancyScorer.java +++ b/judgeval-java/src/main/java/com/judgmentlabs/judgeval/scorers/api_scorers/AnswerRelevancyScorer.java @@ -6,7 +6,7 @@ import com.judgmentlabs.judgeval.scorers.APIScorer; /** - * @deprecated Use {@link com.judgmentlabs.judgeval.v1.JudgmentClient} instead. + * @deprecated Use {@link com.judgmentlabs.judgeval.v1.Judgeval} instead. * *

* Migration example: @@ -16,7 +16,7 @@ * AnswerRelevancyScorer scorer = AnswerRelevancyScorer.create(); * * // New way: - * JudgmentClient client = JudgmentClient.builder().build(); + * Judgeval client = Judgeval.builder().build(); * AnswerRelevancyScorer scorer = client.scorers().builtIn().answerRelevancy().build(); * } */ diff --git a/judgeval-java/src/main/java/com/judgmentlabs/judgeval/scorers/api_scorers/FaithfulnessScorer.java b/judgeval-java/src/main/java/com/judgmentlabs/judgeval/scorers/api_scorers/FaithfulnessScorer.java index d4567ec..ee1a6b8 100644 --- a/judgeval-java/src/main/java/com/judgmentlabs/judgeval/scorers/api_scorers/FaithfulnessScorer.java +++ b/judgeval-java/src/main/java/com/judgmentlabs/judgeval/scorers/api_scorers/FaithfulnessScorer.java @@ -6,7 +6,7 @@ import com.judgmentlabs.judgeval.scorers.APIScorer; /** - * @deprecated Use {@link com.judgmentlabs.judgeval.v1.JudgmentClient} instead. + * @deprecated Use {@link com.judgmentlabs.judgeval.v1.Judgeval} instead. * *

* Migration example: @@ -16,7 +16,7 @@ * FaithfulnessScorer scorer = FaithfulnessScorer.create(); * * // New way: - * JudgmentClient client = JudgmentClient.builder().build(); + * Judgeval client = Judgeval.builder().build(); * FaithfulnessScorer scorer = client.scorers().builtIn().faithfulness().build(); * } */ diff --git a/judgeval-java/src/main/java/com/judgmentlabs/judgeval/scorers/api_scorers/InstructionAdherenceScorer.java b/judgeval-java/src/main/java/com/judgmentlabs/judgeval/scorers/api_scorers/InstructionAdherenceScorer.java index 391eeaa..b60e3cb 100644 --- a/judgeval-java/src/main/java/com/judgmentlabs/judgeval/scorers/api_scorers/InstructionAdherenceScorer.java +++ b/judgeval-java/src/main/java/com/judgmentlabs/judgeval/scorers/api_scorers/InstructionAdherenceScorer.java @@ -6,7 +6,7 @@ import com.judgmentlabs.judgeval.scorers.APIScorer; /** - * @deprecated Use {@link com.judgmentlabs.judgeval.v1.JudgmentClient} instead. + * @deprecated Use {@link com.judgmentlabs.judgeval.v1.Judgeval} instead. * *

* Migration example: @@ -16,7 +16,7 @@ * InstructionAdherenceScorer scorer = InstructionAdherenceScorer.create(); * * // New way: - * JudgmentClient client = JudgmentClient.builder().build(); + * Judgeval client = Judgeval.builder().build(); * InstructionAdherenceScorer scorer = client.scorers().builtIn().instructionAdherence().build(); * } */ diff --git a/judgeval-java/src/main/java/com/judgmentlabs/judgeval/scorers/api_scorers/custom_scorer/CustomScorer.java b/judgeval-java/src/main/java/com/judgmentlabs/judgeval/scorers/api_scorers/custom_scorer/CustomScorer.java index d576faf..0ca1f07 100644 --- a/judgeval-java/src/main/java/com/judgmentlabs/judgeval/scorers/api_scorers/custom_scorer/CustomScorer.java +++ b/judgeval-java/src/main/java/com/judgmentlabs/judgeval/scorers/api_scorers/custom_scorer/CustomScorer.java @@ -9,7 +9,7 @@ * Instances serialize into ExampleEvaluationRun.custom_scorers with score_type * "Custom", server_hosted=true, and optional class_name for server routing. * - * @deprecated Use {@link com.judgmentlabs.judgeval.v1.JudgmentClient} instead. + * @deprecated Use {@link com.judgmentlabs.judgeval.v1.Judgeval} instead. * *

* Migration example: @@ -19,7 +19,7 @@ * CustomScorer scorer = CustomScorer.get("my-scorer"); * * // New way: - * JudgmentClient client = JudgmentClient.builder().build(); + * Judgeval client = Judgeval.builder().build(); * CustomScorer scorer = client.scorers().customScorer().get("my-scorer"); * } */ diff --git a/judgeval-java/src/main/java/com/judgmentlabs/judgeval/scorers/api_scorers/prompt_scorer/PromptScorer.java b/judgeval-java/src/main/java/com/judgmentlabs/judgeval/scorers/api_scorers/prompt_scorer/PromptScorer.java index e07d588..2d9e28c 100644 --- a/judgeval-java/src/main/java/com/judgmentlabs/judgeval/scorers/api_scorers/prompt_scorer/PromptScorer.java +++ b/judgeval-java/src/main/java/com/judgmentlabs/judgeval/scorers/api_scorers/prompt_scorer/PromptScorer.java @@ -10,7 +10,7 @@ import com.judgmentlabs.judgeval.internal.api.models.ScorerConfig; /** - * @deprecated Use {@link com.judgmentlabs.judgeval.v1.JudgmentClient} instead. + * @deprecated Use {@link com.judgmentlabs.judgeval.v1.Judgeval} instead. * *

* Migration example: @@ -20,7 +20,7 @@ * PromptScorer scorer = PromptScorer.get("my-scorer"); * * // New way: - * JudgmentClient client = JudgmentClient.builder().build(); + * Judgeval client = Judgeval.builder().build(); * PromptScorer scorer = client.scorers().promptScorer().get("my-scorer"); * } */ diff --git a/judgeval-java/src/main/java/com/judgmentlabs/judgeval/scorers/api_scorers/prompt_scorer/TracePromptScorer.java b/judgeval-java/src/main/java/com/judgmentlabs/judgeval/scorers/api_scorers/prompt_scorer/TracePromptScorer.java index 5e83766..6ba7c9f 100644 --- a/judgeval-java/src/main/java/com/judgmentlabs/judgeval/scorers/api_scorers/prompt_scorer/TracePromptScorer.java +++ b/judgeval-java/src/main/java/com/judgmentlabs/judgeval/scorers/api_scorers/prompt_scorer/TracePromptScorer.java @@ -10,7 +10,7 @@ import com.judgmentlabs.judgeval.internal.api.models.ScorerConfig; /** - * @deprecated Use {@link com.judgmentlabs.judgeval.v1.JudgmentClient} instead. + * @deprecated Use {@link com.judgmentlabs.judgeval.v1.Judgeval} instead. * *

* Migration example: @@ -20,7 +20,7 @@ * TracePromptScorer scorer = TracePromptScorer.get("my-scorer"); * * // New way: - * JudgmentClient client = JudgmentClient.builder().build(); + * Judgeval client = Judgeval.builder().build(); * PromptScorer scorer = client.scorers().tracePromptScorer().get("my-scorer"); * } */ diff --git a/judgeval-java/src/main/java/com/judgmentlabs/judgeval/tracer/Tracer.java b/judgeval-java/src/main/java/com/judgmentlabs/judgeval/tracer/Tracer.java index ebf98ff..de82d2d 100644 --- a/judgeval-java/src/main/java/com/judgmentlabs/judgeval/tracer/Tracer.java +++ b/judgeval-java/src/main/java/com/judgmentlabs/judgeval/tracer/Tracer.java @@ -16,7 +16,7 @@ /** * Main tracer for Judgment Labs distributed tracing and evaluation. * - * @deprecated Use {@link com.judgmentlabs.judgeval.v1.JudgmentClient} instead. + * @deprecated Use {@link com.judgmentlabs.judgeval.v1.Judgeval} instead. * *

* Migration example: @@ -27,7 +27,7 @@ * tracer.initialize(); * * // New way: - * JudgmentClient client = JudgmentClient.builder().build(); + * Judgeval client = Judgeval.builder().build(); * Tracer tracer = client.tracer().create() * .projectName("my-project") * .build(); diff --git a/judgeval-java/src/main/java/com/judgmentlabs/judgeval/v1/JudgmentClient.java b/judgeval-java/src/main/java/com/judgmentlabs/judgeval/v1/Judgeval.java similarity index 86% rename from judgeval-java/src/main/java/com/judgmentlabs/judgeval/v1/JudgmentClient.java rename to judgeval-java/src/main/java/com/judgmentlabs/judgeval/v1/Judgeval.java index dbb2689..d2cd402 100644 --- a/judgeval-java/src/main/java/com/judgmentlabs/judgeval/v1/JudgmentClient.java +++ b/judgeval-java/src/main/java/com/judgmentlabs/judgeval/v1/Judgeval.java @@ -12,13 +12,13 @@ * Main entry point for the Judgment SDK. Provides access to tracer, scorer, and * evaluation factories. */ -public final class JudgmentClient { +public class Judgeval { private final String apiKey; private final String organizationId; private final String apiUrl; private final JudgmentSyncClient internalClient; - private JudgmentClient(Builder builder) { + protected Judgeval(Builder builder) { this.apiKey = Objects.requireNonNull(builder.apiKey, "apiKey required"); this.organizationId = Objects.requireNonNull(builder.organizationId, "organizationId required"); this.apiUrl = builder.apiUrl != null ? builder.apiUrl : Env.JUDGMENT_API_URL; @@ -53,7 +53,7 @@ public EvaluationFactory evaluation() { } /** - * Creates a new builder for configuring a JudgmentClient. + * Creates a new builder for configuring a Judgeval. * * @return a new builder instance */ @@ -62,9 +62,9 @@ public static Builder builder() { } /** - * Builder for configuring and creating JudgmentClient instances. + * Builder for configuring and creating Judgeval instances. */ - public static final class Builder { + public static class Builder { private String apiKey = Env.JUDGMENT_API_KEY; private String organizationId = Env.JUDGMENT_ORG_ID; private String apiUrl = Env.JUDGMENT_API_URL; @@ -106,12 +106,12 @@ public Builder apiUrl(String apiUrl) { } /** - * Builds and returns a new JudgmentClient instance. + * Builds and returns a new Judgeval instance. * - * @return the configured JudgmentClient + * @return the configured Judgeval */ - public JudgmentClient build() { - return new JudgmentClient(this); + public Judgeval build() { + return new Judgeval(this); } } } diff --git a/judgeval-java/src/test/java/com/judgmentlabs/judgeval/v1/JudgmentClientTest.java b/judgeval-java/src/test/java/com/judgmentlabs/judgeval/v1/JudgevalTest.java similarity index 86% rename from judgeval-java/src/test/java/com/judgmentlabs/judgeval/v1/JudgmentClientTest.java rename to judgeval-java/src/test/java/com/judgmentlabs/judgeval/v1/JudgevalTest.java index e078306..5277d27 100644 --- a/judgeval-java/src/test/java/com/judgmentlabs/judgeval/v1/JudgmentClientTest.java +++ b/judgeval-java/src/test/java/com/judgmentlabs/judgeval/v1/JudgevalTest.java @@ -8,14 +8,14 @@ import com.judgmentlabs.judgeval.v1.scorers.ScorersFactory; import com.judgmentlabs.judgeval.v1.tracer.TracerFactory; -class JudgmentClientTest { +class JudgevalTest { private static final String TEST_API_URL = "https://api.test.com"; private static final String TEST_API_KEY = "test-key"; private static final String TEST_ORG_ID = "test-org"; @Test void builder_withAllParameters_buildsSuccessfully() { - JudgmentClient client = JudgmentClient.builder() + Judgeval client = Judgeval.builder() .apiKey(TEST_API_KEY) .organizationId(TEST_ORG_ID) .apiUrl(TEST_API_URL) @@ -27,7 +27,7 @@ void builder_withAllParameters_buildsSuccessfully() { @Test void builder_withNullApiKey_throwsException() { assertThrows(NullPointerException.class, () -> { - JudgmentClient.builder() + Judgeval.builder() .apiKey(null) .organizationId(TEST_ORG_ID) .build(); @@ -37,7 +37,7 @@ void builder_withNullApiKey_throwsException() { @Test void builder_withNullOrganizationId_throwsException() { assertThrows(NullPointerException.class, () -> { - JudgmentClient.builder() + Judgeval.builder() .apiKey(TEST_API_KEY) .organizationId(null) .build(); @@ -46,7 +46,7 @@ void builder_withNullOrganizationId_throwsException() { @Test void tracer_returnsTracerFactory() { - JudgmentClient client = JudgmentClient.builder() + Judgeval client = Judgeval.builder() .apiKey(TEST_API_KEY) .organizationId(TEST_ORG_ID) .build(); @@ -57,7 +57,7 @@ void tracer_returnsTracerFactory() { @Test void scorers_returnsScorersFactory() { - JudgmentClient client = JudgmentClient.builder() + Judgeval client = Judgeval.builder() .apiKey(TEST_API_KEY) .organizationId(TEST_ORG_ID) .build(); @@ -68,7 +68,7 @@ void scorers_returnsScorersFactory() { @Test void evaluation_returnsEvaluationFactory() { - JudgmentClient client = JudgmentClient.builder() + Judgeval client = Judgeval.builder() .apiKey(TEST_API_KEY) .organizationId(TEST_ORG_ID) .build(); From 3dcf8898b48ff6734ccfef35c23ad1231949fd86 Mon Sep 17 00:00:00 2001 From: Abhishek Govindarasu Date: Mon, 10 Nov 2025 22:59:56 -0800 Subject: [PATCH 18/18] chore: initialize true default --- examples/src/main/java/examples/simple_chat/SimpleChat.java | 1 - .../main/java/com/judgmentlabs/judgeval/v1/tracer/Tracer.java | 2 +- 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/examples/src/main/java/examples/simple_chat/SimpleChat.java b/examples/src/main/java/examples/simple_chat/SimpleChat.java index 1beeb1b..d0306c9 100644 --- a/examples/src/main/java/examples/simple_chat/SimpleChat.java +++ b/examples/src/main/java/examples/simple_chat/SimpleChat.java @@ -19,7 +19,6 @@ public static void main(String[] args) { .organizationId(System.getenv("JUDGMENT_ORG_ID")) .build(); var tracer = client.tracer().create().projectName("SimpleChat-Java").build(); - tracer.initialize(); OpenAIClient baseClient = OpenAIOkHttpClient.fromEnv(); var otelClient = OpenAITelemetry.builder(GlobalOpenTelemetry.get()).build().wrap(baseClient); diff --git a/judgeval-java/src/main/java/com/judgmentlabs/judgeval/v1/tracer/Tracer.java b/judgeval-java/src/main/java/com/judgmentlabs/judgeval/v1/tracer/Tracer.java index 5d3ac69..301be20 100644 --- a/judgeval-java/src/main/java/com/judgmentlabs/judgeval/v1/tracer/Tracer.java +++ b/judgeval-java/src/main/java/com/judgmentlabs/judgeval/v1/tracer/Tracer.java @@ -114,7 +114,7 @@ public static final class Builder { private String projectName; private boolean enableEvaluation = true; private ISerializer serializer; - private boolean initialize = false; + private boolean initialize = true; /** * Sets the Judgment API client.