diff --git a/.cursor/rules b/.cursor/rules deleted file mode 100644 index 8422f18..0000000 --- a/.cursor/rules +++ /dev/null @@ -1,227 +0,0 @@ -# Judgeval Java - Internal API Patterns - -## Context - -The `com.judgmentlabs.judgeval.internal` package contains internal API code that is auto-generated from OpenAPI specifications using `scripts/generate_client.py`, but is explicitly modifiable for internal improvements. This code is NOT part of the public API and can be refactored, cleaned up, and optimized as needed. - -## Internal API Models (`internal/api/models/`) - -### Structure Pattern -- **Package**: `com.judgmentlabs.judgeval.internal.api.models` -- **Naming**: PascalCase class names matching OpenAPI schema names (e.g., `EvalResultsFetch`, `ResolveProjectNameResponse`) -- **Purpose**: Jackson-annotated POJOs for serializing/deserializing JSON requests and responses - -### Field Patterns -- **Field Declaration**: - - Private fields with camelCase naming (e.g., `experimentRunId`, `projectName`) - - Jackson `@JsonProperty` annotation mapping snake_case JSON keys to camelCase fields - - Use wrapper types (String, Integer, Boolean, List, etc.) - never primitives for nullable fields - - All fields are nullable by default (no `@NotNull` annotations) - -### Additional Properties Pattern -All model classes MUST include: -```java -private Map additionalProperties = new HashMap<>(); - -@JsonAnyGetter -public Map getAdditionalProperties() { - return additionalProperties; -} - -@JsonAnySetter -public void setAdditionalProperty(String name, Object value) { - additionalProperties.put(name, value); -} -``` -This allows unknown JSON fields to be preserved during deserialization. - -### Getter/Setter Pattern -- **Getters**: Public methods following JavaBean convention `getFieldName()` returning field type -- **Setters**: Public methods following JavaBean convention `setFieldName(Type fieldName)` returning void -- **Order**: Fields declared first, then additionalProperties, then getters, then setters - -### Equals/HashCode Pattern -- **Equals**: Must include all fields plus `additionalProperties` -- **Implementation**: Use `Objects.equals()` for all field comparisons -- **HashCode**: Use `Objects.hash()` for all fields, with `Objects.hashCode(additionalProperties)` for the Map -- **Null Safety**: Both methods handle null fields safely via `Objects.equals()` - -### Imports -Standard imports: -- `java.util.HashMap` -- `java.util.List` (if using collections) -- `java.util.Map` -- `java.util.Objects` -- `com.fasterxml.jackson.annotation.JsonAnyGetter` -- `com.fasterxml.jackson.annotation.JsonAnySetter` -- `com.fasterxml.jackson.annotation.JsonProperty` - -## Internal API Clients (`internal/api/`) - -### Client Classes -- **JudgmentSyncClient**: Synchronous HTTP client for blocking API calls -- **JudgmentAsyncClient**: Asynchronous HTTP client for non-blocking API calls -- Both share identical structure and helper methods, differing only in execution model - -### Constructor Pattern -```java -public JudgmentSyncClient(String baseUrl, String apiKey, String organizationId) { - this.baseUrl = baseUrl; - this.apiKey = apiKey; - this.organizationId = organizationId; - this.client = HttpClient.newBuilder().version(HttpClient.Version.HTTP_1_1).build(); - this.mapper = new ObjectMapper(); -} -``` - -### Helper Methods Pattern - -#### buildUrl(String path, Map queryParams) -- Builds full URL from baseUrl + path -- Appends query string from map if non-empty -- Query string format: `key1=value1&key2=value2` -- Returns complete URL string - -#### buildUrl(String path) -- Convenience overload calling `buildUrl(path, new HashMap<>())` - -#### buildHeaders() -- Validates `apiKey` and `organizationId` are not null (throws `IllegalArgumentException`) -- Returns String array with: - - `"Content-Type"`, `"application/json"` - - `"Authorization"`, `"Bearer " + apiKey` - - `"X-Organization-Id"`, `organizationId` - -#### handleResponse(HttpResponse response) -- **Sync**: `throws IOException` -- **Async**: No throws clause (unchecked exceptions) -- Checks status code >= 400, throws `RuntimeException` with status and body -- Attempts to deserialize response body using `mapper.readValue(response.body(), new TypeReference() {})` -- Catches parsing exceptions and wraps in `RuntimeException("Failed to parse response", e)` -- **Issue**: TypeReference generic type erasure makes this unreliable - should use specific class when known - -### Method Naming Pattern -- Method names derived from API path: `/fetch_experiment_run/` → `fetchExperimentRun` -- Path segments separated by underscores/forward slashes become camelCase -- HTTP method determines method signature (GET = no body, POST = has payload) - -### Sync Client Method Pattern -```java -public ReturnType methodName(RequestType payload) throws IOException, InterruptedException { - String url = buildUrl("/api/path/"); - String jsonPayload = mapper.writeValueAsString(payload); - HttpRequest request = HttpRequest.newBuilder() - .POST(HttpRequest.BodyPublishers.ofString(jsonPayload)) - .uri(URI.create(url)) - .headers(buildHeaders()) - .build(); - HttpResponse response = client.send(request, HttpResponse.BodyHandlers.ofString()); - return handleResponse(response); // or mapper.readValue(response.body(), SpecificType.class); -} -``` - -### Async Client Method Pattern -```java -public CompletableFuture methodName(RequestType payload) { - String url = buildUrl("/api/path/"); - String jsonPayload; - try { - jsonPayload = mapper.writeValueAsString(payload); - } catch (Exception e) { - throw new RuntimeException("Failed to serialize payload", e); - } - HttpRequest request = HttpRequest.newBuilder() - .POST(HttpRequest.BodyPublishers.ofString(jsonPayload)) - .uri(URI.create(url)) - .headers(buildHeaders()) - .build(); - return client.sendAsync(request, HttpResponse.BodyHandlers.ofString()) - .thenApply(this::handleResponse); -} -``` - -### Return Type Patterns -- **Consistency Issue**: Some methods return `Object`, others return specific response types -- **Preferred**: Return specific response model types when known (e.g., `ScorerExistsResponse`, `ResolveProjectNameResponse`) -- **Fallback**: Use `Object` only when response schema is truly unknown/variable -- **Async**: Return type wrapped in `CompletableFuture` - -### Error Handling Standards - -#### Sync Client -- `IOException` and `InterruptedException` propagate (method signature) -- `RuntimeException` for HTTP errors (4xx/5xx) - thrown by `handleResponse` -- `RuntimeException` for deserialization errors - thrown by `handleResponse` or `mapper.readValue` - -#### Async Client -- All exceptions caught internally and wrapped in `RuntimeException` -- Serialization errors caught immediately and rethrown before async operation -- HTTP/deserialization errors thrown from `handleResponse` in CompletableFuture chain -- No checked exceptions in method signatures - -### Query Parameters Pattern -For GET requests or POST requests with query params: -```java -Map queryParams = new HashMap<>(); -queryParams.put("param_name", paramValue); -String url = buildUrl("/api/path/", queryParams); -``` - -## Code Quality Expectations - -### Documentation -- Add JavaDoc to all public methods in client classes -- Document parameters, return types, and thrown exceptions -- Explain complex business logic or non-obvious patterns - -### Null Safety -- Use `Optional` internally for nullable values when appropriate -- Use `@NotNull` annotations for parameters that must not be null (only if needed for external safety) -- Validate constructor parameters that must not be null - -### Error Messages -- Error messages should be descriptive and include context -- HTTP errors should include status code and response body -- Serialization errors should indicate the operation that failed - -### Code Organization -- Group related methods together -- Keep helper methods private -- Maintain consistent method ordering (constructors, helpers, public API methods) -- Extract common patterns into reusable helpers when duplicated across sync/async clients - -### Type Safety -- Prefer specific types over `Object` for return values -- Use proper generic types for collections -- Avoid raw types and unchecked casts - -### Consistency -- Sync and async clients should have identical API (only differing by CompletableFuture wrapper) -- Method names should be consistent across both clients -- Helper methods should behave identically in both clients - -## Refactoring Opportunities - -### Known Issues -1. **TypeReference generic erasure**: `handleResponse` uses `TypeReference()` which doesn't preserve type information. Consider passing `Class` or using Jackson's `TypeFactory`. - -2. **Return type inconsistency**: Some sync methods return `Object`, others return specific types. Standardize based on OpenAPI response schemas. - -3. **Code duplication**: Sync and async clients share ~90% of code. Consider extracting common base class or using composition. - -4. **Error handling**: Async client wraps serialization errors, sync client doesn't. Consider consistent error handling strategy. - -5. **Missing validation**: No validation of request payloads before serialization. Consider adding validation for required fields. - -6. **Missing documentation**: No JavaDoc comments on any client methods. Should document all public methods. - -### Recommended Improvements -- Extract common client logic to base class `BaseJudgmentClient` -- Standardize return types based on OpenAPI response schemas -- Add request/response validation -- Improve error handling with custom exception types -- Add comprehensive JavaDoc documentation -- Consider using builder pattern for request construction -- Add retry logic for transient failures -- Add request/response logging - diff --git a/.github/workflows/test-instrumentation-openai.yml b/.github/workflows/test-instrumentation-openai.yml index 08252f8..cffa159 100644 --- a/.github/workflows/test-instrumentation-openai.yml +++ b/.github/workflows/test-instrumentation-openai.yml @@ -27,9 +27,13 @@ jobs: key: ${{ runner.os }}-m2-${{ hashFiles('**/pom.xml') }} restore-keys: ${{ runner.os }}-m2 + - name: Run Tests + run: | + cd instrumentation/judgeval-instrumentation-openai && mvn -B test + - name: Test Install run: | - mvn -B -Dgpg.skip=true -pl instrumentation/judgeval-instrumentation-openai -am clean install + cd instrumentation/judgeval-instrumentation-openai && mvn -B -Dgpg.skip=true clean install - name: Verify Build Artifacts run: | diff --git a/.github/workflows/test-judgeval-java.yml b/.github/workflows/test-judgeval-java.yml index 76fb936..6bc4123 100644 --- a/.github/workflows/test-judgeval-java.yml +++ b/.github/workflows/test-judgeval-java.yml @@ -27,9 +27,13 @@ jobs: key: ${{ runner.os }}-m2-${{ hashFiles('**/pom.xml') }} restore-keys: ${{ runner.os }}-m2 + - name: Run Tests + run: | + cd judgeval-java && mvn -B test + - name: Test Install run: | - mvn -B -Dgpg.skip=true -pl judgeval-java -am clean install + cd judgeval-java && mvn -B -Dgpg.skip=true clean install - name: Verify Build Artifacts run: | diff --git a/.gitignore b/.gitignore index 07aae5a..f67d300 100644 --- a/.gitignore +++ b/.gitignore @@ -160,3 +160,6 @@ $RECYCLE.BIN/ .env .env* + + +.idea \ No newline at end of file diff --git a/.idea/codeStyles/codeStyleConfig.xml b/.idea/codeStyles/codeStyleConfig.xml deleted file mode 100644 index a55e7a1..0000000 --- a/.idea/codeStyles/codeStyleConfig.xml +++ /dev/null @@ -1,5 +0,0 @@ - - - - \ No newline at end of file diff --git a/.idea/vcs.xml b/.idea/vcs.xml deleted file mode 100644 index 35eb1dd..0000000 --- a/.idea/vcs.xml +++ /dev/null @@ -1,6 +0,0 @@ - - - - - - \ No newline at end of file diff --git a/.idea/workspace.xml b/.idea/workspace.xml deleted file mode 100644 index c531a7e..0000000 --- a/.idea/workspace.xml +++ /dev/null @@ -1,66 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 1761770774206 - - - - \ No newline at end of file diff --git a/.vscode/settings.json b/.vscode/settings.json index 49b061d..6c6c247 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -1,4 +1,6 @@ { "java.configuration.updateBuildConfiguration": "automatic", + "java.import.maven.enabled": true, + "java.compile.nullAnalysis.mode": "automatic", "java.format.settings.url": ".vscode/java-formatter.xml" } diff --git a/MIGRATION_V1.md b/MIGRATION_V1.md new file mode 100644 index 0000000..22c9055 --- /dev/null +++ b/MIGRATION_V1.md @@ -0,0 +1,507 @@ +# Migration Guide: v0 to v1 + +This guide shows how to migrate from the deprecated v0 API to the new v1 API in `judgeval-java`. + +## Client Initialization + +**After (v1):** + +```java +import com.judgmentlabs.judgeval.v1.JudgmentClient; + +JudgmentClient client = JudgmentClient.builder() + .apiKey("your-api-key") // or use JUDGMENT_API_KEY env var + .organizationId("your-org-id") // or use JUDGMENT_ORG_ID env var + .apiUrl("https://api.judgmentlabs.ai") // optional, defaults to production + .build(); +``` + +The client automatically creates an internal `JudgmentSyncClient` that is passed to all child objects. + +## 1. Tracer Migration + +### Basic Tracer + +**Before (v0):** + +```java +import com.judgmentlabs.judgeval.tracer.Tracer; + +Tracer tracer = Tracer.createDefault("my-project"); +tracer.initialize(); +``` + +**After (v1):** + +```java +import com.judgmentlabs.judgeval.v1.JudgmentClient; +import com.judgmentlabs.judgeval.v1.tracer.Tracer; + +JudgmentClient client = JudgmentClient.builder().build(); + +Tracer tracer = client.tracer() + .create() + .projectName("my-project") + .build(); + +tracer.initialize(); +``` + +### Tracer with Custom Configuration + +**Before (v0):** + +```java +import com.judgmentlabs.judgeval.tracer.Tracer; +import com.judgmentlabs.judgeval.tracer.TracerConfiguration; + +TracerConfiguration config = TracerConfiguration.builder() + .projectName("my-project") + .apiKey("key") + .organizationId("org") + .enableEvaluation(true) + .build(); + +Tracer tracer = Tracer.createWithConfiguration(config); +tracer.initialize(); +``` + +**After (v1):** + +```java +import com.judgmentlabs.judgeval.v1.JudgmentClient; +import com.judgmentlabs.judgeval.v1.tracer.Tracer; + +JudgmentClient client = JudgmentClient.builder() + .apiKey("key") + .organizationId("org") + .build(); + +Tracer tracer = client.tracer() + .create() + .projectName("my-project") + .enableEvaluation(true) + .build(); + +tracer.initialize(); +``` + +### Using Tracer Methods + +All `BaseTracer` methods remain unchanged: + +```java +tracer.setAttribute("key", "value"); +tracer.setInput(inputData); +tracer.setOutput(outputData); +tracer.setLLMSpan(); +tracer.asyncEvaluate(scorer, example); +tracer.asyncTraceEvaluate(scorer); + +tracer.span("operation", () -> { + // your code +}); + +tracer.forceFlush(5000); +tracer.shutdown(5000); +``` + +## 2. PromptScorer Migration + +### Fetching Existing Scorer + +**Before (v0):** + +```java +import com.judgmentlabs.judgeval.scorers.api_scorers.prompt_scorer.PromptScorer; + +PromptScorer scorer = PromptScorer.get("my-scorer"); +``` + +**After (v1):** + +```java +import com.judgmentlabs.judgeval.v1.JudgmentClient; +import com.judgmentlabs.judgeval.v1.scorers.prompt_scorer.PromptScorer; + +JudgmentClient client = JudgmentClient.builder().build(); + +PromptScorer scorer = client.scorers() + .promptScorer() + .get("my-scorer"); +``` + +### Creating New Scorer + +**Before (v0):** + +```java +import com.judgmentlabs.judgeval.scorers.api_scorers.prompt_scorer.PromptScorer; +import java.util.Map; + +PromptScorer scorer = new PromptScorer( + "accuracy-checker", + "Does the output accurately answer the question?", + 0.7, + Map.of("yes", 1.0, "no", 0.0) +); +``` + +**After (v1):** + +```java +import com.judgmentlabs.judgeval.v1.JudgmentClient; +import com.judgmentlabs.judgeval.v1.scorers.prompt_scorer.PromptScorer; +import java.util.Map; + +JudgmentClient client = JudgmentClient.builder().build(); + +PromptScorer scorer = client.scorers() + .promptScorer() + .create() + .name("accuracy-checker") + .prompt("Does the output accurately answer the question?") + .threshold(0.7) + .options(Map.of("yes", 1.0, "no", 0.0)) + .build(); +``` + +## 3. TracePromptScorer Migration + +### Fetching Existing Trace Scorer + +**Before (v0):** + +```java +import com.judgmentlabs.judgeval.scorers.api_scorers.prompt_scorer.TracePromptScorer; + +TracePromptScorer scorer = TracePromptScorer.get("my-scorer"); +``` + +**After (v1):** + +```java +import com.judgmentlabs.judgeval.v1.JudgmentClient; +import com.judgmentlabs.judgeval.v1.scorers.prompt_scorer.TracePromptScorer; + +JudgmentClient client = JudgmentClient.builder().build(); + +TracePromptScorer scorer = client.scorers() + .promptScorer() + .getTrace("my-scorer"); +``` + +### Creating New Trace Scorer + +**Before (v0):** + +```java +import com.judgmentlabs.judgeval.scorers.api_scorers.prompt_scorer.TracePromptScorer; +import java.util.Map; + +TracePromptScorer scorer = new TracePromptScorer( + "response-quality", + "Does this trace show a high-quality response flow?", + 0.75, + null +); +``` + +**After (v1):** + +```java +import com.judgmentlabs.judgeval.v1.JudgmentClient; +import com.judgmentlabs.judgeval.v1.scorers.prompt_scorer.TracePromptScorer; + +JudgmentClient client = JudgmentClient.builder().build(); + +TracePromptScorer scorer = client.scorers() + .promptScorer() + .createTrace() + .name("response-quality") + .prompt("Does this trace show a high-quality response flow?") + .threshold(0.75) + .build(); +``` + +## 4. CustomScorer Migration + +### Basic Custom Scorer + +**Before (v0):** + +```java +import com.judgmentlabs.judgeval.scorers.api_scorers.custom_scorer.CustomScorer; + +CustomScorer scorer = CustomScorer.get("my-custom-scorer"); +``` + +**After (v1):** + +```java +import com.judgmentlabs.judgeval.v1.JudgmentClient; +import com.judgmentlabs.judgeval.v1.scorers.custom_scorer.CustomScorer; + +JudgmentClient client = JudgmentClient.builder().build(); + +CustomScorer scorer = client.scorers() + .customScorer() + .get("my-custom-scorer"); +``` + +### Custom Scorer with Class Name + +**Before (v0):** + +```java +import com.judgmentlabs.judgeval.scorers.api_scorers.custom_scorer.CustomScorer; + +CustomScorer scorer = CustomScorer.get("my-scorer", "MyCustomScorerClass"); +``` + +**After (v1):** + +```java +import com.judgmentlabs.judgeval.v1.JudgmentClient; +import com.judgmentlabs.judgeval.v1.scorers.custom_scorer.CustomScorer; + +JudgmentClient client = JudgmentClient.builder().build(); + +CustomScorer scorer = client.scorers() + .customScorer() + .get("my-scorer", "MyCustomScorerClass"); +``` + +## 5. Built-in Scorers Migration + +### AnswerCorrectnessScorer + +**Before (v0):** + +```java +import com.judgmentlabs.judgeval.scorers.api_scorers.AnswerCorrectnessScorer; + +AnswerCorrectnessScorer scorer = AnswerCorrectnessScorer.create(); + +AnswerCorrectnessScorer scorerWithThreshold = AnswerCorrectnessScorer.create(0.8); + +AnswerCorrectnessScorer customScorer = AnswerCorrectnessScorer.builder() + .threshold(0.7) + .name("custom-correctness") + .build(); +``` + +**After (v1):** + +```java +import com.judgmentlabs.judgeval.v1.JudgmentClient; +import com.judgmentlabs.judgeval.v1.scorers.built_in.AnswerCorrectnessScorer; + +JudgmentClient client = JudgmentClient.builder().build(); + +AnswerCorrectnessScorer scorer = client.scorers() + .builtIn() + .answerCorrectness() + .build(); + +AnswerCorrectnessScorer scorerWithThreshold = client.scorers() + .builtIn() + .answerCorrectness() + .threshold(0.8) + .build(); + +AnswerCorrectnessScorer customScorer = client.scorers() + .builtIn() + .answerCorrectness() + .threshold(0.7) + .name("custom-correctness") + .build(); +``` + +### AnswerRelevancyScorer + +**Before (v0):** + +```java +import com.judgmentlabs.judgeval.scorers.api_scorers.AnswerRelevancyScorer; + +AnswerRelevancyScorer scorer = AnswerRelevancyScorer.create(); +``` + +**After (v1):** + +```java +import com.judgmentlabs.judgeval.v1.JudgmentClient; +import com.judgmentlabs.judgeval.v1.scorers.built_in.AnswerRelevancyScorer; + +JudgmentClient client = JudgmentClient.builder().build(); + +AnswerRelevancyScorer scorer = client.scorers() + .builtIn() + .answerRelevancy() + .build(); +``` + +### FaithfulnessScorer + +**Before (v0):** + +```java +import com.judgmentlabs.judgeval.scorers.api_scorers.FaithfulnessScorer; + +FaithfulnessScorer scorer = FaithfulnessScorer.create(); +``` + +**After (v1):** + +```java +import com.judgmentlabs.judgeval.v1.JudgmentClient; +import com.judgmentlabs.judgeval.v1.scorers.built_in.FaithfulnessScorer; + +JudgmentClient client = JudgmentClient.builder().build(); + +FaithfulnessScorer scorer = client.scorers() + .builtIn() + .faithfulness() + .build(); +``` + +### InstructionAdherenceScorer + +**Before (v0):** + +```java +import com.judgmentlabs.judgeval.scorers.api_scorers.InstructionAdherenceScorer; + +InstructionAdherenceScorer scorer = InstructionAdherenceScorer.create(); +``` + +**After (v1):** + +```java +import com.judgmentlabs.judgeval.v1.JudgmentClient; +import com.judgmentlabs.judgeval.v1.scorers.built_in.InstructionAdherenceScorer; + +JudgmentClient client = JudgmentClient.builder().build(); + +InstructionAdherenceScorer scorer = client.scorers() + .builtIn() + .instructionAdherence() + .build(); +``` + +### DerailmentScorer + +**Before (v0):** + +```java +import com.judgmentlabs.judgeval.scorers.api_scorers.DerailmentScorer; + +DerailmentScorer scorer = DerailmentScorer.create(); +``` + +**After (v1):** + +```java +import com.judgmentlabs.judgeval.v1.JudgmentClient; +import com.judgmentlabs.judgeval.v1.scorers.built_in.DerailmentScorer; + +JudgmentClient client = JudgmentClient.builder().build(); + +DerailmentScorer scorer = client.scorers() + .builtIn() + .derailment() + .build(); +``` + +## Complete Example: Before and After + +### Before (v0) + +```java +import com.judgmentlabs.judgeval.tracer.Tracer; +import com.judgmentlabs.judgeval.scorers.api_scorers.prompt_scorer.PromptScorer; +import com.judgmentlabs.judgeval.scorers.api_scorers.AnswerCorrectnessScorer; +import com.judgmentlabs.judgeval.data.Example; + +public class OldExample { + public static void main(String[] args) { + Tracer tracer = Tracer.createDefault("my-project"); + tracer.initialize(); + + PromptScorer promptScorer = PromptScorer.get("accuracy-checker"); + + AnswerCorrectnessScorer builtInScorer = AnswerCorrectnessScorer.create(0.8); + + Example example = Example.builder() + .property("input", "What is 2+2?") + .property("actual_output", "4") + .property("expected_output", "4") + .build(); + + tracer.span("evaluate", () -> { + tracer.setInput("What is 2+2?"); + tracer.setOutput("4"); + tracer.asyncEvaluate(promptScorer, example); + tracer.asyncEvaluate(builtInScorer, example); + }); + + tracer.forceFlush(5000); + tracer.shutdown(5000); + } +} +``` + +### After (v1) + +```java +import com.judgmentlabs.judgeval.v1.JudgmentClient; +import com.judgmentlabs.judgeval.v1.tracer.Tracer; +import com.judgmentlabs.judgeval.v1.scorers.prompt_scorer.PromptScorer; +import com.judgmentlabs.judgeval.v1.scorers.built_in.AnswerCorrectnessScorer; +import com.judgmentlabs.judgeval.data.Example; + +public class NewExample { + public static void main(String[] args) { + JudgmentClient client = JudgmentClient.builder() + .apiKey(System.getenv("JUDGMENT_API_KEY")) + .organizationId(System.getenv("JUDGMENT_ORG_ID")) + .build(); + + Tracer tracer = client.tracer() + .create() + .projectName("my-project") + .enableEvaluation(true) + .build(); + + tracer.initialize(); + + PromptScorer promptScorer = client.scorers() + .promptScorer() + .get("accuracy-checker"); + + AnswerCorrectnessScorer builtInScorer = client.scorers() + .builtIn() + .answerCorrectness() + .threshold(0.8) + .build(); + + Example example = Example.builder() + .property("input", "What is 2+2?") + .property("actual_output", "4") + .property("expected_output", "4") + .build(); + + tracer.span("evaluate", () -> { + tracer.setInput("What is 2+2?"); + tracer.setOutput("4"); + tracer.asyncEvaluate(promptScorer, example); + tracer.asyncEvaluate(builtInScorer, example); + }); + + tracer.forceFlush(5000); + tracer.shutdown(5000); + } +} +``` + diff --git a/examples/pom.xml b/examples/pom.xml index d8a58ec..7f06a15 100644 --- a/examples/pom.xml +++ b/examples/pom.xml @@ -26,7 +26,7 @@ com.judgmentlabs judgeval-java - 0.2.3 + 0.3.0 io.opentelemetry diff --git a/examples/src/main/java/examples/simple_chat/SimpleChat.java b/examples/src/main/java/examples/simple_chat/SimpleChat.java index c04d465..d0306c9 100644 --- a/examples/src/main/java/examples/simple_chat/SimpleChat.java +++ b/examples/src/main/java/examples/simple_chat/SimpleChat.java @@ -3,7 +3,8 @@ import java.time.Duration; import com.judgmentlabs.judgeval.instrumentation.openai.OpenAITelemetry; -import com.judgmentlabs.judgeval.tracer.Tracer; +import com.judgmentlabs.judgeval.v1.Judgeval; +import com.judgmentlabs.judgeval.v1.data.Example; import com.openai.client.OpenAIClient; import com.openai.client.okhttp.OpenAIOkHttpClient; import com.openai.models.ChatModel; @@ -13,8 +14,11 @@ public class SimpleChat { public static void main(String[] args) { - var tracer = Tracer.createDefault("SimpleChat-Java"); - tracer.initialize(); + var client = Judgeval.builder() + .apiKey(System.getenv("JUDGMENT_API_KEY")) + .organizationId(System.getenv("JUDGMENT_ORG_ID")) + .build(); + var tracer = client.tracer().create().projectName("SimpleChat-Java").build(); OpenAIClient baseClient = OpenAIOkHttpClient.fromEnv(); var otelClient = OpenAITelemetry.builder(GlobalOpenTelemetry.get()).build().wrap(baseClient); @@ -28,6 +32,16 @@ public static void main(String[] args) { .build(); var res = otelClient.chat().completions().create(req); System.out.println(String.valueOf(res)); + + tracer.asyncEvaluate(client.scorers().builtIn().answerCorrectness().threshold(0.8).build(), + Example.builder() + .property("input", "What is 2+2?") + .property("actual_output", "4") + .property("expected_output", "4") + .build()); + + tracer.asyncTraceEvaluate(client.scorers().tracePromptScorer().get("ExampleTraceScorer")); + }); try { @@ -35,4 +49,4 @@ public static void main(String[] args) { } catch (InterruptedException ignored) { } } -} +} \ No newline at end of file diff --git a/judgeval-java/pom.xml b/judgeval-java/pom.xml index 2f44450..e7a99d0 100644 --- a/judgeval-java/pom.xml +++ b/judgeval-java/pom.xml @@ -3,7 +3,7 @@ 4.0.0 com.judgmentlabs judgeval-java - 0.2.4 + 0.3.0 jar Judgeval Java Java SDK for Judgeval @@ -54,13 +54,25 @@ org.mockito mockito-core - 5.8.0 + 5.15.2 test org.mockito mockito-junit-jupiter - 5.8.0 + 5.15.2 + test + + + net.bytebuddy + byte-buddy + 1.17.5 + test + + + net.bytebuddy + byte-buddy-agent + 1.17.5 test @@ -145,6 +157,14 @@ 21 + + org.apache.maven.plugins + maven-surefire-plugin + 3.2.5 + + -XX:+EnableDynamicAgentLoading + + org.apache.maven.plugins maven-jar-plugin diff --git a/judgeval-java/src/main/java/com/judgmentlabs/judgeval/Judgeval.java b/judgeval-java/src/main/java/com/judgmentlabs/judgeval/Judgeval.java new file mode 100644 index 0000000..926ca88 --- /dev/null +++ b/judgeval-java/src/main/java/com/judgmentlabs/judgeval/Judgeval.java @@ -0,0 +1,18 @@ +package com.judgmentlabs.judgeval; + +public final class Judgeval extends com.judgmentlabs.judgeval.v1.Judgeval { + private Judgeval(Builder builder) { + super(builder); + } + + public static Builder builder() { + return new Builder(); + } + + public static final class Builder extends com.judgmentlabs.judgeval.v1.Judgeval.Builder { + @Override + public Judgeval build() { + return new Judgeval(this); + } + } +} diff --git a/judgeval-java/src/main/java/com/judgmentlabs/judgeval/JudgmentAttributeKeys.java b/judgeval-java/src/main/java/com/judgmentlabs/judgeval/JudgmentAttributeKeys.java new file mode 100644 index 0000000..004b8d5 --- /dev/null +++ b/judgeval-java/src/main/java/com/judgmentlabs/judgeval/JudgmentAttributeKeys.java @@ -0,0 +1,51 @@ +package com.judgmentlabs.judgeval; + +public final class JudgmentAttributeKeys { + public static final class AttributeKeys { + public static final String JUDGMENT_SPAN_KIND = "judgment.span_kind"; + public static final String JUDGMENT_INPUT = "judgment.input"; + public static final String JUDGMENT_OUTPUT = "judgment.output"; + public static final String JUDGMENT_OFFLINE_MODE = "judgment.offline_mode"; + public static final String JUDGMENT_UPDATE_ID = "judgment.update_id"; + public static final String JUDGMENT_CUSTOMER_ID = "judgment.customer_id"; + public static final String JUDGMENT_AGENT_ID = "judgment.agent_id"; + public static final String JUDGMENT_PARENT_AGENT_ID = "judgment.parent_agent_id"; + public static final String JUDGMENT_AGENT_CLASS_NAME = "judgment.agent_class_name"; + public static final String JUDGMENT_AGENT_INSTANCE_NAME = "judgment.agent_instance_name"; + public static final String JUDGMENT_IS_AGENT_ENTRY_POINT = "judgment.is_agent_entry_point"; + public static final String JUDGMENT_CUMULATIVE_LLM_COST = "judgment.cumulative_llm_cost"; + public static final String JUDGMENT_STATE_BEFORE = "judgment.state_before"; + public static final String JUDGMENT_STATE_AFTER = "judgment.state_after"; + public static final String JUDGMENT_PENDING_TRACE_EVAL = "judgment.pending_trace_eval"; + + public static final String GEN_AI_PROMPT = "gen_ai.prompt"; + public static final String GEN_AI_COMPLETION = "gen_ai.completion"; + public static final String GEN_AI_REQUEST_MODEL = "gen_ai.request.model"; + public static final String GEN_AI_RESPONSE_MODEL = "gen_ai.response.model"; + public static final String GEN_AI_SYSTEM = "gen_ai.system"; + public static final String GEN_AI_USAGE_INPUT_TOKENS = "gen_ai.usage.input_tokens"; + public static final String GEN_AI_USAGE_OUTPUT_TOKENS = "gen_ai.usage.output_tokens"; + public static final String GEN_AI_USAGE_CACHE_CREATION_INPUT_TOKENS = "gen_ai.usage.cache_creation_input_tokens"; + public static final String GEN_AI_USAGE_CACHE_READ_INPUT_TOKENS = "gen_ai.usage.cache_read_input_tokens"; + public static final String GEN_AI_REQUEST_TEMPERATURE = "gen_ai.request.temperature"; + public static final String GEN_AI_REQUEST_MAX_TOKENS = "gen_ai.request.max_tokens"; + public static final String GEN_AI_RESPONSE_FINISH_REASONS = "gen_ai.response.finish_reasons"; + + private AttributeKeys() { + } + } + + public static final class ResourceKeys { + public static final String SERVICE_NAME = "service.name"; + public static final String TELEMETRY_SDK_LANGUAGE = "telemetry.sdk.language"; + public static final String TELEMETRY_SDK_NAME = "telemetry.sdk.name"; + public static final String TELEMETRY_SDK_VERSION = "telemetry.sdk.version"; + public static final String JUDGMENT_PROJECT_ID = "judgment.project_id"; + + private ResourceKeys() { + } + } + + private JudgmentAttributeKeys() { + } +} diff --git a/judgeval-java/src/main/java/com/judgmentlabs/judgeval/data/APIScorerType.java b/judgeval-java/src/main/java/com/judgmentlabs/judgeval/data/APIScorerType.java index 49e9af2..e3765f7 100644 --- a/judgeval-java/src/main/java/com/judgmentlabs/judgeval/data/APIScorerType.java +++ b/judgeval-java/src/main/java/com/judgmentlabs/judgeval/data/APIScorerType.java @@ -1,5 +1,10 @@ package com.judgmentlabs.judgeval.data; +/** + * @deprecated Use {@link com.judgmentlabs.judgeval.v1.data.APIScorerType} + * instead. + */ +@Deprecated public enum APIScorerType { PROMPT_SCORER("Prompt Scorer"), TRACE_PROMPT_SCORER("Trace Prompt Scorer"), diff --git a/judgeval-java/src/main/java/com/judgmentlabs/judgeval/data/EvaluationRunBuilder.java b/judgeval-java/src/main/java/com/judgmentlabs/judgeval/data/EvaluationRunBuilder.java index 5e0bb09..f6cc75e 100644 --- a/judgeval-java/src/main/java/com/judgmentlabs/judgeval/data/EvaluationRunBuilder.java +++ b/judgeval-java/src/main/java/com/judgmentlabs/judgeval/data/EvaluationRunBuilder.java @@ -13,6 +13,11 @@ import com.judgmentlabs.judgeval.scorers.BaseScorer; import com.judgmentlabs.judgeval.scorers.api_scorers.custom_scorer.CustomScorer; +/** + * @deprecated Replaced by + * com.judgmentlabs.judgeval.v1.data.EvaluationRunBuilder + */ +@Deprecated public class EvaluationRunBuilder { private String projectName; private String evalName; diff --git a/judgeval-java/src/main/java/com/judgmentlabs/judgeval/data/Example.java b/judgeval-java/src/main/java/com/judgmentlabs/judgeval/data/Example.java index a09ec10..ff0f53c 100644 --- a/judgeval-java/src/main/java/com/judgmentlabs/judgeval/data/Example.java +++ b/judgeval-java/src/main/java/com/judgmentlabs/judgeval/data/Example.java @@ -3,6 +3,10 @@ import java.time.Instant; import java.util.UUID; +/** + * @deprecated Use {@link com.judgmentlabs.judgeval.v1.data.Example} instead. + */ +@Deprecated public class Example extends com.judgmentlabs.judgeval.internal.api.models.Example { public Example() { diff --git a/judgeval-java/src/main/java/com/judgmentlabs/judgeval/data/ScorerData.java b/judgeval-java/src/main/java/com/judgmentlabs/judgeval/data/ScorerData.java index fb84d4e..aa94afb 100644 --- a/judgeval-java/src/main/java/com/judgmentlabs/judgeval/data/ScorerData.java +++ b/judgeval-java/src/main/java/com/judgmentlabs/judgeval/data/ScorerData.java @@ -2,6 +2,10 @@ import java.util.Map; +/** + * @deprecated Use {@link com.judgmentlabs.judgeval.v1.data.ScorerData} instead. + */ +@Deprecated public class ScorerData extends com.judgmentlabs.judgeval.internal.api.models.ScorerData { public static Builder builder() { diff --git a/judgeval-java/src/main/java/com/judgmentlabs/judgeval/data/ScoringResult.java b/judgeval-java/src/main/java/com/judgmentlabs/judgeval/data/ScoringResult.java index 0f262c7..60ae78e 100644 --- a/judgeval-java/src/main/java/com/judgmentlabs/judgeval/data/ScoringResult.java +++ b/judgeval-java/src/main/java/com/judgmentlabs/judgeval/data/ScoringResult.java @@ -2,6 +2,11 @@ import java.util.List; +/** + * @deprecated Use {@link com.judgmentlabs.judgeval.v1.data.ScoringResult} + * instead. + */ +@Deprecated public class ScoringResult extends com.judgmentlabs.judgeval.internal.api.models.ScoringResult { public static Builder builder() { diff --git a/judgeval-java/src/main/java/com/judgmentlabs/judgeval/data/TraceEvaluationRunBuilder.java b/judgeval-java/src/main/java/com/judgmentlabs/judgeval/data/TraceEvaluationRunBuilder.java index 4e3dd33..7c114eb 100644 --- a/judgeval-java/src/main/java/com/judgmentlabs/judgeval/data/TraceEvaluationRunBuilder.java +++ b/judgeval-java/src/main/java/com/judgmentlabs/judgeval/data/TraceEvaluationRunBuilder.java @@ -13,6 +13,11 @@ import com.judgmentlabs.judgeval.scorers.BaseScorer; import com.judgmentlabs.judgeval.scorers.api_scorers.custom_scorer.CustomScorer; +/** + * @deprecated Replaced by + * com.judgmentlabs.judgeval.v1.data.TraceEvaluationRunBuilder + */ +@Deprecated public class TraceEvaluationRunBuilder { private String projectName; private String evalName; diff --git a/judgeval-java/src/main/java/com/judgmentlabs/judgeval/internal/api/JudgmentAsyncClient.java b/judgeval-java/src/main/java/com/judgmentlabs/judgeval/internal/api/JudgmentAsyncClient.java index 0ad218d..c6b5e3b 100644 --- a/judgeval-java/src/main/java/com/judgmentlabs/judgeval/internal/api/JudgmentAsyncClient.java +++ b/judgeval-java/src/main/java/com/judgmentlabs/judgeval/internal/api/JudgmentAsyncClient.java @@ -30,6 +30,18 @@ public JudgmentAsyncClient(String baseUrl, String apiKey, String organizationId) this.mapper = new ObjectMapper(); } + public String getApiUrl() { + return baseUrl; + } + + public String getApiKey() { + return apiKey; + } + + public String getOrganizationId() { + return organizationId; + } + private String buildUrl(String path, Map queryParams) { StringBuilder url = new StringBuilder(baseUrl).append(path); if (!queryParams.isEmpty()) { diff --git a/judgeval-java/src/main/java/com/judgmentlabs/judgeval/internal/api/JudgmentSyncClient.java b/judgeval-java/src/main/java/com/judgmentlabs/judgeval/internal/api/JudgmentSyncClient.java index d0f1f5d..2e6851d 100644 --- a/judgeval-java/src/main/java/com/judgmentlabs/judgeval/internal/api/JudgmentSyncClient.java +++ b/judgeval-java/src/main/java/com/judgmentlabs/judgeval/internal/api/JudgmentSyncClient.java @@ -30,6 +30,18 @@ public JudgmentSyncClient(String baseUrl, String apiKey, String organizationId) this.mapper = new ObjectMapper(); } + public String getApiUrl() { + return baseUrl; + } + + public String getApiKey() { + return apiKey; + } + + public String getOrganizationId() { + return organizationId; + } + private String buildUrl(String path, Map queryParams) { StringBuilder url = new StringBuilder(baseUrl).append(path); if (!queryParams.isEmpty()) { diff --git a/judgeval-java/src/main/java/com/judgmentlabs/judgeval/scorers/APIScorer.java b/judgeval-java/src/main/java/com/judgmentlabs/judgeval/scorers/APIScorer.java index 565b48b..1c6e739 100644 --- a/judgeval-java/src/main/java/com/judgmentlabs/judgeval/scorers/APIScorer.java +++ b/judgeval-java/src/main/java/com/judgmentlabs/judgeval/scorers/APIScorer.java @@ -10,6 +10,11 @@ import com.judgmentlabs.judgeval.data.APIScorerType; import com.judgmentlabs.judgeval.internal.api.models.ScorerConfig; +/** + * @deprecated Use {@link com.judgmentlabs.judgeval.v1.scorers.APIScorer} + * instead. + */ +@Deprecated public class APIScorer extends com.judgmentlabs.judgeval.internal.api.models.BaseScorer implements BaseScorer { private APIScorerType scoreType; diff --git a/judgeval-java/src/main/java/com/judgmentlabs/judgeval/scorers/BaseScorer.java b/judgeval-java/src/main/java/com/judgmentlabs/judgeval/scorers/BaseScorer.java index 3a5b231..0eb8c46 100644 --- a/judgeval-java/src/main/java/com/judgmentlabs/judgeval/scorers/BaseScorer.java +++ b/judgeval-java/src/main/java/com/judgmentlabs/judgeval/scorers/BaseScorer.java @@ -5,7 +5,11 @@ /** * Minimal interface for scorers used by BaseTracer. Only requires the essential * methods needed for evaluation. + * + * @deprecated Use {@link com.judgmentlabs.judgeval.v1.scorers.BaseScorer} + * instead. */ +@Deprecated public interface BaseScorer { /** * Gets the name of the scorer. diff --git a/judgeval-java/src/main/java/com/judgmentlabs/judgeval/scorers/api_scorers/AnswerCorrectnessScorer.java b/judgeval-java/src/main/java/com/judgmentlabs/judgeval/scorers/api_scorers/AnswerCorrectnessScorer.java index e0ec743..750844a 100644 --- a/judgeval-java/src/main/java/com/judgmentlabs/judgeval/scorers/api_scorers/AnswerCorrectnessScorer.java +++ b/judgeval-java/src/main/java/com/judgmentlabs/judgeval/scorers/api_scorers/AnswerCorrectnessScorer.java @@ -5,6 +5,22 @@ import com.judgmentlabs.judgeval.data.APIScorerType; import com.judgmentlabs.judgeval.scorers.APIScorer; +/** + * @deprecated Use {@link com.judgmentlabs.judgeval.v1.Judgeval} instead. + * + *

+ * Migration example: + * + *

{@code
+ * // Old way:
+ * AnswerCorrectnessScorer scorer = AnswerCorrectnessScorer.create();
+ * 
+ * // New way:
+ * Judgeval client = Judgeval.builder().build();
+ * AnswerCorrectnessScorer scorer = client.scorers().builtIn().answerCorrectness().build();
+ * }
+ */ +@Deprecated public class AnswerCorrectnessScorer extends APIScorer { public AnswerCorrectnessScorer() { super(APIScorerType.ANSWER_CORRECTNESS); diff --git a/judgeval-java/src/main/java/com/judgmentlabs/judgeval/scorers/api_scorers/AnswerRelevancyScorer.java b/judgeval-java/src/main/java/com/judgmentlabs/judgeval/scorers/api_scorers/AnswerRelevancyScorer.java index 7434399..652dcfe 100644 --- a/judgeval-java/src/main/java/com/judgmentlabs/judgeval/scorers/api_scorers/AnswerRelevancyScorer.java +++ b/judgeval-java/src/main/java/com/judgmentlabs/judgeval/scorers/api_scorers/AnswerRelevancyScorer.java @@ -5,6 +5,22 @@ import com.judgmentlabs.judgeval.data.APIScorerType; import com.judgmentlabs.judgeval.scorers.APIScorer; +/** + * @deprecated Use {@link com.judgmentlabs.judgeval.v1.Judgeval} instead. + * + *

+ * Migration example: + * + *

{@code
+ * // Old way:
+ * AnswerRelevancyScorer scorer = AnswerRelevancyScorer.create();
+ * 
+ * // New way:
+ * Judgeval client = Judgeval.builder().build();
+ * AnswerRelevancyScorer scorer = client.scorers().builtIn().answerRelevancy().build();
+ * }
+ */ +@Deprecated public class AnswerRelevancyScorer extends APIScorer { public AnswerRelevancyScorer() { super(APIScorerType.ANSWER_RELEVANCY); diff --git a/judgeval-java/src/main/java/com/judgmentlabs/judgeval/scorers/api_scorers/DerailmentScorer.java b/judgeval-java/src/main/java/com/judgmentlabs/judgeval/scorers/api_scorers/DerailmentScorer.java deleted file mode 100644 index 431728d..0000000 --- a/judgeval-java/src/main/java/com/judgmentlabs/judgeval/scorers/api_scorers/DerailmentScorer.java +++ /dev/null @@ -1,23 +0,0 @@ -package com.judgmentlabs.judgeval.scorers.api_scorers; - -import com.judgmentlabs.judgeval.data.APIScorerType; -import com.judgmentlabs.judgeval.scorers.APIScorer; - -public class DerailmentScorer extends APIScorer { - public DerailmentScorer() { - super(APIScorerType.DERAILMENT); - } - - public static APIScorer.Builder builder() { - return APIScorer.builder(DerailmentScorer.class); - } - - public static DerailmentScorer create() { - return new DerailmentScorer(); - } - - public static DerailmentScorer create(double threshold) { - return builder().threshold(threshold) - .build(); - } -} diff --git a/judgeval-java/src/main/java/com/judgmentlabs/judgeval/scorers/api_scorers/FaithfulnessScorer.java b/judgeval-java/src/main/java/com/judgmentlabs/judgeval/scorers/api_scorers/FaithfulnessScorer.java index bb53d23..ee1a6b8 100644 --- a/judgeval-java/src/main/java/com/judgmentlabs/judgeval/scorers/api_scorers/FaithfulnessScorer.java +++ b/judgeval-java/src/main/java/com/judgmentlabs/judgeval/scorers/api_scorers/FaithfulnessScorer.java @@ -5,6 +5,22 @@ import com.judgmentlabs.judgeval.data.APIScorerType; import com.judgmentlabs.judgeval.scorers.APIScorer; +/** + * @deprecated Use {@link com.judgmentlabs.judgeval.v1.Judgeval} instead. + * + *

+ * Migration example: + * + *

{@code
+ * // Old way:
+ * FaithfulnessScorer scorer = FaithfulnessScorer.create();
+ * 
+ * // New way:
+ * Judgeval client = Judgeval.builder().build();
+ * FaithfulnessScorer scorer = client.scorers().builtIn().faithfulness().build();
+ * }
+ */ +@Deprecated public class FaithfulnessScorer extends APIScorer { public FaithfulnessScorer() { super(APIScorerType.FAITHFULNESS); diff --git a/judgeval-java/src/main/java/com/judgmentlabs/judgeval/scorers/api_scorers/InstructionAdherenceScorer.java b/judgeval-java/src/main/java/com/judgmentlabs/judgeval/scorers/api_scorers/InstructionAdherenceScorer.java index da71351..b60e3cb 100644 --- a/judgeval-java/src/main/java/com/judgmentlabs/judgeval/scorers/api_scorers/InstructionAdherenceScorer.java +++ b/judgeval-java/src/main/java/com/judgmentlabs/judgeval/scorers/api_scorers/InstructionAdherenceScorer.java @@ -5,6 +5,22 @@ import com.judgmentlabs.judgeval.data.APIScorerType; import com.judgmentlabs.judgeval.scorers.APIScorer; +/** + * @deprecated Use {@link com.judgmentlabs.judgeval.v1.Judgeval} instead. + * + *

+ * Migration example: + * + *

{@code
+ * // Old way:
+ * InstructionAdherenceScorer scorer = InstructionAdherenceScorer.create();
+ * 
+ * // New way:
+ * Judgeval client = Judgeval.builder().build();
+ * InstructionAdherenceScorer scorer = client.scorers().builtIn().instructionAdherence().build();
+ * }
+ */ +@Deprecated public class InstructionAdherenceScorer extends APIScorer { public InstructionAdherenceScorer() { super(APIScorerType.INSTRUCTION_ADHERENCE); diff --git a/judgeval-java/src/main/java/com/judgmentlabs/judgeval/scorers/api_scorers/custom_scorer/CustomScorer.java b/judgeval-java/src/main/java/com/judgmentlabs/judgeval/scorers/api_scorers/custom_scorer/CustomScorer.java index 268962a..0ca1f07 100644 --- a/judgeval-java/src/main/java/com/judgmentlabs/judgeval/scorers/api_scorers/custom_scorer/CustomScorer.java +++ b/judgeval-java/src/main/java/com/judgmentlabs/judgeval/scorers/api_scorers/custom_scorer/CustomScorer.java @@ -8,7 +8,22 @@ * Server-hosted custom scorer representation for enqueue payloads. * Instances serialize into ExampleEvaluationRun.custom_scorers with score_type * "Custom", server_hosted=true, and optional class_name for server routing. + * + * @deprecated Use {@link com.judgmentlabs.judgeval.v1.Judgeval} instead. + * + *

+ * Migration example: + * + *

{@code
+ * // Old way:
+ * CustomScorer scorer = CustomScorer.get("my-scorer");
+ * 
+ * // New way:
+ * Judgeval client = Judgeval.builder().build();
+ * CustomScorer scorer = client.scorers().customScorer().get("my-scorer");
+ * }
*/ +@Deprecated public class CustomScorer extends APIScorer { public CustomScorer() { super(APIScorerType.CUSTOM); diff --git a/judgeval-java/src/main/java/com/judgmentlabs/judgeval/scorers/api_scorers/prompt_scorer/PromptScorer.java b/judgeval-java/src/main/java/com/judgmentlabs/judgeval/scorers/api_scorers/prompt_scorer/PromptScorer.java index 7ebd225..2d9e28c 100644 --- a/judgeval-java/src/main/java/com/judgmentlabs/judgeval/scorers/api_scorers/prompt_scorer/PromptScorer.java +++ b/judgeval-java/src/main/java/com/judgmentlabs/judgeval/scorers/api_scorers/prompt_scorer/PromptScorer.java @@ -9,6 +9,22 @@ import com.judgmentlabs.judgeval.exceptions.JudgmentAPIError; import com.judgmentlabs.judgeval.internal.api.models.ScorerConfig; +/** + * @deprecated Use {@link com.judgmentlabs.judgeval.v1.Judgeval} instead. + * + *

+ * Migration example: + * + *

{@code
+ * // Old way:
+ * PromptScorer scorer = PromptScorer.get("my-scorer");
+ * 
+ * // New way:
+ * Judgeval client = Judgeval.builder().build();
+ * PromptScorer scorer = client.scorers().promptScorer().get("my-scorer");
+ * }
+ */ +@Deprecated public class PromptScorer extends BasePromptScorer { public PromptScorer(String name, String prompt, double threshold, Map options) { diff --git a/judgeval-java/src/main/java/com/judgmentlabs/judgeval/scorers/api_scorers/prompt_scorer/TracePromptScorer.java b/judgeval-java/src/main/java/com/judgmentlabs/judgeval/scorers/api_scorers/prompt_scorer/TracePromptScorer.java index fe813bb..6ba7c9f 100644 --- a/judgeval-java/src/main/java/com/judgmentlabs/judgeval/scorers/api_scorers/prompt_scorer/TracePromptScorer.java +++ b/judgeval-java/src/main/java/com/judgmentlabs/judgeval/scorers/api_scorers/prompt_scorer/TracePromptScorer.java @@ -9,6 +9,22 @@ import com.judgmentlabs.judgeval.exceptions.JudgmentAPIError; import com.judgmentlabs.judgeval.internal.api.models.ScorerConfig; +/** + * @deprecated Use {@link com.judgmentlabs.judgeval.v1.Judgeval} instead. + * + *

+ * Migration example: + * + *

{@code
+ * // Old way:
+ * TracePromptScorer scorer = TracePromptScorer.get("my-scorer");
+ * 
+ * // New way:
+ * Judgeval client = Judgeval.builder().build();
+ * PromptScorer scorer = client.scorers().tracePromptScorer().get("my-scorer");
+ * }
+ */ +@Deprecated public class TracePromptScorer extends BasePromptScorer { public TracePromptScorer(String name, String prompt, double threshold, Map options) { diff --git a/judgeval-java/src/main/java/com/judgmentlabs/judgeval/tracer/ISerializer.java b/judgeval-java/src/main/java/com/judgmentlabs/judgeval/tracer/ISerializer.java index 2f5e0f2..737d80e 100644 --- a/judgeval-java/src/main/java/com/judgmentlabs/judgeval/tracer/ISerializer.java +++ b/judgeval-java/src/main/java/com/judgmentlabs/judgeval/tracer/ISerializer.java @@ -2,6 +2,11 @@ import java.lang.reflect.Type; +/** + * @deprecated Use {@link com.judgmentlabs.judgeval.v1.tracer.ISerializer} + * instead. + */ +@Deprecated public interface ISerializer { String serialize(Object obj); diff --git a/judgeval-java/src/main/java/com/judgmentlabs/judgeval/tracer/Tracer.java b/judgeval-java/src/main/java/com/judgmentlabs/judgeval/tracer/Tracer.java index db8defe..de82d2d 100644 --- a/judgeval-java/src/main/java/com/judgmentlabs/judgeval/tracer/Tracer.java +++ b/judgeval-java/src/main/java/com/judgmentlabs/judgeval/tracer/Tracer.java @@ -16,11 +16,30 @@ /** * Main tracer for Judgment Labs distributed tracing and evaluation. * + * @deprecated Use {@link com.judgmentlabs.judgeval.v1.Judgeval} instead. + * + *

+ * Migration example: + * + *

{@code
+ * // Old way:
+ * Tracer tracer = Tracer.createDefault("my-project");
+ * tracer.initialize();
+ * 
+ * // New way:
+ * Judgeval client = Judgeval.builder().build();
+ * Tracer tracer = client.tracer().create()
+ *     .projectName("my-project")
+ *     .build();
+ * tracer.initialize();
+ * }
+ * * @see TracerConfiguration * @see SpanExporter * @see com.judgmentlabs.judgeval.scorers.BaseScorer * @see com.judgmentlabs.judgeval.data.Example */ +@Deprecated public final class Tracer extends BaseTracer { private Tracer(TracerConfiguration configuration, ISerializer serializer, boolean shouldInitialize) { diff --git a/judgeval-java/src/main/java/com/judgmentlabs/judgeval/tracer/TracerConfiguration.java b/judgeval-java/src/main/java/com/judgmentlabs/judgeval/tracer/TracerConfiguration.java index bd296ea..93270a7 100644 --- a/judgeval-java/src/main/java/com/judgmentlabs/judgeval/tracer/TracerConfiguration.java +++ b/judgeval-java/src/main/java/com/judgmentlabs/judgeval/tracer/TracerConfiguration.java @@ -23,7 +23,10 @@ * } * * @see Tracer + * @deprecated Replaced by + * com.judgmentlabs.judgeval.v1.tracer.TracerConfiguration */ +@Deprecated public final class TracerConfiguration { private final String projectName; private final String apiKey; diff --git a/judgeval-java/src/main/java/com/judgmentlabs/judgeval/v1/Judgeval.java b/judgeval-java/src/main/java/com/judgmentlabs/judgeval/v1/Judgeval.java new file mode 100644 index 0000000..d2cd402 --- /dev/null +++ b/judgeval-java/src/main/java/com/judgmentlabs/judgeval/v1/Judgeval.java @@ -0,0 +1,117 @@ +package com.judgmentlabs.judgeval.v1; + +import java.util.Objects; + +import com.judgmentlabs.judgeval.Env; +import com.judgmentlabs.judgeval.internal.api.JudgmentSyncClient; +import com.judgmentlabs.judgeval.v1.evaluation.EvaluationFactory; +import com.judgmentlabs.judgeval.v1.scorers.ScorersFactory; +import com.judgmentlabs.judgeval.v1.tracer.TracerFactory; + +/** + * Main entry point for the Judgment SDK. Provides access to tracer, scorer, and + * evaluation factories. + */ +public class Judgeval { + private final String apiKey; + private final String organizationId; + private final String apiUrl; + private final JudgmentSyncClient internalClient; + + protected Judgeval(Builder builder) { + this.apiKey = Objects.requireNonNull(builder.apiKey, "apiKey required"); + this.organizationId = Objects.requireNonNull(builder.organizationId, "organizationId required"); + this.apiUrl = builder.apiUrl != null ? builder.apiUrl : Env.JUDGMENT_API_URL; + this.internalClient = new JudgmentSyncClient(apiUrl, apiKey, organizationId); + } + + /** + * Returns a factory for creating tracers. + * + * @return the tracer factory + */ + public TracerFactory tracer() { + return new TracerFactory(internalClient); + } + + /** + * Returns a factory for creating scorers. + * + * @return the scorer factory + */ + public ScorersFactory scorers() { + return new ScorersFactory(internalClient); + } + + /** + * Returns a factory for creating evaluations. + * + * @return the evaluation factory + */ + public EvaluationFactory evaluation() { + return new EvaluationFactory(internalClient); + } + + /** + * Creates a new builder for configuring a Judgeval. + * + * @return a new builder instance + */ + public static Builder builder() { + return new Builder(); + } + + /** + * Builder for configuring and creating Judgeval instances. + */ + public static class Builder { + private String apiKey = Env.JUDGMENT_API_KEY; + private String organizationId = Env.JUDGMENT_ORG_ID; + private String apiUrl = Env.JUDGMENT_API_URL; + + /** + * Sets the API key for authentication. + * + * @param apiKey + * the API key + * @return this builder + */ + public Builder apiKey(String apiKey) { + this.apiKey = apiKey; + return this; + } + + /** + * Sets the organization ID. + * + * @param organizationId + * the organization ID + * @return this builder + */ + public Builder organizationId(String organizationId) { + this.organizationId = organizationId; + return this; + } + + /** + * Sets the API URL. + * + * @param apiUrl + * the API URL + * @return this builder + */ + public Builder apiUrl(String apiUrl) { + this.apiUrl = apiUrl; + return this; + } + + /** + * Builds and returns a new Judgeval instance. + * + * @return the configured Judgeval + */ + public Judgeval build() { + return new Judgeval(this); + } + } +} diff --git a/judgeval-java/src/main/java/com/judgmentlabs/judgeval/v1/data/APIScorerType.java b/judgeval-java/src/main/java/com/judgmentlabs/judgeval/v1/data/APIScorerType.java new file mode 100644 index 0000000..5af16fa --- /dev/null +++ b/judgeval-java/src/main/java/com/judgmentlabs/judgeval/v1/data/APIScorerType.java @@ -0,0 +1,28 @@ +package com.judgmentlabs.judgeval.v1.data; + +/** + * Available types of API-based scorers. + */ +public enum APIScorerType { + PROMPT_SCORER("Prompt Scorer"), + TRACE_PROMPT_SCORER("Trace Prompt Scorer"), + FAITHFULNESS("Faithfulness"), + ANSWER_RELEVANCY("Answer Relevancy"), + ANSWER_CORRECTNESS("Answer Correctness"), + CUSTOM("Custom"); + + private final String value; + + APIScorerType(String value) { + this.value = value; + } + + public String getValue() { + return value; + } + + @Override + public String toString() { + return value; + } +} diff --git a/judgeval-java/src/main/java/com/judgmentlabs/judgeval/v1/data/Example.java b/judgeval-java/src/main/java/com/judgmentlabs/judgeval/v1/data/Example.java new file mode 100644 index 0000000..6a5e66a --- /dev/null +++ b/judgeval-java/src/main/java/com/judgmentlabs/judgeval/v1/data/Example.java @@ -0,0 +1,74 @@ +package com.judgmentlabs.judgeval.v1.data; + +import java.time.Instant; +import java.util.UUID; + +/** + * Represents an evaluation example with arbitrary properties. + */ +public class Example extends com.judgmentlabs.judgeval.internal.api.models.Example { + + public Example() { + super(); + setExampleId(UUID.randomUUID() + .toString()); + setCreatedAt(Instant.now() + .toString()); + setName(null); + } + + /** + * Creates a new builder for configuring an Example. + * + * @return a new builder instance + */ + public static Builder builder() { + return new Builder(); + } + + /** + * Builder for configuring and creating Example instances. + */ + public static final class Builder { + private final Example example; + + private Builder() { + this.example = new Example(); + } + + /** + * Sets a custom property on the example. + * + * @param key + * the property key + * @param value + * the property value + * @return this builder + */ + public Builder property(String key, Object value) { + example.setAdditionalProperty(key, value); + return this; + } + + /** + * Sets the name of the example. + * + * @param name + * the example name + * @return this builder + */ + public Builder name(String name) { + example.setName(name); + return this; + } + + /** + * Builds and returns the configured Example. + * + * @return the configured Example + */ + public Example build() { + return example; + } + } +} diff --git a/judgeval-java/src/main/java/com/judgmentlabs/judgeval/v1/data/ScorerData.java b/judgeval-java/src/main/java/com/judgmentlabs/judgeval/v1/data/ScorerData.java new file mode 100644 index 0000000..3347aad --- /dev/null +++ b/judgeval-java/src/main/java/com/judgmentlabs/judgeval/v1/data/ScorerData.java @@ -0,0 +1,165 @@ +package com.judgmentlabs.judgeval.v1.data; + +import java.util.Map; + +/** + * Represents the result of a single scorer evaluation. + */ +public class ScorerData extends com.judgmentlabs.judgeval.internal.api.models.ScorerData { + + /** + * Creates a new builder for configuring ScorerData. + * + * @return a new builder instance + */ + public static Builder builder() { + return new Builder(); + } + + /** + * Builder for configuring and creating ScorerData instances. + */ + public static final class Builder { + private final ScorerData scorerData; + + private Builder() { + this.scorerData = new ScorerData(); + } + + /** + * Sets the scorer name. + * + * @param name + * the scorer name + * @return this builder + */ + public Builder name(String name) { + scorerData.setName(name); + return this; + } + + /** + * Sets the evaluation score. + * + * @param score + * the score value + * @return this builder + */ + public Builder score(Double score) { + scorerData.setScore(score); + return this; + } + + /** + * Sets whether the evaluation succeeded. + * + * @param success + * true if evaluation succeeded + * @return this builder + */ + public Builder success(Boolean success) { + scorerData.setSuccess(success); + return this; + } + + /** + * Sets the reason for the evaluation result. + * + * @param reason + * the evaluation reason + * @return this builder + */ + public Builder reason(String reason) { + scorerData.setReason(reason); + return this; + } + + /** + * Sets the evaluation threshold. + * + * @param threshold + * the threshold value + * @return this builder + */ + public Builder threshold(Double threshold) { + scorerData.setThreshold(threshold); + return this; + } + + /** + * Sets strict mode for evaluation. + * + * @param strictMode + * true for strict mode + * @return this builder + */ + public Builder strictMode(Boolean strictMode) { + scorerData.setStrictMode(strictMode); + return this; + } + + /** + * Sets the model used for evaluation. + * + * @param evaluationModel + * the model name + * @return this builder + */ + public Builder evaluationModel(String evaluationModel) { + scorerData.setEvaluationModel(evaluationModel); + return this; + } + + /** + * Sets an error message if evaluation failed. + * + * @param error + * the error message + * @return this builder + */ + public Builder error(String error) { + scorerData.setError(error); + return this; + } + + /** + * Sets additional metadata for the evaluation. + * + * @param additionalMetadata + * the metadata map + * @return this builder + */ + public Builder additionalMetadata(Map additionalMetadata) { + scorerData.setAdditionalMetadata(additionalMetadata); + return this; + } + + /** + * Adds a single metadata entry. + * + * @param key + * the metadata key + * @param value + * the metadata value + * @return this builder + */ + public Builder metadata(String key, Object value) { + if (scorerData.getAdditionalMetadata() == null) { + scorerData.setAdditionalMetadata(new java.util.HashMap<>()); + } + @SuppressWarnings("unchecked") + Map metadata = (Map) scorerData.getAdditionalMetadata(); + metadata.put(key, value); + return this; + } + + /** + * Builds and returns the configured ScorerData. + * + * @return the configured ScorerData + */ + public ScorerData build() { + return scorerData; + } + } +} diff --git a/judgeval-java/src/main/java/com/judgmentlabs/judgeval/v1/data/ScoringResult.java b/judgeval-java/src/main/java/com/judgmentlabs/judgeval/v1/data/ScoringResult.java new file mode 100644 index 0000000..0811f62 --- /dev/null +++ b/judgeval-java/src/main/java/com/judgmentlabs/judgeval/v1/data/ScoringResult.java @@ -0,0 +1,94 @@ +package com.judgmentlabs.judgeval.v1.data; + +import java.util.List; + +/** + * Represents a collection of scorer evaluation results. + */ +public class ScoringResult extends com.judgmentlabs.judgeval.internal.api.models.ScoringResult { + + /** + * Creates a new builder for configuring a ScoringResult. + * + * @return a new builder instance + */ + public static Builder builder() { + return new Builder(); + } + + /** + * Builder for configuring and creating ScoringResult instances. + */ + public static final class Builder { + private final ScoringResult result; + + private Builder() { + this.result = new ScoringResult(); + } + + /** + * Sets whether the overall evaluation succeeded. + * + * @param success + * true if evaluation succeeded + * @return this builder + */ + public Builder success(Boolean success) { + result.setSuccess(success); + return this; + } + + /** + * Sets the list of scorer results. + * + * @param scorersData + * the list of scorer data + * @return this builder + */ + public Builder scorersData(List scorersData) { + @SuppressWarnings("unchecked") + List internalList = (List) (List) scorersData; + result.setScorersData(internalList); + return this; + } + + /** + * Adds a single scorer result. + * + * @param scorerData + * the scorer data to add + * @return this builder + */ + public Builder scorerData(ScorerData scorerData) { + if (result.getScorersData() == null) { + result.setScorersData(new java.util.ArrayList<>()); + } + result.getScorersData() + .add(scorerData); + return this; + } + + /** + * Sets the data object for the evaluation. + * + * @param dataObject + * the example data + * @return this builder + */ + public Builder dataObject(Example dataObject) { + if (dataObject != null) { + result.setAdditionalProperty("example", dataObject); + } + return this; + } + + /** + * Builds and returns the configured ScoringResult. + * + * @return the configured ScoringResult + */ + public ScoringResult build() { + return result; + } + } +} diff --git a/judgeval-java/src/main/java/com/judgmentlabs/judgeval/v1/evaluation/Evaluation.java b/judgeval-java/src/main/java/com/judgmentlabs/judgeval/v1/evaluation/Evaluation.java new file mode 100644 index 0000000..c8efdc4 --- /dev/null +++ b/judgeval-java/src/main/java/com/judgmentlabs/judgeval/v1/evaluation/Evaluation.java @@ -0,0 +1,47 @@ +package com.judgmentlabs.judgeval.v1.evaluation; + +import java.util.Objects; + +import com.judgmentlabs.judgeval.internal.api.JudgmentSyncClient; + +/** + * Evaluation configuration for running evaluations against traces and spans. + */ +public final class Evaluation { + @SuppressWarnings("unused") // TODO: will add run_evaluation here + private final JudgmentSyncClient client; + + private Evaluation(Builder builder) { + this.client = Objects.requireNonNull(builder.client, "client required"); + } + + /** + * Creates a new builder for configuring an Evaluation. + * + * @return a new builder instance + */ + public static Builder builder() { + return new Builder(); + } + + /** + * Builder for configuring and creating Evaluation instances. + */ + public static final class Builder { + private JudgmentSyncClient client; + + Builder client(JudgmentSyncClient client) { + this.client = client; + return this; + } + + /** + * Builds and returns a new Evaluation instance. + * + * @return the configured Evaluation + */ + public Evaluation build() { + return new Evaluation(this); + } + } +} diff --git a/judgeval-java/src/main/java/com/judgmentlabs/judgeval/v1/evaluation/EvaluationFactory.java b/judgeval-java/src/main/java/com/judgmentlabs/judgeval/v1/evaluation/EvaluationFactory.java new file mode 100644 index 0000000..372929b --- /dev/null +++ b/judgeval-java/src/main/java/com/judgmentlabs/judgeval/v1/evaluation/EvaluationFactory.java @@ -0,0 +1,23 @@ +package com.judgmentlabs.judgeval.v1.evaluation; + +import com.judgmentlabs.judgeval.internal.api.JudgmentSyncClient; + +/** + * Factory for creating evaluation builders. + */ +public final class EvaluationFactory { + private final JudgmentSyncClient client; + + public EvaluationFactory(JudgmentSyncClient client) { + this.client = client; + } + + /** + * Creates a new evaluation builder configured with this factory's client. + * + * @return a new evaluation builder + */ + public Evaluation.Builder create() { + return Evaluation.builder().client(client); + } +} diff --git a/judgeval-java/src/main/java/com/judgmentlabs/judgeval/v1/package-info.java b/judgeval-java/src/main/java/com/judgmentlabs/judgeval/v1/package-info.java new file mode 100644 index 0000000..a896949 --- /dev/null +++ b/judgeval-java/src/main/java/com/judgmentlabs/judgeval/v1/package-info.java @@ -0,0 +1,4 @@ +/** + * Judgeval SDK v1 API. + */ +package com.judgmentlabs.judgeval.v1; diff --git a/judgeval-java/src/main/java/com/judgmentlabs/judgeval/v1/scorers/APIScorer.java b/judgeval-java/src/main/java/com/judgmentlabs/judgeval/v1/scorers/APIScorer.java new file mode 100644 index 0000000..e3c0d96 --- /dev/null +++ b/judgeval-java/src/main/java/com/judgmentlabs/judgeval/v1/scorers/APIScorer.java @@ -0,0 +1,153 @@ +package com.judgmentlabs.judgeval.v1.scorers; + +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Optional; + +import com.fasterxml.jackson.annotation.JsonIgnore; +import com.fasterxml.jackson.annotation.JsonProperty; +import com.judgmentlabs.judgeval.internal.api.models.ScorerConfig; +import com.judgmentlabs.judgeval.v1.data.APIScorerType; + +/** + * Base class for API-based scorers that evaluate using the Judgment backend. + */ +public class APIScorer extends com.judgmentlabs.judgeval.internal.api.models.BaseScorer implements BaseScorer { + private APIScorerType scoreType; + + @JsonIgnore + private List requiredParams; + + public APIScorer(APIScorerType scoreType) { + super(); + this.scoreType = scoreType; + setName(scoreType.toString()); + setScoreType(scoreType.toString()); + this.requiredParams = new java.util.ArrayList<>(); + if (Boolean.TRUE.equals(getStrictMode())) { + setThreshold(1.0); + } + } + + public void setThreshold(double threshold) { + if (threshold < 0 || threshold > 1) { + throw new IllegalArgumentException("Threshold must be between 0 and 1, got: " + threshold); + } + super.setThreshold(threshold); + } + + @JsonProperty("score_type") + public String getScoreType() { + return scoreType.toString(); + } + + public List getRequiredParams() { + return requiredParams; + } + + public void setRequiredParams(List requiredParams) { + this.requiredParams = requiredParams; + } + + @Override + public Double getThreshold() { + return Optional.ofNullable(super.getThreshold()) + .orElse(0.5); + } + + @Override + public String getName() { + return Optional.ofNullable(super.getName()) + .map(Object::toString) + .orElse(null); + } + + @Override + public Boolean getStrictMode() { + return Optional.ofNullable(super.getStrictMode()) + .orElse(false); + } + + @Override + @JsonIgnore + public ScorerConfig getScorerConfig() { + ScorerConfig cfg = new ScorerConfig(); + cfg.setScoreType(getScoreType()); + cfg.setThreshold(getThreshold()); + cfg.setName(getName()); + cfg.setStrictMode(getStrictMode()); + cfg.setRequiredParams(getRequiredParams()); + Map kwargs = new HashMap<>(); + if (getAdditionalProperties() != null) + kwargs.putAll(getAdditionalProperties()); + cfg.setKwargs(kwargs); + return cfg; + } + + /** + * Creates a new builder for an APIScorer subclass. + * + * @param + * the scorer type + * @param scorerClass + * the scorer class + * @return a new builder instance + */ + public static Builder builder(Class scorerClass) { + return new Builder<>(scorerClass); + } + + /** + * Builder for configuring and creating APIScorer instances. + * + * @param + * the scorer type + */ + public static final class Builder { + private final T scorer; + + private Builder(Class scorerClass) { + try { + this.scorer = scorerClass.getDeclaredConstructor() + .newInstance(); + } catch (Exception e) { + throw new RuntimeException("Failed to create scorer instance", e); + } + } + + public Builder threshold(double threshold) { + scorer.setThreshold(threshold); + return this; + } + + public Builder name(String name) { + scorer.setName(name); + return this; + } + + public Builder strictMode(boolean strictMode) { + scorer.setStrictMode(strictMode); + return this; + } + + public Builder requiredParams(List requiredParams) { + scorer.setRequiredParams(requiredParams); + return this; + } + + public Builder model(String model) { + scorer.setModel(model); + return this; + } + + public Builder additionalProperty(String key, Object value) { + scorer.setAdditionalProperty(key, value); + return this; + } + + public T build() { + return scorer; + } + } +} diff --git a/judgeval-java/src/main/java/com/judgmentlabs/judgeval/v1/scorers/BaseScorer.java b/judgeval-java/src/main/java/com/judgmentlabs/judgeval/v1/scorers/BaseScorer.java new file mode 100644 index 0000000..37e35cd --- /dev/null +++ b/judgeval-java/src/main/java/com/judgmentlabs/judgeval/v1/scorers/BaseScorer.java @@ -0,0 +1,22 @@ +package com.judgmentlabs.judgeval.v1.scorers; + +import com.judgmentlabs.judgeval.internal.api.models.ScorerConfig; + +/** + * Base interface for all scorers in the Judgment evaluation system. + */ +public interface BaseScorer { + /** + * Returns the name of this scorer. + * + * @return the scorer name + */ + String getName(); + + /** + * Returns the configuration for this scorer. + * + * @return the scorer configuration + */ + ScorerConfig getScorerConfig(); +} diff --git a/judgeval-java/src/main/java/com/judgmentlabs/judgeval/v1/scorers/ScorersFactory.java b/judgeval-java/src/main/java/com/judgmentlabs/judgeval/v1/scorers/ScorersFactory.java new file mode 100644 index 0000000..edc26d5 --- /dev/null +++ b/judgeval-java/src/main/java/com/judgmentlabs/judgeval/v1/scorers/ScorersFactory.java @@ -0,0 +1,53 @@ +package com.judgmentlabs.judgeval.v1.scorers; + +import com.judgmentlabs.judgeval.internal.api.JudgmentSyncClient; +import com.judgmentlabs.judgeval.v1.scorers.built_in.BuiltInScorersFactory; +import com.judgmentlabs.judgeval.v1.scorers.custom_scorer.CustomScorerFactory; +import com.judgmentlabs.judgeval.v1.scorers.prompt_scorer.PromptScorerFactory; + +/** + * Factory for creating scorer builders and accessing scorer types. + */ +public final class ScorersFactory { + private final JudgmentSyncClient client; + + public ScorersFactory(JudgmentSyncClient client) { + this.client = client; + } + + /** + * Returns a factory for creating prompt-based scorers. + * + * @return the prompt scorer factory + */ + public PromptScorerFactory promptScorer() { + return new PromptScorerFactory(client, false); + } + + /** + * Returns a factory for creating trace-level prompt scorers. + * + * @return the trace prompt scorer factory + */ + public PromptScorerFactory tracePromptScorer() { + return new PromptScorerFactory(client, true); + } + + /** + * Returns a factory for creating custom scorers. + * + * @return the custom scorer factory + */ + public CustomScorerFactory customScorer() { + return new CustomScorerFactory(); + } + + /** + * Returns a factory for creating built-in scorers. + * + * @return the built-in scorers factory + */ + public BuiltInScorersFactory builtIn() { + return new BuiltInScorersFactory(); + } +} diff --git a/judgeval-java/src/main/java/com/judgmentlabs/judgeval/v1/scorers/built_in/AnswerCorrectnessScorer.java b/judgeval-java/src/main/java/com/judgmentlabs/judgeval/v1/scorers/built_in/AnswerCorrectnessScorer.java new file mode 100644 index 0000000..73f7b76 --- /dev/null +++ b/judgeval-java/src/main/java/com/judgmentlabs/judgeval/v1/scorers/built_in/AnswerCorrectnessScorer.java @@ -0,0 +1,72 @@ +package com.judgmentlabs.judgeval.v1.scorers.built_in; + +import java.util.Arrays; + +import com.judgmentlabs.judgeval.v1.data.APIScorerType; +import com.judgmentlabs.judgeval.v1.scorers.APIScorer; + +/** + * Scorer that evaluates the correctness of an answer against an expected + * output. + */ +public final class AnswerCorrectnessScorer extends APIScorer { + private AnswerCorrectnessScorer(Builder builder) { + super(APIScorerType.ANSWER_CORRECTNESS); + setRequiredParams(Arrays.asList("input", "actual_output", "expected_output")); + if (builder.threshold >= 0) { + setThreshold(builder.threshold); + } + if (builder.name != null) { + setName(builder.name); + } + if (builder.strictMode != null) { + setStrictMode(builder.strictMode); + } + if (builder.model != null) { + setModel(builder.model); + } + } + + /** + * Creates a new builder for configuring an AnswerCorrectnessScorer. + * + * @return a new builder instance + */ + public static Builder builder() { + return new Builder(); + } + + /** + * Builder for configuring and creating AnswerCorrectnessScorer instances. + */ + public static final class Builder { + private double threshold = 0.5; + private String name; + private Boolean strictMode; + private String model; + + public Builder threshold(double threshold) { + this.threshold = threshold; + return this; + } + + public Builder name(String name) { + this.name = name; + return this; + } + + public Builder strictMode(boolean strictMode) { + this.strictMode = strictMode; + return this; + } + + public Builder model(String model) { + this.model = model; + return this; + } + + public AnswerCorrectnessScorer build() { + return new AnswerCorrectnessScorer(this); + } + } +} diff --git a/judgeval-java/src/main/java/com/judgmentlabs/judgeval/v1/scorers/built_in/AnswerRelevancyScorer.java b/judgeval-java/src/main/java/com/judgmentlabs/judgeval/v1/scorers/built_in/AnswerRelevancyScorer.java new file mode 100644 index 0000000..952391b --- /dev/null +++ b/judgeval-java/src/main/java/com/judgmentlabs/judgeval/v1/scorers/built_in/AnswerRelevancyScorer.java @@ -0,0 +1,71 @@ +package com.judgmentlabs.judgeval.v1.scorers.built_in; + +import java.util.Arrays; + +import com.judgmentlabs.judgeval.v1.data.APIScorerType; +import com.judgmentlabs.judgeval.v1.scorers.APIScorer; + +/** + * Scorer that evaluates the relevancy of an answer to the input question. + */ +public final class AnswerRelevancyScorer extends APIScorer { + private AnswerRelevancyScorer(Builder builder) { + super(APIScorerType.ANSWER_RELEVANCY); + setRequiredParams(Arrays.asList("input", "actual_output")); + if (builder.threshold >= 0) { + setThreshold(builder.threshold); + } + if (builder.name != null) { + setName(builder.name); + } + if (builder.strictMode != null) { + setStrictMode(builder.strictMode); + } + if (builder.model != null) { + setModel(builder.model); + } + } + + /** + * Creates a new builder for configuring an AnswerRelevancyScorer. + * + * @return a new builder instance + */ + public static Builder builder() { + return new Builder(); + } + + /** + * Builder for configuring and creating AnswerRelevancyScorer instances. + */ + public static final class Builder { + private double threshold = 0.5; + private String name; + private Boolean strictMode; + private String model; + + public Builder threshold(double threshold) { + this.threshold = threshold; + return this; + } + + public Builder name(String name) { + this.name = name; + return this; + } + + public Builder strictMode(boolean strictMode) { + this.strictMode = strictMode; + return this; + } + + public Builder model(String model) { + this.model = model; + return this; + } + + public AnswerRelevancyScorer build() { + return new AnswerRelevancyScorer(this); + } + } +} diff --git a/judgeval-java/src/main/java/com/judgmentlabs/judgeval/v1/scorers/built_in/BuiltInScorersFactory.java b/judgeval-java/src/main/java/com/judgmentlabs/judgeval/v1/scorers/built_in/BuiltInScorersFactory.java new file mode 100644 index 0000000..b3c0a50 --- /dev/null +++ b/judgeval-java/src/main/java/com/judgmentlabs/judgeval/v1/scorers/built_in/BuiltInScorersFactory.java @@ -0,0 +1,36 @@ +package com.judgmentlabs.judgeval.v1.scorers.built_in; + +/** + * Factory for creating built-in scorer builders. + */ +public final class BuiltInScorersFactory { + public BuiltInScorersFactory() { + } + + /** + * Creates a builder for an answer correctness scorer. + * + * @return the scorer builder + */ + public AnswerCorrectnessScorer.Builder answerCorrectness() { + return AnswerCorrectnessScorer.builder(); + } + + /** + * Creates a builder for an answer relevancy scorer. + * + * @return the scorer builder + */ + public AnswerRelevancyScorer.Builder answerRelevancy() { + return AnswerRelevancyScorer.builder(); + } + + /** + * Creates a builder for a faithfulness scorer. + * + * @return the scorer builder + */ + public FaithfulnessScorer.Builder faithfulness() { + return FaithfulnessScorer.builder(); + } +} diff --git a/judgeval-java/src/main/java/com/judgmentlabs/judgeval/v1/scorers/built_in/FaithfulnessScorer.java b/judgeval-java/src/main/java/com/judgmentlabs/judgeval/v1/scorers/built_in/FaithfulnessScorer.java new file mode 100644 index 0000000..4e20d36 --- /dev/null +++ b/judgeval-java/src/main/java/com/judgmentlabs/judgeval/v1/scorers/built_in/FaithfulnessScorer.java @@ -0,0 +1,71 @@ +package com.judgmentlabs.judgeval.v1.scorers.built_in; + +import java.util.Arrays; + +import com.judgmentlabs.judgeval.v1.data.APIScorerType; +import com.judgmentlabs.judgeval.v1.scorers.APIScorer; + +/** + * Scorer that evaluates whether an answer is faithful to the provided context. + */ +public final class FaithfulnessScorer extends APIScorer { + private FaithfulnessScorer(Builder builder) { + super(APIScorerType.FAITHFULNESS); + setRequiredParams(Arrays.asList("context", "actual_output")); + if (builder.threshold >= 0) { + setThreshold(builder.threshold); + } + if (builder.name != null) { + setName(builder.name); + } + if (builder.strictMode != null) { + setStrictMode(builder.strictMode); + } + if (builder.model != null) { + setModel(builder.model); + } + } + + /** + * Creates a new builder for configuring a FaithfulnessScorer. + * + * @return a new builder instance + */ + public static Builder builder() { + return new Builder(); + } + + /** + * Builder for configuring and creating FaithfulnessScorer instances. + */ + public static final class Builder { + private double threshold = 0.5; + private String name; + private Boolean strictMode; + private String model; + + public Builder threshold(double threshold) { + this.threshold = threshold; + return this; + } + + public Builder name(String name) { + this.name = name; + return this; + } + + public Builder strictMode(boolean strictMode) { + this.strictMode = strictMode; + return this; + } + + public Builder model(String model) { + this.model = model; + return this; + } + + public FaithfulnessScorer build() { + return new FaithfulnessScorer(this); + } + } +} diff --git a/judgeval-java/src/main/java/com/judgmentlabs/judgeval/v1/scorers/custom_scorer/CustomScorer.java b/judgeval-java/src/main/java/com/judgmentlabs/judgeval/v1/scorers/custom_scorer/CustomScorer.java new file mode 100644 index 0000000..da8ea85 --- /dev/null +++ b/judgeval-java/src/main/java/com/judgmentlabs/judgeval/v1/scorers/custom_scorer/CustomScorer.java @@ -0,0 +1,59 @@ +package com.judgmentlabs.judgeval.v1.scorers.custom_scorer; + +import com.judgmentlabs.judgeval.internal.api.models.ScorerConfig; +import com.judgmentlabs.judgeval.v1.data.APIScorerType; +import com.judgmentlabs.judgeval.v1.scorers.APIScorer; + +/** + * Scorer that uses custom user-defined evaluation logic. Scorers are hosted on + * Judgment Servers + * and can be uploaded using the judgeval CLI. + * + * @see Judgment + * Docs: Upload Scorers + */ +public final class CustomScorer extends APIScorer { + private CustomScorer(Builder builder) { + super(APIScorerType.CUSTOM); + setName(builder.name); + setClassName(builder.className); + // Java SDK only supports server-hosted scorers + setServerHosted(true); + } + + @Override + public ScorerConfig getScorerConfig() { + throw new UnsupportedOperationException("CustomScorer does not use ScorerConfig"); + } + + /** + * Creates a new builder for configuring a CustomScorer. + * + * @return a new builder instance + */ + public static Builder builder() { + return new Builder(); + } + + /** + * Builder for configuring and creating CustomScorer instances. + */ + public static final class Builder { + private String name; + private String className; + + public Builder name(String name) { + this.name = name; + return this; + } + + public Builder className(String className) { + this.className = className; + return this; + } + + public CustomScorer build() { + return new CustomScorer(this); + } + } +} diff --git a/judgeval-java/src/main/java/com/judgmentlabs/judgeval/v1/scorers/custom_scorer/CustomScorerFactory.java b/judgeval-java/src/main/java/com/judgmentlabs/judgeval/v1/scorers/custom_scorer/CustomScorerFactory.java new file mode 100644 index 0000000..c575b3c --- /dev/null +++ b/judgeval-java/src/main/java/com/judgmentlabs/judgeval/v1/scorers/custom_scorer/CustomScorerFactory.java @@ -0,0 +1,39 @@ +package com.judgmentlabs.judgeval.v1.scorers.custom_scorer; + +/** + * Factory for creating custom scorer instances. + */ +public final class CustomScorerFactory { + public CustomScorerFactory() { + } + + /** + * Creates a custom scorer with the specified name. + * + * @param name + * the scorer name + * @return the configured custom scorer + */ + public CustomScorer get(String name) { + return CustomScorer.builder() + .name(name) + .className(name) + .build(); + } + + /** + * Creates a custom scorer with the specified name and class name. + * + * @param name + * the scorer name + * @param className + * the class name + * @return the configured custom scorer + */ + public CustomScorer get(String name, String className) { + return CustomScorer.builder() + .name(name) + .className(className) + .build(); + } +} diff --git a/judgeval-java/src/main/java/com/judgmentlabs/judgeval/v1/scorers/prompt_scorer/PromptScorer.java b/judgeval-java/src/main/java/com/judgmentlabs/judgeval/v1/scorers/prompt_scorer/PromptScorer.java new file mode 100644 index 0000000..ea111e0 --- /dev/null +++ b/judgeval-java/src/main/java/com/judgmentlabs/judgeval/v1/scorers/prompt_scorer/PromptScorer.java @@ -0,0 +1,122 @@ +package com.judgmentlabs.judgeval.v1.scorers.prompt_scorer; + +import java.util.HashMap; +import java.util.Map; +import java.util.Objects; + +import com.judgmentlabs.judgeval.internal.api.models.ScorerConfig; +import com.judgmentlabs.judgeval.v1.data.APIScorerType; +import com.judgmentlabs.judgeval.v1.scorers.APIScorer; + +/** + * Scorer that evaluates traces using Judgment-hosted prompt scorers. + * + * Prompt scorers are hosted on Judgment Servers and can be configured using the + * Scorer Playground. + * + * @see Judgment + * Docs: Prompt Scorers + */ +public final class PromptScorer extends APIScorer { + private final String prompt; + private final Map options; + private final boolean isTrace; + + private PromptScorer(Builder builder) { + super(builder.isTrace ? APIScorerType.TRACE_PROMPT_SCORER : APIScorerType.PROMPT_SCORER); + this.prompt = Objects.requireNonNull(builder.prompt, "prompt required"); + this.options = builder.options; + this.isTrace = builder.isTrace; + setName(Objects.requireNonNull(builder.name, "name required")); + setThreshold(builder.threshold); + } + + public String getPrompt() { + return prompt; + } + + public Map getOptions() { + return options != null ? new HashMap<>(options) : null; + } + + public String getScorerName() { + return getName(); + } + + @Override + public ScorerConfig getScorerConfig() { + ScorerConfig cfg = new ScorerConfig(); + cfg.setScoreType(getScoreType()); + cfg.setThreshold(getThreshold()); + cfg.setName(getName()); + cfg.setStrictMode(getStrictMode()); + cfg.setRequiredParams(getRequiredParams()); + + Map kwargs = new HashMap<>(); + kwargs.put("prompt", prompt); + if (options != null) { + kwargs.put("options", options); + } + if (getAdditionalProperties() != null) { + kwargs.putAll(getAdditionalProperties()); + } + cfg.setKwargs(kwargs); + return cfg; + } + + /** + * Creates a new builder for configuring a PromptScorer. + * + * @return a new builder instance + */ + public static Builder builder() { + return new Builder(); + } + + /** + * Builder for configuring and creating PromptScorer instances. + */ + public static final class Builder { + private String name; + private String prompt; + private double threshold = 0.5; + private Map options; + private boolean isTrace; + + public Builder name(String name) { + this.name = name; + return this; + } + + public Builder prompt(String prompt) { + this.prompt = prompt; + return this; + } + + public Builder threshold(double threshold) { + this.threshold = threshold; + return this; + } + + public Builder options(Map options) { + this.options = options; + return this; + } + + Builder isTrace(boolean isTrace) { + this.isTrace = isTrace; + return this; + } + + public PromptScorer build() { + return new PromptScorer(this); + } + } + + @Override + public String toString() { + return "PromptScorer(name=" + getName() + ", prompt=" + prompt + ", threshold=" + getThreshold() + + ", options=" + options + ", isTrace=" + isTrace + ")"; + } +} diff --git a/judgeval-java/src/main/java/com/judgmentlabs/judgeval/v1/scorers/prompt_scorer/PromptScorerFactory.java b/judgeval-java/src/main/java/com/judgmentlabs/judgeval/v1/scorers/prompt_scorer/PromptScorerFactory.java new file mode 100644 index 0000000..6f38121 --- /dev/null +++ b/judgeval-java/src/main/java/com/judgmentlabs/judgeval/v1/scorers/prompt_scorer/PromptScorerFactory.java @@ -0,0 +1,129 @@ +package com.judgmentlabs.judgeval.v1.scorers.prompt_scorer; + +import java.util.HashMap; +import java.util.Map; +import java.util.Objects; +import java.util.Optional; +import java.util.concurrent.ConcurrentHashMap; + +import com.judgmentlabs.judgeval.internal.api.JudgmentSyncClient; +import com.judgmentlabs.judgeval.internal.api.models.FetchPromptScorersRequest; +import com.judgmentlabs.judgeval.internal.api.models.FetchPromptScorersResponse; +import com.judgmentlabs.judgeval.utils.Logger; + +/** + * Factory for retrieving prompt-based scorers. + */ +public final class PromptScorerFactory { + private final JudgmentSyncClient client; + private final boolean isTrace; + private static final Map cache = new ConcurrentHashMap<>(); + + public PromptScorerFactory(JudgmentSyncClient client, boolean isTrace) { + this.client = client; + this.isTrace = isTrace; + } + + /** + * Retrieves a prompt scorer by name from the Judgment API. + * Results are cached to avoid repeated API calls. + * + * @param name + * the scorer name + * @return the configured prompt scorer or null if not found or retrieval fails + */ + public PromptScorer get(String name) { + CacheKey key = new CacheKey(name, client.getApiKey(), client.getOrganizationId()); + com.judgmentlabs.judgeval.internal.api.models.PromptScorer cached = cache.get(key); + if (cached != null) { + return createFromModel(cached, name); + } + + try { + FetchPromptScorersRequest request = new FetchPromptScorersRequest(); + request.setNames(java.util.Collections.singletonList(name)); + + FetchPromptScorersResponse response = client.fetchScorers(request); + + com.judgmentlabs.judgeval.internal.api.models.PromptScorer scorer = Optional.ofNullable(response) + .map(FetchPromptScorersResponse::getScorers) + .filter(scorers -> scorers != null && !scorers.isEmpty()) + .map(scorers -> scorers.get(0)) + .orElseGet( + () -> { + Logger.error("Failed to fetch prompt scorer '" + name + "': not found"); + return null; + }); + + if (scorer == null) { + return null; + } + + if (Boolean.TRUE.equals(scorer.getIsTrace()) != isTrace) { + Logger.error("Scorer with name " + name + " is a " + + (Boolean.TRUE.equals(scorer.getIsTrace()) ? "TracePromptScorer" : "PromptScorer") + ", not a " + + (isTrace ? "TracePromptScorer" : "PromptScorer")); + return null; + } + + cache.put(key, scorer); + return createFromModel(scorer, name); + } catch (Exception e) { + Logger.error("Failed to fetch prompt scorer '" + name + "': " + e.getMessage()); + return null; + } + } + + private PromptScorer createFromModel(com.judgmentlabs.judgeval.internal.api.models.PromptScorer model, + String name) { + Map options = null; + if (model.getOptions() != null) { + if (model.getOptions() instanceof Map) { + @SuppressWarnings("unchecked") + Map rawOptions = (Map) model.getOptions(); + options = new HashMap<>(); + for (Map.Entry entry : rawOptions.entrySet()) { + if (entry.getValue() instanceof Number) { + options.put(entry.getKey(), ((Number) entry.getValue()).doubleValue()); + } + } + } + } + + return PromptScorer.builder() + .name(name) + .prompt(model.getPrompt()) + .threshold(Optional.ofNullable(model.getThreshold()).orElse(0.5)) + .options(options) + .isTrace(isTrace) + .build(); + } + + private static final class CacheKey { + private final String name; + private final String apiKey; + private final String organizationId; + + CacheKey(String name, String apiKey, String organizationId) { + this.name = name; + this.apiKey = apiKey; + this.organizationId = organizationId; + } + + @Override + public boolean equals(Object obj) { + if (this == obj) + return true; + if (obj == null || getClass() != obj.getClass()) + return false; + CacheKey that = (CacheKey) obj; + return Objects.equals(name, that.name) && Objects.equals(apiKey, that.apiKey) + && Objects.equals(organizationId, that.organizationId); + } + + @Override + public int hashCode() { + return Objects.hash(name, apiKey, organizationId); + } + } +} diff --git a/judgeval-java/src/main/java/com/judgmentlabs/judgeval/v1/tracer/BaseTracer.java b/judgeval-java/src/main/java/com/judgmentlabs/judgeval/v1/tracer/BaseTracer.java new file mode 100644 index 0000000..41d68b5 --- /dev/null +++ b/judgeval-java/src/main/java/com/judgmentlabs/judgeval/v1/tracer/BaseTracer.java @@ -0,0 +1,563 @@ +package com.judgmentlabs.judgeval.v1.tracer; + +import java.lang.reflect.Type; +import java.time.Instant; +import java.time.ZoneOffset; +import java.time.format.DateTimeFormatter; +import java.util.List; +import java.util.Map; +import java.util.Objects; +import java.util.Optional; +import java.util.UUID; + +import com.fasterxml.jackson.databind.ObjectMapper; +import com.judgmentlabs.judgeval.JudgmentAttributeKeys; +import com.judgmentlabs.judgeval.internal.api.JudgmentSyncClient; +import com.judgmentlabs.judgeval.internal.api.models.ExampleEvaluationRun; +import com.judgmentlabs.judgeval.internal.api.models.ResolveProjectNameRequest; +import com.judgmentlabs.judgeval.internal.api.models.ResolveProjectNameResponse; +import com.judgmentlabs.judgeval.internal.api.models.TraceEvaluationRun; +import com.judgmentlabs.judgeval.utils.Logger; +import com.judgmentlabs.judgeval.v1.data.Example; +import com.judgmentlabs.judgeval.v1.scorers.BaseScorer; +import com.judgmentlabs.judgeval.v1.scorers.custom_scorer.CustomScorer; +import com.judgmentlabs.judgeval.v1.tracer.exporters.JudgmentSpanExporter; +import com.judgmentlabs.judgeval.v1.tracer.exporters.NoOpSpanExporter; + +import io.opentelemetry.api.GlobalOpenTelemetry; +import io.opentelemetry.api.trace.Span; +import io.opentelemetry.api.trace.SpanContext; +import io.opentelemetry.api.trace.StatusCode; +import io.opentelemetry.context.Scope; +import io.opentelemetry.sdk.trace.export.SpanExporter; + +/** + * Base tracer providing span manipulation, attribute setting, and evaluation + * capabilities. + */ +public abstract class BaseTracer { + public static final String TRACER_NAME = "judgeval"; + + protected final String projectName; + protected final boolean enableEvaluation; + protected final JudgmentSyncClient apiClient; + protected final ISerializer serializer; + protected final ObjectMapper jacksonMapper; + protected final Optional projectId; + + protected BaseTracer(String projectName, + boolean enableEvaluation, JudgmentSyncClient apiClient, ISerializer serializer) { + this.projectName = Objects.requireNonNull(projectName, "projectName required"); + this.enableEvaluation = enableEvaluation; + this.apiClient = Objects.requireNonNull(apiClient, "apiClient required"); + this.serializer = Objects.requireNonNull(serializer, "serializer required"); + this.jacksonMapper = new ObjectMapper(); + this.projectId = resolveProjectId(projectName); + + this.projectId.ifPresentOrElse(id -> { + }, () -> Logger.error("Failed to resolve project " + projectName + + ", please create it first at https://app.judgmentlabs.ai/org/" + this.apiClient.getOrganizationId() + + "/projects. Skipping Judgment export.")); + } + + /** + * Initializes the tracer. + */ + public abstract void initialize(); + + /** + * Forces pending spans to flush. + * + * @param timeoutMillis + * maximum time to wait in milliseconds + * @return true if flush succeeded within timeout + */ + public abstract boolean forceFlush(int timeoutMillis); + + /** + * Shuts down the tracer. + * + * @param timeoutMillis + * maximum time to wait for shutdown in milliseconds + */ + public abstract void shutdown(int timeoutMillis); + + /** + * Returns the span exporter for this tracer. + * + * @return the span exporter + */ + public SpanExporter getSpanExporter() { + return projectId.map(this::createJudgmentSpanExporter) + .orElseGet(() -> { + Logger.error("Project not resolved; cannot create exporter, returning NoOpSpanExporter"); + return new NoOpSpanExporter(); + }); + } + + /** + * Sets the span kind attribute on the current span. + * + * @param kind + * the span kind + */ + public void setSpanKind(String kind) { + Optional.ofNullable(kind) + .ifPresent(k -> withCurrentSpan( + span -> span.setAttribute(JudgmentAttributeKeys.AttributeKeys.JUDGMENT_SPAN_KIND, k))); + } + + private static void withCurrentSpan(java.util.function.Consumer action) { + Optional.ofNullable(Span.current()) + .ifPresent(action); + } + + private static boolean isValidKey(String key) { + return key != null && !key.isEmpty(); + } + + /** + * Sets an attribute on the current span by serializing the value. + * + * @param key + * the attribute key + * @param value + * the attribute value + */ + public void setAttribute(String key, Object value) { + if (!isValidKey(key)) { + return; + } + if (value != null) { + setAttribute(key, value, value.getClass()); + } + } + + /** + * Sets an attribute on the current span by serializing the value with the + * specified type. + * + * @param key + * the attribute key + * @param value + * the attribute value + * @param type + * the type to use for serialization + */ + public void setAttribute(String key, Object value, Type type) { + if (!isValidKey(key)) { + return; + } + if (value != null) { + withCurrentSpan(span -> span.setAttribute(key, serializer.serialize(value, type))); + } + } + + /** + * Sets a string attribute on the current span. + * + * @param key + * the attribute key + * @param value + * the attribute value + */ + public void setAttribute(String key, String value) { + if (!isValidKey(key)) { + return; + } + withCurrentSpan(span -> span.setAttribute(key, value)); + } + + /** + * Sets a long attribute on the current span. + * + * @param key + * the attribute key + * @param value + * the attribute value + */ + public void setAttribute(String key, long value) { + if (!isValidKey(key)) { + return; + } + withCurrentSpan(span -> span.setAttribute(key, value)); + } + + /** + * Sets a double attribute on the current span. + * + * @param key + * the attribute key + * @param value + * the attribute value + */ + public void setAttribute(String key, double value) { + if (!isValidKey(key)) { + return; + } + withCurrentSpan(span -> span.setAttribute(key, value)); + } + + /** + * Sets a boolean attribute on the current span. + * + * @param key + * the attribute key + * @param value + * the attribute value + */ + public void setAttribute(String key, boolean value) { + if (!isValidKey(key)) { + return; + } + withCurrentSpan(span -> span.setAttribute(key, value)); + } + + private Optional getSampledSpanContext() { + return Optional.ofNullable(Span.current()) + .filter(span -> span.getSpanContext() + .isSampled()) + .map(Span::getSpanContext); + } + + private Optional getSampledSpan() { + return Optional.ofNullable(Span.current()) + .filter(span -> span.getSpanContext() + .isSampled()); + } + + private boolean isEvaluationEnabled() { + return enableEvaluation; + } + + private void logEvaluationInfo(String method, String traceId, String spanId, String scorerName) { + Logger.info(method + ": project=" + projectName + ", traceId=" + traceId + ", spanId=" + + spanId + ", scorer=" + scorerName); + } + + private void safeExecute(String operation, Runnable action) { + try { + action.run(); + } catch (Exception e) { + Logger.error("Failed to " + operation + ": " + e.getMessage()); + } + } + + /** + * Asynchronously evaluates the current span using the specified scorer and + * example. + * The evaluation is queued and processed asynchronously by the Judgment + * backend. + * + * @param scorer + * the scorer to use for evaluation + * @param example + * the example data to evaluate against + */ + public void asyncEvaluate(BaseScorer scorer, Example example) { + safeExecute("evaluate scorer", () -> { + if (!isEvaluationEnabled()) { + return; + } + + getSampledSpanContext().ifPresent(spanContext -> { + String traceId = spanContext.getTraceId(); + String spanId = spanContext.getSpanId(); + + logEvaluationInfo("asyncEvaluate", traceId, spanId, scorer.getName()); + + ExampleEvaluationRun evaluationRun = createEvaluationRun(scorer, example, traceId, spanId); + enqueueEvaluation(evaluationRun); + }); + }); + } + + /** + * Asynchronously evaluates the current trace using the specified scorer. + * Attaches evaluation metadata to the current span for processing after trace + * completion. + * + * @param scorer + * the scorer to use for trace evaluation + */ + public void asyncTraceEvaluate(BaseScorer scorer) { + safeExecute("evaluate trace scorer", () -> { + if (!isEvaluationEnabled()) { + return; + } + + getSampledSpan().ifPresent(currentSpan -> { + SpanContext spanContext = currentSpan.getSpanContext(); + String traceId = spanContext.getTraceId(); + String spanId = spanContext.getSpanId(); + + logEvaluationInfo("asyncTraceEvaluate", traceId, spanId, scorer.getName()); + + TraceEvaluationRun evaluationRun = createTraceEvaluationRun(scorer, traceId, spanId); + try { + String traceEvalJson = jacksonMapper.writeValueAsString(evaluationRun); + currentSpan.setAttribute(JudgmentAttributeKeys.AttributeKeys.JUDGMENT_PENDING_TRACE_EVAL, + traceEvalJson); + } catch (Exception e) { + Logger.error("Failed to serialize trace evaluation: " + e.getMessage()); + } + }); + }); + } + + /** + * Sets multiple attributes on the current span. + * + * @param attributes + * map of attribute keys to values + */ + public void setAttributes(Map attributes) { + Optional.ofNullable(attributes) + .ifPresent(attrs -> attrs.forEach(this::setAttribute)); + } + + /** + * Marks the current span as an LLM span. + */ + public void setLLMSpan() { + setSpanKind("llm"); + } + + /** + * Marks the current span as a tool span. + */ + public void setToolSpan() { + setSpanKind("tool"); + } + + /** + * Marks the current span as a general span. + */ + public void setGeneralSpan() { + setSpanKind("span"); + } + + /** + * Sets the input attribute on the current span. + * + * @param input + * the input value + */ + public void setInput(Object input) { + setInput(input, input.getClass()); + } + + /** + * Sets the output attribute on the current span. + * + * @param output + * the output value + */ + public void setOutput(Object output) { + setOutput(output, output.getClass()); + } + + /** + * Sets the input attribute on the current span using the specified type. + * + * @param input + * the input value + * @param type + * the type to use for serialization + */ + public void setInput(Object input, Type type) { + setAttribute(JudgmentAttributeKeys.AttributeKeys.JUDGMENT_INPUT, input, type); + } + + /** + * Sets the output attribute on the current span using the specified type. + * + * @param output + * the output value + * @param type + * the type to use for serialization + */ + public void setOutput(Object output, Type type) { + setAttribute(JudgmentAttributeKeys.AttributeKeys.JUDGMENT_OUTPUT, output, type); + } + + /** + * Executes a runnable within a new span, automatically handling span lifecycle + * and errors. + * + * @param spanName + * the name of the span + * @param runnable + * the code to execute within the span + */ + public void span(String spanName, Runnable runnable) { + Span span = getTracer().spanBuilder(spanName) + .startSpan(); + try (Scope scope = span.makeCurrent()) { + runnable.run(); + } catch (Exception e) { + span.setStatus(StatusCode.ERROR).recordException(e); + throw e; + } finally { + span.end(); + } + } + + /** + * Executes a callable within a new span, automatically handling span lifecycle + * and errors. + * + * @param + * the return type + * @param spanName + * the name of the span + * @param callable + * the code to execute within the span + * @return the result of the callable + * @throws Exception + * if the callable throws an exception + */ + public T span(String spanName, java.util.concurrent.Callable callable) throws Exception { + Span span = getTracer().spanBuilder(spanName) + .startSpan(); + try (Scope scope = span.makeCurrent()) { + return callable.call(); + } catch (Exception e) { + span.setStatus(StatusCode.ERROR).recordException(e); + throw e; + } finally { + span.end(); + } + } + + /** + * Returns the OpenTelemetry tracer instance. + * + * @return the OpenTelemetry tracer + */ + public io.opentelemetry.api.trace.Tracer getTracer() { + return GlobalOpenTelemetry.get() + .getTracer(TRACER_NAME); + } + + /** + * Returns the project name. + * + * @return the project name + */ + public String getProjectName() { + return projectName; + } + + /** + * Returns whether evaluation is enabled. + * + * @return true if evaluation is enabled + */ + public boolean isEnableEvaluation() { + return enableEvaluation; + } + + /** + * Returns the resolved project ID if available. + * + * @return the project ID, or empty if not resolved + */ + public Optional getProjectId() { + return projectId; + } + + /** + * Creates and returns a new span with the specified name. + * + * @param spanName + * the name of the span + * @return the created span + */ + public static Span span(String spanName) { + return GlobalOpenTelemetry.get() + .getTracer(TRACER_NAME) + .spanBuilder(spanName) + .startSpan(); + } + + private Optional resolveProjectId(String name) { + try { + ResolveProjectNameRequest request = new ResolveProjectNameRequest(); + request.setProjectName(name); + ResolveProjectNameResponse response = apiClient.projectsResolve(request); + return Optional.ofNullable(response.getProjectId()) + .map(Object::toString); + } catch (Exception e) { + return Optional.empty(); + } + } + + private String buildEndpoint(String baseUrl) { + return baseUrl.endsWith("/") ? baseUrl + "otel/v1/traces" : baseUrl + "/otel/v1/traces"; + } + + private JudgmentSpanExporter createJudgmentSpanExporter(String projectId) { + return JudgmentSpanExporter.builder() + .endpoint(buildEndpoint(apiClient.getApiUrl())) + .apiKey(apiClient.getApiKey()) + .organizationId(apiClient.getOrganizationId()) + .projectId(projectId) + .build(); + } + + private String generateRunId(String prefix, String spanId) { + return prefix + Optional.ofNullable(spanId) + .orElseGet(() -> String.valueOf(System.currentTimeMillis())); + } + + private ExampleEvaluationRun createEvaluationRun(BaseScorer scorer, Example example, String traceId, + String spanId) { + String runId = generateRunId("async_evaluate_", spanId); + + ExampleEvaluationRun evaluationRun = new ExampleEvaluationRun(); + evaluationRun.setId(UUID.randomUUID().toString()); + evaluationRun.setProjectName(projectName); + evaluationRun.setEvalName(runId); + evaluationRun.setTraceId(traceId); + evaluationRun.setTraceSpanId(spanId); + evaluationRun.setExamples(List.of(example)); + + if (scorer instanceof CustomScorer) { + evaluationRun.setJudgmentScorers(List.of()); + evaluationRun.setCustomScorers(List.of((com.judgmentlabs.judgeval.internal.api.models.BaseScorer) scorer)); + } else { + evaluationRun.setJudgmentScorers(List.of(scorer.getScorerConfig())); + evaluationRun.setCustomScorers(List.of()); + } + evaluationRun.setCreatedAt(Instant.now().atOffset(ZoneOffset.UTC).format(DateTimeFormatter.ISO_INSTANT)); + + return evaluationRun; + } + + private TraceEvaluationRun createTraceEvaluationRun(BaseScorer scorer, String traceId, + String spanId) { + String evalName = generateRunId("async_trace_evaluate_", spanId); + + TraceEvaluationRun evaluationRun = new TraceEvaluationRun(); + evaluationRun.setId(UUID.randomUUID().toString()); + evaluationRun.setProjectName(projectName); + evaluationRun.setEvalName(evalName); + evaluationRun.setTraceAndSpanIds(List.of(List.of(traceId, spanId))); + evaluationRun.setJudgmentScorers(List.of(scorer.getScorerConfig())); + evaluationRun.setCustomScorers(List.of()); + evaluationRun.setIsOffline(false); + evaluationRun.setIsBucketRun(false); + evaluationRun.setCreatedAt(Instant.now().atOffset(ZoneOffset.UTC).format(DateTimeFormatter.ISO_INSTANT)); + + return evaluationRun; + } + + private void enqueueEvaluation(ExampleEvaluationRun evaluationRun) { + try { + apiClient.addToRunEvalQueue(evaluationRun); + } catch (Exception e) { + Logger.error("Failed to enqueue evaluation run: " + e.getMessage()); + } + } +} diff --git a/judgeval-java/src/main/java/com/judgmentlabs/judgeval/v1/tracer/ISerializer.java b/judgeval-java/src/main/java/com/judgmentlabs/judgeval/v1/tracer/ISerializer.java new file mode 100644 index 0000000..8d84fe8 --- /dev/null +++ b/judgeval-java/src/main/java/com/judgmentlabs/judgeval/v1/tracer/ISerializer.java @@ -0,0 +1,30 @@ +package com.judgmentlabs.judgeval.v1.tracer; + +import java.lang.reflect.Type; + +/** + * Serializes objects to string representations. + */ +public interface ISerializer { + /** + * Serializes an object to a string. + * + * @param obj + * the object to serialize + * @return the serialized string + */ + String serialize(Object obj); + + /** + * Serializes an object to a string using the specified type. + * + * @param obj + * the object to serialize + * @param type + * the type to use for serialization + * @return the serialized string + */ + default String serialize(Object obj, Type type) { + return serialize(obj); + } +} diff --git a/judgeval-java/src/main/java/com/judgmentlabs/judgeval/v1/tracer/Tracer.java b/judgeval-java/src/main/java/com/judgmentlabs/judgeval/v1/tracer/Tracer.java new file mode 100644 index 0000000..301be20 --- /dev/null +++ b/judgeval-java/src/main/java/com/judgmentlabs/judgeval/v1/tracer/Tracer.java @@ -0,0 +1,211 @@ +package com.judgmentlabs.judgeval.v1.tracer; + +import java.lang.reflect.Type; +import java.util.Objects; +import java.util.Optional; + +import com.google.gson.Gson; +import com.judgmentlabs.judgeval.Version; +import com.judgmentlabs.judgeval.internal.api.JudgmentSyncClient; +import com.judgmentlabs.judgeval.utils.Logger; + +import io.opentelemetry.api.GlobalOpenTelemetry; +import io.opentelemetry.api.OpenTelemetry; +import io.opentelemetry.api.common.Attributes; +import io.opentelemetry.sdk.OpenTelemetrySdk; +import io.opentelemetry.sdk.resources.Resource; +import io.opentelemetry.sdk.trace.SdkTracerProvider; +import io.opentelemetry.sdk.trace.export.BatchSpanProcessor; +import io.opentelemetry.sdk.trace.export.SpanExporter; + +public final class Tracer extends BaseTracer { + private SdkTracerProvider tracerProvider; + + private Tracer(Builder builder) { + super( + Objects.requireNonNull(builder.projectName, "projectName required"), + builder.enableEvaluation, + Objects.requireNonNull(builder.client, "client required"), + builder.serializer != null ? builder.serializer : new GsonSerializer()); + + if (builder.initialize) { + initialize(); + } + } + + /** + * Initializes the tracer by setting up the OpenTelemetry SDK with a span + * exporter, + * configuring the tracer provider with batch span processing, and registering + * it globally. + */ + @Override + public void initialize() { + SpanExporter spanExporter = getSpanExporter(); + + var resource = Resource.getDefault() + .merge(Resource.create(Attributes.builder() + .put("service.name", projectName) + .put("telemetry.sdk.name", TRACER_NAME) + .put("telemetry.sdk.version", Version.getVersion()) + .build())); + + this.tracerProvider = SdkTracerProvider.builder() + .setResource(resource) + .addSpanProcessor(BatchSpanProcessor.builder(spanExporter) + .build()) + .build(); + + OpenTelemetry openTelemetry = OpenTelemetrySdk.builder() + .setTracerProvider(this.tracerProvider) + .build(); + + GlobalOpenTelemetry.set(openTelemetry); + } + + /** + * Forces the tracer to flush any pending spans within the specified timeout. + * + * @param timeoutMillis + * the maximum time to wait in milliseconds + * @return true if the flush completed successfully within the timeout + */ + @Override + public boolean forceFlush(int timeoutMillis) { + if (tracerProvider == null) { + Logger.error("Cannot forceFlush: tracer not initialized"); + return false; + } + return tracerProvider.forceFlush() + .join(timeoutMillis, java.util.concurrent.TimeUnit.MILLISECONDS) + .isSuccess(); + } + + /** + * Shuts down the tracer, flushing any remaining spans and releasing resources. + * + * @param timeoutMillis + * the maximum time to wait for shutdown in milliseconds + */ + @Override + public void shutdown(int timeoutMillis) { + if (tracerProvider == null) { + Logger.error("Cannot shutdown: tracer not initialized"); + return; + } + tracerProvider.shutdown() + .join(timeoutMillis, java.util.concurrent.TimeUnit.MILLISECONDS); + } + + /** + * Creates a new builder for configuring a Tracer. + * + * @return a new builder instance + */ + public static Builder builder() { + return new Builder(); + } + + /** + * Builder for configuring and creating Tracer instances. + */ + public static final class Builder { + private JudgmentSyncClient client; + private String projectName; + private boolean enableEvaluation = true; + private ISerializer serializer; + private boolean initialize = true; + + /** + * Sets the Judgment API client. + * + * @param client + * the API client + * @return this builder + */ + public Builder client(JudgmentSyncClient client) { + this.client = client; + return this; + } + + /** + * Sets the project name for this tracer. + * + * @param projectName + * the project name + * @return this builder + */ + public Builder projectName(String projectName) { + this.projectName = projectName; + return this; + } + + /** + * Sets whether evaluation is enabled. + * + * @param enableEvaluation + * true to enable evaluation + * @return this builder + */ + public Builder enableEvaluation(boolean enableEvaluation) { + this.enableEvaluation = enableEvaluation; + return this; + } + + /** + * Sets the custom serializer for span attributes. + * + * @param serializer + * the serializer + * @return this builder + */ + public Builder serializer(ISerializer serializer) { + this.serializer = serializer; + return this; + } + + /** + * Sets whether to automatically initialize the tracer on build. + * + * @param initialize + * true to initialize on build + * @return this builder + */ + public Builder initialize(boolean initialize) { + this.initialize = initialize; + return this; + } + + /** + * Builds and returns a new Tracer instance. + * + * @return the configured Tracer + */ + public Tracer build() { + return new Tracer(this); + } + } + + private static class GsonSerializer implements ISerializer { + private final Gson gson = new Gson(); + + @Override + public String serialize(Object obj) { + return Optional.ofNullable(obj) + .map(o -> serialize(o, o.getClass())) + .orElse(null); + } + + @Override + public String serialize(Object obj, Type type) { + try { + return gson.toJson(obj, type); + } catch (Exception e) { + Logger.error("Failed to serialize object: " + e.getMessage()); + return Optional.ofNullable(obj) + .map(Object::toString) + .orElse(null); + } + } + } +} diff --git a/judgeval-java/src/main/java/com/judgmentlabs/judgeval/v1/tracer/TracerFactory.java b/judgeval-java/src/main/java/com/judgmentlabs/judgeval/v1/tracer/TracerFactory.java new file mode 100644 index 0000000..cffa765 --- /dev/null +++ b/judgeval-java/src/main/java/com/judgmentlabs/judgeval/v1/tracer/TracerFactory.java @@ -0,0 +1,24 @@ +package com.judgmentlabs.judgeval.v1.tracer; + +import com.judgmentlabs.judgeval.internal.api.JudgmentSyncClient; + +/** + * Factory for creating tracer builders. + */ +public final class TracerFactory { + private final JudgmentSyncClient client; + + public TracerFactory(JudgmentSyncClient client) { + this.client = client; + } + + /** + * Creates a new tracer builder configured with this factory's client. + * + * @return a new tracer builder + */ + public Tracer.Builder create() { + return Tracer.builder() + .client(client); + } +} diff --git a/judgeval-java/src/main/java/com/judgmentlabs/judgeval/v1/tracer/exporters/JudgmentSpanExporter.java b/judgeval-java/src/main/java/com/judgmentlabs/judgeval/v1/tracer/exporters/JudgmentSpanExporter.java new file mode 100644 index 0000000..d31674d --- /dev/null +++ b/judgeval-java/src/main/java/com/judgmentlabs/judgeval/v1/tracer/exporters/JudgmentSpanExporter.java @@ -0,0 +1,179 @@ +package com.judgmentlabs.judgeval.v1.tracer.exporters; + +import java.util.Collection; +import java.util.Optional; + +import com.judgmentlabs.judgeval.utils.Logger; + +import io.opentelemetry.exporter.otlp.http.trace.OtlpHttpSpanExporter; +import io.opentelemetry.sdk.common.CompletableResultCode; +import io.opentelemetry.sdk.trace.data.SpanData; +import io.opentelemetry.sdk.trace.export.SpanExporter; + +/** + * SpanExporter implementation that sends spans to Judgment Labs with project + * identification. + *

+ * This exporter wraps the OTLP HTTP exporter and adds Judgment Labs specific + * headers and project identification to all exported spans. + */ +public class JudgmentSpanExporter implements SpanExporter { + private final SpanExporter delegate; + + /** + * Creates a new JudgmentSpanExporter with the specified configuration. + * + * @param endpoint + * the OTLP endpoint URL + * @param apiKey + * the API key for authentication + * @param organizationId + * the organization ID + * @param projectId + * the project ID (must not be null or empty) + * @throws IllegalArgumentException + * if projectId is null or empty + */ + protected JudgmentSpanExporter(String endpoint, String apiKey, String organizationId, String projectId) { + if (projectId.isEmpty()) { + throw new IllegalArgumentException("projectId is required for JudgmentSpanExporter"); + } + this.delegate = OtlpHttpSpanExporter.builder() + .setEndpoint(endpoint) + .addHeader("Authorization", "Bearer " + apiKey) + .addHeader("X-Organization-Id", organizationId) + .addHeader("X-Project-Id", projectId) + .build(); + } + + /** + * Creates a new builder for constructing JudgmentSpanExporter instances. + * + * @return a new Builder instance + */ + public static Builder builder() { + return new Builder(); + } + + /** + * Exports the collection of spans to the Judgment Labs backend. + * + * @param spans + * the collection of spans to export + * @return a CompletableResultCode representing the export operation status + */ + @Override + public CompletableResultCode export(Collection spans) { + Logger.info("Exported " + spans.size() + " spans"); + return delegate.export(spans); + } + + /** + * Flushes any pending span exports. + * + * @return a CompletableResultCode representing the flush operation status + */ + @Override + public CompletableResultCode flush() { + return delegate.flush(); + } + + /** + * Shuts down this exporter and releases any resources. + * + * @return a CompletableResultCode representing the shutdown operation status + */ + @Override + public CompletableResultCode shutdown() { + return delegate.shutdown(); + } + + /** + * Builder for creating JudgmentSpanExporter instances. + */ + public static final class Builder { + private String endpoint; + private String apiKey; + private String organizationId; + private String projectId; + + private Builder() { + } + + /** + * Sets the OTLP endpoint URL. + * + * @param endpoint + * the endpoint URL + * @return this builder for method chaining + */ + public Builder endpoint(String endpoint) { + this.endpoint = endpoint; + return this; + } + + /** + * Sets the API key for authentication. + * + * @param apiKey + * the API key + * @return this builder for method chaining + */ + public Builder apiKey(String apiKey) { + this.apiKey = apiKey; + return this; + } + + /** + * Sets the organization ID. + * + * @param organizationId + * the organization ID + * @return this builder for method chaining + */ + public Builder organizationId(String organizationId) { + this.organizationId = organizationId; + return this; + } + + /** + * Sets the project ID. + * + * @param projectId + * the project ID + * @return this builder for method chaining + */ + public Builder projectId(String projectId) { + this.projectId = projectId; + return this; + } + + /** + * Builds a new JudgmentSpanExporter instance with the configured settings. + * + * @return a new JudgmentSpanExporter instance + * @throws IllegalArgumentException + * if any required field is null or empty + */ + public JudgmentSpanExporter build() { + String validEndpoint = Optional.ofNullable(endpoint) + .map(String::trim) + .filter(e -> !e.isEmpty()) + .orElseThrow(() -> new IllegalArgumentException("Endpoint is required")); + String validApiKey = Optional.ofNullable(apiKey) + .map(String::trim) + .filter(key -> !key.isEmpty()) + .orElseThrow(() -> new IllegalArgumentException("API key is required")); + String validOrganizationId = Optional.ofNullable(organizationId) + .map(String::trim) + .filter(id -> !id.isEmpty()) + .orElseThrow(() -> new IllegalArgumentException("Organization ID is required")); + String validProjectId = Optional.ofNullable(projectId) + .map(String::trim) + .filter(id -> !id.isEmpty()) + .orElseThrow(() -> new IllegalArgumentException("Project ID is required")); + + return new JudgmentSpanExporter(validEndpoint, validApiKey, validOrganizationId, validProjectId); + } + } +} diff --git a/judgeval-java/src/main/java/com/judgmentlabs/judgeval/v1/tracer/exporters/NoOpSpanExporter.java b/judgeval-java/src/main/java/com/judgmentlabs/judgeval/v1/tracer/exporters/NoOpSpanExporter.java new file mode 100644 index 0000000..6bb5244 --- /dev/null +++ b/judgeval-java/src/main/java/com/judgmentlabs/judgeval/v1/tracer/exporters/NoOpSpanExporter.java @@ -0,0 +1,45 @@ +package com.judgmentlabs.judgeval.v1.tracer.exporters; + +import java.util.Collection; + +import io.opentelemetry.sdk.common.CompletableResultCode; +import io.opentelemetry.sdk.trace.data.SpanData; +import io.opentelemetry.sdk.trace.export.SpanExporter; + +/** + * A no-op implementation of SpanExporter that discards all spans. Used as a + * fallback when project resolution fails or when spans should not be exported. + */ +public class NoOpSpanExporter implements SpanExporter { + /** + * Discards the collection of spans without exporting. + * + * @param spans + * the collection of spans (ignored) + * @return a successful CompletableResultCode + */ + @Override + public CompletableResultCode export(Collection spans) { + return CompletableResultCode.ofSuccess(); + } + + /** + * Performs a no-op flush operation. + * + * @return a successful CompletableResultCode + */ + @Override + public CompletableResultCode flush() { + return CompletableResultCode.ofSuccess(); + } + + /** + * Performs a no-op shutdown operation. + * + * @return a successful CompletableResultCode + */ + @Override + public CompletableResultCode shutdown() { + return CompletableResultCode.ofSuccess(); + } +} diff --git a/judgeval-java/src/test/java/com/judgmentlabs/judgeval/v1/JudgevalTest.java b/judgeval-java/src/test/java/com/judgmentlabs/judgeval/v1/JudgevalTest.java new file mode 100644 index 0000000..5277d27 --- /dev/null +++ b/judgeval-java/src/test/java/com/judgmentlabs/judgeval/v1/JudgevalTest.java @@ -0,0 +1,79 @@ +package com.judgmentlabs.judgeval.v1; + +import static org.junit.jupiter.api.Assertions.*; + +import org.junit.jupiter.api.Test; + +import com.judgmentlabs.judgeval.v1.evaluation.EvaluationFactory; +import com.judgmentlabs.judgeval.v1.scorers.ScorersFactory; +import com.judgmentlabs.judgeval.v1.tracer.TracerFactory; + +class JudgevalTest { + private static final String TEST_API_URL = "https://api.test.com"; + private static final String TEST_API_KEY = "test-key"; + private static final String TEST_ORG_ID = "test-org"; + + @Test + void builder_withAllParameters_buildsSuccessfully() { + Judgeval client = Judgeval.builder() + .apiKey(TEST_API_KEY) + .organizationId(TEST_ORG_ID) + .apiUrl(TEST_API_URL) + .build(); + + assertNotNull(client); + } + + @Test + void builder_withNullApiKey_throwsException() { + assertThrows(NullPointerException.class, () -> { + Judgeval.builder() + .apiKey(null) + .organizationId(TEST_ORG_ID) + .build(); + }); + } + + @Test + void builder_withNullOrganizationId_throwsException() { + assertThrows(NullPointerException.class, () -> { + Judgeval.builder() + .apiKey(TEST_API_KEY) + .organizationId(null) + .build(); + }); + } + + @Test + void tracer_returnsTracerFactory() { + Judgeval client = Judgeval.builder() + .apiKey(TEST_API_KEY) + .organizationId(TEST_ORG_ID) + .build(); + + TracerFactory factory = client.tracer(); + assertNotNull(factory); + } + + @Test + void scorers_returnsScorersFactory() { + Judgeval client = Judgeval.builder() + .apiKey(TEST_API_KEY) + .organizationId(TEST_ORG_ID) + .build(); + + ScorersFactory factory = client.scorers(); + assertNotNull(factory); + } + + @Test + void evaluation_returnsEvaluationFactory() { + Judgeval client = Judgeval.builder() + .apiKey(TEST_API_KEY) + .organizationId(TEST_ORG_ID) + .build(); + + EvaluationFactory factory = client.evaluation(); + assertNotNull(factory); + } +} diff --git a/judgeval-java/src/test/java/com/judgmentlabs/judgeval/v1/data/ExampleTest.java b/judgeval-java/src/test/java/com/judgmentlabs/judgeval/v1/data/ExampleTest.java new file mode 100644 index 0000000..204fe53 --- /dev/null +++ b/judgeval-java/src/test/java/com/judgmentlabs/judgeval/v1/data/ExampleTest.java @@ -0,0 +1,51 @@ +package com.judgmentlabs.judgeval.v1.data; + +import static org.junit.jupiter.api.Assertions.*; + +import org.junit.jupiter.api.Test; + +class ExampleTest { + + @Test + void constructor_generatesExampleId() { + Example example = new Example(); + assertNotNull(example.getExampleId()); + } + + @Test + void constructor_setsCreatedAt() { + Example example = new Example(); + assertNotNull(example.getCreatedAt()); + } + + @Test + void constructor_setsNameToNull() { + Example example = new Example(); + assertNull(example.getName()); + } + + @Test + void builder_createsExample() { + Example example = Example.builder().build(); + assertNotNull(example); + } + + @Test + void builder_withProperty_setsProperty() { + Example example = Example.builder() + .property("key", "value") + .build(); + + assertNotNull(example); + assertEquals("value", example.getAdditionalProperties().get("key")); + } + + @Test + void builder_withName_setsName() { + Example example = Example.builder() + .name("test-example") + .build(); + + assertEquals("test-example", example.getName()); + } +} diff --git a/judgeval-java/src/test/java/com/judgmentlabs/judgeval/v1/evaluation/EvaluationFactoryTest.java b/judgeval-java/src/test/java/com/judgmentlabs/judgeval/v1/evaluation/EvaluationFactoryTest.java new file mode 100644 index 0000000..bde3d5a --- /dev/null +++ b/judgeval-java/src/test/java/com/judgmentlabs/judgeval/v1/evaluation/EvaluationFactoryTest.java @@ -0,0 +1,35 @@ +package com.judgmentlabs.judgeval.v1.evaluation; + +import static org.junit.jupiter.api.Assertions.*; + +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; + +import com.judgmentlabs.judgeval.internal.api.JudgmentSyncClient; + +class EvaluationFactoryTest { + private static final String TEST_API_URL = "https://api.test.com"; + private static final String TEST_API_KEY = "test-key"; + private static final String TEST_ORG_ID = "test-org"; + + private EvaluationFactory factory; + + @BeforeEach + void setUp() { + JudgmentSyncClient client = new JudgmentSyncClient(TEST_API_URL, TEST_API_KEY, TEST_ORG_ID); + factory = new EvaluationFactory(client); + } + + @Test + void create_returnsConfiguredBuilder() { + Evaluation.Builder builder = factory.create(); + assertNotNull(builder); + } + + @Test + void create_builderBuildsEvaluation() { + Evaluation.Builder builder = factory.create(); + Evaluation evaluation = builder.build(); + assertNotNull(evaluation); + } +} diff --git a/judgeval-java/src/test/java/com/judgmentlabs/judgeval/v1/scorers/ScorersFactoryTest.java b/judgeval-java/src/test/java/com/judgmentlabs/judgeval/v1/scorers/ScorersFactoryTest.java new file mode 100644 index 0000000..625ce85 --- /dev/null +++ b/judgeval-java/src/test/java/com/judgmentlabs/judgeval/v1/scorers/ScorersFactoryTest.java @@ -0,0 +1,49 @@ +package com.judgmentlabs.judgeval.v1.scorers; + +import static org.junit.jupiter.api.Assertions.*; + +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; + +import com.judgmentlabs.judgeval.internal.api.JudgmentSyncClient; +import com.judgmentlabs.judgeval.v1.scorers.built_in.BuiltInScorersFactory; +import com.judgmentlabs.judgeval.v1.scorers.custom_scorer.CustomScorerFactory; +import com.judgmentlabs.judgeval.v1.scorers.prompt_scorer.PromptScorerFactory; + +class ScorersFactoryTest { + private static final String TEST_API_URL = "https://api.test.com"; + private static final String TEST_API_KEY = "test-key"; + private static final String TEST_ORG_ID = "test-org"; + + private ScorersFactory factory; + + @BeforeEach + void setUp() { + JudgmentSyncClient client = new JudgmentSyncClient(TEST_API_URL, TEST_API_KEY, TEST_ORG_ID); + factory = new ScorersFactory(client); + } + + @Test + void promptScorer_returnsFactory() { + PromptScorerFactory promptScorerFactory = factory.promptScorer(); + assertNotNull(promptScorerFactory); + } + + @Test + void tracePromptScorer_returnsFactory() { + PromptScorerFactory tracePromptScorerFactory = factory.tracePromptScorer(); + assertNotNull(tracePromptScorerFactory); + } + + @Test + void customScorer_returnsFactory() { + CustomScorerFactory customScorerFactory = factory.customScorer(); + assertNotNull(customScorerFactory); + } + + @Test + void builtIn_returnsFactory() { + BuiltInScorersFactory builtInFactory = factory.builtIn(); + assertNotNull(builtInFactory); + } +} diff --git a/judgeval-java/src/test/java/com/judgmentlabs/judgeval/v1/scorers/built_in/BuiltInScorersFactoryTest.java b/judgeval-java/src/test/java/com/judgmentlabs/judgeval/v1/scorers/built_in/BuiltInScorersFactoryTest.java new file mode 100644 index 0000000..fe8d71d --- /dev/null +++ b/judgeval-java/src/test/java/com/judgmentlabs/judgeval/v1/scorers/built_in/BuiltInScorersFactoryTest.java @@ -0,0 +1,33 @@ +package com.judgmentlabs.judgeval.v1.scorers.built_in; + +import static org.junit.jupiter.api.Assertions.*; + +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; + +class BuiltInScorersFactoryTest { + private BuiltInScorersFactory factory; + + @BeforeEach + void setUp() { + factory = new BuiltInScorersFactory(); + } + + @Test + void answerCorrectness_returnsBuilder() { + AnswerCorrectnessScorer.Builder builder = factory.answerCorrectness(); + assertNotNull(builder); + } + + @Test + void answerRelevancy_returnsBuilder() { + AnswerRelevancyScorer.Builder builder = factory.answerRelevancy(); + assertNotNull(builder); + } + + @Test + void faithfulness_returnsBuilder() { + FaithfulnessScorer.Builder builder = factory.faithfulness(); + assertNotNull(builder); + } +} diff --git a/judgeval-java/src/test/java/com/judgmentlabs/judgeval/v1/tracer/BaseTracerTest.java b/judgeval-java/src/test/java/com/judgmentlabs/judgeval/v1/tracer/BaseTracerTest.java new file mode 100644 index 0000000..86e01fd --- /dev/null +++ b/judgeval-java/src/test/java/com/judgmentlabs/judgeval/v1/tracer/BaseTracerTest.java @@ -0,0 +1,168 @@ +package com.judgmentlabs.judgeval.v1.tracer; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertNotNull; +import static org.junit.jupiter.api.Assertions.assertThrows; +import static org.junit.jupiter.api.Assertions.assertTrue; +import static org.mockito.ArgumentMatchers.any; +import static org.mockito.Mockito.lenient; +import static org.mockito.Mockito.when; + +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.extension.ExtendWith; +import org.mockito.Mock; +import org.mockito.junit.jupiter.MockitoExtension; + +import com.judgmentlabs.judgeval.internal.api.JudgmentSyncClient; +import com.judgmentlabs.judgeval.internal.api.models.ResolveProjectNameRequest; +import com.judgmentlabs.judgeval.internal.api.models.ResolveProjectNameResponse; +import com.judgmentlabs.judgeval.v1.tracer.exporters.JudgmentSpanExporter; +import com.judgmentlabs.judgeval.v1.tracer.exporters.NoOpSpanExporter; + +import io.opentelemetry.sdk.trace.export.SpanExporter; + +@ExtendWith(MockitoExtension.class) +class BaseTracerTest { + private static final String TEST_PROJECT_NAME = "test-project"; + private static final String TEST_PROJECT_ID = "test-project-id-123"; + @Mock + private JudgmentSyncClient mockClient; + + @Mock + private ISerializer mockSerializer; + + private TestableBaseTracer tracer; + + @BeforeEach + void setUp() throws Exception { + ResolveProjectNameResponse response = new ResolveProjectNameResponse(); + response.setProjectId(TEST_PROJECT_ID); + + lenient().when(mockClient.projectsResolve(any(ResolveProjectNameRequest.class))) + .thenReturn(response); + lenient().when(mockClient.getApiUrl()).thenReturn("https://api.example.com"); + lenient().when(mockClient.getApiKey()).thenReturn("test-api-key"); + lenient().when(mockClient.getOrganizationId()).thenReturn("test-org-id"); + + lenient().when(mockSerializer.serialize(any())).thenReturn("serialized"); + lenient().when(mockSerializer.serialize(any(), any())).thenReturn("serialized"); + + tracer = new TestableBaseTracer( + TEST_PROJECT_NAME, + true, + mockClient, + mockSerializer); + } + + @Test + void constructor_withValidParameters_resolvesProject() { + assertNotNull(tracer); + assertEquals(TEST_PROJECT_NAME, tracer.getProjectName()); + assertTrue(tracer.isEnableEvaluation()); + assertTrue(tracer.getProjectId().isPresent()); + assertEquals(TEST_PROJECT_ID, tracer.getProjectId().get()); + } + + @Test + void constructor_withNullProjectName_throwsException() { + assertThrows(NullPointerException.class, () -> { + new TestableBaseTracer( + null, + true, + mockClient, + mockSerializer); + }); + } + + @Test + void constructor_withNullClient_throwsException() { + assertThrows(NullPointerException.class, () -> { + new TestableBaseTracer( + TEST_PROJECT_NAME, + true, + null, + mockSerializer); + }); + } + + @Test + void constructor_withNullSerializer_throwsException() { + assertThrows(NullPointerException.class, () -> { + new TestableBaseTracer( + TEST_PROJECT_NAME, + true, + mockClient, + null); + }); + } + + @Test + void constructor_withFailedProjectResolution_hasEmptyProjectId() throws Exception { + when(mockClient.projectsResolve(any(ResolveProjectNameRequest.class))) + .thenThrow(new RuntimeException("Project not found")); + + TestableBaseTracer failedTracer = new TestableBaseTracer( + TEST_PROJECT_NAME, + true, + mockClient, + mockSerializer); + + assertFalse(failedTracer.getProjectId().isPresent()); + } + + @Test + void getSpanExporter_withValidProjectId_returnsJudgmentSpanExporter() { + SpanExporter exporter = tracer.getSpanExporter(); + assertNotNull(exporter); + assertTrue(exporter instanceof JudgmentSpanExporter); + } + + @Test + void getSpanExporter_withoutProjectId_returnsNoOpSpanExporter() throws Exception { + when(mockClient.projectsResolve(any(ResolveProjectNameRequest.class))) + .thenReturn(null); + + TestableBaseTracer failedTracer = new TestableBaseTracer( + TEST_PROJECT_NAME, + true, + mockClient, + mockSerializer); + + SpanExporter exporter = failedTracer.getSpanExporter(); + assertNotNull(exporter); + assertTrue(exporter instanceof NoOpSpanExporter); + } + + @Test + void getTracer_returnsTracer() { + io.opentelemetry.api.trace.Tracer otelTracer = tracer.getTracer(); + assertNotNull(otelTracer); + } + + @Test + void setAttributes_withNull_doesNotThrow() { + tracer.setAttributes(null); + } + + private static class TestableBaseTracer extends BaseTracer { + protected TestableBaseTracer(String projectName, boolean enableEvaluation, JudgmentSyncClient apiClient, + ISerializer serializer) { + super(projectName, enableEvaluation, apiClient, serializer); + } + + @Override + public void initialize() { + } + + @Override + public boolean forceFlush(int timeoutMillis) { + return false; + } + + @Override + public void shutdown(int timeoutMillis) { + } + } +} diff --git a/judgeval-java/src/test/java/com/judgmentlabs/judgeval/v1/tracer/TracerFactoryTest.java b/judgeval-java/src/test/java/com/judgmentlabs/judgeval/v1/tracer/TracerFactoryTest.java new file mode 100644 index 0000000..f3637fe --- /dev/null +++ b/judgeval-java/src/test/java/com/judgmentlabs/judgeval/v1/tracer/TracerFactoryTest.java @@ -0,0 +1,28 @@ +package com.judgmentlabs.judgeval.v1.tracer; + +import static org.junit.jupiter.api.Assertions.*; + +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; + +import com.judgmentlabs.judgeval.internal.api.JudgmentSyncClient; + +class TracerFactoryTest { + private static final String TEST_API_URL = "https://api.test.com"; + private static final String TEST_API_KEY = "test-key"; + private static final String TEST_ORG_ID = "test-org"; + + private TracerFactory factory; + + @BeforeEach + void setUp() { + JudgmentSyncClient client = new JudgmentSyncClient(TEST_API_URL, TEST_API_KEY, TEST_ORG_ID); + factory = new TracerFactory(client); + } + + @Test + void create_returnsConfiguredBuilder() { + Tracer.Builder builder = factory.create(); + assertNotNull(builder); + } +} diff --git a/judgeval-java/src/test/java/com/judgmentlabs/judgeval/v1/tracer/TracerTest.java b/judgeval-java/src/test/java/com/judgmentlabs/judgeval/v1/tracer/TracerTest.java new file mode 100644 index 0000000..3a57598 --- /dev/null +++ b/judgeval-java/src/test/java/com/judgmentlabs/judgeval/v1/tracer/TracerTest.java @@ -0,0 +1,60 @@ +package com.judgmentlabs.judgeval.v1.tracer; + +import static org.junit.jupiter.api.Assertions.assertNotNull; +import static org.junit.jupiter.api.Assertions.assertThrows; +import static org.mockito.ArgumentMatchers.any; +import static org.mockito.Mockito.lenient; + +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.extension.ExtendWith; +import org.mockito.Mock; +import org.mockito.junit.jupiter.MockitoExtension; + +import com.judgmentlabs.judgeval.internal.api.JudgmentSyncClient; +import com.judgmentlabs.judgeval.internal.api.models.ResolveProjectNameRequest; +import com.judgmentlabs.judgeval.internal.api.models.ResolveProjectNameResponse; + +@ExtendWith(MockitoExtension.class) +class TracerTest { + private static final String TEST_PROJECT_NAME = "test-project"; + private static final String TEST_PROJECT_ID = "test-project-id-123"; + + @Mock + private JudgmentSyncClient mockClient; + + @BeforeEach + void setUp() throws Exception { + ResolveProjectNameResponse response = new ResolveProjectNameResponse(); + response.setProjectId(TEST_PROJECT_ID); + + lenient().when(mockClient.projectsResolve(any(ResolveProjectNameRequest.class))) + .thenReturn(response); + } + + @Test + void builder_returnsBuilder() { + Tracer.Builder builder = Tracer.builder(); + assertNotNull(builder); + } + + @Test + void builder_withNullProjectName_throwsException() { + assertThrows(NullPointerException.class, () -> { + Tracer.builder() + .client(mockClient) + .projectName(null) + .build(); + }); + } + + @Test + void builder_withNullClient_throwsException() { + assertThrows(NullPointerException.class, () -> { + Tracer.builder() + .projectName(TEST_PROJECT_NAME) + .client(null) + .build(); + }); + } +} diff --git a/package.json b/package.json new file mode 100644 index 0000000..fc25fe5 --- /dev/null +++ b/package.json @@ -0,0 +1,33 @@ +{ + "name": "judgeval-java", + "version": "0.0.0", + "private": true, + "scripts": { + "format": "npm run format:core && npm run format:openai", + "format:core": "mvn -B -f judgeval-java/pom.xml spotless:apply", + "format:openai": "mvn -B -f instrumentation/judgeval-instrumentation-openai/pom.xml spotless:apply", + "format:check": "npm run format:check:core && npm run format:check:openai", + "format:check:core": "cd judgeval-java && mvn -B spotless:check", + "format:check:openai": "cd instrumentation/judgeval-instrumentation-openai && mvn -B spotless:check", + "check": "cd judgeval-java && mvn -B compile checkstyle:check spotless:check", + "lint": "cd judgeval-java && mvn -B checkstyle:check", + "test": "cd judgeval-java && mvn test", + "clean": "npm run clean:core && npm run clean:openai", + "clean:core": "cd judgeval-java && mvn clean", + "clean:openai": "cd instrumentation/judgeval-instrumentation-openai && mvn clean", + "build": "cd judgeval-java && mvn -B clean compile", + "install:all": "npm run install:core && npm run install:openai", + "install:core": "cd judgeval-java && mvn -B -Dgpg.skip=true clean install", + "install:openai": "cd instrumentation/judgeval-instrumentation-openai && mvn -B -Dgpg.skip=true clean install", + "generate:client": "npm run generate:client:raw && npm run format", + "generate:client:raw": "./scripts/generate-client.sh", + "ci": "cd judgeval-java && mvn -B clean compile test checkstyle:check spotless:check", + "status": "npm run status:core && npm run status:openai", + "status:core": "cd judgeval-java && echo \"[status] judgeval-java\" && G=$(mvn -q -DforceStdout help:evaluate -Dexpression=project.groupId) && A=$(mvn -q -DforceStdout help:evaluate -Dexpression=project.artifactId) && V=$(mvn -q -DforceStdout help:evaluate -Dexpression=project.version) && echo \"GAV: $G:$A:$V\" && (ls -1 target/*.jar 2>/dev/null || echo \"No jar built\")", + "status:openai": "cd instrumentation/judgeval-instrumentation-openai && echo \"[status] instrumentation/judgeval-instrumentation-openai\" && G=$(mvn -q -DforceStdout help:evaluate -Dexpression=project.groupId) && A=$(mvn -q -DforceStdout help:evaluate -Dexpression=project.artifactId) && V=$(mvn -q -DforceStdout help:evaluate -Dexpression=project.version) && echo \"GAV: $G:$A:$V\" && (ls -1 target/*.jar 2>/dev/null || echo \"No jar built\")", + "example": "dotenv -e .env -- ./scripts/run-example.sh" + }, + "devDependencies": { + "dotenv-cli": "^7.4.2" + } +} diff --git a/scripts/generate_client.py b/scripts/generate_client.py index 2a3cb68..682de7a 100755 --- a/scripts/generate_client.py +++ b/scripts/generate_client.py @@ -463,6 +463,18 @@ def generate_client_class( " this.mapper = new ObjectMapper();", " }", "", + " public String getApiUrl() {", + " return baseUrl;", + " }", + "", + " public String getApiKey() {", + " return apiKey;", + " }", + "", + " public String getOrganizationId() {", + " return organizationId;", + " }", + "", " private String buildUrl(String path, Map queryParams) {", " StringBuilder url = new StringBuilder(baseUrl).append(path);", " if (!queryParams.isEmpty()) {", diff --git a/scripts/run-example.sh b/scripts/run-example.sh new file mode 100755 index 0000000..2f4d17f --- /dev/null +++ b/scripts/run-example.sh @@ -0,0 +1,17 @@ +#!/bin/bash + +if [ -z "$1" ]; then + echo "Usage: npm run run [MAIN=ClassName]" + exit 1 +fi + +EXAMPLE=$1 +MAIN_CLASS=${MAIN:-$(ls examples/src/main/java/examples/${EXAMPLE}/*.java 2>/dev/null | head -n1 | xargs -n1 basename 2>/dev/null | sed 's/.java$//')} + +if [ -z "$MAIN_CLASS" ]; then + echo "Error: Could not find example in examples/src/main/java/examples/${EXAMPLE}/" + exit 1 +fi + +mvn -q -f examples/pom.xml -DskipTests -Dexec.cleanupDaemonThreads=false -Dexec.mainClass=examples.${EXAMPLE}.${MAIN_CLASS} clean compile exec:java +