Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
41 changes: 40 additions & 1 deletion ldai/client.go
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,11 @@ func (c *Client) Config(
builder := NewConfig().
WithModelName(parsed.Model.Name).
WithProviderName(parsed.Provider.Name).
WithEnabled(parsed.Meta.Enabled)
WithEnabled(parsed.Meta.Enabled).
WithMode(parsed.Mode).
WithEvaluationMetricKey(parsed.EvaluationMetricKey).
WithEvaluationMetricKeys(parsed.EvaluationMetricKeys).
WithJudgeConfiguration(parsed.JudgeConfiguration)

for k, v := range parsed.Model.Parameters {
builder.WithModelParam(k, v)
Expand Down Expand Up @@ -174,3 +178,38 @@ func interpolateTemplate(template string, variables map[string]interface{}) (str
}
return m.RenderString(variables)
}

// JudgeConfig evaluates an AI Config, tracking it as a judge function. See Config for details.
//
// This method extends the provided variables with reserved judge variables:
// - "message_history": "{{message_history}}"
// - "response_to_evaluate": "{{response_to_evaluate}}"
//
// These literal placeholder strings preserve the Mustache templates through the first interpolation
// (during config fetch), allowing Judge.Evaluate() to perform a second interpolation with actual values.
func (c *Client) JudgeConfig(
key string,
context ldcontext.Context,
defaultValue Config,
variables map[string]interface{},
) (Config, *Tracker) {
_ = c.sdk.TrackMetric("$ld:ai:judge:function:single", context, 1, ldvalue.String(key))

// Extend variables with reserved judge placeholders
extendedVariables := make(map[string]interface{})
for k, v := range variables {
// Warn if user tries to override reserved variables
if k == "message_history" || k == "response_to_evaluate" {
c.logger.Warnf("AI Config '%s': variable '%s' is reserved by judge and will be ignored", key, k)
continue
}
extendedVariables[k] = v
}

// Inject reserved variables as literal placeholder strings
// These will be preserved through the first interpolation and resolved during Judge.Evaluate()
extendedVariables["message_history"] = "{{message_history}}"
extendedVariables["response_to_evaluate"] = "{{response_to_evaluate}}"

return c.Config(key, context, defaultValue, extendedVariables)
}
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

JudgeConfig double-tracks config and judge metric events

Medium Severity

JudgeConfig emits $ld:ai:judge:function:single at line 196, then delegates to c.Config which independently emits $ld:ai:config:function:single at line 73. Every judge evaluation is therefore double-counted — once as a judge function call and once as a regular config function call. This inflates the config function metric on the monitoring dashboard, making it appear there are more regular config evaluations than actually occurred.

Additional Locations (1)

Fix in Cursor Fix in Web

164 changes: 164 additions & 0 deletions ldai/client_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -579,3 +579,167 @@ func TestInterpolation(t *testing.T) {
assert.Equal(t, "user_kind=<>,cat_kind=<>", result)
})
}

func TestParseJudgeSpecificFields(t *testing.T) {
json := []byte(`{
"_ldMeta": {"variationKey": "1", "enabled": true},
"mode": "judge",
"evaluationMetricKey": "toxicity",
"judgeConfiguration": {
"judges": [
{"key": "judge1", "samplingRate": 0.5},
{"key": "judge2", "samplingRate": 1.0}
]
},
"messages": [
{"content": "test", "role": "system"}
]
}`)

client, err := NewClient(newMockSDK(json, nil))
require.NoError(t, err)
require.NotNil(t, client)

cfg, _ := client.Config("key", ldcontext.New("user"), Disabled(), nil)

assert.Equal(t, "judge", cfg.Mode())
assert.Equal(t, "toxicity", cfg.EvaluationMetricKey())

judgeConfig := cfg.JudgeConfiguration()
require.NotNil(t, judgeConfig)
require.Len(t, judgeConfig.Judges, 2)
assert.Equal(t, "judge1", judgeConfig.Judges[0].Key)
assert.Equal(t, 0.5, judgeConfig.Judges[0].SamplingRate)
assert.Equal(t, "judge2", judgeConfig.Judges[1].Key)
assert.Equal(t, 1.0, judgeConfig.Judges[1].SamplingRate)
}

func TestParseEvaluationMetricKeys(t *testing.T) {
json := []byte(`{
"_ldMeta": {"variationKey": "1", "enabled": true},
"mode": "judge",
"evaluationMetricKeys": ["relevance", "accuracy"],
"messages": [
{"content": "test", "role": "system"}
]
}`)

client, err := NewClient(newMockSDK(json, nil))
require.NoError(t, err)
require.NotNil(t, client)

cfg, _ := client.Config("key", ldcontext.New("user"), Disabled(), nil)

assert.Equal(t, "judge", cfg.Mode())
assert.Equal(t, "", cfg.EvaluationMetricKey())
assert.Equal(t, []string{"relevance", "accuracy"}, cfg.EvaluationMetricKeys())
}

func TestParseEvaluationMetricKeyPriority(t *testing.T) {
json := []byte(`{
"_ldMeta": {"variationKey": "1", "enabled": true},
"mode": "judge",
"evaluationMetricKey": "toxicity",
"evaluationMetricKeys": ["relevance", "accuracy"],
"messages": [
{"content": "test", "role": "system"}
]
}`)

client, err := NewClient(newMockSDK(json, nil))
require.NoError(t, err)
require.NotNil(t, client)

cfg, _ := client.Config("key", ldcontext.New("user"), Disabled(), nil)

assert.Equal(t, "judge", cfg.Mode())
// Both fields should be parsed
assert.Equal(t, "toxicity", cfg.EvaluationMetricKey())
assert.Equal(t, []string{"relevance", "accuracy"}, cfg.EvaluationMetricKeys())
}

func TestJudgeConfigurationImmutable(t *testing.T) {
// Test that mutations to JudgeConfiguration don't affect the Config
judgeConfig := &datamodel.JudgeConfiguration{
Judges: []datamodel.Judge{
{Key: "judge1", SamplingRate: 0.5},
{Key: "judge2", SamplingRate: 1.0},
},
}

builder := NewConfig().
Enable().
WithJudgeConfiguration(judgeConfig)
cfg := builder.Build()

// Mutate the original
judgeConfig.Judges[0].Key = "mutated"
judgeConfig.Judges = append(judgeConfig.Judges, datamodel.Judge{Key: "judge3", SamplingRate: 0.3})

// Config should not be affected
retrieved := cfg.JudgeConfiguration()
require.NotNil(t, retrieved)
require.Len(t, retrieved.Judges, 2)
assert.Equal(t, "judge1", retrieved.Judges[0].Key) // Should still be original value
assert.Equal(t, "judge2", retrieved.Judges[1].Key)

// Mutate the retrieved config
retrieved.Judges[0].Key = "mutated_again"
retrieved.Judges = append(retrieved.Judges, datamodel.Judge{Key: "judge4", SamplingRate: 0.4})

// Config should still not be affected
retrieved2 := cfg.JudgeConfiguration()
require.NotNil(t, retrieved2)
require.Len(t, retrieved2.Judges, 2)
assert.Equal(t, "judge1", retrieved2.Judges[0].Key) // Should still be original value
assert.Equal(t, "judge2", retrieved2.Judges[1].Key)
}

// TestJudgeConfig_PreservesReservedPlaceholders verifies that JudgeConfig injects reserved variables
// so that {{message_history}} and {{response_to_evaluate}} are preserved for the second interpolation
// pass during Judge.Evaluate(). Without this, Config's first Mustache pass would render them as empty.
func TestJudgeConfig_PreservesReservedPlaceholders(t *testing.T) {
json := []byte(`{
"_ldMeta": {"variationKey": "1", "enabled": true},
"mode": "judge",
"evaluationMetricKey": "toxicity",
"messages": [
{"content": "You are a judge.", "role": "system"},
{"content": "Input: {{message_history}}\nOutput: {{response_to_evaluate}}", "role": "user"}
]
}`)

client, err := NewClient(newMockSDK(json, nil))
require.NoError(t, err)
require.NotNil(t, client)

cfg, _ := client.JudgeConfig("judge-key", ldcontext.New("user"), Disabled(), nil)

msgs := cfg.Messages()
require.Len(t, msgs, 2)
assert.Equal(t, "You are a judge.", msgs[0].Content)
assert.Contains(t, msgs[1].Content, "{{message_history}}", "JudgeConfig must preserve placeholder for second interpolation")
assert.Contains(t, msgs[1].Content, "{{response_to_evaluate}}", "JudgeConfig must preserve placeholder for second interpolation")
assert.Equal(t, "Input: {{message_history}}\nOutput: {{response_to_evaluate}}", msgs[1].Content)
}

// TestConfig_WithoutReservedVarsWipesJudgePlaceholders documents that Config (without reserved vars)
// renders {{message_history}} and {{response_to_evaluate}} as empty when used for judge templates.
func TestConfig_WithoutReservedVarsWipesJudgePlaceholders(t *testing.T) {
json := []byte(`{
"_ldMeta": {"variationKey": "1", "enabled": true},
"messages": [
{"content": "Input: {{message_history}}\nOutput: {{response_to_evaluate}}", "role": "user"}
]
}`)

client, err := NewClient(newMockSDK(json, nil))
require.NoError(t, err)
require.NotNil(t, client)

cfg, _ := client.Config("key", ldcontext.New("user"), Disabled(), nil)

msgs := cfg.Messages()
require.Len(t, msgs, 1)
assert.Equal(t, "Input: \nOutput: ", msgs[0].Content, "Config without reserved vars renders placeholders as empty")
}
86 changes: 80 additions & 6 deletions ldai/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,33 @@ func (c *Config) CustomModelParam(key string) (ldvalue.Value, bool) {
return val, ok
}

// Mode returns the AI Config mode (e.g., "completion", "agent", "judge").
func (c *Config) Mode() string {
return c.c.Mode
}

// EvaluationMetricKey returns the evaluation metric key for judge mode configs.
func (c *Config) EvaluationMetricKey() string {
return c.c.EvaluationMetricKey
}

// EvaluationMetricKeys returns the deprecated array of evaluation metric keys.
// Use EvaluationMetricKey instead.
func (c *Config) EvaluationMetricKeys() []string {
return slices.Clone(c.c.EvaluationMetricKeys)
}

// JudgeConfiguration returns the judge configuration attached to this config, if any.
// Returns a defensive copy to prevent mutations.
func (c *Config) JudgeConfiguration() *datamodel.JudgeConfiguration {
if c.c.JudgeConfiguration == nil {
return nil
}
return &datamodel.JudgeConfiguration{
Judges: slices.Clone(c.c.JudgeConfiguration.Judges),
}
}

// AsLdValue is used internally.
func (c *Config) AsLdValue() ldvalue.Value {
return ldvalue.FromJSONMarshal(c.c)
Expand All @@ -68,12 +95,16 @@ func (c *Config) AsLdValue() ldvalue.Value {
// ConfigBuilder is used to define a default AI Config, returned when LaunchDarkly is unreachable or there
// is an error evaluating the Config.
type ConfigBuilder struct {
messages []datamodel.Message
enabled bool
providerName string
modelName string
modelParams map[string]ldvalue.Value
modelCustomParams map[string]ldvalue.Value
messages []datamodel.Message
enabled bool
providerName string
modelName string
modelParams map[string]ldvalue.Value
modelCustomParams map[string]ldvalue.Value
mode string
evaluationMetricKey string
evaluationMetricKeys []string
judgeConfiguration *datamodel.JudgeConfiguration
}

// NewConfig returns a new ConfigBuilder. By default, the Config is disabled.
Expand Down Expand Up @@ -141,8 +172,47 @@ func (cb *ConfigBuilder) WithCustomModelParam(key string, value ldvalue.Value) *
return cb
}

// WithMode sets the AI Config mode (e.g., "completion", "agent", "judge").
func (cb *ConfigBuilder) WithMode(mode string) *ConfigBuilder {
cb.mode = mode
return cb
}

// WithEvaluationMetricKey sets the evaluation metric key for judge mode configs.
func (cb *ConfigBuilder) WithEvaluationMetricKey(key string) *ConfigBuilder {
cb.evaluationMetricKey = key
return cb
}

// WithEvaluationMetricKeys sets the deprecated array of evaluation metric keys.
// Use WithEvaluationMetricKey instead.
func (cb *ConfigBuilder) WithEvaluationMetricKeys(keys []string) *ConfigBuilder {
cb.evaluationMetricKeys = slices.Clone(keys)
return cb
}

// WithJudgeConfiguration sets the judge configuration for this config.
// The provided judgeConfig is defensively copied.
func (cb *ConfigBuilder) WithJudgeConfiguration(judgeConfig *datamodel.JudgeConfiguration) *ConfigBuilder {
if judgeConfig == nil {
cb.judgeConfiguration = nil
return cb
}
cb.judgeConfiguration = &datamodel.JudgeConfiguration{
Judges: slices.Clone(judgeConfig.Judges),
}
return cb
}

// Build creates a Config from the current builder state.
func (cb *ConfigBuilder) Build() Config {
var judgeConfig *datamodel.JudgeConfiguration
if cb.judgeConfiguration != nil {
judgeConfig = &datamodel.JudgeConfiguration{
Judges: slices.Clone(cb.judgeConfiguration.Judges),
}
}

return Config{
c: datamodel.Config{
Messages: slices.Clone(cb.messages),
Expand All @@ -157,6 +227,10 @@ func (cb *ConfigBuilder) Build() Config {
Provider: datamodel.Provider{
Name: cb.providerName,
},
Mode: cb.mode,
EvaluationMetricKey: cb.evaluationMetricKey,
EvaluationMetricKeys: slices.Clone(cb.evaluationMetricKeys),
JudgeConfiguration: judgeConfig,
},
}
}
52 changes: 52 additions & 0 deletions ldai/datamodel/datamodel.go
Original file line number Diff line number Diff line change
Expand Up @@ -68,4 +68,56 @@ type Config struct {

// Provider is the provider.
Provider Provider `json:"provider,omitempty"`

// Mode is the AI Config mode (e.g., "completion", "agent", "judge").
Mode string `json:"mode,omitempty"`

// EvaluationMetricKey is the evaluation metric key for judge mode configs.
EvaluationMetricKey string `json:"evaluationMetricKey,omitempty"`

// EvaluationMetricKeys is a deprecated array of evaluation metric keys.
// Use EvaluationMetricKey instead.
EvaluationMetricKeys []string `json:"evaluationMetricKeys,omitempty"`

// JudgeConfiguration specifies judges attached to this config.
JudgeConfiguration *JudgeConfiguration `json:"judgeConfiguration,omitempty"`
}

// JudgeConfiguration defines the configuration for judges attached to a config.
type JudgeConfiguration struct {
// Judges is a list of judges to evaluate this config's outputs.
Judges []Judge `json:"judges,omitempty"`
}

// Judge defines a single judge reference with key and sampling rate.
type Judge struct {
// Key is the judge config key.
Key string `json:"key"`

// SamplingRate is the probability (0.0-1.0) that the judge will evaluate.
SamplingRate float64 `json:"samplingRate"`
}

// EvalScore represents a single evaluation metric result.
type EvalScore struct {
// Score is the evaluation score between 0.0 and 1.0.
Score float64 `json:"score"`

// Reasoning is the explanation for the score.
Reasoning string `json:"reasoning"`
}

// JudgeResponse represents the response from a judge evaluation.
type JudgeResponse struct {
// Evals contains the evaluation results keyed by metric name.
Evals map[string]EvalScore `json:"evals"`

// Success indicates whether the evaluation completed successfully.
Success bool `json:"success"`

// JudgeConfigKey is the key of the judge config that produced this response.
JudgeConfigKey string `json:"judgeConfigKey,omitempty"`

// Error contains the error message if the evaluation failed.
Error string `json:"error,omitempty"`
}
Loading