Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 3 additions & 7 deletions benchmarks/commit0/run_infer.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,13 +23,14 @@
get_default_on_result_writer,
)
from benchmarks.utils.image_utils import image_exists
from benchmarks.utils.llm_config import load_llm_config
from benchmarks.utils.models import (
EvalInstance,
EvalMetadata,
EvalOutput,
)
from benchmarks.utils.version import SDK_SHORT_SHA
from openhands.sdk import LLM, Agent, Conversation, get_logger
from openhands.sdk import Agent, Conversation, get_logger
from openhands.sdk.workspace import RemoteWorkspace
from openhands.tools.preset.default import get_default_tools
from openhands.workspace import APIRemoteWorkspace, DockerDevWorkspace
Expand Down Expand Up @@ -600,12 +601,7 @@ def main() -> None:
if args.max_attempts < 1:
raise ValueError(f"max_attempts must be >= 1, got {args.max_attempts}")

llm_config_path = args.llm_config_path
if not os.path.isfile(llm_config_path):
raise ValueError(f"LLM config file {llm_config_path} does not exist")
with open(llm_config_path, "r") as f:
llm_config = f.read()
llm = LLM.model_validate_json(llm_config)
llm = load_llm_config(args.llm_config_path)
logger.info("Using LLM config: %s", llm.model_dump_json(indent=2))

dataset_description = (
Expand Down
9 changes: 2 additions & 7 deletions benchmarks/gaia/run_infer.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,10 +24,10 @@
get_default_on_result_writer,
)
from benchmarks.utils.image_utils import image_exists
from benchmarks.utils.llm_config import load_llm_config
from benchmarks.utils.models import EvalInstance, EvalMetadata, EvalOutput
from benchmarks.utils.version import SDK_SHORT_SHA
from openhands.sdk import (
LLM,
Agent,
Conversation,
Event,
Expand Down Expand Up @@ -557,12 +557,7 @@ def main() -> None:
raise ValueError(f"max_attempts must be >= 1, got {args.max_attempts}")

# Load LLM config
llm_config_path = args.llm_config_path
if not os.path.isfile(llm_config_path):
raise ValueError(f"LLM config file {llm_config_path} does not exist")
with open(llm_config_path, "r") as f:
llm_config = f.read()
llm = LLM.model_validate_json(llm_config)
llm = load_llm_config(args.llm_config_path)
logger.info("Using LLM config: %s", llm.model_dump_json(indent=2))

# Construct dataset description
Expand Down
10 changes: 3 additions & 7 deletions benchmarks/multiswebench/run_infer.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,13 +25,14 @@
get_default_on_result_writer,
)
from benchmarks.utils.image_utils import image_exists
from benchmarks.utils.llm_config import load_llm_config
from benchmarks.utils.models import (
EvalInstance,
EvalMetadata,
EvalOutput,
)
from benchmarks.utils.version import SDK_SHORT_SHA
from openhands.sdk import LLM, Agent, Conversation, get_logger
from openhands.sdk import Agent, Conversation, get_logger
from openhands.sdk.workspace import RemoteWorkspace
from openhands.tools.preset.default import get_default_tools
from openhands.workspace import APIRemoteWorkspace, DockerWorkspace
Expand Down Expand Up @@ -435,12 +436,7 @@ def main() -> None:
if args.max_attempts < 1:
raise ValueError(f"max_attempts must be >= 1, got {args.max_attempts}")

llm_config_path = args.llm_config_path
if not os.path.isfile(llm_config_path):
raise ValueError(f"LLM config file {llm_config_path} does not exist")
with open(llm_config_path, "r") as f:
llm_config = f.read()
llm = LLM.model_validate_json(llm_config)
llm = load_llm_config(args.llm_config_path)
logger.info("Using LLM config: %s", llm.model_dump_json(indent=2))

dataset_description = (
Expand Down
10 changes: 3 additions & 7 deletions benchmarks/openagentsafety/run_infer.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,9 @@
from benchmarks.utils.dataset import get_dataset
from benchmarks.utils.evaluation import Evaluation
from benchmarks.utils.evaluation_utils import construct_eval_output_dir
from benchmarks.utils.llm_config import load_llm_config
from benchmarks.utils.models import EvalInstance, EvalMetadata, EvalOutput
from openhands.sdk import LLM, Agent, Conversation, get_logger
from openhands.sdk import Agent, Conversation, get_logger
from openhands.sdk.workspace import RemoteWorkspace
from openhands.tools.preset.default import get_default_tools
from openhands.workspace import DockerWorkspace
Expand Down Expand Up @@ -540,12 +541,7 @@ def main() -> None:
raise ValueError(f"max_attempts must be >= 1, got {args.max_attempts}")

# Load LLM config
llm_config_path = args.llm_config_path
if not os.path.isfile(llm_config_path):
raise ValueError(f"LLM config file {llm_config_path} does not exist")
with open(llm_config_path, "r") as f:
llm_config = f.read()
llm = LLM.model_validate_json(llm_config)
llm = load_llm_config(args.llm_config_path)
logger.info("Using LLM config: %s", llm.model_dump_json(indent=2))

# Construct output directory
Expand Down
6 changes: 2 additions & 4 deletions benchmarks/scripts/validate_cfg.py
Original file line number Diff line number Diff line change
@@ -1,16 +1,14 @@
import argparse

from openhands.sdk import LLM
from benchmarks.utils.llm_config import load_llm_config


def main():
parser = argparse.ArgumentParser(description="Validate LLM configuration")
parser.add_argument("config_path", type=str, help="Path to JSON LLM configuration")
args = parser.parse_args()

with open(args.config_path, "r") as f:
llm_config = f.read()
llm = LLM.model_validate_json(llm_config)
llm = load_llm_config(args.config_path)

print("LLM configuration is valid:")
print(llm.model_dump_json(indent=2))
Expand Down
10 changes: 3 additions & 7 deletions benchmarks/swebench/run_infer.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,13 +22,14 @@
get_default_on_result_writer,
)
from benchmarks.utils.image_utils import image_exists
from benchmarks.utils.llm_config import load_llm_config
from benchmarks.utils.models import (
EvalInstance,
EvalMetadata,
EvalOutput,
)
from benchmarks.utils.version import SDK_SHORT_SHA
from openhands.sdk import LLM, Agent, Conversation, get_logger
from openhands.sdk import Agent, Conversation, get_logger
from openhands.sdk.workspace import RemoteWorkspace
from openhands.tools.preset.default import get_default_tools
from openhands.workspace import APIRemoteWorkspace, DockerWorkspace
Expand Down Expand Up @@ -326,12 +327,7 @@ def main() -> None:
if args.max_attempts < 1:
raise ValueError(f"max_attempts must be >= 1, got {args.max_attempts}")

llm_config_path = args.llm_config_path
if not os.path.isfile(llm_config_path):
raise ValueError(f"LLM config file {llm_config_path} does not exist")
with open(llm_config_path, "r") as f:
llm_config = f.read()
llm = LLM.model_validate_json(llm_config)
llm = load_llm_config(args.llm_config_path)
logger.info("Using LLM config: %s", llm.model_dump_json(indent=2))

dataset_description = (
Expand Down
9 changes: 2 additions & 7 deletions benchmarks/swebenchmultimodal/run_infer.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,14 +21,14 @@
get_default_on_result_writer,
)
from benchmarks.utils.image_utils import image_exists
from benchmarks.utils.llm_config import load_llm_config
from benchmarks.utils.models import (
EvalInstance,
EvalMetadata,
EvalOutput,
)
from benchmarks.utils.version import SDK_SHORT_SHA
from openhands.sdk import (
LLM,
Agent,
Conversation,
ImageContent,
Expand Down Expand Up @@ -391,12 +391,7 @@ def main() -> None:
if args.max_attempts < 1:
raise ValueError(f"max_attempts must be >= 1, got {args.max_attempts}")

llm_config_path = args.llm_config_path
if not os.path.isfile(llm_config_path):
raise ValueError(f"LLM config file {llm_config_path} does not exist")
with open(llm_config_path, "r") as f:
llm_config = f.read()
llm = LLM.model_validate_json(llm_config)
llm = load_llm_config(args.llm_config_path)
logger.info("Using LLM config: %s", llm.model_dump_json(indent=2))

dataset_description = (
Expand Down
10 changes: 3 additions & 7 deletions benchmarks/swtbench/run_infer.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,14 +15,15 @@
get_default_on_result_writer,
)
from benchmarks.utils.image_utils import image_exists
from benchmarks.utils.llm_config import load_llm_config
from benchmarks.utils.models import (
EvalInstance,
EvalMetadata,
EvalOutput,
)
from benchmarks.utils.version import SDK_SHORT_SHA
from openhands.agent_server.docker.build import _base_slug
from openhands.sdk import LLM, Agent, Conversation, __version__, get_logger
from openhands.sdk import Agent, Conversation, __version__, get_logger
from openhands.sdk.workspace import RemoteWorkspace
from openhands.tools.preset.default import get_default_tools
from openhands.workspace import APIRemoteWorkspace, DockerDevWorkspace, DockerWorkspace
Expand Down Expand Up @@ -355,12 +356,7 @@ def main() -> None:
if args.max_attempts < 1:
raise ValueError(f"max_attempts must be >= 1, got {args.max_attempts}")

llm_config_path = args.llm_config_path
if not os.path.isfile(llm_config_path):
raise ValueError(f"LLM config file {llm_config_path} does not exist")
with open(llm_config_path, "r") as f:
llm_config = f.read()
llm = LLM.model_validate_json(llm_config)
llm = load_llm_config(args.llm_config_path)
logger.info("Using LLM config: %s", llm.model_dump_json(indent=2))

dataset_description = (
Expand Down
41 changes: 41 additions & 0 deletions benchmarks/utils/llm_config.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
"""
Utilities for loading LLM configuration with environment variable overrides.
"""

import json
import os

from openhands.sdk import LLM


LLM_API_KEY_ENV_VAR = "LLM_API_KEY"


def load_llm_config(config_path: str) -> LLM:
"""Load LLM configuration from a JSON file with environment variable override.

If the LLM_API_KEY environment variable is set, it will override the api_key
value in the JSON configuration file. This allows cloud environments to inject
the API key via secrets without modifying the config file.

Args:
config_path: Path to the JSON LLM configuration file.

Returns:
LLM instance with the loaded configuration.

Raises:
ValueError: If the config file does not exist.
"""
if not os.path.isfile(config_path):
raise ValueError(f"LLM config file {config_path} does not exist")

with open(config_path, "r") as f:
config_data = json.load(f)

# Override api_key with environment variable if set
env_api_key = os.getenv(LLM_API_KEY_ENV_VAR)
if env_api_key:
config_data["api_key"] = env_api_key

return LLM.model_validate(config_data)
109 changes: 109 additions & 0 deletions tests/test_llm_config.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,109 @@
"""Tests for LLM configuration loading with environment variable override."""

import json
import os
import tempfile
from unittest.mock import patch

import pytest
from pydantic import SecretStr

from benchmarks.utils.llm_config import LLM_API_KEY_ENV_VAR, load_llm_config


def get_api_key_value(api_key: str | SecretStr | None) -> str | None:
"""Extract the actual value from api_key which can be str, SecretStr, or None."""
if api_key is None:
return None
if isinstance(api_key, SecretStr):
return api_key.get_secret_value()
return api_key


@pytest.fixture
def sample_config():
"""Create a sample LLM config dict."""
return {
"model": "test-model",
"base_url": "https://api.example.com",
"api_key": "config-api-key",
}


@pytest.fixture
def config_file(sample_config):
"""Create a temporary config file."""
with tempfile.NamedTemporaryFile(mode="w", suffix=".json", delete=False) as f:
json.dump(sample_config, f)
f.flush()
yield f.name
os.unlink(f.name)


def test_load_llm_config_from_file(config_file, sample_config):
"""Test loading LLM config from file without env var override."""
with patch.dict(os.environ, {}, clear=True):
# Ensure LLM_API_KEY is not set
os.environ.pop(LLM_API_KEY_ENV_VAR, None)

llm = load_llm_config(config_file)

assert llm.model == sample_config["model"]
assert llm.base_url == sample_config["base_url"]
assert get_api_key_value(llm.api_key) == sample_config["api_key"]


def test_load_llm_config_with_env_override(config_file, sample_config):
"""Test that LLM_API_KEY env var overrides the config file api_key."""
env_api_key = "env-override-api-key"

with patch.dict(os.environ, {LLM_API_KEY_ENV_VAR: env_api_key}):
llm = load_llm_config(config_file)

assert llm.model == sample_config["model"]
assert llm.base_url == sample_config["base_url"]
# api_key should be overridden by env var
assert get_api_key_value(llm.api_key) == env_api_key


def test_load_llm_config_env_override_empty_string(config_file, sample_config):
"""Test that empty string env var does not override config."""
with patch.dict(os.environ, {LLM_API_KEY_ENV_VAR: ""}):
llm = load_llm_config(config_file)

# Empty string is falsy, so config value should be used
assert get_api_key_value(llm.api_key) == sample_config["api_key"]


def test_load_llm_config_file_not_found():
"""Test that ValueError is raised when config file doesn't exist."""
with pytest.raises(ValueError, match="does not exist"):
load_llm_config("/nonexistent/path/config.json")


def test_load_llm_config_without_api_key_in_file():
"""Test loading config without api_key in file, with env var set."""
config_without_key = {
"model": "test-model",
"base_url": "https://api.example.com",
}

with tempfile.NamedTemporaryFile(mode="w", suffix=".json", delete=False) as f:
json.dump(config_without_key, f)
f.flush()
config_path = f.name

try:
env_api_key = "env-api-key"
with patch.dict(os.environ, {LLM_API_KEY_ENV_VAR: env_api_key}):
llm = load_llm_config(config_path)

assert llm.model == config_without_key["model"]
assert get_api_key_value(llm.api_key) == env_api_key
finally:
os.unlink(config_path)


def test_llm_api_key_env_var_constant():
"""Test that the env var constant is correctly defined."""
assert LLM_API_KEY_ENV_VAR == "LLM_API_KEY"