Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
43 changes: 31 additions & 12 deletions vlmrun/cli/_cli/chat.py
Original file line number Diff line number Diff line change
Expand Up @@ -273,6 +273,28 @@ def upload_files(
return file_responses


def _completion_create_kwargs(
model: str,
messages: List[Dict[str, Any]],
stream: bool,
session_id: Optional[str] = None,
) -> Dict[str, Any]:
"""Build kwargs for agent.completions.create() (CLI-only).

session_id is a VLM Run-specific parameter. The OpenAI client does not accept
it as a top-level argument; we pass it via extra_body so the backend gets it
without changing the OpenAI client.
"""
kwargs: Dict[str, Any] = {
"model": model,
"messages": messages,
"stream": stream,
}
if session_id is not None:
kwargs["extra_body"] = {"session_id": session_id}
return kwargs


def build_messages(
prompt: str, file_responses: Optional[List[FileResponse]] = None
) -> List[Dict[str, Any]]:
Expand Down Expand Up @@ -613,19 +635,17 @@ def chat(
handle_api_errors(),
):
response = client.agent.completions.create(
model=model,
messages=messages,
stream=False,
session_id=session_id,
**_completion_create_kwargs(
model, messages, False, session_id
)
)
else:
# JSON output: no status messages, just make the API call
with handle_api_errors():
response = client.agent.completions.create(
model=model,
messages=messages,
stream=False,
session_id=session_id,
**_completion_create_kwargs(
model, messages, False, session_id
)
)

latency_s = time.time() - start_time
Expand Down Expand Up @@ -680,10 +700,9 @@ def chat(
handle_api_errors(),
):
stream = client.agent.completions.create(
model=model,
messages=messages,
stream=True,
session_id=session_id,
**_completion_create_kwargs(
model, messages, True, session_id
)
)

# Collect streaming content and usage data
Expand Down
8 changes: 6 additions & 2 deletions vlmrun/cli/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
from vlmrun.cli._cli.config import app as config_app, get_config
from vlmrun.cli._cli.chat import chat as chat_command
from vlmrun.cli._cli.models import app as models_app
from vlmrun.constants import DEFAULT_BASE_URL
from vlmrun.constants import DEFAULT_AGENT_BASE_URL, DEFAULT_BASE_URL

app = typer.Typer(
name="vlmrun",
Expand Down Expand Up @@ -119,7 +119,11 @@ def main(

if ctx.invoked_subcommand is not None:
check_credentials(ctx, api_key, base_url)
ctx.obj = VLMRun(api_key=api_key, base_url=base_url)
# Chat subcommand uses the Agent API; use agent base URL when none set
client_base_url = base_url
if ctx.invoked_subcommand == "chat" and client_base_url is None:
client_base_url = DEFAULT_AGENT_BASE_URL
ctx.obj = VLMRun(api_key=api_key, base_url=client_base_url)
Comment on lines +122 to +126

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

high

The current logic for determining client_base_url doesn't account for the base_url that might be set in the user's config file. The base_url variable from typer.Option will be None if not provided via the CLI or an environment variable. This causes the logic to fall back to DEFAULT_AGENT_BASE_URL for the chat command, even if a custom URL is specified in the configuration file.

This also relates to a pre-existing issue where the api_key from the config file isn't passed to the VLMRun client. I suggest resolving both api_key and base_url from the config file before initializing the client to ensure consistent and predictable behavior.

Suggested change
# Chat subcommand uses the Agent API; use agent base URL when none set
client_base_url = base_url
if ctx.invoked_subcommand == "chat" and client_base_url is None:
client_base_url = DEFAULT_AGENT_BASE_URL
ctx.obj = VLMRun(api_key=api_key, base_url=client_base_url)
# Chat subcommand uses the Agent API; use agent base URL when none set
config = get_config()
resolved_api_key = api_key or config.api_key
client_base_url = base_url or config.base_url
if ctx.invoked_subcommand == "chat" and client_base_url is None:
client_base_url = DEFAULT_AGENT_BASE_URL
ctx.obj = VLMRun(api_key=resolved_api_key, base_url=client_base_url)



# Add subcommands
Expand Down
2 changes: 2 additions & 0 deletions vlmrun/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@
import os

DEFAULT_BASE_URL = "https://api.vlm.run/v1"
# Agent API base URL (used by the chat subcommand for OpenAI-compatible completions)
DEFAULT_AGENT_BASE_URL = "https://agent.vlm.run/v1"

# Cache directories - use VLMRUN_CACHE_DIR env var if set, otherwise default to ~/.vlmrun/cache
VLMRUN_HOME = Path.home() / ".vlmrun"
Expand Down
Loading