Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
21 commits
Select commit Hold shift + click to select a range
44a124e
fix: improve path handling in sync client pull operation
AlePouroullis May 15, 2025
83fb88d
Improve and make path handling more consistent in call and log overloads
AlePouroullis May 15, 2025
fb292db
refactor(tests): update normalize_path test to cover strip_extension …
AlePouroullis May 15, 2025
c0a200a
fix: improve path validation for SDK calls
AlePouroullis May 16, 2025
32f0b1b
feat: add path utils for centralized path validation
AlePouroullis May 16, 2025
9b7c809
Further refined path handling and added more tests
AlePouroullis May 16, 2025
e5314f2
refactor: use pytest tmp_path fixture to isolate test file operations
AlePouroullis May 16, 2025
4965c7b
test: fix type errors in local file operations test by using proper C…
AlePouroullis May 16, 2025
cdf449f
docs(cli): clarify SyncClient log level control and OpenTelemetry iso…
AlePouroullis May 16, 2025
25fce50
refactor: simplified path processing to use pathlib where possible
AlePouroullis May 16, 2025
d21020a
docs: improve comment clarity
AlePouroullis May 16, 2025
2504e51
test: improve path normalization tests with parametrize and edge cases
AlePouroullis May 16, 2025
a9de255
docs: clarify pull_file docstring with failure example
AlePouroullis May 16, 2025
08e9a4f
docs: expand normalize_path docstring with usage and examples
AlePouroullis May 16, 2025
5f481f0
Merge branch 'master' of github.com:humanloop/humanloop-python into f…
AlePouroullis May 16, 2025
44a95b1
refactor: SyncClient -> FileSyncer
AlePouroullis May 16, 2025
9a703b9
fix(sync): Convert base_dir to string in FileSyncer fixture
AlePouroullis May 16, 2025
6b2e5a0
fix(dosc): correct logger namespace reference
AlePouroullis May 16, 2025
0a2dd2e
docs: Improve error messages and comments in FileSyncer
AlePouroullis May 16, 2025
c5bec0a
refactor(test): use pytest.mark.parametrize for path validation tests
AlePouroullis May 16, 2025
14294eb
fix(test): use proper typing.Callable for path generator
AlePouroullis May 16, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .fernignore
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@

src/humanloop/evals
src/humanloop/prompt_utils.py
src/humanloop/path_utils.py
src/humanloop/client.py
src/humanloop/overload.py
src/humanloop/context.py
Expand Down
129 changes: 109 additions & 20 deletions poetry.lock

Large diffs are not rendered by default.

12 changes: 9 additions & 3 deletions src/humanloop/cli/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
from dotenv import load_dotenv

from humanloop import Humanloop
from humanloop.sync.sync_client import SyncClient
from humanloop.sync.file_syncer import FileSyncer

# Set up logging
logger = logging.getLogger(__name__)
Expand Down Expand Up @@ -154,6 +154,7 @@ def cli(): # Does nothing because used as a group for other subcommands (pull,
"-p",
help="Path in the Humanloop workspace to pull from (file or directory). You can pull an entire directory (e.g. 'my/directory') "
"or a specific file (e.g. 'my/directory/my_prompt.prompt'). When pulling a directory, all files within that directory and its subdirectories will be included. "
"Paths should not contain leading or trailing slashes. "
"If not specified, pulls from the root of the remote workspace.",
default=None,
)
Expand Down Expand Up @@ -218,7 +219,12 @@ def pull(

Currently only supports syncing Prompt and Agent files. Other file types will be skipped."""
client = get_client(api_key, env_file, base_url)
sync_client = SyncClient(
# Although pull() is available on the Humanloop client, we instantiate FileSyncer separately to control its log level.
# This allows CLI users to toggle between detailed logging (--verbose) and minimal output without affecting the
# main Humanloop client logger. The FileSyncer uses its own logger namespace (humanloop.sdk.file_syncer), making this
# modification isolated from the client's OpenTelemetry setup. This client instance is short-lived and only
# exists for the duration of the CLI command execution.
file_syncer = FileSyncer(
client, base_dir=local_files_directory, log_level=logging.DEBUG if verbose else logging.WARNING
)

Expand All @@ -227,7 +233,7 @@ def pull(
click.echo(click.style(f"Environment: {environment or '(default)'}", fg=INFO_COLOR))

start_time = time.time()
successful_files, failed_files = sync_client.pull(path, environment)
successful_files, failed_files = file_syncer.pull(path, environment)
duration_ms = int((time.time() - start_time) * 1000)

# Determine if the operation was successful based on failed_files
Expand Down
10 changes: 5 additions & 5 deletions src/humanloop/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@
from humanloop.overload import overload_client
from humanloop.prompt_utils import populate_template
from humanloop.prompts.client import PromptsClient
from humanloop.sync.sync_client import DEFAULT_CACHE_SIZE, SyncClient
from humanloop.sync.file_syncer import DEFAULT_CACHE_SIZE, FileSyncer

logger = logging.getLogger("humanloop.sdk")

Expand Down Expand Up @@ -158,7 +158,7 @@ def __init__(
)

# Check if cache_size is non-default but use_local_files is False
self._sync_client = SyncClient(client=self, base_dir=local_files_directory, cache_size=cache_size)
self._file_syncer = FileSyncer(client=self, base_dir=local_files_directory, cache_size=cache_size)
eval_client = ExtendedEvalsClient(client_wrapper=self._client_wrapper)
eval_client.client = self
self.evaluations = eval_client
Expand All @@ -168,10 +168,10 @@ def __init__(
# and the @flow decorator providing the trace_id
# Additionally, call and log methods are overloaded in the prompts and agents client to support the use of local files
self.prompts = overload_client(
client=self.prompts, sync_client=self._sync_client, use_local_files=self.use_local_files
client=self.prompts, file_syncer=self._file_syncer, use_local_files=self.use_local_files
)
self.agents = overload_client(
client=self.agents, sync_client=self._sync_client, use_local_files=self.use_local_files
client=self.agents, file_syncer=self._file_syncer, use_local_files=self.use_local_files
)
self.flows = overload_client(client=self.flows)
self.tools = overload_client(client=self.tools)
Expand Down Expand Up @@ -439,7 +439,7 @@ def pull(self, path: Optional[str] = None, environment: Optional[str] = None) ->
or filesystem issues)
:raises HumanloopRuntimeError: If there's an error communicating with the API
"""
return self._sync_client.pull(environment=environment, path=path)
return self._file_syncer.pull(environment=environment, path=path)


class AsyncHumanloop(AsyncBaseHumanloop):
Expand Down
121 changes: 86 additions & 35 deletions src/humanloop/overload.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import inspect
import logging
import types
from pathlib import Path
from typing import Any, Callable, Dict, Optional, TypeVar, Union

from humanloop.agents.client import AgentsClient
Expand All @@ -14,7 +15,7 @@
from humanloop.evaluators.client import EvaluatorsClient
from humanloop.flows.client import FlowsClient
from humanloop.prompts.client import PromptsClient
from humanloop.sync.sync_client import SyncClient
from humanloop.sync.file_syncer import FileSyncer
from humanloop.tools.client import ToolsClient
from humanloop.types import FileType
from humanloop.types.agent_call_response import AgentCallResponse
Expand Down Expand Up @@ -65,7 +66,7 @@ def _get_file_type_from_client(

def _handle_tracing_context(kwargs: Dict[str, Any], client: T) -> Dict[str, Any]:
"""Handle tracing context for both log and call methods."""
trace_id = get_trace_id()
trace_id = get_trace_id()
if trace_id is not None:
if "flow" in str(type(client).__name__).lower():
context = get_decorator_context()
Expand All @@ -91,45 +92,81 @@ def _handle_tracing_context(kwargs: Dict[str, Any], client: T) -> Dict[str, Any]
def _handle_local_files(
kwargs: Dict[str, Any],
client: T,
sync_client: Optional[SyncClient],
use_local_files: bool,
file_syncer: FileSyncer,
) -> Dict[str, Any]:
"""Handle local file loading if enabled."""
if not use_local_files or "path" not in kwargs or sync_client is None:
return kwargs
"""Load prompt/agent file content from local filesystem into API request.

Retrieves the file content at the specified path and adds it to kwargs
under the appropriate field ('prompt' or 'agent'), allowing local files
to be used in API calls instead of fetching from Humanloop API.

Args:
kwargs: API call arguments
client: Client instance making the call
file_syncer: FileSyncer handling local file operations

Returns:
Updated kwargs with file content in prompt/agent field

Raises:
HumanloopRuntimeError: On validation or file loading failures.
For example, an invalid path format (absolute paths, leading/trailing slashes, etc.) or a file not being found.
"""
if "id" in kwargs:
raise HumanloopRuntimeError("Can only specify one of `id` or `path`")

path = kwargs["path"]

# First check for path format issues (absolute paths or leading/trailing slashes)
normalized_path = path.strip("/")
if Path(path).is_absolute() or path != normalized_path:
raise HumanloopRuntimeError(
f"Path '{path}' format is invalid. "
f"Paths must follow the standard API format 'path/to/resource' without leading or trailing slashes. "
f"Please use '{normalized_path}' instead."
)

# Then check for file extensions
if file_syncer.is_file(path):
# Extract the path without extension to suggest correct format in the error message
path_without_extension = str(Path(path).with_suffix(""))

# Always raise error when file extension is detected (based on the outer if condition)
raise HumanloopRuntimeError(
f"Path '{path}' includes a file extension which is not supported in API calls. "
f"When referencing files via the `path` parameter, use the path without extensions: '{path_without_extension}'. "
f"Note: File extensions are only used when pulling specific files via the CLI."
)

# Check if version_id or environment is specified
use_remote = any(["version_id" in kwargs, "environment" in kwargs])
normalized_path = sync_client._normalize_path(kwargs["path"])

if use_remote:
raise HumanloopRuntimeError(
f"Cannot use local file for `{normalized_path}` as version_id or environment was specified. "
f"Cannot use local file for `{path}` as version_id or environment was specified. "
"Please either remove version_id/environment to use local files, or set use_local_files=False to use remote files."
)

file_type = _get_file_type_from_client(client)
if file_type not in SyncClient.SERIALIZABLE_FILE_TYPES:
raise HumanloopRuntimeError(f"Local files are not supported for `{file_type}` files.")
if file_type not in FileSyncer.SERIALIZABLE_FILE_TYPES:
raise HumanloopRuntimeError(f"Local files are not supported for `{file_type.capitalize()}` files: '{path}'.")

# If file_type is already specified in kwargs, it means user provided a PromptKernelRequestParams object
# If file_type is already specified in kwargs (`prompt` or `agent`), it means user provided a Prompt- or AgentKernelRequestParams object
# In this case, we should prioritize the user-provided value over the local file content.
if file_type in kwargs and not isinstance(kwargs[file_type], str):
logger.warning(
f"Ignoring local file for `{normalized_path}` as {file_type} parameters were directly provided. "
f"Ignoring local file for `{path}` as {file_type} parameters were directly provided. "
"Using provided parameters instead."
)
return kwargs

try:
file_content = sync_client.get_file_content(normalized_path, file_type) # type: ignore[arg-type] # file_type was checked above
file_content = file_syncer.get_file_content(path, file_type) # type: ignore[arg-type] # file_type was checked above
kwargs[file_type] = file_content
except HumanloopRuntimeError as e:
raise HumanloopRuntimeError(f"Failed to use local file for `{normalized_path}`: {str(e)}")

return kwargs
return kwargs
except HumanloopRuntimeError as e:
raise HumanloopRuntimeError(f"Failed to use local file for `{path}`: {str(e)}")


def _handle_evaluation_context(kwargs: Dict[str, Any]) -> tuple[Dict[str, Any], Optional[Callable[[str], None]]]:
Expand All @@ -140,7 +177,7 @@ def _handle_evaluation_context(kwargs: Dict[str, Any]) -> tuple[Dict[str, Any],
return kwargs, None


def _overload_log(self: T, sync_client: Optional[SyncClient], use_local_files: bool, **kwargs) -> LogResponseType:
def _overload_log(self: T, file_syncer: Optional[FileSyncer], use_local_files: bool, **kwargs) -> LogResponseType:
try:
# Special handling for flows - prevent direct log usage
if type(self) is FlowsClient and get_trace_id() is not None:
Expand All @@ -154,12 +191,20 @@ def _overload_log(self: T, sync_client: Optional[SyncClient], use_local_files: b

kwargs = _handle_tracing_context(kwargs, self)

# Handle local files for Prompts and Agents clients
if _get_file_type_from_client(self) in ["prompt", "agent"]:
if sync_client is None:
logger.error("sync_client is None but client has log method and use_local_files=%s", use_local_files)
raise HumanloopRuntimeError("sync_client is required for clients that support local file operations")
kwargs = _handle_local_files(kwargs, self, sync_client, use_local_files)
# Handle loading files from local filesystem when using Prompt and Agent clients
# This enables users to define prompts/agents in local files rather than fetching from the Humanloop API
if use_local_files and _get_file_type_from_client(self) in FileSyncer.SERIALIZABLE_FILE_TYPES:
# Developer note: file_syncer should always be provided during SDK initialization when
# use_local_files=True. If we hit this error, there's likely an initialization issue
# in Humanloop.__init__ where the file_syncer wasn't properly created or passed to the
# overload_client function.
if file_syncer is None:
logger.error("file_syncer is None but client has log method and use_local_files=%s", use_local_files)
raise HumanloopRuntimeError(
"SDK initialization error: file_syncer is missing but required for local file operations. "
"This is likely a bug in the SDK initialization - please report this issue to the Humanloop team."
)
kwargs = _handle_local_files(kwargs, self, file_syncer)

kwargs, eval_callback = _handle_evaluation_context(kwargs)
response = self._log(**kwargs) # type: ignore[union-attr] # Use stored original method
Expand All @@ -174,10 +219,16 @@ def _overload_log(self: T, sync_client: Optional[SyncClient], use_local_files: b
raise HumanloopRuntimeError from e


def _overload_call(self: T, sync_client: Optional[SyncClient], use_local_files: bool, **kwargs) -> CallResponseType:
def _overload_call(self: T, file_syncer: Optional[FileSyncer], use_local_files: bool, **kwargs) -> CallResponseType:
try:
kwargs = _handle_tracing_context(kwargs, self)
kwargs = _handle_local_files(kwargs, self, sync_client, use_local_files)
# If `use_local_files` flag is True, we should use local file content for `call` operations on Prompt and Agent clients.
if use_local_files and _get_file_type_from_client(self) in FileSyncer.SERIALIZABLE_FILE_TYPES:
# Same file_syncer requirement as in _overload_log - see developer note there
if file_syncer is None:
logger.error("file_syncer is None but client has call method and use_local_files=%s", use_local_files)
raise HumanloopRuntimeError("file_syncer is required for clients that support call operations")
kwargs = _handle_local_files(kwargs, self, file_syncer)
return self._call(**kwargs) # type: ignore[union-attr] # Use stored original method
except HumanloopRuntimeError:
# Re-raise HumanloopRuntimeError without wrapping to preserve the message
Expand All @@ -189,7 +240,7 @@ def _overload_call(self: T, sync_client: Optional[SyncClient], use_local_files:

def overload_client(
client: T,
sync_client: Optional[SyncClient] = None,
file_syncer: Optional[FileSyncer] = None,
use_local_files: bool = False,
) -> T:
"""Overloads client methods to add tracing, local file handling, and evaluation context."""
Expand All @@ -198,25 +249,25 @@ def overload_client(
# Store original method with type ignore
client._log = client.log # type: ignore

# Create a closure to capture sync_client and use_local_files
# Create a closure to capture file_syncer and use_local_files
def log_wrapper(self: T, **kwargs) -> LogResponseType:
return _overload_log(self, sync_client, use_local_files, **kwargs)
return _overload_log(self, file_syncer, use_local_files, **kwargs)

# Replace the log method with type ignore
client.log = types.MethodType(log_wrapper, client) # type: ignore

# Overload call method for Prompt and Agent clients
if _get_file_type_from_client(client) in ["prompt", "agent"]:
if sync_client is None and use_local_files:
logger.error("sync_client is None but client has call method and use_local_files=%s", use_local_files)
raise HumanloopRuntimeError("sync_client is required for clients that support call operations")
if _get_file_type_from_client(client) in FileSyncer.SERIALIZABLE_FILE_TYPES:
if file_syncer is None and use_local_files:
logger.error("file_syncer is None but client has call method and use_local_files=%s", use_local_files)
raise HumanloopRuntimeError("file_syncer is required for clients that support call operations")
if hasattr(client, "call") and not hasattr(client, "_call"):
# Store original method with type ignore
client._call = client.call # type: ignore

# Create a closure to capture sync_client and use_local_files
# Create a closure to capture file_syncer and use_local_files
def call_wrapper(self: T, **kwargs) -> CallResponseType:
return _overload_call(self, sync_client, use_local_files, **kwargs)
return _overload_call(self, file_syncer, use_local_files, **kwargs)

# Replace the call method with type ignore
client.call = types.MethodType(call_wrapper, client) # type: ignore
Expand Down
54 changes: 54 additions & 0 deletions src/humanloop/path_utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
from pathlib import Path


def normalize_path(path: str, strip_extension: bool = False) -> str:
"""Normalize a path to the standard Humanloop API format.

This function is primarily used when interacting with the Humanloop API to ensure paths
follow the standard format: 'path/to/resource' without leading/trailing slashes.
It's used when pulling files from Humanloop to local filesystem (see FileSyncer.pull)

The function:
- Converts Windows backslashes to forward slashes
- Normalizes consecutive slashes
- Optionally strips file extensions (e.g. .prompt, .agent)
- Removes leading/trailing slashes to match API conventions

Leading/trailing slashes are stripped because the Humanloop API expects paths in the
format 'path/to/resource' without them. This is consistent with how the API stores
and references files, and ensures paths work correctly in both API calls and local
filesystem operations.

Args:
path: The path to normalize. Can be a Windows or Unix-style path.
strip_extension: If True, removes the file extension (e.g. .prompt, .agent)

Returns:
Normalized path string in the format 'path/to/resource'

Examples:
>>> normalize_path("path/to/file.prompt")
'path/to/file.prompt'
>>> normalize_path("path/to/file.prompt", strip_extension=True)
'path/to/file'
>>> normalize_path("\\windows\\style\\path.prompt")
'windows/style/path.prompt'
>>> normalize_path("/leading/slash/path/")
'leading/slash/path'
>>> normalize_path("multiple//slashes//path")
'multiple/slashes/path'
"""
# Handle backslashes for Windows paths before passing to PurePosixPath
# This is needed because some backslash sequences are treated as escape chars
path = path.replace("\\", "/")

# Use PurePosixPath to normalize the path (handles consecutive slashes)
path_obj = Path(path)

# Strip extension if requested
if strip_extension:
path_obj = path_obj.with_suffix("")

# Convert to string and remove any leading/trailing slashes
# We use the path as a string and not as_posix() since we've already normalized separators
return str(path_obj).strip("/")
4 changes: 2 additions & 2 deletions src/humanloop/sync/__init__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
from humanloop.sync.sync_client import SyncClient
from humanloop.sync.file_syncer import FileSyncer

__all__ = ["SyncClient"]
__all__ = ["FileSyncer"]
Loading