diff --git a/Makefile b/Makefile index 0d173c29..2dd9fc58 100644 --- a/Makefile +++ b/Makefile @@ -109,11 +109,11 @@ upload-distribution-archives: ## Upload distribution archives into Python regist uv run python -m twine upload --repository ${PYTHON_REGISTRY} dist/* konflux-requirements: ## generate hermetic requirements.*.txt file for konflux build - uv pip compile pyproject.toml -o requirements.x86_64.txt --generate-hashes --group llslibdev --python-platform x86_64-unknown-linux-gnu --torch-backend cpu --python-version 3.12 - uv pip compile pyproject.toml -o requirements.aarch64.txt --generate-hashes --group llslibdev --python-platform aarch64-unknown-linux-gnu --torch-backend cpu --python-version 3.12 + uv pip compile pyproject.toml -o requirements.x86_64.txt --generate-hashes --group llslibdev --python-platform x86_64-unknown-linux-gnu --torch-backend cpu --python-version 3.12 --refresh + uv pip compile pyproject.toml -o requirements.aarch64.txt --generate-hashes --group llslibdev --python-platform aarch64-unknown-linux-gnu --torch-backend cpu --python-version 3.12 --refresh ./scripts/remove_torch_deps.sh requirements.x86_64.txt ./scripts/remove_torch_deps.sh requirements.aarch64.txt - echo "torch==${TORCH_VERSION}" | uv pip compile - -o requirements.torch.txt --generate-hashes --python-version 3.12 --torch-backend cpu --emit-index-url --no-deps --index-url https://download.pytorch.org/whl/cpu + echo "torch==${TORCH_VERSION}" | uv pip compile - -o requirements.torch.txt --generate-hashes --python-version 3.12 --torch-backend cpu --emit-index-url --no-deps --index-url https://download.pytorch.org/whl/cpu --refresh help: ## Show this help screen @echo 'Usage: make ... ' diff --git a/pyproject.toml b/pyproject.toml index 62f5ab0e..180b55ea 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -28,8 +28,8 @@ dependencies = [ # Used by authentication/k8s integration "kubernetes>=30.1.0", # Used to call Llama Stack APIs - "llama-stack==0.3.4", - "llama-stack-client==0.3.4", + "llama-stack==0.3.5", + "llama-stack-client==0.3.5", # Used by Logger "rich>=14.0.0", # Used by JWK token auth handler diff --git a/requirements.aarch64.txt b/requirements.aarch64.txt index 2378fd5b..3a6d03c4 100644 --- a/requirements.aarch64.txt +++ b/requirements.aarch64.txt @@ -1,5 +1,5 @@ # This file was autogenerated by uv via the following command: -# uv pip compile pyproject.toml -o requirements.aarch64.txt --generate-hashes --group llslibdev --python-platform aarch64-unknown-linux-gnu --torch-backend cpu --python-version 3.12 +# uv pip compile pyproject.toml -o requirements.aarch64.txt --generate-hashes --group llslibdev --python-platform aarch64-unknown-linux-gnu --torch-backend cpu --python-version 3.12 --refresh accelerate==1.12.0 \ --hash=sha256:3e2091cd341423207e2f084a6654b1efcd250dc326f2a37d6dde446e07cabb11 \ --hash=sha256:70988c352feb481887077d2ab845125024b2a137a5090d6d7a32b57d03a45df6 @@ -1393,13 +1393,13 @@ litellm==1.80.10 \ --hash=sha256:4a4aff7558945c2f7e5c6523e67c1b5525a46b10b0e1ad6b8f847cb13b16779e \ --hash=sha256:9b3e561efaba0eb1291cb1555d3dcb7283cf7f3cb65aadbcdb42e2a8765898c8 # via lightspeed-stack (pyproject.toml) -llama-stack==0.3.4 \ - --hash=sha256:3e302db1efb2ed6c974526b8c6b04b9e54891f3959d0d83c004f77e1c21f6147 \ - --hash=sha256:bdb489e4341559465d604c9eba554460ab0d17c5dc005ee2d40aa892b94e2e9b +llama-stack==0.3.5 \ + --hash=sha256:4a0ce8014b17d14a06858251736f1170f12580fafc519daf75ee1df6c4fbf64b \ + --hash=sha256:93097409c65108e429fc3dda2f246ef4e8d0b07314a32865e941680e537ec366 # via lightspeed-stack (pyproject.toml) -llama-stack-client==0.3.4 \ - --hash=sha256:6afbd10b152911a044e8d038e58981425ce0a34510da3e31cdd3103516e27688 \ - --hash=sha256:949c0a6c9a1c925a2b0d930d85b6485bb8d264ba68d02f36aca3c2539cb7b893 +llama-stack-client==0.3.5 \ + --hash=sha256:2d954429347e920038709ae3e026c06f336ce570bd41245fc4e1e54c78879485 \ + --hash=sha256:b98acdc660d60839da8b71d5ae59531ba7f059e3e9656ca5ca20edca70f7d6a2 # via # lightspeed-stack (pyproject.toml) # llama-stack diff --git a/requirements.torch.txt b/requirements.torch.txt index c81da137..404bf76c 100644 --- a/requirements.torch.txt +++ b/requirements.torch.txt @@ -1,5 +1,5 @@ # This file was autogenerated by uv via the following command: -# uv pip compile - -o requirements.torch.txt --generate-hashes --python-version 3.12 --torch-backend cpu --emit-index-url --no-deps --index-url https://download.pytorch.org/whl/cpu +# uv pip compile - -o requirements.torch.txt --generate-hashes --python-version 3.12 --torch-backend cpu --emit-index-url --no-deps --index-url https://download.pytorch.org/whl/cpu --refresh --index-url https://download.pytorch.org/whl/cpu torch==2.7.1+cpu \ diff --git a/requirements.x86_64.txt b/requirements.x86_64.txt index 1cc052ae..de18dc32 100644 --- a/requirements.x86_64.txt +++ b/requirements.x86_64.txt @@ -1,5 +1,5 @@ # This file was autogenerated by uv via the following command: -# uv pip compile pyproject.toml -o requirements.x86_64.txt --generate-hashes --group llslibdev --python-platform x86_64-unknown-linux-gnu --torch-backend cpu --python-version 3.12 +# uv pip compile pyproject.toml -o requirements.x86_64.txt --generate-hashes --group llslibdev --python-platform x86_64-unknown-linux-gnu --torch-backend cpu --python-version 3.12 --refresh accelerate==1.12.0 \ --hash=sha256:3e2091cd341423207e2f084a6654b1efcd250dc326f2a37d6dde446e07cabb11 \ --hash=sha256:70988c352feb481887077d2ab845125024b2a137a5090d6d7a32b57d03a45df6 @@ -1393,13 +1393,13 @@ litellm==1.80.10 \ --hash=sha256:4a4aff7558945c2f7e5c6523e67c1b5525a46b10b0e1ad6b8f847cb13b16779e \ --hash=sha256:9b3e561efaba0eb1291cb1555d3dcb7283cf7f3cb65aadbcdb42e2a8765898c8 # via lightspeed-stack (pyproject.toml) -llama-stack==0.3.4 \ - --hash=sha256:3e302db1efb2ed6c974526b8c6b04b9e54891f3959d0d83c004f77e1c21f6147 \ - --hash=sha256:bdb489e4341559465d604c9eba554460ab0d17c5dc005ee2d40aa892b94e2e9b +llama-stack==0.3.5 \ + --hash=sha256:4a0ce8014b17d14a06858251736f1170f12580fafc519daf75ee1df6c4fbf64b \ + --hash=sha256:93097409c65108e429fc3dda2f246ef4e8d0b07314a32865e941680e537ec366 # via lightspeed-stack (pyproject.toml) -llama-stack-client==0.3.4 \ - --hash=sha256:6afbd10b152911a044e8d038e58981425ce0a34510da3e31cdd3103516e27688 \ - --hash=sha256:949c0a6c9a1c925a2b0d930d85b6485bb8d264ba68d02f36aca3c2539cb7b893 +llama-stack-client==0.3.5 \ + --hash=sha256:2d954429347e920038709ae3e026c06f336ce570bd41245fc4e1e54c78879485 \ + --hash=sha256:b98acdc660d60839da8b71d5ae59531ba7f059e3e9656ca5ca20edca70f7d6a2 # via # lightspeed-stack (pyproject.toml) # llama-stack diff --git a/src/constants.py b/src/constants.py index 7829f924..959dccd9 100644 --- a/src/constants.py +++ b/src/constants.py @@ -2,7 +2,7 @@ # Minimal and maximal supported Llama Stack version MINIMAL_SUPPORTED_LLAMA_STACK_VERSION = "0.2.17" -MAXIMAL_SUPPORTED_LLAMA_STACK_VERSION = "0.3.4" +MAXIMAL_SUPPORTED_LLAMA_STACK_VERSION = "0.3.5" UNABLE_TO_PROCESS_RESPONSE = "Unable to process this request" diff --git a/tests/e2e/configuration/lightspeed-stack-library-mode.yaml b/tests/e2e/configuration/lightspeed-stack-library-mode.yaml new file mode 100644 index 00000000..47257bfb --- /dev/null +++ b/tests/e2e/configuration/lightspeed-stack-library-mode.yaml @@ -0,0 +1,19 @@ +name: Lightspeed Core Service (LCS) +service: + host: 0.0.0.0 + port: 8080 + auth_enabled: false + workers: 1 + color_log: true + access_log: true +llama_stack: + # Library mode - embeds llama-stack as library + use_as_library_client: true + library_client_config_path: run.yaml +user_data_collection: + feedback_enabled: true + feedback_storage: "/tmp/data/feedback" + transcripts_enabled: true + transcripts_storage: "/tmp/data/transcripts" +authentication: + module: "noop" diff --git a/tests/e2e/configuration/lightspeed-stack-server-mode.yaml b/tests/e2e/configuration/lightspeed-stack-server-mode.yaml new file mode 100644 index 00000000..cc699ba8 --- /dev/null +++ b/tests/e2e/configuration/lightspeed-stack-server-mode.yaml @@ -0,0 +1,20 @@ +name: Lightspeed Core Service (LCS) +service: + host: 0.0.0.0 + port: 8080 + auth_enabled: false + workers: 1 + color_log: true + access_log: true +llama_stack: + # Server mode - connects to separate llama-stack service + use_as_library_client: false + url: http://llama-stack:8321 + api_key: xyzzy +user_data_collection: + feedback_enabled: true + feedback_storage: "/tmp/data/feedback" + transcripts_enabled: true + transcripts_storage: "/tmp/data/transcripts" +authentication: + module: "noop" diff --git a/tests/e2e/features/info.feature b/tests/e2e/features/info.feature index 37845689..df7e30de 100644 --- a/tests/e2e/features/info.feature +++ b/tests/e2e/features/info.feature @@ -16,7 +16,7 @@ Feature: Info tests When I access REST API endpoint "info" using HTTP GET method Then The status code of the response is 200 And The body of the response has proper name Lightspeed Core Service (LCS) and version 0.3.1 - And The body of the response has llama-stack version 0.3.4 + And The body of the response has llama-stack version 0.3.5 @skip-in-library-mode Scenario: Check if info endpoint reports error when llama-stack connection is not working diff --git a/uv.lock b/uv.lock index 5ce364d0..1b559c33 100644 --- a/uv.lock +++ b/uv.lock @@ -1,5 +1,5 @@ version = 1 -revision = 3 +revision = 2 requires-python = ">=3.12, <3.14" resolution-markers = [ "python_full_version >= '3.13' and sys_platform != 'darwin'", @@ -1433,8 +1433,8 @@ requires-dist = [ { name = "jsonpath-ng", specifier = ">=1.6.1" }, { name = "kubernetes", specifier = ">=30.1.0" }, { name = "litellm", specifier = ">=1.75.5.post1" }, - { name = "llama-stack", specifier = "==0.3.4" }, - { name = "llama-stack-client", specifier = "==0.3.4" }, + { name = "llama-stack", specifier = "==0.3.5" }, + { name = "llama-stack-client", specifier = "==0.3.5" }, { name = "openai", specifier = ">=1.99.9" }, { name = "prometheus-client", specifier = ">=0.22.1" }, { name = "psycopg2-binary", specifier = ">=2.9.10" }, @@ -1530,7 +1530,7 @@ wheels = [ [[package]] name = "llama-stack" -version = "0.3.4" +version = "0.3.5" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "aiohttp" }, @@ -1559,14 +1559,14 @@ dependencies = [ { name = "tiktoken" }, { name = "uvicorn" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/8f/c5/ade666e8ce894066c0358988e831b31c81840e7b285aa8b5f70236e33681/llama_stack-0.3.4.tar.gz", hash = "sha256:bdb489e4341559465d604c9eba554460ab0d17c5dc005ee2d40aa892b94e2e9b", size = 3322494, upload-time = "2025-12-03T19:00:18.397Z" } +sdist = { url = "https://files.pythonhosted.org/packages/af/68/967f95e5fe3a650b9bb6a18c4beeb39e734695d92f1ab1525c5b9bfadb1b/llama_stack-0.3.5.tar.gz", hash = "sha256:4a0ce8014b17d14a06858251736f1170f12580fafc519daf75ee1df6c4fbf64b", size = 3320526, upload-time = "2025-12-15T14:34:32.96Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/49/14/c98e5b564b425e4fc7aabf33f4bf9f40c43057424a555f023bcd8e334874/llama_stack-0.3.4-py3-none-any.whl", hash = "sha256:3e302db1efb2ed6c974526b8c6b04b9e54891f3959d0d83c004f77e1c21f6147", size = 3637817, upload-time = "2025-12-03T19:00:16.581Z" }, + { url = "https://files.pythonhosted.org/packages/24/70/fb1896f07fc38a94b4c0bfb5999872d1514c6b3259fe77358cadef77a3db/llama_stack-0.3.5-py3-none-any.whl", hash = "sha256:93097409c65108e429fc3dda2f246ef4e8d0b07314a32865e941680e537ec366", size = 3636815, upload-time = "2025-12-15T14:34:31.354Z" }, ] [[package]] name = "llama-stack-client" -version = "0.3.4" +version = "0.3.5" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "anyio" }, @@ -1585,9 +1585,9 @@ dependencies = [ { name = "tqdm" }, { name = "typing-extensions" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/6a/10/9c198c62e720c647a01506f40ba4e058a5b2a23c947fab1827eb096a94f2/llama_stack_client-0.3.4.tar.gz", hash = "sha256:6afbd10b152911a044e8d038e58981425ce0a34510da3e31cdd3103516e27688", size = 335668, upload-time = "2025-12-03T18:59:25.48Z" } +sdist = { url = "https://files.pythonhosted.org/packages/34/ff/b4bb891249379849e6e273a6254998c7e08562613ca4020817af2da9498e/llama_stack_client-0.3.5.tar.gz", hash = "sha256:2d954429347e920038709ae3e026c06f336ce570bd41245fc4e1e54c78879485", size = 335659, upload-time = "2025-12-15T14:10:16.444Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/ae/b9/bcc815cee68ef87635edf72f9454dd35cef8492d2670f5a6b229b5913f0b/llama_stack_client-0.3.4-py3-none-any.whl", hash = "sha256:949c0a6c9a1c925a2b0d930d85b6485bb8d264ba68d02f36aca3c2539cb7b893", size = 425244, upload-time = "2025-12-03T18:59:24.293Z" }, + { url = "https://files.pythonhosted.org/packages/4d/10/84a4f0ef1cc13f44a692e55bed6a55792671e5320c95a8fd581e02848d61/llama_stack_client-0.3.5-py3-none-any.whl", hash = "sha256:b98acdc660d60839da8b71d5ae59531ba7f059e3e9656ca5ca20edca70f7d6a2", size = 425244, upload-time = "2025-12-15T14:10:14.726Z" }, ] [[package]]