diff --git a/docker-compose-library.yaml b/docker-compose-library.yaml index cbdd2de51..b10e1ef4c 100644 --- a/docker-compose-library.yaml +++ b/docker-compose-library.yaml @@ -13,6 +13,7 @@ services: - ./lightspeed-stack.yaml:/app-root/lightspeed-stack.yaml:Z - ./run.yaml:/app-root/run.yaml:Z - ${GCP_KEYS_PATH:-./tmp/.gcp-keys-dummy}:/opt/app-root/.gcp-keys:ro + - ./tests/e2e/rag:/opt/app-root/src/.llama/storage/rag:Z environment: - BRAVE_SEARCH_API_KEY=${BRAVE_SEARCH_API_KEY:-} - TAVILY_SEARCH_API_KEY=${TAVILY_SEARCH_API_KEY:-} diff --git a/docker-compose.yaml b/docker-compose.yaml index 8122011aa..0c141b201 100644 --- a/docker-compose.yaml +++ b/docker-compose.yaml @@ -11,6 +11,7 @@ services: volumes: - ./run.yaml:/opt/app-root/run.yaml:Z - ${GCP_KEYS_PATH:-./tmp/.gcp-keys-dummy}:/opt/app-root/.gcp-keys:ro + - ./tests/e2e/rag:/opt/app-root/src/.llama/storage/rag:Z environment: - BRAVE_SEARCH_API_KEY=${BRAVE_SEARCH_API_KEY:-} - TAVILY_SEARCH_API_KEY=${TAVILY_SEARCH_API_KEY:-} diff --git a/docs/providers.md b/docs/providers.md index 32f320dcb..3874f60ff 100644 --- a/docs/providers.md +++ b/docs/providers.md @@ -100,7 +100,7 @@ Red Hat providers: | Name | Type | Pip Dependencies | Supported in LCS | |---|---|---|:---:| | code-scanner | inline | `codeshield` | ❌ | -| llama-guard | inline | — | ✅ | +| llama-guard | inline | — | ❌ | | prompt-guard | inline | `transformers[accelerate]`, `torch --index-url https://download.pytorch.org/whl/cpu` | ❌ | | bedrock | remote | `boto3` | ❌ | | nvidia | remote | `requests` | ❌ | @@ -157,7 +157,7 @@ Red Hat providers: | Name | Type | Pip Dependencies | Supported in LCS | |---|---|---|:---:| -| rag-runtime | inline | `chardet`,`pypdf`, `tqdm`, `numpy`, `scikit-learn`, `scipy`, `nltk`, `sentencepiece`, `transformers` | ❌ | +| rag-runtime | inline | `chardet`,`pypdf`, `tqdm`, `numpy`, `scikit-learn`, `scipy`, `nltk`, `sentencepiece`, `transformers` | ✅ | | bing-search | remote | `requests` | ❌ | | brave-search | remote | `requests` | ❌ | | model-context-protocol | remote | `mcp>=1.8.1` | ✅ | diff --git a/examples/azure-run.yaml b/examples/azure-run.yaml index a50301add..206ac0110 100644 --- a/examples/azure-run.yaml +++ b/examples/azure-run.yaml @@ -1,128 +1,161 @@ -version: '2' -image_name: minimal-viable-llama-stack-configuration +version: 2 +image_name: azure-configuration apis: - - agents - - datasetio - - eval - - files - - inference - - post_training - - safety - - scoring - - telemetry - - tool_runtime - - vector_io +- agents +- batches +- datasetio +- eval +- files +- inference +- safety +- scoring +- telemetry +- tool_runtime +- vector_io + benchmarks: [] -container_image: null +conversations_store: + db_path: ~/.llama/storage/conversations.db + type: sqlite datasets: [] -external_providers_dir: null +# external_providers_dir: /opt/app-root/src/.llama/providers.d inference_store: - db_path: .llama/distributions/ollama/inference_store.db + db_path: ~/.llama/storage/inference-store.db type: sqlite -logging: null metadata_store: - db_path: .llama/distributions/ollama/registry.db - namespace: null + db_path: ~/.llama/storage/registry.db type: sqlite + providers: - files: - - provider_id: localfs - provider_type: inline::localfs + inference: + - provider_id: azure + provider_type: remote::azure + config: + api_key: ${env.AZURE_API_KEY} + api_base: https://ols-test.openai.azure.com/ + api_version: 2024-02-15-preview + - provider_id: openai + provider_type: remote::openai config: - storage_dir: /tmp/llama-stack-files + api_key: ${env.OPENAI_API_KEY} + - config: {} + provider_id: sentence-transformers + provider_type: inline::sentence-transformers + files: + - config: metadata_store: - type: sqlite - db_path: .llama/distributions/ollama/files_metadata.db + table_name: files_metadata + backend: sql_default + storage_dir: ~/.llama/storage/files + provider_id: meta-reference-files + provider_type: inline::localfs + safety: + - config: + excluded_categories: [] + provider_id: llama-guard + provider_type: inline::llama-guard + scoring: + - config: {} + provider_id: basic + provider_type: inline::basic + tool_runtime: + - config: {} + provider_id: rag-runtime + provider_type: inline::rag-runtime + vector_io: + - config: + persistence: + namespace: vector_io::faiss + backend: kv_default + provider_id: faiss + provider_type: inline::faiss agents: - - provider_id: meta-reference + - config: + persistence: + agent_state: + namespace: agents_state + backend: kv_default + responses: + table_name: agents_responses + backend: sql_default + provider_id: meta-reference provider_type: inline::meta-reference - config: - persistence_store: - db_path: .llama/distributions/ollama/agents_store.db - namespace: null - type: sqlite - responses_store: - db_path: .llama/distributions/ollama/responses_store.db - type: sqlite + batches: + - config: + kvstore: + namespace: batches_store + backend: kv_default + provider_id: reference + provider_type: inline::reference datasetio: - - provider_id: huggingface + - config: + kvstore: + namespace: huggingface_datasetio + backend: kv_default + provider_id: huggingface provider_type: remote::huggingface - config: + - config: kvstore: - db_path: .llama/distributions/ollama/huggingface_datasetio.db - namespace: null - type: sqlite - - provider_id: localfs + namespace: localfs_datasetio + backend: kv_default + provider_id: localfs provider_type: inline::localfs - config: - kvstore: - db_path: .llama/distributions/ollama/localfs_datasetio.db - namespace: null - type: sqlite eval: - - provider_id: meta-reference - provider_type: inline::meta-reference - config: + - config: kvstore: - db_path: .llama/distributions/ollama/meta_reference_eval.db - namespace: null - type: sqlite - inference: - - provider_id: azure - provider_type: remote::azure - config: - api_key: ${env.AZURE_API_KEY} - api_base: https://ols-test.openai.azure.com/ - api_version: 2024-02-15-preview - api_type: ${env.AZURE_API_TYPE:=} - post_training: - - provider_id: huggingface - provider_type: inline::huggingface-gpu - config: - checkpoint_format: huggingface - device: cpu - distributed_backend: null - dpo_output_dir: "." - safety: - - provider_id: llama-guard - provider_type: inline::llama-guard - config: - excluded_categories: [] - scoring: - - provider_id: basic - provider_type: inline::basic - config: {} - - provider_id: llm-as-judge - provider_type: inline::llm-as-judge - config: {} - - provider_id: braintrust - provider_type: inline::braintrust - config: - openai_api_key: '********' - telemetry: - - provider_id: meta-reference + namespace: eval_store + backend: kv_default + provider_id: meta-reference provider_type: inline::meta-reference - config: - service_name: 'lightspeed-stack-telemetry' - sinks: sqlite - sqlite_db_path: .llama/distributions/ollama/trace_store.db - tool_runtime: - - provider_id: model-context-protocol - provider_type: remote::model-context-protocol - config: {} scoring_fns: [] server: - auth: null - host: null port: 8321 - quota: null - tls_cafile: null - tls_certfile: null - tls_keyfile: null -shields: [] -models: +storage: + backends: + kv_default: + type: kv_sqlite + db_path: ${env.KV_STORE_PATH:=~/.llama/storage/rag/kv_store.db} + sql_default: + type: sql_sqlite + db_path: ${env.SQL_STORE_PATH:=~/.llama/storage/sql_store.db} + stores: + metadata: + namespace: registry + backend: kv_default + inference: + table_name: inference_store + backend: sql_default + max_write_queue_size: 10000 + num_writers: 4 + conversations: + table_name: openai_conversations + backend: sql_default + prompts: + namespace: prompts + backend: kv_default +registered_resources: + models: - model_id: gpt-4o-mini - model_type: llm provider_id: azure - provider_model_id: gpt-4o-mini \ No newline at end of file + model_type: llm + provider_model_id: gpt-4o-mini + shields: + - shield_id: llama-guard + provider_id: llama-guard + provider_shield_id: openai/gpt-4o-mini + datasets: [] + scoring_fns: [] + benchmarks: [] + tool_groups: + - toolgroup_id: builtin::rag + provider_id: rag-runtime +vector_stores: + default_provider_id: faiss + default_embedding_model: + provider_id: sentence-transformers + model_id: nomic-ai/nomic-embed-text-v1.5 +safety: + default_shield_id: llama-guard +telemetry: + enabled: true \ No newline at end of file diff --git a/examples/gemini-run.yaml b/examples/gemini-run.yaml deleted file mode 100644 index 91edfb5dc..000000000 --- a/examples/gemini-run.yaml +++ /dev/null @@ -1,112 +0,0 @@ -# Example llama-stack configuration for Google Gemini inference -# -# Contributed by @eranco74 (2025-08). See https://github.com/rh-ecosystem-edge/assisted-chat/blob/main/template.yaml#L282-L386 -# This file shows how to integrate Gemini with LCS. -# -# Notes: -# - You will need valid Gemini API credentials to run this. -# - You will need a postgres instance to run this config. -# -version: 2 -image_name: gemini-config -apis: -- agents -- datasetio -- eval -- files -- inference -- safety -- scoring -- telemetry -- tool_runtime -- vector_io -providers: - inference: - - provider_id: ${LLAMA_STACK_INFERENCE_PROVIDER} - provider_type: remote::gemini - config: - api_key: ${env.GEMINI_API_KEY} - vector_io: [] - files: [] - safety: [] - agents: - - provider_id: meta-reference - provider_type: inline::meta-reference - config: - persistence_store: - type: postgres - host: ${env.LLAMA_STACK_POSTGRES_HOST} - port: ${env.LLAMA_STACK_POSTGRES_PORT} - db: ${env.LLAMA_STACK_POSTGRES_NAME} - user: ${env.LLAMA_STACK_POSTGRES_USER} - password: ${env.LLAMA_STACK_POSTGRES_PASSWORD} - responses_store: - type: postgres - host: ${env.LLAMA_STACK_POSTGRES_HOST} - port: ${env.LLAMA_STACK_POSTGRES_PORT} - db: ${env.LLAMA_STACK_POSTGRES_NAME} - user: ${env.LLAMA_STACK_POSTGRES_USER} - password: ${env.LLAMA_STACK_POSTGRES_PASSWORD} - telemetry: - - provider_id: meta-reference - provider_type: inline::meta-reference - config: - service_name: "${LLAMA_STACK_OTEL_SERVICE_NAME}" - sinks: ${LLAMA_STACK_TELEMETRY_SINKS} - sqlite_db_path: ${STORAGE_MOUNT_PATH}/sqlite/trace_store.db - eval: [] - datasetio: [] - scoring: - - provider_id: basic - provider_type: inline::basic - config: {} - - provider_id: llm-as-judge - provider_type: inline::llm-as-judge - config: {} - tool_runtime: - - provider_id: rag-runtime - provider_type: inline::rag-runtime - config: {} - - provider_id: model-context-protocol - provider_type: remote::model-context-protocol - config: {} -metadata_store: - type: sqlite - db_path: ${STORAGE_MOUNT_PATH}/sqlite/registry.db -inference_store: - type: postgres - host: ${env.LLAMA_STACK_POSTGRES_HOST} - port: ${env.LLAMA_STACK_POSTGRES_PORT} - db: ${env.LLAMA_STACK_POSTGRES_NAME} - user: ${env.LLAMA_STACK_POSTGRES_USER} - password: ${env.LLAMA_STACK_POSTGRES_PASSWORD} -models: -- metadata: {} - model_id: ${LLAMA_STACK_2_0_FLASH_MODEL} - provider_id: ${LLAMA_STACK_INFERENCE_PROVIDER} - provider_model_id: ${LLAMA_STACK_2_0_FLASH_MODEL} - model_type: llm -- metadata: {} - model_id: ${LLAMA_STACK_2_5_PRO_MODEL} - provider_id: ${LLAMA_STACK_INFERENCE_PROVIDER} - provider_model_id: ${LLAMA_STACK_2_5_PRO_MODEL} - model_type: llm -- metadata: {} - model_id: ${LLAMA_STACK_2_5_FLASH_MODEL} - provider_id: ${LLAMA_STACK_INFERENCE_PROVIDER} - provider_model_id: ${LLAMA_STACK_2_5_FLASH_MODEL} - model_type: llm -shields: [] -vector_dbs: [] -datasets: [] -scoring_fns: [] -benchmarks: [] -tool_groups: -- toolgroup_id: builtin::rag - provider_id: rag-runtime -- toolgroup_id: mcp::assisted - provider_id: model-context-protocol - mcp_endpoint: - uri: "${MCP_SERVER_URL}" -server: - port: ${LLAMA_STACK_SERVER_PORT} diff --git a/examples/openai-faiss-run.yaml b/examples/openai-faiss-run.yaml deleted file mode 100644 index 4068dea86..000000000 --- a/examples/openai-faiss-run.yaml +++ /dev/null @@ -1,83 +0,0 @@ -# Example llama-stack configuration for OpenAI inference + FAISS (RAG) -# -# Notes: -# - You will need an OpenAI API key -# - You can generate the vector index with the rag-content tool (https://github.com/lightspeed-core/rag-content) -# -version: 2 -image_name: openai-faiss-config - -apis: -- agents -- inference -- vector_io -- tool_runtime -- safety - -models: -- model_id: gpt-test - provider_id: openai # This ID is a reference to 'providers.inference' - model_type: llm - provider_model_id: gpt-4o-mini - -- model_id: sentence-transformers/all-mpnet-base-v2 - metadata: - embedding_dimension: 768 - model_type: embedding - provider_id: sentence-transformers # This ID is a reference to 'providers.inference' - provider_model_id: /home/USER/lightspeed-stack/embedding_models/all-mpnet-base-v2 - -providers: - inference: - - provider_id: sentence-transformers - provider_type: inline::sentence-transformers - config: {} - - - provider_id: openai - provider_type: remote::openai - config: - api_key: ${env.OPENAI_API_KEY} - - agents: - - provider_id: meta-reference - provider_type: inline::meta-reference - config: - persistence_store: - type: sqlite - db_path: .llama/distributions/ollama/agents_store.db - responses_store: - type: sqlite - db_path: .llama/distributions/ollama/responses_store.db - - safety: - - provider_id: llama-guard - provider_type: inline::llama-guard - config: - excluded_categories: [] - - vector_io: - - provider_id: ocp-docs - provider_type: inline::faiss - config: - kvstore: - type: sqlite - db_path: /home/USER/lightspeed-stack/vector_dbs/ocp_docs/faiss_store.db - namespace: null - - tool_runtime: - - provider_id: rag-runtime - provider_type: inline::rag-runtime - config: {} - -# Enable the RAG tool -tool_groups: -- provider_id: rag-runtime - toolgroup_id: builtin::rag - args: null - mcp_endpoint: null - -vector_dbs: -- embedding_dimension: 768 - embedding_model: sentence-transformers/all-mpnet-base-v2 - provider_id: ocp-docs # This ID is a reference to 'providers.vector_io' - vector_db_id: openshift-index # This ID was defined during index generation \ No newline at end of file diff --git a/examples/openai-pgvector-run.yaml b/examples/openai-pgvector-run.yaml deleted file mode 100644 index a8e1da345..000000000 --- a/examples/openai-pgvector-run.yaml +++ /dev/null @@ -1,87 +0,0 @@ -# Example llama-stack configuration for OpenAI inference + PSQL (pgvector) vector index (RAG) -# -# Notes: -# - You will need an OpenAI API key -# - You will need to setup PSQL with pgvector -# - The table schema must follow the expected schema in llama-stack (see rag_guide.md) -# -version: 2 -image_name: openai-pgvector-config - -apis: -- agents -- inference -- vector_io -- tool_runtime -- safety - -models: -- model_id: gpt-test - provider_id: openai - model_type: llm - provider_model_id: gpt-4o-mini -- model_id: sentence-transformers/all-mpnet-base-v2 - metadata: - embedding_dimension: 768 - model_type: embedding - provider_id: sentence-transformers - provider_model_id: /home/USER/lightspeed-stack/embedding_models/all-mpnet-base-v2 - -providers: - inference: - - provider_id: sentence-transformers - provider_type: inline::sentence-transformers - config: {} - - provider_id: openai - provider_type: remote::openai - config: - api_key: ${env.OPENAI_API_KEY} - - agents: - - provider_id: meta-reference - provider_type: inline::meta-reference - config: - persistence_store: - type: sqlite - db_path: .llama/distributions/ollama/agents_store.db - responses_store: - type: sqlite - db_path: .llama/distributions/ollama/responses_store.db - - safety: - - provider_id: llama-guard - provider_type: inline::llama-guard - config: - excluded_categories: [] - - vector_io: - - provider_id: pgvector-example - provider_type: remote::pgvector - config: - host: localhost - port: 5432 - db: pgvector_example # PostgreSQL database (psql -d pgvector_example) - user: lightspeed # PostgreSQL user - password: empty - kvstore: - type: sqlite - db_path: .llama/distributions/pgvector/pgvector_registry.db - - tool_runtime: - - provider_id: rag-runtime - provider_type: inline::rag-runtime - config: {} - -tool_groups: -- provider_id: rag-runtime - toolgroup_id: builtin::rag - args: null - mcp_endpoint: null - -vector_dbs: -- embedding_dimension: 768 - embedding_model: sentence-transformers/all-mpnet-base-v2 - provider_id: pgvector-example - # A unique ID that becomes the PostgreSQL table name, prefixed with 'vector_store_'. - # e.g., 'rhdocs' will create the table 'vector_store_rhdocs'. - vector_db_id: rhdocs \ No newline at end of file diff --git a/examples/run.yaml b/examples/run.yaml index dcb1b0e67..ea560fcdb 100644 --- a/examples/run.yaml +++ b/examples/run.yaml @@ -1,126 +1,176 @@ -version: '2' -image_name: minimal-viable-llama-stack-configuration +# Example llama-stack configuration for OpenAI inference + FAISS (RAG) +# +# Notes: +# - You will need an OpenAI API key +# - You can generate the vector index with the rag-content tool (https://github.com/lightspeed-core/rag-content) +# +version: 2 apis: - - agents - - datasetio - - eval - - inference - - post_training - - safety - - scoring - - telemetry - - tool_runtime - - vector_io +- agents +- batches +- datasetio +- eval +- files +- inference +- safety +- scoring +- telemetry +- tool_runtime +- vector_io + benchmarks: [] -container_image: null +conversations_store: + db_path: ~/.llama/storage/conversations.db + type: sqlite datasets: [] -external_providers_dir: null +image_name: starter +# external_providers_dir: /opt/app-root/src/.llama/providers.d inference_store: - db_path: .llama/distributions/ollama/inference_store.db + db_path: ~/.llama/storage/inference-store.db type: sqlite -logging: null metadata_store: - db_path: .llama/distributions/ollama/registry.db - namespace: null + db_path: ~/.llama/storage/registry.db type: sqlite + providers: + inference: + - provider_id: openai # This ID is a reference to 'providers.inference' + provider_type: remote::openai + config: + api_key: ${env.OPENAI_API_KEY} + allowed_models: ["${env.E2E_OPENAI_MODEL:=gpt-4o-mini}"] + - config: {} + provider_id: sentence-transformers + provider_type: inline::sentence-transformers + files: + - config: + metadata_store: + table_name: files_metadata + backend: sql_default + storage_dir: ~/.llama/storage/files + provider_id: meta-reference-files + provider_type: inline::localfs + safety: + - config: + excluded_categories: [] + provider_id: llama-guard + provider_type: inline::llama-guard scoring: + - provider_id: basic + provider_type: inline::basic + config: {} + - provider_id: llm-as-judge + provider_type: inline::llm-as-judge + config: {} + - provider_id: braintrust + provider_type: inline::braintrust + config: + openai_api_key: '********' + tool_runtime: + - config: {} # Enable the RAG tool + provider_id: rag-runtime + provider_type: inline::rag-runtime + vector_io: + - config: # Define the storage backend for RAG + persistence: + namespace: vector_io::faiss + backend: kv_default + provider_id: faiss + provider_type: inline::faiss agents: - config: - persistence_store: - db_path: .llama/distributions/ollama/agents_store.db - namespace: null - type: sqlite - responses_store: - db_path: .llama/distributions/ollama/responses_store.db - type: sqlite + persistence: + agent_state: + namespace: agents_state + backend: kv_default + responses: + table_name: agents_responses + backend: sql_default provider_id: meta-reference provider_type: inline::meta-reference + batches: + - config: + kvstore: + namespace: batches_store + backend: kv_default + provider_id: reference + provider_type: inline::reference datasetio: - config: kvstore: - db_path: .llama/distributions/ollama/huggingface_datasetio.db - namespace: null - type: sqlite + namespace: huggingface_datasetio + backend: kv_default provider_id: huggingface provider_type: remote::huggingface - config: kvstore: - db_path: .llama/distributions/ollama/localfs_datasetio.db - namespace: null - type: sqlite + namespace: localfs_datasetio + backend: kv_default provider_id: localfs provider_type: inline::localfs eval: - config: kvstore: - db_path: .llama/distributions/ollama/meta_reference_eval.db - namespace: null - type: sqlite + namespace: eval_store + backend: kv_default provider_id: meta-reference provider_type: inline::meta-reference - inference: - - provider_id: openai - provider_type: remote::openai - config: - api_key: ${env.OPENAI_API_KEY} - post_training: - - config: - checkpoint_format: huggingface - device: cpu - distributed_backend: null - dpo_output_dir: .llama/distributions/ollama - provider_id: huggingface - provider_type: inline::huggingface-gpu - safety: - - config: - excluded_categories: [] - provider_id: llama-guard - provider_type: inline::llama-guard - scoring: - - config: {} - provider_id: basic - provider_type: inline::basic - - config: {} - provider_id: llm-as-judge - provider_type: inline::llm-as-judge - - config: - openai_api_key: '********' - provider_id: braintrust - provider_type: inline::braintrust - telemetry: - - config: - service_name: 'lightspeed-stack' - sinks: sqlite - sqlite_db_path: .llama/distributions/ollama/trace_store.db - provider_id: meta-reference - provider_type: inline::meta-reference - tool_runtime: - - provider_id: model-context-protocol - provider_type: remote::model-context-protocol - config: {} - vector_io: - - config: - kvstore: - db_path: .llama/distributions/ollama/faiss_store.db - namespace: null - type: sqlite - provider_id: faiss - provider_type: inline::faiss scoring_fns: [] +telemetry: + enabled: true server: - auth: null - host: null port: 8321 - quota: null - tls_cafile: null - tls_certfile: null - tls_keyfile: null -shields: [] -vector_dbs: [] - -models: - - model_id: gpt-4-turbo +storage: + backends: + kv_default: # Define the storage backend type for RAG, in this case registry and RAG are unified i.e. information on registered resources (e.g. models, vector_stores) are saved together with the RAG chunks + type: kv_sqlite + db_path: ${env.KV_STORE_PATH:=~/.llama/storage/rag/kv_store.db} + sql_default: + type: sql_sqlite + db_path: ${env.SQL_STORE_PATH:=~/.llama/storage/sql_store.db} + stores: + metadata: + namespace: registry + backend: kv_default + inference: + table_name: inference_store + backend: sql_default + max_write_queue_size: 10000 + num_writers: 4 + conversations: + table_name: openai_conversations + backend: sql_default + prompts: + namespace: prompts + backend: kv_default +registered_resources: + models: + - model_id: gpt-4o-mini provider_id: openai model_type: llm - provider_model_id: gpt-4-turbo + provider_model_id: gpt-4o-mini + - model_id: sentence-transformers/all-mpnet-base-v2 + model_type: embedding + provider_id: sentence-transformers + provider_model_id: sentence-transformers/all-mpnet-base-v2 + metadata: + embedding_dimension: 768 + shields: + - shield_id: llama-guard + provider_id: llama-guard + provider_shield_id: openai/gpt-4o-mini + datasets: [] + scoring_fns: [] + benchmarks: [] + tool_groups: + - toolgroup_id: builtin::rag # Register the RAG tool + provider_id: rag-runtime +vector_stores: + default_provider_id: faiss + default_embedding_model: # Define the default embedding model for RAG + provider_id: sentence-transformers + model_id: nomic-ai/nomic-embed-text-v1.5 +safety: + default_shield_id: llama-guard +telemetry: + enabled: true \ No newline at end of file diff --git a/examples/vertexai-run.yaml b/examples/vertexai-run.yaml index 37e083b8f..2f777e41b 100644 --- a/examples/vertexai-run.yaml +++ b/examples/vertexai-run.yaml @@ -1,4 +1,5 @@ version: 2 +image_name: vertexai-configuration apis: - agents @@ -17,7 +18,6 @@ conversations_store: db_path: ~/.llama/storage/conversations.db type: sqlite datasets: [] -image_name: starter # external_providers_dir: /opt/app-root/src/.llama/providers.d inference_store: db_path: ~/.llama/storage/inference-store.db @@ -27,6 +27,48 @@ metadata_store: type: sqlite providers: + inference: + - provider_id: google-vertex + provider_type: remote::vertexai + config: + project: ${env.VERTEX_AI_PROJECT} + location: ${env.VERTEX_AI_LOCATION} + allowed_models: ["google/gemini-2.5-flash"] + - provider_id: openai + provider_type: remote::openai + config: + api_key: ${env.OPENAI_API_KEY} + - config: {} + provider_id: sentence-transformers + provider_type: inline::sentence-transformers + files: + - config: + metadata_store: + table_name: files_metadata + backend: sql_default + storage_dir: ~/.llama/storage/files + provider_id: meta-reference-files + provider_type: inline::localfs + safety: + - config: + excluded_categories: [] + provider_id: llama-guard + provider_type: inline::llama-guard + scoring: + - config: {} + provider_id: basic + provider_type: inline::basic + tool_runtime: + - config: {} + provider_id: rag-runtime + provider_type: inline::rag-runtime + vector_io: + - config: + persistence: + namespace: vector_io::faiss + backend: kv_default + provider_id: faiss + provider_type: inline::faiss agents: - config: persistence: @@ -65,46 +107,6 @@ providers: backend: kv_default provider_id: meta-reference provider_type: inline::meta-reference - files: - - config: - metadata_store: - table_name: files_metadata - backend: sql_default - storage_dir: ~/.llama/storage - provider_id: meta-reference-files - provider_type: inline::localfs - inference: - - provider_id: google-vertex - provider_type: remote::vertexai - config: - project: ${env.VERTEX_AI_PROJECT} - location: ${env.VERTEX_AI_LOCATION} - - config: {} - provider_id: sentence-transformers - provider_type: inline::sentence-transformers - safety: - - config: - excluded_categories: [] - provider_id: llama-guard - provider_type: inline::llama-guard - scoring: - - config: {} - provider_id: basic - provider_type: inline::basic - - config: {} - provider_id: llm-as-judge - provider_type: inline::llm-as-judge - tool_runtime: - - config: {} - provider_id: rag-runtime - provider_type: inline::rag-runtime - vector_io: - - config: - persistence: - namespace: faiss_store - backend: kv_default - provider_id: faiss - provider_type: inline::faiss scoring_fns: [] server: port: 8321 @@ -112,10 +114,10 @@ storage: backends: kv_default: type: kv_sqlite - db_path: ~/.llama/storage/kv_store.db + db_path: ${env.KV_STORE_PATH:=~/.llama/storage/rag/kv_store.db} sql_default: type: sql_sqlite - db_path: ~/.llama/storage/sql_store.db + db_path: ${env.SQL_STORE_PATH:=~/.llama/storage/sql_store.db} stores: metadata: namespace: registry @@ -133,11 +135,22 @@ storage: backend: kv_default registered_resources: models: [] - shields: [] - vector_dbs: [] + shields: + - shield_id: llama-guard + provider_id: llama-guard + provider_shield_id: openai/gpt-4o-mini datasets: [] scoring_fns: [] benchmarks: [] tool_groups: - toolgroup_id: builtin::rag provider_id: rag-runtime +vector_stores: + default_provider_id: faiss + default_embedding_model: + provider_id: sentence-transformers + model_id: nomic-ai/nomic-embed-text-v1.5 +safety: + default_shield_id: llama-guard +telemetry: + enabled: true diff --git a/examples/vllm-granite-run.yaml b/examples/vllm-granite-run.yaml deleted file mode 100644 index 198095ad6..000000000 --- a/examples/vllm-granite-run.yaml +++ /dev/null @@ -1,148 +0,0 @@ -# Example llama-stack configuration for IBM Granite using vLLM (no RAG) - -# -# Contributed by @eranco74 (2025-08). -# -# Notes: -# - You will need to serve Granite on a vLLM instance -# -version: '2' -image_name: vllm-granite-config -apis: -- agents -- datasetio -- eval -- files -- inference -- post_training -- safety -- scoring -- telemetry -- tool_runtime -- vector_io -providers: - inference: - - provider_id: granite - provider_type: remote::vllm - config: - url: ${env.VLLM_URL} - api_token: ${env.VLLM_API_TOKEN:fake} - max_tokens: 10000 - safety: - - provider_id: llama-guard - provider_type: inline::llama-guard - config: - excluded_categories: [] - agents: - - provider_id: meta-reference - provider_type: inline::meta-reference - config: - persistence_store: - type: sqlite - namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ollama}/agents_store.db - responses_store: - type: sqlite - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ollama}/responses_store.db - telemetry: - - provider_id: meta-reference - provider_type: inline::meta-reference - config: - service_name: "${env.OTEL_SERVICE_NAME:\u200B}" - sinks: ${env.TELEMETRY_SINKS:console,sqlite} - sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ollama}/trace_store.db - eval: - - provider_id: meta-reference - provider_type: inline::meta-reference - config: - kvstore: - type: sqlite - namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ollama}/meta_reference_eval.db - datasetio: - - provider_id: huggingface - provider_type: remote::huggingface - config: - kvstore: - type: sqlite - namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ollama}/huggingface_datasetio.db - - provider_id: localfs - provider_type: inline::localfs - config: - kvstore: - type: sqlite - namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ollama}/localfs_datasetio.db - scoring: - - provider_id: basic - provider_type: inline::basic - config: {} - - provider_id: llm-as-judge - provider_type: inline::llm-as-judge - config: {} - - provider_id: braintrust - provider_type: inline::braintrust - config: - openai_api_key: ${env.OPENAI_API_KEY:} - files: - - provider_id: meta-reference-files - provider_type: inline::localfs - config: - storage_dir: ${env.FILES_STORAGE_DIR:~/.llama/distributions/ollama/files} - metadata_store: - type: sqlite - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ollama}/files_metadata.db - post_training: - - provider_id: huggingface - provider_type: inline::huggingface - config: - checkpoint_format: huggingface - distributed_backend: null - device: cpu - tool_runtime: - - provider_id: brave-search - provider_type: remote::brave-search - config: - api_key: ${env.BRAVE_SEARCH_API_KEY:} - max_results: 3 - - provider_id: tavily-search - provider_type: remote::tavily-search - config: - api_key: ${env.TAVILY_SEARCH_API_KEY:} - max_results: 3 - - provider_id: rag-runtime - provider_type: inline::rag-runtime - config: {} - - provider_id: model-context-protocol - provider_type: remote::model-context-protocol - config: {} - - provider_id: wolfram-alpha - provider_type: remote::wolfram-alpha - config: - api_key: ${env.WOLFRAM_ALPHA_API_KEY:} -metadata_store: - type: sqlite - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ollama}/registry.db -inference_store: - type: sqlite - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ollama}/inference_store.db -models: -- metadata: {} - model_id: ${env.INFERENCE_MODEL} - provider_id: granite - provider_model_id: null -shields: [] -vector_dbs: [] -datasets: [] -scoring_fns: [] -benchmarks: [] -tool_groups: -- toolgroup_id: builtin::websearch - provider_id: tavily-search -- toolgroup_id: builtin::rag - provider_id: rag-runtime -- toolgroup_id: builtin::wolfram_alpha - provider_id: wolfram-alpha -server: - port: 8321 \ No newline at end of file diff --git a/examples/vllm-llama-faiss-run.yaml b/examples/vllm-llama-faiss-run.yaml deleted file mode 100644 index 924577470..000000000 --- a/examples/vllm-llama-faiss-run.yaml +++ /dev/null @@ -1,80 +0,0 @@ -# Example llama-stack configuration for vLLM on RHEL, Meta Llama 3.1 Instruct + FAISS (RAG) -# -# Notes: -# - You will need to serve Llama 3.1 Instruct on a vLLM instance -# -version: 2 -image_name: vllm-llama-faiss-config - -apis: -- agents -- inference -- vector_io -- tool_runtime -- safety - -models: -- model_id: meta-llama/Llama-3.1-8B-Instruct - provider_id: vllm - model_type: llm - provider_model_id: null -- model_id: sentence-transformers/all-mpnet-base-v2 - metadata: - embedding_dimension: 768 - model_type: embedding - provider_id: sentence-transformers - provider_model_id: /home/USER/embedding_models/all-mpnet-base-v2 - -providers: - inference: - - provider_id: sentence-transformers - provider_type: inline::sentence-transformers - config: {} - - provider_id: vllm - provider_type: remote::vllm - config: - url: http://localhost:8000/v1/ - api_token: key - - agents: - - provider_id: meta-reference - provider_type: inline::meta-reference - config: - persistence_store: - type: sqlite - db_path: .llama/distributions/ollama/agents_store.db - responses_store: - type: sqlite - db_path: .llama/distributions/ollama/responses_store.db - - safety: - - provider_id: llama-guard - provider_type: inline::llama-guard - config: - excluded_categories: [] - - vector_io: - - provider_id: rhel-db - provider_type: inline::faiss - config: - kvstore: - type: sqlite - db_path: /home/USER/vector_dbs/rhel_index/faiss_store.db - namespace: null - - tool_runtime: - - provider_id: rag-runtime - provider_type: inline::rag-runtime - config: {} - -tool_groups: -- provider_id: rag-runtime - toolgroup_id: builtin::rag - args: null - mcp_endpoint: null - -vector_dbs: -- embedding_dimension: 768 - embedding_model: sentence-transformers/all-mpnet-base-v2 - provider_id: rhel-db - vector_db_id: rhel-docs \ No newline at end of file diff --git a/examples/vllm-qwen3-run.yaml b/examples/vllm-qwen3-run.yaml deleted file mode 100644 index 9de77f2ec..000000000 --- a/examples/vllm-qwen3-run.yaml +++ /dev/null @@ -1,108 +0,0 @@ -# Example llama-stack configuration for Alibaba Qwen3 using vLLM (no RAG) - -# -# Contributed by @eranco74 (2025-08). -# -# Notes: -# - You will need to serve Qwen3 on a vLLM instance -# -version: 2 -image_name: vllm-qwen3-config -apis: -- agents -- datasetio -- eval -- files -- inference -- safety -- scoring -- telemetry -- tool_runtime -- vector_io -providers: - inference: - - provider_id: qwen - provider_type: remote::vllm - config: - url: https://qwen3.rosa.openshiftapps.com/v1 - max_tokens: 32768 - api_token: - tls_verify: true - vector_io: [] - files: [] - safety: [] - agents: - - provider_id: meta-reference - provider_type: inline::meta-reference - config: - persistence_store: - type: postgres - host: ${env.POSTGRES_HOST:=localhost} - port: ${env.POSTGRES_PORT:=5432} - db: ${env.POSTGRES_DB:=llamastack} - user: ${env.POSTGRES_USER:=user} - password: ${env.POSTGRES_PASSWORD:=password} - responses_store: - type: postgres - host: ${env.POSTGRES_HOST:=localhost} - port: ${env.POSTGRES_PORT:=5432} - db: ${env.POSTGRES_DB:=llamastack} - user: ${env.POSTGRES_USER:=user} - password: ${env.POSTGRES_PASSWORD:=password} - telemetry: - - provider_id: meta-reference - provider_type: inline::meta-reference - config: - service_name: "${env.OTEL_SERVICE_NAME:=\u200B}" - sinks: ${env.TELEMETRY_SINKS:=console,sqlite} - sqlite_db_path: ${env.SQLITE_STORE_DIR:=/tmp/.llama/distributions/starter}/trace_store.db - eval: [] - datasetio: [] - scoring: - - provider_id: basic - provider_type: inline::basic - config: {} - - provider_id: llm-as-judge - provider_type: inline::llm-as-judge - config: {} - tool_runtime: - - provider_id: rag-runtime - provider_type: inline::rag-runtime - config: {} - - provider_id: model-context-protocol - provider_type: remote::model-context-protocol - config: {} -metadata_store: - type: postgres - host: ${env.POSTGRES_HOST:=localhost} - port: ${env.POSTGRES_PORT:=5432} - db: ${env.POSTGRES_DB:=llamastack} - user: ${env.POSTGRES_USER:=user} - password: ${env.POSTGRES_PASSWORD:=password} - table_name: llamastack_kvstore -inference_store: - type: postgres - host: ${env.POSTGRES_HOST:=localhost} - port: ${env.POSTGRES_PORT:=5432} - db: ${env.POSTGRES_DB:=llamastack} - user: ${env.POSTGRES_USER:=user} - password: ${env.POSTGRES_PASSWORD:=password} -models: -- metadata: {} - model_id: qwen3-32b-maas - provider_id: qwen - provider_model_id: null -shields: [] -vector_dbs: [] -datasets: [] -scoring_fns: [] -benchmarks: [] -tool_groups: -- toolgroup_id: builtin::rag - provider_id: rag-runtime -- toolgroup_id: mcp::assisted - provider_id: model-context-protocol - mcp_endpoint: - uri: "http://assisted-service-mcp:8000/sse" -server: - port: 8321 \ No newline at end of file diff --git a/examples/vllm-rhaiis.yaml b/examples/vllm-rhaiis.yaml index 7ec33263f..5853f7766 100644 --- a/examples/vllm-rhaiis.yaml +++ b/examples/vllm-rhaiis.yaml @@ -1,5 +1,6 @@ -version: '2' -image_name: sample-notebook +version: 2 +image_name: rhaiis-configuration + apis: - agents - batches @@ -11,6 +12,19 @@ apis: - scoring - tool_runtime - vector_io + +benchmarks: [] +conversations_store: + db_path: ~/.llama/storage/conversations.db + type: sqlite +datasets: [] +# external_providers_dir: /opt/app-root/src/.llama/providers.d +inference_store: + db_path: ~/.llama/storage/inference-store.db + type: sqlite +metadata_store: + db_path: ~/.llama/storage/registry.db + type: sqlite providers: inference: @@ -20,89 +34,91 @@ providers: url: http://${env.RHAIIS_URL}:8000/v1/ api_token: ${env.RHAIIS_API_KEY} tls_verify: false - max_tokens: 2048 - - provider_id: sentence-transformers - provider_type: inline::sentence-transformers - config: {} - vector_io: - - provider_id: documentation_faiss - provider_type: inline::faiss + max_tokens: 2048 + - provider_id: openai + provider_type: remote::openai config: - persistence: - namespace: vector_io::faiss - backend: kv_default + api_key: ${env.OPENAI_API_KEY} + - config: {} + provider_id: sentence-transformers + provider_type: inline::sentence-transformers files: - - provider_id: meta-reference-files - provider_type: inline::localfs - config: - storage_dir: ${env.FILES_STORAGE_DIR:=~/.llama/storage} + - config: metadata_store: table_name: files_metadata backend: sql_default + storage_dir: ~/.llama/storage/files + provider_id: meta-reference-files + provider_type: inline::localfs safety: - - provider_id: llama-guard - provider_type: inline::llama-guard - config: + - config: excluded_categories: [] + provider_id: llama-guard + provider_type: inline::llama-guard + scoring: + - config: {} + provider_id: basic + provider_type: inline::basic + tool_runtime: + - config: {} + provider_id: rag-runtime + provider_type: inline::rag-runtime + vector_io: + - config: + persistence: + namespace: vector_io::faiss + backend: kv_default + provider_id: faiss + provider_type: inline::faiss agents: - - provider_id: meta-reference - provider_type: inline::meta-reference - config: + - config: persistence: agent_state: - namespace: agents + namespace: agents_state backend: kv_default responses: - table_name: responses + table_name: agents_responses backend: sql_default - max_write_queue_size: 10000 - num_writers: 4 - eval: - - provider_id: meta-reference + provider_id: meta-reference provider_type: inline::meta-reference - config: + batches: + - config: kvstore: - namespace: eval + namespace: batches_store backend: kv_default + provider_id: reference + provider_type: inline::reference datasetio: - - provider_id: huggingface - provider_type: remote::huggingface - config: + - config: kvstore: - namespace: datasetio::huggingface + namespace: huggingface_datasetio backend: kv_default - - provider_id: localfs - provider_type: inline::localfs - config: + provider_id: huggingface + provider_type: remote::huggingface + - config: kvstore: - namespace: datasetio::localfs + namespace: localfs_datasetio backend: kv_default - scoring: - - provider_id: basic - provider_type: inline::basic - config: {} - tool_runtime: - - provider_id: rag-runtime - provider_type: inline::rag-runtime - config: {} - - provider_id: model-context-protocol - provider_type: remote::model-context-protocol - config: {} - batches: - - provider_id: reference - provider_type: inline::reference - config: + provider_id: localfs + provider_type: inline::localfs + eval: + - config: kvstore: - namespace: batches + namespace: eval_store backend: kv_default + provider_id: meta-reference + provider_type: inline::meta-reference +scoring_fns: [] +server: + port: 8321 storage: backends: kv_default: type: kv_sqlite - db_path: ${env.SQLITE_STORE_DIR:=~/.llama/storage}/kv_store.db + db_path: ${env.KV_STORE_PATH:=~/.llama/storage/rag/kv_store.db} sql_default: type: sql_sqlite - db_path: ${env.SQLITE_STORE_DIR:=~/.llama/storage}/sql_store.db + db_path: ${env.SQL_STORE_PATH:=~/.llama/storage/sql_store.db} stores: metadata: namespace: registry @@ -120,38 +136,26 @@ storage: backend: kv_default registered_resources: models: - - model_id: all-mpnet-base-v2 - provider_id: sentence-transformers - provider_model_id: all-mpnet-base-v2 - model_type: embedding - metadata: - embedding_dimension: 768 - model_id: ${env.RHAIIS_MODEL} provider_id: vllm model_type: llm provider_model_id: ${env.RHAIIS_MODEL} - shields: - shield_id: llama-guard - provider_id: ${env.SAFETY_MODEL:+llama-guard} - provider_shield_id: ${env.SAFETY_MODEL:=} - - shield_id: code-scanner - provider_id: ${env.CODE_SCANNER_MODEL:+code-scanner} - provider_shield_id: ${env.CODE_SCANNER_MODEL:=} + provider_id: llama-guard + provider_shield_id: openai/gpt-4o-mini datasets: [] scoring_fns: [] benchmarks: [] tool_groups: - toolgroup_id: builtin::rag provider_id: rag-runtime -server: - port: 8321 -telemetry: - enabled: true vector_stores: - default_provider_id: documentation_faiss + default_provider_id: faiss default_embedding_model: provider_id: sentence-transformers - model_id: all-mpnet-base-v2 + model_id: nomic-ai/nomic-embed-text-v1.5 safety: default_shield_id: llama-guard +telemetry: + enabled: true diff --git a/examples/vllm-rhelai.yaml b/examples/vllm-rhelai.yaml index 2d9ac373c..75025a815 100644 --- a/examples/vllm-rhelai.yaml +++ b/examples/vllm-rhelai.yaml @@ -1,5 +1,6 @@ -version: '2' -image_name: sample-notebook +version: 2 +image_name: rhelai-configuration + apis: - agents - batches @@ -9,8 +10,22 @@ apis: - inference - safety - scoring +- telemetry - tool_runtime - vector_io + +benchmarks: [] +conversations_store: + db_path: ~/.llama/storage/conversations.db + type: sqlite +datasets: [] +# external_providers_dir: /opt/app-root/src/.llama/providers.d +inference_store: + db_path: ~/.llama/storage/inference-store.db + type: sqlite +metadata_store: + db_path: ~/.llama/storage/registry.db + type: sqlite providers: inference: @@ -20,89 +35,91 @@ providers: url: http://${env.RHEL_AI_URL}:${env.RHEL_AI_PORT}/v1/ api_token: ${env.RHEL_AI_API_KEY} tls_verify: false - max_tokens: 2048 - - provider_id: sentence-transformers - provider_type: inline::sentence-transformers - config: {} - vector_io: - - provider_id: documentation_faiss - provider_type: inline::faiss + max_tokens: 2048 + - provider_id: openai + provider_type: remote::openai config: - persistence: - namespace: vector_io::faiss - backend: kv_default + api_key: ${env.OPENAI_API_KEY} + - config: {} + provider_id: sentence-transformers + provider_type: inline::sentence-transformers files: - - provider_id: meta-reference-files - provider_type: inline::localfs - config: - storage_dir: ${env.FILES_STORAGE_DIR:=~/.llama/storage} + - config: metadata_store: table_name: files_metadata backend: sql_default + storage_dir: ~/.llama/storage/files + provider_id: meta-reference-files + provider_type: inline::localfs safety: - - provider_id: llama-guard - provider_type: inline::llama-guard - config: + - config: excluded_categories: [] + provider_id: llama-guard + provider_type: inline::llama-guard + scoring: + - config: {} + provider_id: basic + provider_type: inline::basic + tool_runtime: + - config: {} + provider_id: rag-runtime + provider_type: inline::rag-runtime + vector_io: + - config: + persistence: + namespace: vector_io::faiss + backend: kv_default + provider_id: faiss + provider_type: inline::faiss agents: - - provider_id: meta-reference - provider_type: inline::meta-reference - config: + - config: persistence: agent_state: - namespace: agents + namespace: agents_state backend: kv_default responses: - table_name: responses + table_name: agents_responses backend: sql_default - max_write_queue_size: 10000 - num_writers: 4 - eval: - - provider_id: meta-reference + provider_id: meta-reference provider_type: inline::meta-reference - config: + batches: + - config: kvstore: - namespace: eval + namespace: batches_store backend: kv_default + provider_id: reference + provider_type: inline::reference datasetio: - - provider_id: huggingface - provider_type: remote::huggingface - config: + - config: kvstore: - namespace: datasetio::huggingface + namespace: huggingface_datasetio backend: kv_default - - provider_id: localfs - provider_type: inline::localfs - config: + provider_id: huggingface + provider_type: remote::huggingface + - config: kvstore: - namespace: datasetio::localfs + namespace: localfs_datasetio backend: kv_default - scoring: - - provider_id: basic - provider_type: inline::basic - config: {} - tool_runtime: - - provider_id: rag-runtime - provider_type: inline::rag-runtime - config: {} - - provider_id: model-context-protocol - provider_type: remote::model-context-protocol - config: {} - batches: - - provider_id: reference - provider_type: inline::reference - config: + provider_id: localfs + provider_type: inline::localfs + eval: + - config: kvstore: - namespace: batches + namespace: eval_store backend: kv_default + provider_id: meta-reference + provider_type: inline::meta-reference +scoring_fns: [] +server: + port: 8321 storage: backends: kv_default: type: kv_sqlite - db_path: ${env.SQLITE_STORE_DIR:=~/.llama/storage}/kv_store.db + db_path: ${env.KV_STORE_PATH:=~/.llama/storage/rag/kv_store.db} sql_default: type: sql_sqlite - db_path: ${env.SQLITE_STORE_DIR:=~/.llama/storage}/sql_store.db + db_path: ${env.SQL_STORE_PATH:=~/.llama/storage/sql_store.db} stores: metadata: namespace: registry @@ -120,38 +137,26 @@ storage: backend: kv_default registered_resources: models: - - model_id: all-mpnet-base-v2 - provider_id: sentence-transformers - provider_model_id: all-mpnet-base-v2 - model_type: embedding - metadata: - embedding_dimension: 768 - model_id: ${env.RHEL_AI_MODEL} provider_id: vllm model_type: llm provider_model_id: ${env.RHEL_AI_MODEL} - shields: - shield_id: llama-guard - provider_id: ${env.SAFETY_MODEL:+llama-guard} - provider_shield_id: ${env.SAFETY_MODEL:=} - - shield_id: code-scanner - provider_id: ${env.CODE_SCANNER_MODEL:+code-scanner} - provider_shield_id: ${env.CODE_SCANNER_MODEL:=} + provider_id: llama-guard + provider_shield_id: openai/gpt-4o-mini datasets: [] scoring_fns: [] benchmarks: [] tool_groups: - toolgroup_id: builtin::rag provider_id: rag-runtime -server: - port: 8321 -telemetry: - enabled: true vector_stores: - default_provider_id: documentation_faiss + default_provider_id: faiss default_embedding_model: provider_id: sentence-transformers - model_id: all-mpnet-base-v2 + model_id: nomic-ai/nomic-embed-text-v1.5 safety: default_shield_id: llama-guard +telemetry: + enabled: true diff --git a/examples/vllm-rhoai.yaml b/examples/vllm-rhoai.yaml index f9e992b1b..645e88dc1 100644 --- a/examples/vllm-rhoai.yaml +++ b/examples/vllm-rhoai.yaml @@ -1,5 +1,6 @@ -version: '2' -image_name: sample-notebook +version: 2 +image_name: rhoai-configuration + apis: - agents - batches @@ -9,8 +10,22 @@ apis: - inference - safety - scoring +- telemetry - tool_runtime - vector_io + +benchmarks: [] +conversations_store: + db_path: ~/.llama/storage/conversations.db + type: sqlite +datasets: [] +# external_providers_dir: /opt/app-root/src/.llama/providers.d +inference_store: + db_path: ~/.llama/storage/inference-store.db + type: sqlite +metadata_store: + db_path: ~/.llama/storage/registry.db + type: sqlite providers: inference: @@ -21,88 +36,90 @@ providers: api_token: ${env.VLLM_API_KEY} tls_verify: false max_tokens: 1024 - - provider_id: sentence-transformers - provider_type: inline::sentence-transformers - config: {} - vector_io: - - provider_id: documentation_faiss - provider_type: inline::faiss + - provider_id: openai + provider_type: remote::openai config: - persistence: - namespace: vector_io::faiss - backend: kv_default + api_key: ${env.OPENAI_API_KEY} + - config: {} + provider_id: sentence-transformers + provider_type: inline::sentence-transformers files: - - provider_id: meta-reference-files - provider_type: inline::localfs - config: - storage_dir: ${env.FILES_STORAGE_DIR:=~/.llama/storage} + - config: metadata_store: table_name: files_metadata backend: sql_default + storage_dir: ~/.llama/storage/files + provider_id: meta-reference-files + provider_type: inline::localfs safety: - provider_id: llama-guard provider_type: inline::llama-guard config: excluded_categories: [] + scoring: + - config: {} + provider_id: basic + provider_type: inline::basic + tool_runtime: + - config: {} + provider_id: rag-runtime + provider_type: inline::rag-runtime + vector_io: + - config: + persistence: + namespace: vector_io::faiss + backend: kv_default + provider_id: faiss + provider_type: inline::faiss agents: - - provider_id: meta-reference - provider_type: inline::meta-reference - config: + - config: persistence: agent_state: - namespace: agents + namespace: agents_state backend: kv_default responses: - table_name: responses + table_name: agents_responses backend: sql_default - max_write_queue_size: 10000 - num_writers: 4 - eval: - - provider_id: meta-reference + provider_id: meta-reference provider_type: inline::meta-reference - config: + batches: + - config: kvstore: - namespace: eval + namespace: batches_store backend: kv_default + provider_id: reference + provider_type: inline::reference datasetio: - - provider_id: huggingface - provider_type: remote::huggingface - config: + - config: kvstore: - namespace: datasetio::huggingface + namespace: huggingface_datasetio backend: kv_default - - provider_id: localfs - provider_type: inline::localfs - config: + provider_id: huggingface + provider_type: remote::huggingface + - config: kvstore: - namespace: datasetio::localfs + namespace: localfs_datasetio backend: kv_default - scoring: - - provider_id: basic - provider_type: inline::basic - config: {} - tool_runtime: - - provider_id: rag-runtime - provider_type: inline::rag-runtime - config: {} - - provider_id: model-context-protocol - provider_type: remote::model-context-protocol - config: {} - batches: - - provider_id: reference - provider_type: inline::reference - config: + provider_id: localfs + provider_type: inline::localfs + eval: + - config: kvstore: - namespace: batches + namespace: eval_store backend: kv_default + provider_id: meta-reference + provider_type: inline::meta-reference +scoring_fns: [] +server: + port: 8321 storage: backends: kv_default: type: kv_sqlite - db_path: ${env.SQLITE_STORE_DIR:=~/.llama/storage}/kv_store.db + db_path: ${env.KV_STORE_PATH:=~/.llama/storage/rag/kv_store.db} sql_default: type: sql_sqlite - db_path: ${env.SQLITE_STORE_DIR:=~/.llama/storage}/sql_store.db + db_path: ${env.SQL_STORE_PATH:=~/.llama/storage/sql_store.db} stores: metadata: namespace: registry @@ -120,38 +137,26 @@ storage: backend: kv_default registered_resources: models: - - model_id: all-mpnet-base-v2 - provider_id: sentence-transformers - provider_model_id: all-mpnet-base-v2 - model_type: embedding - metadata: - embedding_dimension: 768 - model_id: meta-llama/Llama-3.2-1B-Instruct provider_id: vllm model_type: llm - provider_model_id: meta-llama/Llama-3.2-1B-Instruct - + provider_model_id: null shields: - shield_id: llama-guard - provider_id: ${env.SAFETY_MODEL:+llama-guard} - provider_shield_id: ${env.SAFETY_MODEL:=} - - shield_id: code-scanner - provider_id: ${env.CODE_SCANNER_MODEL:+code-scanner} - provider_shield_id: ${env.CODE_SCANNER_MODEL:=} + provider_id: llama-guard + provider_shield_id: openai/gpt-4o-mini datasets: [] scoring_fns: [] benchmarks: [] tool_groups: - toolgroup_id: builtin::rag provider_id: rag-runtime -server: - port: 8321 -telemetry: - enabled: true vector_stores: - default_provider_id: documentation_faiss + default_provider_id: faiss default_embedding_model: provider_id: sentence-transformers - model_id: all-mpnet-base-v2 + model_id: nomic-ai/nomic-embed-text-v1.5 safety: default_shield_id: llama-guard +telemetry: + enabled: true diff --git a/pyproject.toml b/pyproject.toml index bfcfdfb9a..0064635b8 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -60,6 +60,8 @@ dependencies = [ "urllib3==2.6.2", # Used for agent card configuration "PyYAML>=6.0.0", + # Used for rag faiss support + "einops>=0.8.1", ] diff --git a/run.yaml b/run.yaml index 4deba67b7..88690b682 100644 --- a/run.yaml +++ b/run.yaml @@ -9,6 +9,7 @@ apis: - inference - safety - scoring +- telemetry - tool_runtime - vector_io @@ -27,6 +28,50 @@ metadata_store: type: sqlite providers: + inference: + - provider_id: openai # This ID is a reference to 'providers.inference' + provider_type: remote::openai + config: + api_key: ${env.OPENAI_API_KEY} + allowed_models: ["${env.E2E_OPENAI_MODEL:=gpt-4o-mini}"] + - config: {} + provider_id: sentence-transformers + provider_type: inline::sentence-transformers + files: + - config: + metadata_store: + table_name: files_metadata + backend: sql_default + storage_dir: ~/.llama/storage/files + provider_id: meta-reference-files + provider_type: inline::localfs + safety: + - config: + excluded_categories: [] + provider_id: llama-guard + provider_type: inline::llama-guard + scoring: + - provider_id: basic + provider_type: inline::basic + config: {} + - provider_id: llm-as-judge + provider_type: inline::llm-as-judge + config: {} + - provider_id: braintrust + provider_type: inline::braintrust + config: + openai_api_key: '********' + tool_runtime: + - config: {} # Enable the RAG tool + provider_id: rag-runtime + provider_type: inline::rag-runtime + vector_io: + - config: # Define the storage backend for RAG + persistence: + namespace: vector_io::faiss + backend: kv_default + provider_id: faiss + provider_type: inline::faiss agents: - config: persistence: @@ -65,56 +110,17 @@ providers: backend: kv_default provider_id: meta-reference provider_type: inline::meta-reference - files: - - config: - metadata_store: - table_name: files_metadata - backend: sql_default - storage_dir: ~/.llama/storage - provider_id: meta-reference-files - provider_type: inline::localfs - inference: - - provider_id: openai - provider_type: remote::openai - config: - api_key: ${env.OPENAI_API_KEY} - - config: {} - provider_id: sentence-transformers - provider_type: inline::sentence-transformers - safety: - - config: - excluded_categories: [] - provider_id: llama-guard - provider_type: inline::llama-guard - scoring: - - config: {} - provider_id: basic - provider_type: inline::basic - - config: {} - provider_id: llm-as-judge - provider_type: inline::llm-as-judge - tool_runtime: - - config: {} - provider_id: rag-runtime - provider_type: inline::rag-runtime - vector_io: - - config: - persistence: - namespace: faiss_store - backend: kv_default - provider_id: faiss - provider_type: inline::faiss scoring_fns: [] server: port: 8321 storage: backends: - kv_default: + kv_default: # Define the storage backend type for RAG, in this case registry and RAG are unified i.e. information on registered resources (e.g. models, vector_stores) are saved together with the RAG chunks type: kv_sqlite - db_path: ~/.llama/storage/kv_store.db + db_path: ${env.KV_STORE_PATH:=~/.llama/storage/rag/kv_store.db} sql_default: type: sql_sqlite - db_path: ~/.llama/storage/sql_store.db + db_path: ${env.SQL_STORE_PATH:=~/.llama/storage/sql_store.db} stores: metadata: namespace: registry @@ -145,5 +151,14 @@ registered_resources: scoring_fns: [] benchmarks: [] tool_groups: - - toolgroup_id: builtin::rag + - toolgroup_id: builtin::rag # Register the RAG tool provider_id: rag-runtime +vector_stores: + default_provider_id: faiss + default_embedding_model: # Define the default embedding model for RAG + provider_id: sentence-transformers + model_id: nomic-ai/nomic-embed-text-v1.5 +safety: + default_shield_id: llama-guard +telemetry: + enabled: true \ No newline at end of file diff --git a/tests/e2e-prow/rhoai/configs/run.yaml b/tests/e2e-prow/rhoai/configs/run.yaml index f9e992b1b..645e88dc1 100644 --- a/tests/e2e-prow/rhoai/configs/run.yaml +++ b/tests/e2e-prow/rhoai/configs/run.yaml @@ -1,5 +1,6 @@ -version: '2' -image_name: sample-notebook +version: 2 +image_name: rhoai-configuration + apis: - agents - batches @@ -9,8 +10,22 @@ apis: - inference - safety - scoring +- telemetry - tool_runtime - vector_io + +benchmarks: [] +conversations_store: + db_path: ~/.llama/storage/conversations.db + type: sqlite +datasets: [] +# external_providers_dir: /opt/app-root/src/.llama/providers.d +inference_store: + db_path: ~/.llama/storage/inference-store.db + type: sqlite +metadata_store: + db_path: ~/.llama/storage/registry.db + type: sqlite providers: inference: @@ -21,88 +36,90 @@ providers: api_token: ${env.VLLM_API_KEY} tls_verify: false max_tokens: 1024 - - provider_id: sentence-transformers - provider_type: inline::sentence-transformers - config: {} - vector_io: - - provider_id: documentation_faiss - provider_type: inline::faiss + - provider_id: openai + provider_type: remote::openai config: - persistence: - namespace: vector_io::faiss - backend: kv_default + api_key: ${env.OPENAI_API_KEY} + - config: {} + provider_id: sentence-transformers + provider_type: inline::sentence-transformers files: - - provider_id: meta-reference-files - provider_type: inline::localfs - config: - storage_dir: ${env.FILES_STORAGE_DIR:=~/.llama/storage} + - config: metadata_store: table_name: files_metadata backend: sql_default + storage_dir: ~/.llama/storage/files + provider_id: meta-reference-files + provider_type: inline::localfs safety: - provider_id: llama-guard provider_type: inline::llama-guard config: excluded_categories: [] + scoring: + - config: {} + provider_id: basic + provider_type: inline::basic + tool_runtime: + - config: {} + provider_id: rag-runtime + provider_type: inline::rag-runtime + vector_io: + - config: + persistence: + namespace: vector_io::faiss + backend: kv_default + provider_id: faiss + provider_type: inline::faiss agents: - - provider_id: meta-reference - provider_type: inline::meta-reference - config: + - config: persistence: agent_state: - namespace: agents + namespace: agents_state backend: kv_default responses: - table_name: responses + table_name: agents_responses backend: sql_default - max_write_queue_size: 10000 - num_writers: 4 - eval: - - provider_id: meta-reference + provider_id: meta-reference provider_type: inline::meta-reference - config: + batches: + - config: kvstore: - namespace: eval + namespace: batches_store backend: kv_default + provider_id: reference + provider_type: inline::reference datasetio: - - provider_id: huggingface - provider_type: remote::huggingface - config: + - config: kvstore: - namespace: datasetio::huggingface + namespace: huggingface_datasetio backend: kv_default - - provider_id: localfs - provider_type: inline::localfs - config: + provider_id: huggingface + provider_type: remote::huggingface + - config: kvstore: - namespace: datasetio::localfs + namespace: localfs_datasetio backend: kv_default - scoring: - - provider_id: basic - provider_type: inline::basic - config: {} - tool_runtime: - - provider_id: rag-runtime - provider_type: inline::rag-runtime - config: {} - - provider_id: model-context-protocol - provider_type: remote::model-context-protocol - config: {} - batches: - - provider_id: reference - provider_type: inline::reference - config: + provider_id: localfs + provider_type: inline::localfs + eval: + - config: kvstore: - namespace: batches + namespace: eval_store backend: kv_default + provider_id: meta-reference + provider_type: inline::meta-reference +scoring_fns: [] +server: + port: 8321 storage: backends: kv_default: type: kv_sqlite - db_path: ${env.SQLITE_STORE_DIR:=~/.llama/storage}/kv_store.db + db_path: ${env.KV_STORE_PATH:=~/.llama/storage/rag/kv_store.db} sql_default: type: sql_sqlite - db_path: ${env.SQLITE_STORE_DIR:=~/.llama/storage}/sql_store.db + db_path: ${env.SQL_STORE_PATH:=~/.llama/storage/sql_store.db} stores: metadata: namespace: registry @@ -120,38 +137,26 @@ storage: backend: kv_default registered_resources: models: - - model_id: all-mpnet-base-v2 - provider_id: sentence-transformers - provider_model_id: all-mpnet-base-v2 - model_type: embedding - metadata: - embedding_dimension: 768 - model_id: meta-llama/Llama-3.2-1B-Instruct provider_id: vllm model_type: llm - provider_model_id: meta-llama/Llama-3.2-1B-Instruct - + provider_model_id: null shields: - shield_id: llama-guard - provider_id: ${env.SAFETY_MODEL:+llama-guard} - provider_shield_id: ${env.SAFETY_MODEL:=} - - shield_id: code-scanner - provider_id: ${env.CODE_SCANNER_MODEL:+code-scanner} - provider_shield_id: ${env.CODE_SCANNER_MODEL:=} + provider_id: llama-guard + provider_shield_id: openai/gpt-4o-mini datasets: [] scoring_fns: [] benchmarks: [] tool_groups: - toolgroup_id: builtin::rag provider_id: rag-runtime -server: - port: 8321 -telemetry: - enabled: true vector_stores: - default_provider_id: documentation_faiss + default_provider_id: faiss default_embedding_model: provider_id: sentence-transformers - model_id: all-mpnet-base-v2 + model_id: nomic-ai/nomic-embed-text-v1.5 safety: default_shield_id: llama-guard +telemetry: + enabled: true diff --git a/tests/e2e/configs/run-azure.yaml b/tests/e2e/configs/run-azure.yaml index 08004a1d6..206ac0110 100644 --- a/tests/e2e/configs/run-azure.yaml +++ b/tests/e2e/configs/run-azure.yaml @@ -1,4 +1,5 @@ version: 2 +image_name: azure-configuration apis: - agents @@ -9,6 +10,7 @@ apis: - inference - safety - scoring +- telemetry - tool_runtime - vector_io @@ -17,7 +19,6 @@ conversations_store: db_path: ~/.llama/storage/conversations.db type: sqlite datasets: [] -image_name: starter # external_providers_dir: /opt/app-root/src/.llama/providers.d inference_store: db_path: ~/.llama/storage/inference-store.db @@ -27,6 +28,48 @@ metadata_store: type: sqlite providers: + inference: + - provider_id: azure + provider_type: remote::azure + config: + api_key: ${env.AZURE_API_KEY} + api_base: https://ols-test.openai.azure.com/ + api_version: 2024-02-15-preview + - provider_id: openai + provider_type: remote::openai + config: + api_key: ${env.OPENAI_API_KEY} + - config: {} + provider_id: sentence-transformers + provider_type: inline::sentence-transformers + files: + - config: + metadata_store: + table_name: files_metadata + backend: sql_default + storage_dir: ~/.llama/storage/files + provider_id: meta-reference-files + provider_type: inline::localfs + safety: + - config: + excluded_categories: [] + provider_id: llama-guard + provider_type: inline::llama-guard + scoring: + - config: {} + provider_id: basic + provider_type: inline::basic + tool_runtime: + - config: {} + provider_id: rag-runtime + provider_type: inline::rag-runtime + vector_io: + - config: + persistence: + namespace: vector_io::faiss + backend: kv_default + provider_id: faiss + provider_type: inline::faiss agents: - config: persistence: @@ -65,51 +108,6 @@ providers: backend: kv_default provider_id: meta-reference provider_type: inline::meta-reference - files: - - config: - metadata_store: - table_name: files_metadata - backend: sql_default - storage_dir: ~/.llama/storage - provider_id: meta-reference-files - provider_type: inline::localfs - inference: - - provider_id: azure - provider_type: remote::azure - config: - api_key: ${env.AZURE_API_KEY} - api_base: https://ols-test.openai.azure.com/ - api_version: 2024-02-15-preview - - provider_id: openai - provider_type: remote::openai - config: - api_key: ${env.OPENAI_API_KEY} - - config: {} - provider_id: sentence-transformers - provider_type: inline::sentence-transformers - safety: - - config: - excluded_categories: [] - provider_id: llama-guard - provider_type: inline::llama-guard - scoring: - - config: {} - provider_id: basic - provider_type: inline::basic - - config: {} - provider_id: llm-as-judge - provider_type: inline::llm-as-judge - tool_runtime: - - config: {} - provider_id: rag-runtime - provider_type: inline::rag-runtime - vector_io: - - config: - persistence: - namespace: faiss_store - backend: kv_default - provider_id: faiss - provider_type: inline::faiss scoring_fns: [] server: port: 8321 @@ -117,10 +115,10 @@ storage: backends: kv_default: type: kv_sqlite - db_path: ~/.llama/storage/kv_store.db + db_path: ${env.KV_STORE_PATH:=~/.llama/storage/rag/kv_store.db} sql_default: type: sql_sqlite - db_path: ~/.llama/storage/sql_store.db + db_path: ${env.SQL_STORE_PATH:=~/.llama/storage/sql_store.db} stores: metadata: namespace: registry @@ -137,7 +135,7 @@ storage: namespace: prompts backend: kv_default registered_resources: - models: + models: - model_id: gpt-4o-mini provider_id: azure model_type: llm @@ -146,10 +144,18 @@ registered_resources: - shield_id: llama-guard provider_id: llama-guard provider_shield_id: openai/gpt-4o-mini - vector_dbs: [] datasets: [] scoring_fns: [] benchmarks: [] tool_groups: - toolgroup_id: builtin::rag - provider_id: rag-runtime \ No newline at end of file + provider_id: rag-runtime +vector_stores: + default_provider_id: faiss + default_embedding_model: + provider_id: sentence-transformers + model_id: nomic-ai/nomic-embed-text-v1.5 +safety: + default_shield_id: llama-guard +telemetry: + enabled: true \ No newline at end of file diff --git a/tests/e2e/configs/run-ci.yaml b/tests/e2e/configs/run-ci.yaml index 49971d2b6..04c45978b 100644 --- a/tests/e2e/configs/run-ci.yaml +++ b/tests/e2e/configs/run-ci.yaml @@ -1,4 +1,5 @@ version: 2 +image_name: starter apis: - agents @@ -9,6 +10,7 @@ apis: - inference - safety - scoring +- telemetry - tool_runtime - vector_io @@ -17,7 +19,6 @@ conversations_store: db_path: ~/.llama/storage/conversations.db type: sqlite datasets: [] -image_name: starter # external_providers_dir: /opt/app-root/src/.llama/providers.d inference_store: db_path: ~/.llama/storage/inference-store.db @@ -27,6 +28,50 @@ metadata_store: type: sqlite providers: + inference: + - provider_id: openai # This ID is a reference to 'providers.inference' + provider_type: remote::openai + config: + api_key: ${env.OPENAI_API_KEY} + allowed_models: ["${env.E2E_OPENAI_MODEL:=gpt-4o-mini}"] + - config: {} + provider_id: sentence-transformers + provider_type: inline::sentence-transformers + files: + - config: + metadata_store: + table_name: files_metadata + backend: sql_default + storage_dir: ~/.llama/storage/files + provider_id: meta-reference-files + provider_type: inline::localfs + safety: + - config: + excluded_categories: [] + provider_id: llama-guard + provider_type: inline::llama-guard + scoring: + - provider_id: basic + provider_type: inline::basic + config: {} + - provider_id: llm-as-judge + provider_type: inline::llm-as-judge + config: {} + - provider_id: braintrust + provider_type: inline::braintrust + config: + openai_api_key: '********' + tool_runtime: + - config: {} # Enable the RAG tool + provider_id: rag-runtime + provider_type: inline::rag-runtime + vector_io: + - config: # Define the storage backend for RAG + persistence: + namespace: vector_io::faiss + backend: kv_default + provider_id: faiss + provider_type: inline::faiss agents: - config: persistence: @@ -65,56 +110,17 @@ providers: backend: kv_default provider_id: meta-reference provider_type: inline::meta-reference - files: - - config: - metadata_store: - table_name: files_metadata - backend: sql_default - storage_dir: ~/.llama/storage - provider_id: meta-reference-files - provider_type: inline::localfs - inference: - - provider_id: openai - provider_type: remote::openai - config: - api_key: ${env.OPENAI_API_KEY} - - config: {} - provider_id: sentence-transformers - provider_type: inline::sentence-transformers - safety: - - config: - excluded_categories: [] - provider_id: llama-guard - provider_type: inline::llama-guard - scoring: - - config: {} - provider_id: basic - provider_type: inline::basic - - config: {} - provider_id: llm-as-judge - provider_type: inline::llm-as-judge - tool_runtime: - - config: {} - provider_id: rag-runtime - provider_type: inline::rag-runtime - vector_io: - - config: - persistence: - namespace: faiss_store - backend: kv_default - provider_id: faiss - provider_type: inline::faiss scoring_fns: [] server: port: 8321 storage: backends: - kv_default: + kv_default: # Define the storage backend type for RAG, in this case registry and RAG are unified i.e. information on registered resources (e.g. models, vector_stores) are saved together with the RAG chunks type: kv_sqlite - db_path: ~/.llama/storage/kv_store.db + db_path: ${env.KV_STORE_PATH:=~/.llama/storage/rag/kv_store.db} sql_default: type: sql_sqlite - db_path: ~/.llama/storage/sql_store.db + db_path: ${env.SQL_STORE_PATH:=~/.llama/storage/sql_store.db} stores: metadata: namespace: registry @@ -146,10 +152,18 @@ registered_resources: - shield_id: llama-guard provider_id: llama-guard provider_shield_id: openai/gpt-4o-mini - vector_dbs: [] datasets: [] scoring_fns: [] benchmarks: [] tool_groups: - - toolgroup_id: builtin::rag + - toolgroup_id: builtin::rag # Register the RAG tool provider_id: rag-runtime +vector_stores: + default_provider_id: faiss + default_embedding_model: # Define the default embedding model for RAG + provider_id: sentence-transformers + model_id: nomic-ai/nomic-embed-text-v1.5 +safety: + default_shield_id: llama-guard +telemetry: + enabled: true diff --git a/tests/e2e/configs/run-rhaiis.yaml b/tests/e2e/configs/run-rhaiis.yaml index b828e89ea..b6d6bfb45 100644 --- a/tests/e2e/configs/run-rhaiis.yaml +++ b/tests/e2e/configs/run-rhaiis.yaml @@ -1,5 +1,6 @@ -version: '2' -image_name: sample-notebook +version: 2 +image_name: rhaiis-configuration + apis: - agents - batches @@ -9,8 +10,22 @@ apis: - inference - safety - scoring +- telemetry - tool_runtime - vector_io + +benchmarks: [] +conversations_store: + db_path: ~/.llama/storage/conversations.db + type: sqlite +datasets: [] +# external_providers_dir: /opt/app-root/src/.llama/providers.d +inference_store: + db_path: ~/.llama/storage/inference-store.db + type: sqlite +metadata_store: + db_path: ~/.llama/storage/registry.db + type: sqlite providers: inference: @@ -20,93 +35,91 @@ providers: url: http://${env.RHAIIS_URL}:8000/v1/ api_token: ${env.RHAIIS_API_KEY} tls_verify: false - max_tokens: 2048 - - provider_id: sentence-transformers - provider_type: inline::sentence-transformers - config: {} + max_tokens: 2048 - provider_id: openai provider_type: remote::openai config: api_key: ${env.OPENAI_API_KEY} - vector_io: - - provider_id: documentation_faiss - provider_type: inline::faiss - config: - persistence: - namespace: vector_io::faiss - backend: kv_default + - config: {} + provider_id: sentence-transformers + provider_type: inline::sentence-transformers files: - - provider_id: meta-reference-files - provider_type: inline::localfs - config: - storage_dir: ${env.FILES_STORAGE_DIR:=~/.llama/storage} + - config: metadata_store: table_name: files_metadata backend: sql_default + storage_dir: ~/.llama/storage/files + provider_id: meta-reference-files + provider_type: inline::localfs safety: - - provider_id: llama-guard - provider_type: inline::llama-guard - config: + - config: excluded_categories: [] + provider_id: llama-guard + provider_type: inline::llama-guard + scoring: + - config: {} + provider_id: basic + provider_type: inline::basic + tool_runtime: + - config: {} + provider_id: rag-runtime + provider_type: inline::rag-runtime + vector_io: + - config: + persistence: + namespace: vector_io::faiss + backend: kv_default + provider_id: faiss + provider_type: inline::faiss agents: - - provider_id: meta-reference - provider_type: inline::meta-reference - config: + - config: persistence: agent_state: - namespace: agents + namespace: agents_state backend: kv_default responses: - table_name: responses + table_name: agents_responses backend: sql_default - max_write_queue_size: 10000 - num_writers: 4 - eval: - - provider_id: meta-reference + provider_id: meta-reference provider_type: inline::meta-reference - config: + batches: + - config: kvstore: - namespace: eval + namespace: batches_store backend: kv_default + provider_id: reference + provider_type: inline::reference datasetio: - - provider_id: huggingface - provider_type: remote::huggingface - config: + - config: kvstore: - namespace: datasetio::huggingface + namespace: huggingface_datasetio backend: kv_default - - provider_id: localfs - provider_type: inline::localfs - config: + provider_id: huggingface + provider_type: remote::huggingface + - config: kvstore: - namespace: datasetio::localfs + namespace: localfs_datasetio backend: kv_default - scoring: - - provider_id: basic - provider_type: inline::basic - config: {} - tool_runtime: - - provider_id: rag-runtime - provider_type: inline::rag-runtime - config: {} - - provider_id: model-context-protocol - provider_type: remote::model-context-protocol - config: {} - batches: - - provider_id: reference - provider_type: inline::reference - config: + provider_id: localfs + provider_type: inline::localfs + eval: + - config: kvstore: - namespace: batches + namespace: eval_store backend: kv_default + provider_id: meta-reference + provider_type: inline::meta-reference +scoring_fns: [] +server: + port: 8321 storage: backends: kv_default: type: kv_sqlite - db_path: ${env.SQLITE_STORE_DIR:=~/.llama/storage}/kv_store.db + db_path: ${env.KV_STORE_PATH:=~/.llama/storage/rag/kv_store.db} sql_default: type: sql_sqlite - db_path: ${env.SQLITE_STORE_DIR:=~/.llama/storage}/sql_store.db + db_path: ${env.SQL_STORE_PATH:=~/.llama/storage/sql_store.db} stores: metadata: namespace: registry @@ -124,12 +137,6 @@ storage: backend: kv_default registered_resources: models: - - model_id: all-mpnet-base-v2 - provider_id: sentence-transformers - provider_model_id: all-mpnet-base-v2 - model_type: embedding - metadata: - embedding_dimension: 768 - model_id: ${env.RHAIIS_MODEL} provider_id: vllm model_type: llm @@ -144,14 +151,12 @@ registered_resources: tool_groups: - toolgroup_id: builtin::rag provider_id: rag-runtime -server: - port: 8321 -telemetry: - enabled: true vector_stores: - default_provider_id: documentation_faiss + default_provider_id: faiss default_embedding_model: provider_id: sentence-transformers - model_id: all-mpnet-base-v2 + model_id: nomic-ai/nomic-embed-text-v1.5 safety: default_shield_id: llama-guard +telemetry: + enabled: true diff --git a/tests/e2e/configs/run-rhelai.yaml b/tests/e2e/configs/run-rhelai.yaml index 8327f2939..75025a815 100644 --- a/tests/e2e/configs/run-rhelai.yaml +++ b/tests/e2e/configs/run-rhelai.yaml @@ -1,5 +1,6 @@ -version: '2' -image_name: sample-notebook +version: 2 +image_name: rhelai-configuration + apis: - agents - batches @@ -9,8 +10,22 @@ apis: - inference - safety - scoring +- telemetry - tool_runtime - vector_io + +benchmarks: [] +conversations_store: + db_path: ~/.llama/storage/conversations.db + type: sqlite +datasets: [] +# external_providers_dir: /opt/app-root/src/.llama/providers.d +inference_store: + db_path: ~/.llama/storage/inference-store.db + type: sqlite +metadata_store: + db_path: ~/.llama/storage/registry.db + type: sqlite providers: inference: @@ -20,93 +35,91 @@ providers: url: http://${env.RHEL_AI_URL}:${env.RHEL_AI_PORT}/v1/ api_token: ${env.RHEL_AI_API_KEY} tls_verify: false - max_tokens: 2048 - - provider_id: sentence-transformers - provider_type: inline::sentence-transformers - config: {} + max_tokens: 2048 - provider_id: openai provider_type: remote::openai config: api_key: ${env.OPENAI_API_KEY} - vector_io: - - provider_id: documentation_faiss - provider_type: inline::faiss - config: - persistence: - namespace: vector_io::faiss - backend: kv_default + - config: {} + provider_id: sentence-transformers + provider_type: inline::sentence-transformers files: - - provider_id: meta-reference-files - provider_type: inline::localfs - config: - storage_dir: ${env.FILES_STORAGE_DIR:=~/.llama/storage} + - config: metadata_store: table_name: files_metadata backend: sql_default + storage_dir: ~/.llama/storage/files + provider_id: meta-reference-files + provider_type: inline::localfs safety: - - provider_id: llama-guard - provider_type: inline::llama-guard - config: + - config: excluded_categories: [] + provider_id: llama-guard + provider_type: inline::llama-guard + scoring: + - config: {} + provider_id: basic + provider_type: inline::basic + tool_runtime: + - config: {} + provider_id: rag-runtime + provider_type: inline::rag-runtime + vector_io: + - config: + persistence: + namespace: vector_io::faiss + backend: kv_default + provider_id: faiss + provider_type: inline::faiss agents: - - provider_id: meta-reference - provider_type: inline::meta-reference - config: + - config: persistence: agent_state: - namespace: agents + namespace: agents_state backend: kv_default responses: - table_name: responses + table_name: agents_responses backend: sql_default - max_write_queue_size: 10000 - num_writers: 4 - eval: - - provider_id: meta-reference + provider_id: meta-reference provider_type: inline::meta-reference - config: + batches: + - config: kvstore: - namespace: eval + namespace: batches_store backend: kv_default + provider_id: reference + provider_type: inline::reference datasetio: - - provider_id: huggingface - provider_type: remote::huggingface - config: + - config: kvstore: - namespace: datasetio::huggingface + namespace: huggingface_datasetio backend: kv_default - - provider_id: localfs - provider_type: inline::localfs - config: + provider_id: huggingface + provider_type: remote::huggingface + - config: kvstore: - namespace: datasetio::localfs + namespace: localfs_datasetio backend: kv_default - scoring: - - provider_id: basic - provider_type: inline::basic - config: {} - tool_runtime: - - provider_id: rag-runtime - provider_type: inline::rag-runtime - config: {} - - provider_id: model-context-protocol - provider_type: remote::model-context-protocol - config: {} - batches: - - provider_id: reference - provider_type: inline::reference - config: + provider_id: localfs + provider_type: inline::localfs + eval: + - config: kvstore: - namespace: batches + namespace: eval_store backend: kv_default + provider_id: meta-reference + provider_type: inline::meta-reference +scoring_fns: [] +server: + port: 8321 storage: backends: kv_default: type: kv_sqlite - db_path: ${env.SQLITE_STORE_DIR:=~/.llama/storage}/kv_store.db + db_path: ${env.KV_STORE_PATH:=~/.llama/storage/rag/kv_store.db} sql_default: type: sql_sqlite - db_path: ${env.SQLITE_STORE_DIR:=~/.llama/storage}/sql_store.db + db_path: ${env.SQL_STORE_PATH:=~/.llama/storage/sql_store.db} stores: metadata: namespace: registry @@ -124,12 +137,6 @@ storage: backend: kv_default registered_resources: models: - - model_id: all-mpnet-base-v2 - provider_id: sentence-transformers - provider_model_id: all-mpnet-base-v2 - model_type: embedding - metadata: - embedding_dimension: 768 - model_id: ${env.RHEL_AI_MODEL} provider_id: vllm model_type: llm @@ -144,14 +151,12 @@ registered_resources: tool_groups: - toolgroup_id: builtin::rag provider_id: rag-runtime -server: - port: 8321 -telemetry: - enabled: true vector_stores: - default_provider_id: documentation_faiss + default_provider_id: faiss default_embedding_model: provider_id: sentence-transformers - model_id: all-mpnet-base-v2 + model_id: nomic-ai/nomic-embed-text-v1.5 safety: default_shield_id: llama-guard +telemetry: + enabled: true diff --git a/tests/e2e/configs/run-vertexai.yaml b/tests/e2e/configs/run-vertexai.yaml index af6bbe2ac..3a0b19feb 100644 --- a/tests/e2e/configs/run-vertexai.yaml +++ b/tests/e2e/configs/run-vertexai.yaml @@ -1,4 +1,5 @@ version: 2 +image_name: vertexai-configuration apis: - agents @@ -9,6 +10,7 @@ apis: - inference - safety - scoring +- telemetry - tool_runtime - vector_io @@ -17,7 +19,6 @@ conversations_store: db_path: ~/.llama/storage/conversations.db type: sqlite datasets: [] -image_name: starter # external_providers_dir: /opt/app-root/src/.llama/providers.d inference_store: db_path: ~/.llama/storage/inference-store.db @@ -27,6 +28,48 @@ metadata_store: type: sqlite providers: + inference: + - provider_id: google-vertex + provider_type: remote::vertexai + config: + project: ${env.VERTEX_AI_PROJECT} + location: ${env.VERTEX_AI_LOCATION} + allowed_models: ["google/gemini-2.5-flash"] + - provider_id: openai + provider_type: remote::openai + config: + api_key: ${env.OPENAI_API_KEY} + - config: {} + provider_id: sentence-transformers + provider_type: inline::sentence-transformers + files: + - config: + metadata_store: + table_name: files_metadata + backend: sql_default + storage_dir: ~/.llama/storage/files + provider_id: meta-reference-files + provider_type: inline::localfs + safety: + - config: + excluded_categories: [] + provider_id: llama-guard + provider_type: inline::llama-guard + scoring: + - config: {} + provider_id: basic + provider_type: inline::basic + tool_runtime: + - config: {} + provider_id: rag-runtime + provider_type: inline::rag-runtime + vector_io: + - config: + persistence: + namespace: vector_io::faiss + backend: kv_default + provider_id: faiss + provider_type: inline::faiss agents: - config: persistence: @@ -65,50 +108,6 @@ providers: backend: kv_default provider_id: meta-reference provider_type: inline::meta-reference - files: - - config: - metadata_store: - table_name: files_metadata - backend: sql_default - storage_dir: ~/.llama/storage - provider_id: meta-reference-files - provider_type: inline::localfs - inference: - - provider_id: google-vertex - provider_type: remote::vertexai - config: - project: ${env.VERTEX_AI_PROJECT} - location: ${env.VERTEX_AI_LOCATION} - - provider_id: openai - provider_type: remote::openai - config: - api_key: ${env.OPENAI_API_KEY} - - config: {} - provider_id: sentence-transformers - provider_type: inline::sentence-transformers - safety: - - config: - excluded_categories: [] - provider_id: llama-guard - provider_type: inline::llama-guard - scoring: - - config: {} - provider_id: basic - provider_type: inline::basic - - config: {} - provider_id: llm-as-judge - provider_type: inline::llm-as-judge - tool_runtime: - - config: {} - provider_id: rag-runtime - provider_type: inline::rag-runtime - vector_io: - - config: - persistence: - namespace: faiss_store - backend: kv_default - provider_id: faiss - provider_type: inline::faiss scoring_fns: [] server: port: 8321 @@ -116,10 +115,10 @@ storage: backends: kv_default: type: kv_sqlite - db_path: ~/.llama/storage/kv_store.db + db_path: ${env.KV_STORE_PATH:=~/.llama/storage/rag/kv_store.db} sql_default: type: sql_sqlite - db_path: ~/.llama/storage/sql_store.db + db_path: ${env.SQL_STORE_PATH:=~/.llama/storage/sql_store.db} stores: metadata: namespace: registry @@ -141,10 +140,18 @@ registered_resources: - shield_id: llama-guard provider_id: llama-guard provider_shield_id: openai/gpt-4o-mini - vector_dbs: [] datasets: [] scoring_fns: [] benchmarks: [] tool_groups: - toolgroup_id: builtin::rag provider_id: rag-runtime +vector_stores: + default_provider_id: faiss + default_embedding_model: + provider_id: sentence-transformers + model_id: nomic-ai/nomic-embed-text-v1.5 +safety: + default_shield_id: llama-guard +telemetry: + enabled: true diff --git a/tests/e2e/features/faiss.feature b/tests/e2e/features/faiss.feature new file mode 100644 index 000000000..e9a247484 --- /dev/null +++ b/tests/e2e/features/faiss.feature @@ -0,0 +1,32 @@ +@Authorized +Feature: FAISS support tests + + Background: + Given The service is started locally + And REST API service prefix is /v1 + + Scenario: Verify vector store is registered + Given The system is in default state + And I set the Authorization header to Bearer eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiIxMjM0NTY3ODkwIiwibmFtZSI6Ikpva + When I access REST API endpoint rags using HTTP GET method + Then The status code of the response is 200 + And the body of the response has the following structure + """ + { + "rags": [ + "vs_37316db9-e60d-4e5f-a1d4-d2a22219aaee" + ] + } + """ + + Scenario: Query vector db using the file_search tool + Given The system is in default state + And I set the Authorization header to Bearer eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiIxMjM0NTY3ODkwIiwibmFtZSI6Ikpva + When I use "query" to ask question with authorization header + """ + {"query": "What is the title of the article from Paul?", "system_prompt": "You are an assistant. Always use the file_search tool to answer. Write only lowercase letters"} + """ + Then The status code of the response is 200 + And The response should contain following fragments + | Fragments in LLM response | + | great work | diff --git a/tests/e2e/rag/kv_store.db b/tests/e2e/rag/kv_store.db new file mode 100644 index 000000000..d83c2f163 Binary files /dev/null and b/tests/e2e/rag/kv_store.db differ diff --git a/tests/e2e/test_list.txt b/tests/e2e/test_list.txt index 2a62eaf6c..b61f186ac 100644 --- a/tests/e2e/test_list.txt +++ b/tests/e2e/test_list.txt @@ -1,3 +1,4 @@ +features/faiss.feature features/smoketests.feature features/authorized_noop.feature features/authorized_noop_token.feature diff --git a/uv.lock b/uv.lock index 1eef9084d..d3341c1b1 100644 --- a/uv.lock +++ b/uv.lock @@ -1,5 +1,5 @@ version = 1 -revision = 3 +revision = 2 requires-python = ">=3.12, <3.14" resolution-markers = [ "python_full_version >= '3.13' and sys_platform != 'darwin'", @@ -614,6 +614,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/b0/0d/9feae160378a3553fa9a339b0e9c1a048e147a4127210e286ef18b730f03/durationpy-0.10-py3-none-any.whl", hash = "sha256:3b41e1b601234296b4fb368338fdcd3e13e0b4fb5b67345948f4f2bf9868b286", size = 3922, upload-time = "2025-05-17T13:52:36.463Z" }, ] +[[package]] +name = "einops" +version = "0.8.1" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/e5/81/df4fbe24dff8ba3934af99044188e20a98ed441ad17a274539b74e82e126/einops-0.8.1.tar.gz", hash = "sha256:de5d960a7a761225532e0f1959e5315ebeafc0cd43394732f103ca44b9837e84", size = 54805, upload-time = "2025-02-09T03:17:00.434Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/87/62/9773de14fe6c45c23649e98b83231fffd7b9892b6cf863251dc2afa73643/einops-0.8.1-py3-none-any.whl", hash = "sha256:919387eb55330f5757c6bea9165c5ff5cfe63a642682ea788a6d472576d81737", size = 64359, upload-time = "2025-02-09T03:17:01.998Z" }, +] + [[package]] name = "email-validator" version = "2.3.0" @@ -1261,6 +1270,7 @@ dependencies = [ { name = "asyncpg" }, { name = "authlib" }, { name = "cachetools" }, + { name = "einops" }, { name = "email-validator" }, { name = "fastapi" }, { name = "jsonpath-ng" }, @@ -1344,6 +1354,7 @@ requires-dist = [ { name = "asyncpg", specifier = ">=0.31.0" }, { name = "authlib", specifier = ">=1.6.0" }, { name = "cachetools", specifier = ">=6.1.0" }, + { name = "einops", specifier = ">=0.8.1" }, { name = "email-validator", specifier = ">=2.2.0" }, { name = "fastapi", specifier = ">=0.115.12" }, { name = "jsonpath-ng", specifier = ">=1.6.1" },