diff --git a/.github/workflows/e2e_tests_rhaiis.yaml b/.github/workflows/e2e_tests_rhaiis.yaml index 539f633c0..9ca10015a 100644 --- a/.github/workflows/e2e_tests_rhaiis.yaml +++ b/.github/workflows/e2e_tests_rhaiis.yaml @@ -124,8 +124,8 @@ jobs: - name: Docker Login for quay access env: - QUAY_ROBOT_USERNAME: ${{ secrets.QUAY_ROBOT_USERNAME }} - QUAY_ROBOT_TOKEN: ${{ secrets.QUAY_ROBOT_TOKEN }} + QUAY_ROBOT_USERNAME: ${{ secrets.QUAY_DOWNSTREAM_USERNAME }} + QUAY_ROBOT_TOKEN: ${{ secrets.QUAY_DOWNSTREAM_TOKEN }} run: | echo $QUAY_ROBOT_TOKEN | docker login quay.io -u=$QUAY_ROBOT_USERNAME --password-stdin diff --git a/.github/workflows/e2e_tests_rhelai.yaml b/.github/workflows/e2e_tests_rhelai.yaml index c73c4434c..da6c6d2b9 100644 --- a/.github/workflows/e2e_tests_rhelai.yaml +++ b/.github/workflows/e2e_tests_rhelai.yaml @@ -125,8 +125,8 @@ jobs: - name: Docker Login for quay access env: - QUAY_ROBOT_USERNAME: ${{ secrets.QUAY_ROBOT_USERNAME }} - QUAY_ROBOT_TOKEN: ${{ secrets.QUAY_ROBOT_TOKEN }} + QUAY_ROBOT_USERNAME: ${{ secrets.QUAY_DOWNSTREAM_USERNAME }} + QUAY_ROBOT_TOKEN: ${{ secrets.QUAY_DOWNSTREAM_TOKEN }} run: | echo $QUAY_ROBOT_TOKEN | docker login quay.io -u=$QUAY_ROBOT_USERNAME --password-stdin diff --git a/examples/vllm-rhaiis.yaml b/examples/vllm-rhaiis.yaml new file mode 100644 index 000000000..7ec33263f --- /dev/null +++ b/examples/vllm-rhaiis.yaml @@ -0,0 +1,157 @@ +version: '2' +image_name: sample-notebook +apis: +- agents +- batches +- datasetio +- eval +- files +- inference +- safety +- scoring +- tool_runtime +- vector_io + +providers: + inference: + - provider_id: vllm + provider_type: remote::vllm + config: + url: http://${env.RHAIIS_URL}:8000/v1/ + api_token: ${env.RHAIIS_API_KEY} + tls_verify: false + max_tokens: 2048 + - provider_id: sentence-transformers + provider_type: inline::sentence-transformers + config: {} + vector_io: + - provider_id: documentation_faiss + provider_type: inline::faiss + config: + persistence: + namespace: vector_io::faiss + backend: kv_default + files: + - provider_id: meta-reference-files + provider_type: inline::localfs + config: + storage_dir: ${env.FILES_STORAGE_DIR:=~/.llama/storage} + metadata_store: + table_name: files_metadata + backend: sql_default + safety: + - provider_id: llama-guard + provider_type: inline::llama-guard + config: + excluded_categories: [] + agents: + - provider_id: meta-reference + provider_type: inline::meta-reference + config: + persistence: + agent_state: + namespace: agents + backend: kv_default + responses: + table_name: responses + backend: sql_default + max_write_queue_size: 10000 + num_writers: 4 + eval: + - provider_id: meta-reference + provider_type: inline::meta-reference + config: + kvstore: + namespace: eval + backend: kv_default + datasetio: + - provider_id: huggingface + provider_type: remote::huggingface + config: + kvstore: + namespace: datasetio::huggingface + backend: kv_default + - provider_id: localfs + provider_type: inline::localfs + config: + kvstore: + namespace: datasetio::localfs + backend: kv_default + scoring: + - provider_id: basic + provider_type: inline::basic + config: {} + tool_runtime: + - provider_id: rag-runtime + provider_type: inline::rag-runtime + config: {} + - provider_id: model-context-protocol + provider_type: remote::model-context-protocol + config: {} + batches: + - provider_id: reference + provider_type: inline::reference + config: + kvstore: + namespace: batches + backend: kv_default +storage: + backends: + kv_default: + type: kv_sqlite + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/storage}/kv_store.db + sql_default: + type: sql_sqlite + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/storage}/sql_store.db + stores: + metadata: + namespace: registry + backend: kv_default + inference: + table_name: inference_store + backend: sql_default + max_write_queue_size: 10000 + num_writers: 4 + conversations: + table_name: openai_conversations + backend: sql_default + prompts: + namespace: prompts + backend: kv_default +registered_resources: + models: + - model_id: all-mpnet-base-v2 + provider_id: sentence-transformers + provider_model_id: all-mpnet-base-v2 + model_type: embedding + metadata: + embedding_dimension: 768 + - model_id: ${env.RHAIIS_MODEL} + provider_id: vllm + model_type: llm + provider_model_id: ${env.RHAIIS_MODEL} + + shields: + - shield_id: llama-guard + provider_id: ${env.SAFETY_MODEL:+llama-guard} + provider_shield_id: ${env.SAFETY_MODEL:=} + - shield_id: code-scanner + provider_id: ${env.CODE_SCANNER_MODEL:+code-scanner} + provider_shield_id: ${env.CODE_SCANNER_MODEL:=} + datasets: [] + scoring_fns: [] + benchmarks: [] + tool_groups: + - toolgroup_id: builtin::rag + provider_id: rag-runtime +server: + port: 8321 +telemetry: + enabled: true +vector_stores: + default_provider_id: documentation_faiss + default_embedding_model: + provider_id: sentence-transformers + model_id: all-mpnet-base-v2 +safety: + default_shield_id: llama-guard diff --git a/examples/vllm-rhelai.yaml b/examples/vllm-rhelai.yaml new file mode 100644 index 000000000..2d9ac373c --- /dev/null +++ b/examples/vllm-rhelai.yaml @@ -0,0 +1,157 @@ +version: '2' +image_name: sample-notebook +apis: +- agents +- batches +- datasetio +- eval +- files +- inference +- safety +- scoring +- tool_runtime +- vector_io + +providers: + inference: + - provider_id: vllm + provider_type: remote::vllm + config: + url: http://${env.RHEL_AI_URL}:${env.RHEL_AI_PORT}/v1/ + api_token: ${env.RHEL_AI_API_KEY} + tls_verify: false + max_tokens: 2048 + - provider_id: sentence-transformers + provider_type: inline::sentence-transformers + config: {} + vector_io: + - provider_id: documentation_faiss + provider_type: inline::faiss + config: + persistence: + namespace: vector_io::faiss + backend: kv_default + files: + - provider_id: meta-reference-files + provider_type: inline::localfs + config: + storage_dir: ${env.FILES_STORAGE_DIR:=~/.llama/storage} + metadata_store: + table_name: files_metadata + backend: sql_default + safety: + - provider_id: llama-guard + provider_type: inline::llama-guard + config: + excluded_categories: [] + agents: + - provider_id: meta-reference + provider_type: inline::meta-reference + config: + persistence: + agent_state: + namespace: agents + backend: kv_default + responses: + table_name: responses + backend: sql_default + max_write_queue_size: 10000 + num_writers: 4 + eval: + - provider_id: meta-reference + provider_type: inline::meta-reference + config: + kvstore: + namespace: eval + backend: kv_default + datasetio: + - provider_id: huggingface + provider_type: remote::huggingface + config: + kvstore: + namespace: datasetio::huggingface + backend: kv_default + - provider_id: localfs + provider_type: inline::localfs + config: + kvstore: + namespace: datasetio::localfs + backend: kv_default + scoring: + - provider_id: basic + provider_type: inline::basic + config: {} + tool_runtime: + - provider_id: rag-runtime + provider_type: inline::rag-runtime + config: {} + - provider_id: model-context-protocol + provider_type: remote::model-context-protocol + config: {} + batches: + - provider_id: reference + provider_type: inline::reference + config: + kvstore: + namespace: batches + backend: kv_default +storage: + backends: + kv_default: + type: kv_sqlite + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/storage}/kv_store.db + sql_default: + type: sql_sqlite + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/storage}/sql_store.db + stores: + metadata: + namespace: registry + backend: kv_default + inference: + table_name: inference_store + backend: sql_default + max_write_queue_size: 10000 + num_writers: 4 + conversations: + table_name: openai_conversations + backend: sql_default + prompts: + namespace: prompts + backend: kv_default +registered_resources: + models: + - model_id: all-mpnet-base-v2 + provider_id: sentence-transformers + provider_model_id: all-mpnet-base-v2 + model_type: embedding + metadata: + embedding_dimension: 768 + - model_id: ${env.RHEL_AI_MODEL} + provider_id: vllm + model_type: llm + provider_model_id: ${env.RHEL_AI_MODEL} + + shields: + - shield_id: llama-guard + provider_id: ${env.SAFETY_MODEL:+llama-guard} + provider_shield_id: ${env.SAFETY_MODEL:=} + - shield_id: code-scanner + provider_id: ${env.CODE_SCANNER_MODEL:+code-scanner} + provider_shield_id: ${env.CODE_SCANNER_MODEL:=} + datasets: [] + scoring_fns: [] + benchmarks: [] + tool_groups: + - toolgroup_id: builtin::rag + provider_id: rag-runtime +server: + port: 8321 +telemetry: + enabled: true +vector_stores: + default_provider_id: documentation_faiss + default_embedding_model: + provider_id: sentence-transformers + model_id: all-mpnet-base-v2 +safety: + default_shield_id: llama-guard diff --git a/examples/vllm-rhoai.yaml b/examples/vllm-rhoai.yaml new file mode 100644 index 000000000..f9e992b1b --- /dev/null +++ b/examples/vllm-rhoai.yaml @@ -0,0 +1,157 @@ +version: '2' +image_name: sample-notebook +apis: +- agents +- batches +- datasetio +- eval +- files +- inference +- safety +- scoring +- tool_runtime +- vector_io + +providers: + inference: + - provider_id: vllm + provider_type: remote::vllm + config: + url: ${env.KSVC_URL}/v1/ + api_token: ${env.VLLM_API_KEY} + tls_verify: false + max_tokens: 1024 + - provider_id: sentence-transformers + provider_type: inline::sentence-transformers + config: {} + vector_io: + - provider_id: documentation_faiss + provider_type: inline::faiss + config: + persistence: + namespace: vector_io::faiss + backend: kv_default + files: + - provider_id: meta-reference-files + provider_type: inline::localfs + config: + storage_dir: ${env.FILES_STORAGE_DIR:=~/.llama/storage} + metadata_store: + table_name: files_metadata + backend: sql_default + safety: + - provider_id: llama-guard + provider_type: inline::llama-guard + config: + excluded_categories: [] + agents: + - provider_id: meta-reference + provider_type: inline::meta-reference + config: + persistence: + agent_state: + namespace: agents + backend: kv_default + responses: + table_name: responses + backend: sql_default + max_write_queue_size: 10000 + num_writers: 4 + eval: + - provider_id: meta-reference + provider_type: inline::meta-reference + config: + kvstore: + namespace: eval + backend: kv_default + datasetio: + - provider_id: huggingface + provider_type: remote::huggingface + config: + kvstore: + namespace: datasetio::huggingface + backend: kv_default + - provider_id: localfs + provider_type: inline::localfs + config: + kvstore: + namespace: datasetio::localfs + backend: kv_default + scoring: + - provider_id: basic + provider_type: inline::basic + config: {} + tool_runtime: + - provider_id: rag-runtime + provider_type: inline::rag-runtime + config: {} + - provider_id: model-context-protocol + provider_type: remote::model-context-protocol + config: {} + batches: + - provider_id: reference + provider_type: inline::reference + config: + kvstore: + namespace: batches + backend: kv_default +storage: + backends: + kv_default: + type: kv_sqlite + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/storage}/kv_store.db + sql_default: + type: sql_sqlite + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/storage}/sql_store.db + stores: + metadata: + namespace: registry + backend: kv_default + inference: + table_name: inference_store + backend: sql_default + max_write_queue_size: 10000 + num_writers: 4 + conversations: + table_name: openai_conversations + backend: sql_default + prompts: + namespace: prompts + backend: kv_default +registered_resources: + models: + - model_id: all-mpnet-base-v2 + provider_id: sentence-transformers + provider_model_id: all-mpnet-base-v2 + model_type: embedding + metadata: + embedding_dimension: 768 + - model_id: meta-llama/Llama-3.2-1B-Instruct + provider_id: vllm + model_type: llm + provider_model_id: meta-llama/Llama-3.2-1B-Instruct + + shields: + - shield_id: llama-guard + provider_id: ${env.SAFETY_MODEL:+llama-guard} + provider_shield_id: ${env.SAFETY_MODEL:=} + - shield_id: code-scanner + provider_id: ${env.CODE_SCANNER_MODEL:+code-scanner} + provider_shield_id: ${env.CODE_SCANNER_MODEL:=} + datasets: [] + scoring_fns: [] + benchmarks: [] + tool_groups: + - toolgroup_id: builtin::rag + provider_id: rag-runtime +server: + port: 8321 +telemetry: + enabled: true +vector_stores: + default_provider_id: documentation_faiss + default_embedding_model: + provider_id: sentence-transformers + model_id: all-mpnet-base-v2 +safety: + default_shield_id: llama-guard diff --git a/tests/e2e-prow/rhoai/configs/run.yaml b/tests/e2e-prow/rhoai/configs/run.yaml index ea3067e90..f9e992b1b 100644 --- a/tests/e2e-prow/rhoai/configs/run.yaml +++ b/tests/e2e-prow/rhoai/configs/run.yaml @@ -1,64 +1,18 @@ version: '2' -image_name: minimal-viable-llama-stack-configuration - +image_name: sample-notebook apis: - - agents - - datasetio - - eval - - inference - - post_training - - safety - - scoring - - telemetry - - tool_runtime - - vector_io -benchmarks: [] -container_image: null -datasets: [] -external_providers_dir: null -inference_store: - db_path: .llama/distributions/ollama/inference_store.db - type: sqlite -logging: null -metadata_store: - db_path: .llama/distributions/ollama/registry.db - namespace: null - type: sqlite +- agents +- batches +- datasetio +- eval +- files +- inference +- safety +- scoring +- tool_runtime +- vector_io + providers: - agents: - - config: - persistence_store: - db_path: .llama/distributions/ollama/agents_store.db - namespace: null - type: sqlite - responses_store: - db_path: .llama/distributions/ollama/responses_store.db - type: sqlite - provider_id: meta-reference - provider_type: inline::meta-reference - datasetio: - - config: - kvstore: - db_path: .llama/distributions/ollama/huggingface_datasetio.db - namespace: null - type: sqlite - provider_id: huggingface - provider_type: remote::huggingface - - config: - kvstore: - db_path: .llama/distributions/ollama/localfs_datasetio.db - namespace: null - type: sqlite - provider_id: localfs - provider_type: inline::localfs - eval: - - config: - kvstore: - db_path: .llama/distributions/ollama/meta_reference_eval.db - namespace: null - type: sqlite - provider_id: meta-reference - provider_type: inline::meta-reference inference: - provider_id: vllm provider_type: remote::vllm @@ -67,54 +21,137 @@ providers: api_token: ${env.VLLM_API_KEY} tls_verify: false max_tokens: 1024 - post_training: - - config: - checkpoint_format: huggingface - device: cpu - distributed_backend: null - dpo_output_dir: "." - provider_id: huggingface - provider_type: inline::huggingface-gpu + - provider_id: sentence-transformers + provider_type: inline::sentence-transformers + config: {} + vector_io: + - provider_id: documentation_faiss + provider_type: inline::faiss + config: + persistence: + namespace: vector_io::faiss + backend: kv_default + files: + - provider_id: meta-reference-files + provider_type: inline::localfs + config: + storage_dir: ${env.FILES_STORAGE_DIR:=~/.llama/storage} + metadata_store: + table_name: files_metadata + backend: sql_default safety: - - config: - excluded_categories: [] - provider_id: llama-guard + - provider_id: llama-guard provider_type: inline::llama-guard + config: + excluded_categories: [] + agents: + - provider_id: meta-reference + provider_type: inline::meta-reference + config: + persistence: + agent_state: + namespace: agents + backend: kv_default + responses: + table_name: responses + backend: sql_default + max_write_queue_size: 10000 + num_writers: 4 + eval: + - provider_id: meta-reference + provider_type: inline::meta-reference + config: + kvstore: + namespace: eval + backend: kv_default + datasetio: + - provider_id: huggingface + provider_type: remote::huggingface + config: + kvstore: + namespace: datasetio::huggingface + backend: kv_default + - provider_id: localfs + provider_type: inline::localfs + config: + kvstore: + namespace: datasetio::localfs + backend: kv_default scoring: - - config: {} - provider_id: basic + - provider_id: basic provider_type: inline::basic - - config: {} - provider_id: llm-as-judge - provider_type: inline::llm-as-judge - - config: - openai_api_key: '********' - provider_id: braintrust - provider_type: inline::braintrust - telemetry: - - config: - service_name: 'lightspeed-stack-telemetry' - sinks: sqlite - sqlite_db_path: .llama/distributions/ollama/trace_store.db - provider_id: meta-reference - provider_type: inline::meta-reference - tool_runtime: [] - vector_io: [] -scoring_fns: [] + config: {} + tool_runtime: + - provider_id: rag-runtime + provider_type: inline::rag-runtime + config: {} + - provider_id: model-context-protocol + provider_type: remote::model-context-protocol + config: {} + batches: + - provider_id: reference + provider_type: inline::reference + config: + kvstore: + namespace: batches + backend: kv_default +storage: + backends: + kv_default: + type: kv_sqlite + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/storage}/kv_store.db + sql_default: + type: sql_sqlite + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/storage}/sql_store.db + stores: + metadata: + namespace: registry + backend: kv_default + inference: + table_name: inference_store + backend: sql_default + max_write_queue_size: 10000 + num_writers: 4 + conversations: + table_name: openai_conversations + backend: sql_default + prompts: + namespace: prompts + backend: kv_default +registered_resources: + models: + - model_id: all-mpnet-base-v2 + provider_id: sentence-transformers + provider_model_id: all-mpnet-base-v2 + model_type: embedding + metadata: + embedding_dimension: 768 + - model_id: meta-llama/Llama-3.2-1B-Instruct + provider_id: vllm + model_type: llm + provider_model_id: meta-llama/Llama-3.2-1B-Instruct + + shields: + - shield_id: llama-guard + provider_id: ${env.SAFETY_MODEL:+llama-guard} + provider_shield_id: ${env.SAFETY_MODEL:=} + - shield_id: code-scanner + provider_id: ${env.CODE_SCANNER_MODEL:+code-scanner} + provider_shield_id: ${env.CODE_SCANNER_MODEL:=} + datasets: [] + scoring_fns: [] + benchmarks: [] + tool_groups: + - toolgroup_id: builtin::rag + provider_id: rag-runtime server: - auth: null - host: null port: 8321 - quota: null - tls_cafile: null - tls_certfile: null - tls_keyfile: null -shields: [] -vector_dbs: [] - -models: -- model_id: meta-llama/Llama-3.2-1B-Instruct - provider_id: vllm - model_type: llm - provider_model_id: null - +telemetry: + enabled: true +vector_stores: + default_provider_id: documentation_faiss + default_embedding_model: + provider_id: sentence-transformers + model_id: all-mpnet-base-v2 +safety: + default_shield_id: llama-guard diff --git a/tests/e2e/configs/run-rhaiis.yaml b/tests/e2e/configs/run-rhaiis.yaml index 8af4b292c..7ec33263f 100644 --- a/tests/e2e/configs/run-rhaiis.yaml +++ b/tests/e2e/configs/run-rhaiis.yaml @@ -1,148 +1,157 @@ version: '2' -image_name: rhaiis-configuration - +image_name: sample-notebook apis: - - agents - - datasetio - - eval - - files - - inference - - post_training - - safety - - scoring - - telemetry - - tool_runtime - - vector_io -benchmarks: [] -container_image: null -datasets: [] -external_providers_dir: null -inference_store: - db_path: .llama/distributions/ollama/inference_store.db - type: sqlite -logging: null -metadata_store: - db_path: .llama/distributions/ollama/registry.db - namespace: null - type: sqlite +- agents +- batches +- datasetio +- eval +- files +- inference +- safety +- scoring +- tool_runtime +- vector_io + providers: + inference: + - provider_id: vllm + provider_type: remote::vllm + config: + url: http://${env.RHAIIS_URL}:8000/v1/ + api_token: ${env.RHAIIS_API_KEY} + tls_verify: false + max_tokens: 2048 + - provider_id: sentence-transformers + provider_type: inline::sentence-transformers + config: {} + vector_io: + - provider_id: documentation_faiss + provider_type: inline::faiss + config: + persistence: + namespace: vector_io::faiss + backend: kv_default files: - - config: - storage_dir: /tmp/llama-stack-files - metadata_store: - type: sqlite - db_path: .llama/distributions/ollama/files_metadata.db - provider_id: localfs + - provider_id: meta-reference-files provider_type: inline::localfs + config: + storage_dir: ${env.FILES_STORAGE_DIR:=~/.llama/storage} + metadata_store: + table_name: files_metadata + backend: sql_default + safety: + - provider_id: llama-guard + provider_type: inline::llama-guard + config: + excluded_categories: [] agents: - - config: - persistence_store: - db_path: .llama/distributions/ollama/agents_store.db - namespace: null - type: sqlite - responses_store: - db_path: .llama/distributions/ollama/responses_store.db - type: sqlite - provider_id: meta-reference + - provider_id: meta-reference provider_type: inline::meta-reference - datasetio: - - config: + config: + persistence: + agent_state: + namespace: agents + backend: kv_default + responses: + table_name: responses + backend: sql_default + max_write_queue_size: 10000 + num_writers: 4 + eval: + - provider_id: meta-reference + provider_type: inline::meta-reference + config: kvstore: - db_path: .llama/distributions/ollama/huggingface_datasetio.db - namespace: null - type: sqlite - provider_id: huggingface + namespace: eval + backend: kv_default + datasetio: + - provider_id: huggingface provider_type: remote::huggingface - - config: + config: kvstore: - db_path: .llama/distributions/ollama/localfs_datasetio.db - namespace: null - type: sqlite - provider_id: localfs + namespace: datasetio::huggingface + backend: kv_default + - provider_id: localfs provider_type: inline::localfs - eval: - - config: + config: kvstore: - db_path: .llama/distributions/ollama/meta_reference_eval.db - namespace: null - type: sqlite - provider_id: meta-reference - provider_type: inline::meta-reference - inference: - - provider_id: sentence-transformers # Can be any embedding provider - provider_type: inline::sentence-transformers - config: {} - - provider_id: vllm - provider_type: remote::vllm - config: - url: http://${env.RHAIIS_URL}:8000/v1/ - api_token: ${env.RHAIIS_API_KEY} - tls_verify: false - max_tokens: 2048 - - provider_id: openai - provider_type: remote::openai - config: - api_key: ${env.OPENAI_API_KEY} - post_training: - - config: - checkpoint_format: huggingface - device: cpu - distributed_backend: null - dpo_output_dir: "." - provider_id: huggingface - provider_type: inline::huggingface-gpu - safety: - - config: - excluded_categories: [] - provider_id: llama-guard - provider_type: inline::llama-guard + namespace: datasetio::localfs + backend: kv_default scoring: - - config: {} - provider_id: basic + - provider_id: basic provider_type: inline::basic - - config: {} - provider_id: llm-as-judge - provider_type: inline::llm-as-judge - - config: - openai_api_key: '********' - provider_id: braintrust - provider_type: inline::braintrust - telemetry: - - config: - service_name: 'lightspeed-stack-telemetry' - sinks: sqlite - sqlite_db_path: .llama/distributions/ollama/trace_store.db - provider_id: meta-reference - provider_type: inline::meta-reference + config: {} tool_runtime: - - provider_id: model-context-protocol - provider_type: remote::model-context-protocol - config: {} -scoring_fns: [] -server: - auth: null - host: null - port: 8321 - quota: null - tls_cafile: null - tls_certfile: null - tls_keyfile: null -shields: - - shield_id: llama-guard-shield - provider_id: llama-guard - provider_shield_id: gpt-4-turbo -models: - - metadata: - embedding_dimension: 768 # Depends on chosen model - model_id: sentence-transformers/all-mpnet-base-v2 # Example embedding model + - provider_id: rag-runtime + provider_type: inline::rag-runtime + config: {} + - provider_id: model-context-protocol + provider_type: remote::model-context-protocol + config: {} + batches: + - provider_id: reference + provider_type: inline::reference + config: + kvstore: + namespace: batches + backend: kv_default +storage: + backends: + kv_default: + type: kv_sqlite + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/storage}/kv_store.db + sql_default: + type: sql_sqlite + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/storage}/sql_store.db + stores: + metadata: + namespace: registry + backend: kv_default + inference: + table_name: inference_store + backend: sql_default + max_write_queue_size: 10000 + num_writers: 4 + conversations: + table_name: openai_conversations + backend: sql_default + prompts: + namespace: prompts + backend: kv_default +registered_resources: + models: + - model_id: all-mpnet-base-v2 provider_id: sentence-transformers - provider_model_id: sentence-transformers/all-mpnet-base-v2 # Location of embedding model + provider_model_id: all-mpnet-base-v2 model_type: embedding + metadata: + embedding_dimension: 768 - model_id: ${env.RHAIIS_MODEL} provider_id: vllm model_type: llm provider_model_id: ${env.RHAIIS_MODEL} - - model_id: gpt-4-turbo - provider_id: openai - model_type: llm - provider_model_id: gpt-4-turbo \ No newline at end of file + + shields: + - shield_id: llama-guard + provider_id: ${env.SAFETY_MODEL:+llama-guard} + provider_shield_id: ${env.SAFETY_MODEL:=} + - shield_id: code-scanner + provider_id: ${env.CODE_SCANNER_MODEL:+code-scanner} + provider_shield_id: ${env.CODE_SCANNER_MODEL:=} + datasets: [] + scoring_fns: [] + benchmarks: [] + tool_groups: + - toolgroup_id: builtin::rag + provider_id: rag-runtime +server: + port: 8321 +telemetry: + enabled: true +vector_stores: + default_provider_id: documentation_faiss + default_embedding_model: + provider_id: sentence-transformers + model_id: all-mpnet-base-v2 +safety: + default_shield_id: llama-guard diff --git a/tests/e2e/configs/run-rhelai.yaml b/tests/e2e/configs/run-rhelai.yaml index e4a7d6494..2d9ac373c 100644 --- a/tests/e2e/configs/run-rhelai.yaml +++ b/tests/e2e/configs/run-rhelai.yaml @@ -1,144 +1,157 @@ version: '2' -image_name: rhelai-configuration - +image_name: sample-notebook apis: - - agents - - datasetio - - eval - - files - - inference - - post_training - - safety - - scoring - - telemetry - - tool_runtime - - vector_io -benchmarks: [] -container_image: null -datasets: [] -external_providers_dir: /opt/app-root/src/.llama/providers.d -inference_store: - db_path: .llama/distributions/ollama/inference_store.db - type: sqlite -logging: null -metadata_store: - db_path: .llama/distributions/ollama/registry.db - namespace: null - type: sqlite +- agents +- batches +- datasetio +- eval +- files +- inference +- safety +- scoring +- tool_runtime +- vector_io + providers: + inference: + - provider_id: vllm + provider_type: remote::vllm + config: + url: http://${env.RHEL_AI_URL}:${env.RHEL_AI_PORT}/v1/ + api_token: ${env.RHEL_AI_API_KEY} + tls_verify: false + max_tokens: 2048 + - provider_id: sentence-transformers + provider_type: inline::sentence-transformers + config: {} + vector_io: + - provider_id: documentation_faiss + provider_type: inline::faiss + config: + persistence: + namespace: vector_io::faiss + backend: kv_default files: - - config: - storage_dir: /tmp/llama-stack-files - metadata_store: - type: sqlite - db_path: .llama/distributions/ollama/files_metadata.db - provider_id: localfs + - provider_id: meta-reference-files provider_type: inline::localfs + config: + storage_dir: ${env.FILES_STORAGE_DIR:=~/.llama/storage} + metadata_store: + table_name: files_metadata + backend: sql_default + safety: + - provider_id: llama-guard + provider_type: inline::llama-guard + config: + excluded_categories: [] agents: - - config: - persistence_store: - db_path: .llama/distributions/ollama/agents_store.db - namespace: null - type: sqlite - responses_store: - db_path: .llama/distributions/ollama/responses_store.db - type: sqlite - provider_id: meta-reference + - provider_id: meta-reference provider_type: inline::meta-reference - datasetio: - - config: + config: + persistence: + agent_state: + namespace: agents + backend: kv_default + responses: + table_name: responses + backend: sql_default + max_write_queue_size: 10000 + num_writers: 4 + eval: + - provider_id: meta-reference + provider_type: inline::meta-reference + config: kvstore: - db_path: .llama/distributions/ollama/huggingface_datasetio.db - namespace: null - type: sqlite - provider_id: huggingface + namespace: eval + backend: kv_default + datasetio: + - provider_id: huggingface provider_type: remote::huggingface - - config: + config: kvstore: - db_path: .llama/distributions/ollama/localfs_datasetio.db - namespace: null - type: sqlite - provider_id: localfs + namespace: datasetio::huggingface + backend: kv_default + - provider_id: localfs provider_type: inline::localfs - eval: - - config: + config: kvstore: - db_path: .llama/distributions/ollama/meta_reference_eval.db - namespace: null - type: sqlite - provider_id: meta-reference - provider_type: inline::meta-reference - inference: - - provider_id: sentence-transformers # Can be any embedding provider - provider_type: inline::sentence-transformers - config: {} - - provider_id: vllm - provider_type: remote::vllm - config: - url: http://${env.RHEL_AI_URL}:${env.RHEL_AI_PORT}/v1/ - api_token: ${env.RHEL_AI_API_KEY} - tls_verify: false - max_tokens: 2048 - - provider_id: openai - provider_type: remote::openai - config: - api_key: ${env.OPENAI_API_KEY} - post_training: - - config: - checkpoint_format: huggingface - device: cpu - distributed_backend: null - dpo_output_dir: "." - provider_id: huggingface - provider_type: inline::huggingface-gpu - safety: - - config: - excluded_categories: [] - provider_id: llama-guard - provider_type: inline::llama-guard + namespace: datasetio::localfs + backend: kv_default scoring: - - config: {} - provider_id: basic + - provider_id: basic provider_type: inline::basic - - config: {} - provider_id: llm-as-judge - provider_type: inline::llm-as-judge - - config: - openai_api_key: '********' - provider_id: braintrust - provider_type: inline::braintrust - telemetry: - - config: - service_name: 'lightspeed-stack-telemetry' - sinks: sqlite - sqlite_db_path: .llama/distributions/ollama/trace_store.db - provider_id: meta-reference - provider_type: inline::meta-reference + config: {} tool_runtime: - - provider_id: model-context-protocol - provider_type: remote::model-context-protocol - config: {} -scoring_fns: [] -server: - auth: null - host: null - port: 8321 - quota: null - tls_cafile: null - tls_certfile: null - tls_keyfile: null -shields: - - shield_id: llama-guard-shield - provider_id: llama-guard - provider_shield_id: ${env.RHEL_AI_MODEL} -models: - - metadata: - embedding_dimension: 768 # Depends on chosen model - model_id: sentence-transformers/all-mpnet-base-v2 # Example embedding model + - provider_id: rag-runtime + provider_type: inline::rag-runtime + config: {} + - provider_id: model-context-protocol + provider_type: remote::model-context-protocol + config: {} + batches: + - provider_id: reference + provider_type: inline::reference + config: + kvstore: + namespace: batches + backend: kv_default +storage: + backends: + kv_default: + type: kv_sqlite + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/storage}/kv_store.db + sql_default: + type: sql_sqlite + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/storage}/sql_store.db + stores: + metadata: + namespace: registry + backend: kv_default + inference: + table_name: inference_store + backend: sql_default + max_write_queue_size: 10000 + num_writers: 4 + conversations: + table_name: openai_conversations + backend: sql_default + prompts: + namespace: prompts + backend: kv_default +registered_resources: + models: + - model_id: all-mpnet-base-v2 provider_id: sentence-transformers - provider_model_id: sentence-transformers/all-mpnet-base-v2 # Location of embedding model + provider_model_id: all-mpnet-base-v2 model_type: embedding + metadata: + embedding_dimension: 768 - model_id: ${env.RHEL_AI_MODEL} provider_id: vllm model_type: llm provider_model_id: ${env.RHEL_AI_MODEL} + + shields: + - shield_id: llama-guard + provider_id: ${env.SAFETY_MODEL:+llama-guard} + provider_shield_id: ${env.SAFETY_MODEL:=} + - shield_id: code-scanner + provider_id: ${env.CODE_SCANNER_MODEL:+code-scanner} + provider_shield_id: ${env.CODE_SCANNER_MODEL:=} + datasets: [] + scoring_fns: [] + benchmarks: [] + tool_groups: + - toolgroup_id: builtin::rag + provider_id: rag-runtime +server: + port: 8321 +telemetry: + enabled: true +vector_stores: + default_provider_id: documentation_faiss + default_embedding_model: + provider_id: sentence-transformers + model_id: all-mpnet-base-v2 +safety: + default_shield_id: llama-guard