From 677ba71b9773565aee9de1ba3abe06b69c8bf353 Mon Sep 17 00:00:00 2001 From: are-ces <195810094+are-ces@users.noreply.github.com> Date: Thu, 18 Sep 2025 10:11:34 +0200 Subject: [PATCH] Added RHOAI E2E tests in prow vLLM served in Openshift, llama-stack (2.25 RHOAI) and lightspeed-stack (dev-latest) served in Openshift as independent containers. Tests run on a fourth pod. --- README.md | 1 + docs/providers.md | 1 + .../rhoai/configs/lightspeed-stack.yaml | 25 +++ tests/e2e-prow/rhoai/configs/run.yaml | 120 +++++++++++ .../lightspeed/lightspeed-stack.yaml | 25 +++ .../manifests/lightspeed/llama-stack.yaml | 34 +++ .../rhoai/manifests/operators/ds-cluster.yaml | 17 ++ .../manifests/operators/operatorgroup.yaml | 6 + .../rhoai/manifests/operators/operators.yaml | 35 +++ .../rhoai/manifests/test-pod/spin-up.yaml | 30 +++ .../manifests/vllm/inference-service.yaml | 13 ++ .../manifests/vllm/vllm-runtime-cpu.yaml | 65 ++++++ tests/e2e-prow/rhoai/pipeline-services.sh | 25 +++ tests/e2e-prow/rhoai/pipeline-test-pod.sh | 5 + tests/e2e-prow/rhoai/pipeline-vllm.sh | 7 + tests/e2e-prow/rhoai/pipeline.sh | 204 ++++++++++++++++++ tests/e2e-prow/rhoai/run-tests.sh | 17 ++ tests/e2e-prow/rhoai/scripts/bootstrap.sh | 47 ++++ tests/e2e-prow/rhoai/scripts/deploy-vllm.sh | 34 +++ .../rhoai/scripts/get-vllm-pod-info.sh | 69 ++++++ 20 files changed, 780 insertions(+) create mode 100644 tests/e2e-prow/rhoai/configs/lightspeed-stack.yaml create mode 100644 tests/e2e-prow/rhoai/configs/run.yaml create mode 100644 tests/e2e-prow/rhoai/manifests/lightspeed/lightspeed-stack.yaml create mode 100644 tests/e2e-prow/rhoai/manifests/lightspeed/llama-stack.yaml create mode 100644 tests/e2e-prow/rhoai/manifests/operators/ds-cluster.yaml create mode 100644 tests/e2e-prow/rhoai/manifests/operators/operatorgroup.yaml create mode 100644 tests/e2e-prow/rhoai/manifests/operators/operators.yaml create mode 100644 tests/e2e-prow/rhoai/manifests/test-pod/spin-up.yaml create mode 100644 tests/e2e-prow/rhoai/manifests/vllm/inference-service.yaml create mode 100644 tests/e2e-prow/rhoai/manifests/vllm/vllm-runtime-cpu.yaml create mode 100755 tests/e2e-prow/rhoai/pipeline-services.sh create mode 100755 tests/e2e-prow/rhoai/pipeline-test-pod.sh create mode 100755 tests/e2e-prow/rhoai/pipeline-vllm.sh create mode 100755 tests/e2e-prow/rhoai/pipeline.sh create mode 100644 tests/e2e-prow/rhoai/run-tests.sh create mode 100755 tests/e2e-prow/rhoai/scripts/bootstrap.sh create mode 100755 tests/e2e-prow/rhoai/scripts/deploy-vllm.sh create mode 100755 tests/e2e-prow/rhoai/scripts/get-vllm-pod-info.sh diff --git a/README.md b/README.md index 2b85bb0b7..fa0f5c43f 100644 --- a/README.md +++ b/README.md @@ -124,6 +124,7 @@ Lightspeed Core Stack (LCS) supports the large language models from the provider | -------- | ---------------------------------------------- | ------------ | -------------- | -------------------------------------------------------------------------- | | OpenAI | gpt-5, gpt-4o, gpt4-turbo, gpt-4.1, o1, o3, o4 | Yes | remote::openai | [1](examples/openai-faiss-run.yaml) [2](examples/openai-pgvector-run.yaml) | | OpenAI | gpt-3.5-turbo, gpt-4 | No | remote::openai | | +| RHOAI (vLLM)| meta-llama/Llama-3.2-1B-Instruct | Yes | remote::vllm | [1](tests/e2e-prow/rhoai/configs/run.yaml) | | RHAIIS (vLLM)| meta-llama/Llama-3.1-8B-Instruct | Yes | remote::vllm | [1](tests/e2e/configs/run-rhaiis.yaml) | | Azure | gpt-5, gpt-5-mini, gpt-5-nano, gpt-5-chat, gpt-4.1, gpt-4.1-mini, gpt-4.1-nano, o3-mini, o4-mini | Yes | remote::azure | [1](examples/azure-run.yaml) | | Azure | o1, o1-mini | No | remote::azure | | diff --git a/docs/providers.md b/docs/providers.md index d7bcb36d0..e5bb9e6ea 100644 --- a/docs/providers.md +++ b/docs/providers.md @@ -61,6 +61,7 @@ Red Hat providers: | Name | Version Tested | Type | Pip Dependencies | Supported in LCS | |---|---|---|---|:---:| +| RHOAI (vllm) | latest operator | remote | `openai` | ✅ | | RHAIIS (vllm) | 3.2.3 (on RHEL 9.20250429.0.4) | remote | `openai` | ✅ | diff --git a/tests/e2e-prow/rhoai/configs/lightspeed-stack.yaml b/tests/e2e-prow/rhoai/configs/lightspeed-stack.yaml new file mode 100644 index 000000000..cd667a4f0 --- /dev/null +++ b/tests/e2e-prow/rhoai/configs/lightspeed-stack.yaml @@ -0,0 +1,25 @@ +name: Lightspeed Core Service (LCS) +service: + host: 0.0.0.0 + port: 8080 + auth_enabled: false + workers: 1 + color_log: true + access_log: true +llama_stack: + # Uses a remote llama-stack service + # The instance would have already been started with a llama-stack-run.yaml file + use_as_library_client: false + # Alternative for "as library use" + # use_as_library_client: true + # library_client_config_path: + url: http://${env.E2E_LLAMA_HOSTNAME}:8321 + api_key: xyzzy +user_data_collection: + feedback_enabled: true + feedback_storage: "/tmp/data/feedback" + transcripts_enabled: true + transcripts_storage: "/tmp/data/transcripts" + +authentication: + module: "noop" diff --git a/tests/e2e-prow/rhoai/configs/run.yaml b/tests/e2e-prow/rhoai/configs/run.yaml new file mode 100644 index 000000000..ea3067e90 --- /dev/null +++ b/tests/e2e-prow/rhoai/configs/run.yaml @@ -0,0 +1,120 @@ +version: '2' +image_name: minimal-viable-llama-stack-configuration + +apis: + - agents + - datasetio + - eval + - inference + - post_training + - safety + - scoring + - telemetry + - tool_runtime + - vector_io +benchmarks: [] +container_image: null +datasets: [] +external_providers_dir: null +inference_store: + db_path: .llama/distributions/ollama/inference_store.db + type: sqlite +logging: null +metadata_store: + db_path: .llama/distributions/ollama/registry.db + namespace: null + type: sqlite +providers: + agents: + - config: + persistence_store: + db_path: .llama/distributions/ollama/agents_store.db + namespace: null + type: sqlite + responses_store: + db_path: .llama/distributions/ollama/responses_store.db + type: sqlite + provider_id: meta-reference + provider_type: inline::meta-reference + datasetio: + - config: + kvstore: + db_path: .llama/distributions/ollama/huggingface_datasetio.db + namespace: null + type: sqlite + provider_id: huggingface + provider_type: remote::huggingface + - config: + kvstore: + db_path: .llama/distributions/ollama/localfs_datasetio.db + namespace: null + type: sqlite + provider_id: localfs + provider_type: inline::localfs + eval: + - config: + kvstore: + db_path: .llama/distributions/ollama/meta_reference_eval.db + namespace: null + type: sqlite + provider_id: meta-reference + provider_type: inline::meta-reference + inference: + - provider_id: vllm + provider_type: remote::vllm + config: + url: ${env.KSVC_URL}/v1/ + api_token: ${env.VLLM_API_KEY} + tls_verify: false + max_tokens: 1024 + post_training: + - config: + checkpoint_format: huggingface + device: cpu + distributed_backend: null + dpo_output_dir: "." + provider_id: huggingface + provider_type: inline::huggingface-gpu + safety: + - config: + excluded_categories: [] + provider_id: llama-guard + provider_type: inline::llama-guard + scoring: + - config: {} + provider_id: basic + provider_type: inline::basic + - config: {} + provider_id: llm-as-judge + provider_type: inline::llm-as-judge + - config: + openai_api_key: '********' + provider_id: braintrust + provider_type: inline::braintrust + telemetry: + - config: + service_name: 'lightspeed-stack-telemetry' + sinks: sqlite + sqlite_db_path: .llama/distributions/ollama/trace_store.db + provider_id: meta-reference + provider_type: inline::meta-reference + tool_runtime: [] + vector_io: [] +scoring_fns: [] +server: + auth: null + host: null + port: 8321 + quota: null + tls_cafile: null + tls_certfile: null + tls_keyfile: null +shields: [] +vector_dbs: [] + +models: +- model_id: meta-llama/Llama-3.2-1B-Instruct + provider_id: vllm + model_type: llm + provider_model_id: null + diff --git a/tests/e2e-prow/rhoai/manifests/lightspeed/lightspeed-stack.yaml b/tests/e2e-prow/rhoai/manifests/lightspeed/lightspeed-stack.yaml new file mode 100644 index 000000000..095eb02f8 --- /dev/null +++ b/tests/e2e-prow/rhoai/manifests/lightspeed/lightspeed-stack.yaml @@ -0,0 +1,25 @@ +apiVersion: v1 +kind: Pod +metadata: + name: lightspeed-stack-service + namespace: e2e-rhoai-dsc +spec: + containers: + - name: lightspeed-stack-container + env: + - name: E2E_LLAMA_HOSTNAME + valueFrom: + secretKeyRef: + name: llama-stack-ip-secret + key: key + image: quay.io/lightspeed-core/lightspeed-stack:dev-latest + ports: + - containerPort: 8080 + volumeMounts: + - name: config + mountPath: /app-root/lightspeed-stack.yaml + subPath: lightspeed-stack.yaml + volumes: + - name: config + configMap: + name: lightspeed-stack-config diff --git a/tests/e2e-prow/rhoai/manifests/lightspeed/llama-stack.yaml b/tests/e2e-prow/rhoai/manifests/lightspeed/llama-stack.yaml new file mode 100644 index 000000000..b228ab650 --- /dev/null +++ b/tests/e2e-prow/rhoai/manifests/lightspeed/llama-stack.yaml @@ -0,0 +1,34 @@ +apiVersion: v1 +kind: Pod +metadata: + name: llama-stack-service + namespace: e2e-rhoai-dsc +spec: + containers: + - name: llama-stack-container + env: + - name: KSVC_URL + valueFrom: + secretKeyRef: + name: api-url-secret + key: key + - name: VLLM_API_KEY + valueFrom: + secretKeyRef: + name: vllm-api-key-secret + key: key + image: quay.io/opendatahub/llama-stack:rhoai-v2.25-latest + ports: + - containerPort: 8321 + volumeMounts: + - name: app-root + mountPath: /opt/app-root/src/.llama + - name: config + mountPath: /opt/app-root/run.yaml + subPath: run.yaml + volumes: + - name: app-root + emptyDir: {} + - name: config + configMap: + name: llama-stack-config diff --git a/tests/e2e-prow/rhoai/manifests/operators/ds-cluster.yaml b/tests/e2e-prow/rhoai/manifests/operators/ds-cluster.yaml new file mode 100644 index 000000000..e9b619726 --- /dev/null +++ b/tests/e2e-prow/rhoai/manifests/operators/ds-cluster.yaml @@ -0,0 +1,17 @@ +apiVersion: datasciencecluster.opendatahub.io/v1 +kind: DataScienceCluster +metadata: + name: default-dsc + namespace: e2e-rhoai-dsc +spec: + serviceMesh: + managementState: Managed + components: + kserve: + managementState: Managed + workbenches: + managementState: Removed + dashboard: + managementState: Removed + dataSciencePipelines: + managementState: Removed \ No newline at end of file diff --git a/tests/e2e-prow/rhoai/manifests/operators/operatorgroup.yaml b/tests/e2e-prow/rhoai/manifests/operators/operatorgroup.yaml new file mode 100644 index 000000000..7ed06cac5 --- /dev/null +++ b/tests/e2e-prow/rhoai/manifests/operators/operatorgroup.yaml @@ -0,0 +1,6 @@ +apiVersion: operators.coreos.com/v1 +kind: OperatorGroup +metadata: + name: global-operators + namespace: openshift-operators +spec: \ No newline at end of file diff --git a/tests/e2e-prow/rhoai/manifests/operators/operators.yaml b/tests/e2e-prow/rhoai/manifests/operators/operators.yaml new file mode 100644 index 000000000..2da92a08e --- /dev/null +++ b/tests/e2e-prow/rhoai/manifests/operators/operators.yaml @@ -0,0 +1,35 @@ +# Service Mesh Operator Subscription +apiVersion: operators.coreos.com/v1alpha1 +kind: Subscription +metadata: + name: servicemeshoperator + namespace: openshift-operators +spec: + channel: "stable" + name: "servicemeshoperator" + source: "redhat-operators" + sourceNamespace: "openshift-marketplace" +--- +# Serverless Operator Subscription +apiVersion: operators.coreos.com/v1alpha1 +kind: Subscription +metadata: + name: serverless-operator + namespace: openshift-operators +spec: + channel: "stable" + name: "serverless-operator" + source: "redhat-operators" + sourceNamespace: "openshift-marketplace" +--- +# RHODS Operator Subscription +apiVersion: operators.coreos.com/v1alpha1 +kind: Subscription +metadata: + name: rhods-operator + namespace: openshift-operators +spec: + channel: stable + name: rhods-operator + source: redhat-operators + sourceNamespace: openshift-marketplace \ No newline at end of file diff --git a/tests/e2e-prow/rhoai/manifests/test-pod/spin-up.yaml b/tests/e2e-prow/rhoai/manifests/test-pod/spin-up.yaml new file mode 100644 index 000000000..f11778c0d --- /dev/null +++ b/tests/e2e-prow/rhoai/manifests/test-pod/spin-up.yaml @@ -0,0 +1,30 @@ +apiVersion: v1 +kind: Pod +metadata: + name: test-pod + namespace: e2e-rhoai-dsc +spec: + containers: + - name: test-container + env: + - name: E2E_LSC_HOSTNAME + valueFrom: + secretKeyRef: + name: lcs-ip-secret + key: key + - name: E2E_LLAMA_HOSTNAME + valueFrom: + secretKeyRef: + name: llama-stack-ip-secret + key: key + image: registry.access.redhat.com/ubi9/python-312 + command: ["/bin/sh", "/scripts/run-tests.sh"] + volumeMounts: + - name: script-volume + mountPath: /scripts + volumes: + - name: script-volume + configMap: + name: test-script-cm + defaultMode: 0755 # Make the script executable + restartPolicy: Never \ No newline at end of file diff --git a/tests/e2e-prow/rhoai/manifests/vllm/inference-service.yaml b/tests/e2e-prow/rhoai/manifests/vllm/inference-service.yaml new file mode 100644 index 000000000..8e8096f72 --- /dev/null +++ b/tests/e2e-prow/rhoai/manifests/vllm/inference-service.yaml @@ -0,0 +1,13 @@ +apiVersion: serving.kserve.io/v1beta1 +kind: InferenceService +metadata: + name: vllm-model + namespace: e2e-rhoai-dsc +spec: + predictor: + model: + modelFormat: + name: pytorch + runtime: vllm + storage: + key: localModel diff --git a/tests/e2e-prow/rhoai/manifests/vllm/vllm-runtime-cpu.yaml b/tests/e2e-prow/rhoai/manifests/vllm/vllm-runtime-cpu.yaml new file mode 100644 index 000000000..4c81d6b01 --- /dev/null +++ b/tests/e2e-prow/rhoai/manifests/vllm/vllm-runtime-cpu.yaml @@ -0,0 +1,65 @@ +apiVersion: serving.kserve.io/v1alpha1 +kind: ServingRuntime +labels: + opendatahub.io/dashboard: "true" +metadata: + annotations: + openshift.io/display-name: vLLM + name: vllm + namespace: e2e-rhoai-dsc +spec: + builtInAdapter: + modelLoadingTimeoutMillis: 90000 + containers: + - args: + # - /mnt/models/ + - meta-llama/Llama-3.2-1B-Instruct + - --enable-auto-tool-choice + - --tool-call-parser + - llama3_json + - --chat-template + - /mnt/chat-template/tool_chat_template_llama3.2_json.jinja + - --download-dir + - /tmp/models-cache + - --port + - "8080" + - --max-model-len + - "2048" + image: quay.io/rh-ee-cpompeia/vllm-cpu:latest + name: kserve-container + env: + - name: HF_HOME + value: /mnt/models-cache/hf_home + - name: HUGGING_FACE_HUB_TOKEN + valueFrom: + secretKeyRef: + name: hf-token-secret + key: token + - name: VLLM_API_KEY + valueFrom: + secretKeyRef: + name: vllm-api-key-secret + key: key + ports: + - containerPort: 8080 + name: http1 + protocol: TCP + volumeMounts: + - name: chat-template + mountPath: /mnt/chat-template + - name: models-cache + mountPath: /mnt/models-cache + - name: vllm-cache + mountPath: /.cache + volumes: + - name: chat-template + configMap: + name: vllm-chat-template + - name: models-cache + emptyDir: {} + - name: vllm-cache + emptyDir: {} + multiModel: false + supportedModelFormats: + - autoSelect: true + name: pytorch \ No newline at end of file diff --git a/tests/e2e-prow/rhoai/pipeline-services.sh b/tests/e2e-prow/rhoai/pipeline-services.sh new file mode 100755 index 000000000..832bff727 --- /dev/null +++ b/tests/e2e-prow/rhoai/pipeline-services.sh @@ -0,0 +1,25 @@ +#!/bin/bash + +BASE_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" + +oc apply -f "$BASE_DIR/manifests/lightspeed/llama-stack.yaml" + +oc wait pod/llama-stack-service \ +-n e2e-rhoai-dsc --for=condition=Ready --timeout=300s + +# Get url address of llama-stack pod +oc label pod llama-stack-service pod=llama-stack-service -n e2e-rhoai-dsc + +oc expose pod llama-stack-service \ + --name=llama-stack-service-svc \ + --port=8321 \ + --type=ClusterIP \ + -n e2e-rhoai-dsc + +export E2E_LLAMA_HOSTNAME="llama-stack-service-svc.e2e-rhoai-dsc.svc.cluster.local" + +oc create secret generic llama-stack-ip-secret \ + --from-literal=key="$E2E_LLAMA_HOSTNAME" \ + -n e2e-rhoai-dsc || echo "Secret exists" + +oc apply -f "$BASE_DIR/manifests/lightspeed/lightspeed-stack.yaml" \ No newline at end of file diff --git a/tests/e2e-prow/rhoai/pipeline-test-pod.sh b/tests/e2e-prow/rhoai/pipeline-test-pod.sh new file mode 100755 index 000000000..93bed18ec --- /dev/null +++ b/tests/e2e-prow/rhoai/pipeline-test-pod.sh @@ -0,0 +1,5 @@ +#!/bin/bash + +BASE_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" + +oc apply -f "$BASE_DIR/manifests/test-pod/spin-up.yaml" diff --git a/tests/e2e-prow/rhoai/pipeline-vllm.sh b/tests/e2e-prow/rhoai/pipeline-vllm.sh new file mode 100755 index 000000000..20dedf752 --- /dev/null +++ b/tests/e2e-prow/rhoai/pipeline-vllm.sh @@ -0,0 +1,7 @@ +#!/bin/bash + +PIPELINE_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" + +"$PIPELINE_DIR/scripts/bootstrap.sh" "$PIPELINE_DIR" +"$PIPELINE_DIR/scripts/deploy-vllm.sh" "$PIPELINE_DIR" +"$PIPELINE_DIR/scripts/get-vllm-pod-info.sh" \ No newline at end of file diff --git a/tests/e2e-prow/rhoai/pipeline.sh b/tests/e2e-prow/rhoai/pipeline.sh new file mode 100755 index 000000000..71f5a4910 --- /dev/null +++ b/tests/e2e-prow/rhoai/pipeline.sh @@ -0,0 +1,204 @@ +#!/bin/bash +set -euo pipefail +trap 'echo "❌ Pipeline failed at line $LINENO"; exit 1' ERR + + +#======================================== +# 1. GLOBAL CONFIG +#======================================== +NAMESPACE="e2e-rhoai-dsc" +MODEL_NAME="meta-llama/Llama-3.2-1B-Instruct" + + +#======================================== +# 2. ENVIRONMENT SETUP +#======================================== +echo "===== Setting up environment variables =====" +export HUGGING_FACE_HUB_TOKEN=$(cat /var/run/huggingface/hf-token-ces-lcore-test || true) +export VLLM_API_KEY=$(cat /var/run/vllm/vllm-api-key-lcore-test || true) + +[[ -n "$HUGGING_FACE_HUB_TOKEN" ]] && echo "✅ HUGGING_FACE_HUB_TOKEN is set" || { echo "❌ Missing HUGGING_FACE_HUB_TOKEN"; exit 1; } +[[ -n "$VLLM_API_KEY" ]] && echo "✅ VLLM_API_KEY is set" || { echo "❌ Missing VLLM_API_KEY"; exit 1; } + +# Basic info +ls -A || true +oc version +oc whoami + +#======================================== +# 3. CREATE NAMESPACE & SECRETS +#======================================== +echo "===== Creating namespace & secrets =====" +oc get ns "$NAMESPACE" >/dev/null 2>&1 || oc create namespace "$NAMESPACE" + +create_secret() { + local name=$1; shift + echo "Creating secret $name..." + oc create secret generic "$name" "$@" -n "$NAMESPACE" 2>/dev/null || echo "Secret $name exists" +} + +create_secret hf-token-secret --from-literal=token="$HUGGING_FACE_HUB_TOKEN" +create_secret vllm-api-key-secret --from-literal=key="$VLLM_API_KEY" + + +#======================================== +# 4. CONFIGMAPS +#======================================== +echo "===== Setting up configmaps =====" + +curl -sL -o tool_chat_template_llama3.2_json.jinja \ + https://raw.githubusercontent.com/vllm-project/vllm/main/examples/tool_chat_template_llama3.2_json.jinja \ + || { echo "❌ Failed to download jinja template"; exit 1; } + +oc create configmap vllm-chat-template -n "$NAMESPACE" \ + --from-file=tool_chat_template_llama3.2_json.jinja --dry-run=client -o yaml | oc apply -f - + + +#======================================== +# 5. DEPLOY vLLM +#======================================== +echo "===== Deploying vLLM =====" +./pipeline-vllm.sh +oc get pods -n "$NAMESPACE" + + +#======================================== +# 6. WAIT FOR POD & TEST API +#======================================== +source pod.env +oc wait --for=condition=Ready pod/$POD_NAME -n $NAMESPACE --timeout=300s + +echo "===== Testing vLLM endpoint =====" +start_time=$(date +%s) +timeout=200 + +while true; do + response=$(curl -sk -w "%{http_code}" \ + -H "Content-Type: application/json" \ + -H "Authorization: Bearer $VLLM_API_KEY" \ + -d "{ + \"model\": \"$MODEL_NAME\", + \"prompt\": \"Who won the world series in 2020?\", + \"max_new_tokens\": 100 + }" \ + "$KSVC_URL/v1/completions") + + if [[ ${#response} -ge 3 ]]; then + http_code="${response: -3}" + body="${response:0:${#response}-3}" + else + http_code="000" + body="$response" + fi + + if [[ "$http_code" == "200" && "$body" == *'"object":"text_completion"'* ]]; then + echo "✅ API test passed." + break + else + echo "❌ API test failed (HTTP $http_code)" + echo "$body" | jq . 2>/dev/null || echo "$body" + fi + + current_time=$(date +%s) + elapsed=$((current_time - start_time)) + + if (( elapsed >= timeout )); then + echo "⏰ Timeout reached ($timeout seconds). Stopping test." + exit 1 + fi + + sleep 20 +done + + +#======================================== +# 7. DEPLOY LIGHTSPEED STACK AND LLAMA STACK +#======================================== +echo "===== Deploying Services =====" + +create_secret api-url-secret --from-literal=key="$KSVC_URL" +oc create configmap llama-stack-config -n "$NAMESPACE" --from-file=configs/run.yaml +oc create configmap lightspeed-stack-config -n "$NAMESPACE" --from-file=configs/lightspeed-stack.yaml +oc create configmap test-script-cm -n "$NAMESPACE" --from-file=run-tests.sh + +./pipeline-services.sh + +oc wait pod/lightspeed-stack-service pod/llama-stack-service \ + -n "$NAMESPACE" --for=condition=Ready --timeout=300s +sleep 30 + +oc get pods -n "$NAMESPACE" + +echo "logs lightspeed" +oc logs lightspeed-stack-service -n "$NAMESPACE" || true +echo "logs llama" +oc logs llama-stack-service -n "$NAMESPACE" || true + +oc describe pod lightspeed-stack-service -n "$NAMESPACE" || true +oc describe pod llama-stack-service -n "$NAMESPACE" || true + + +#======================================== +# 8. EXTRACT LCS IP & STORE +#======================================== +oc label pod lightspeed-stack-service pod=lightspeed-stack-service -n $NAMESPACE + +oc expose pod lightspeed-stack-service \ + --name=lightspeed-stack-service-svc \ + --port=8080 \ + --type=ClusterIP \ + -n $NAMESPACE + +E2E_LSC_HOSTNAME="lightspeed-stack-service-svc.$NAMESPACE.svc.cluster.local" +echo "LCS IP: $E2E_LSC_HOSTNAME" + +create_secret lcs-ip-secret --from-literal=key="$E2E_LSC_HOSTNAME" + + +#======================================== +# 9. LOGGING & TEST EXECUTION +#======================================== +echo "===== Running test pod =====" +./pipeline-test-pod.sh + +sleep 20 +oc get pods -n "$NAMESPACE" + +# Wait until tests are complete +oc wait --for=condition=Ready=True pod/test-pod -n $NAMESPACE --timeout=900s || oc wait --for=condition=Ready=False pod/test-pod -n $NAMESPACE --timeout=60s + +start_time=$(date +%s) +timeout=2400 +while true; do + sleep 120 + + PHASE=$(oc get pod test-pod -n $NAMESPACE -o jsonpath='{.status.phase}') + echo "Current phase test-pod: $PHASE" + if [[ "$PHASE" == "Succeeded" || "$PHASE" == "Failed" ]]; then + break + fi + + current_time=$(date +%s) + elapsed=$((current_time - start_time)) + + if (( elapsed >= timeout )); then + echo "⏰ Timeout reached ($timeout seconds). Stopping test." + exit 1 + fi + + oc get pods -n "$NAMESPACE" +done +oc logs test-pod -n $NAMESPACE || oc describe pod test-pod -n $NAMESPACE || true + + +TEST_EXIT_CODE=$(oc get pod test-pod -n $NAMESPACE -o jsonpath='{.status.containerStatuses[0].state.terminated.exitCode}') + +echo "===== E2E COMPLETE =====" + +if [ "${TEST_EXIT_CODE:-2}" -ne 0 ]; then + echo "❌ E2E tests failed with exit code $TEST_EXIT_CODE (pod/test-pod failed)" +else + echo "✅ E2E tests succeeded" +fi + +exit $TEST_EXIT_CODE \ No newline at end of file diff --git a/tests/e2e-prow/rhoai/run-tests.sh b/tests/e2e-prow/rhoai/run-tests.sh new file mode 100644 index 000000000..657b8124b --- /dev/null +++ b/tests/e2e-prow/rhoai/run-tests.sh @@ -0,0 +1,17 @@ +git clone https://github.com/lightspeed-core/lightspeed-stack.git +cd lightspeed-stack + +echo "pod started" +echo $E2E_LSC_HOSTNAME + +curl -f http://$E2E_LSC_HOSTNAME:8080/v1/models || { + echo "❌ Basic connectivity failed - showing logs before running full tests" + exit 1 +} + +echo "Installing test dependencies..." +pip install uv +uv sync + +echo "Running comprehensive e2e test suite..." +make test-e2e \ No newline at end of file diff --git a/tests/e2e-prow/rhoai/scripts/bootstrap.sh b/tests/e2e-prow/rhoai/scripts/bootstrap.sh new file mode 100755 index 000000000..7a40ca56e --- /dev/null +++ b/tests/e2e-prow/rhoai/scripts/bootstrap.sh @@ -0,0 +1,47 @@ +#!/bin/bash + +set -euo pipefail + +BASE_DIR="$1" + +wait_for_operator() { + local OPERATOR_LABEL=$1 + local NAMESPACE=$2 + local OPERATOR_NAME=$3 + + echo " -> Waiting for ${OPERATOR_NAME} CSV resource to be created in namespace ${NAMESPACE}..." + until oc get csv -n "${NAMESPACE}" -l "${OPERATOR_LABEL}" --no-headers 2>/dev/null | grep -q .; do + echo " ...still waiting for ${OPERATOR_NAME} CSV to show up" + sleep 5 + done + + echo " -> Waiting for ${OPERATOR_NAME} CSV to reach Succeeded..." + oc wait --for=jsonpath='{.status.phase}'=Succeeded csv -n "${NAMESPACE}" -l "${OPERATOR_LABEL}" --timeout=600s +} + +# APPLY OPERATOR SUBSCRIPTIONS +echo "--> Applying Operator Subscriptions from operators.yaml..." +oc apply -f "$BASE_DIR/manifests/operators/operators.yaml" + +# WAIT FOR OPERATORS TO BECOME READY +echo "--> Waiting for Operators to be installed. This can take several minutes..." + +# Ensure the ClusterServiceVersion CRD exists before checking for CSVs +oc wait --for=condition=established --timeout=300s crd/clusterserviceversions.operators.coreos.com + +wait_for_operator "operators.coreos.com/servicemeshoperator.openshift-operators" "openshift-operators" "Service Mesh Operator" +wait_for_operator "operators.coreos.com/serverless-operator.openshift-operators" "openshift-operators" "Serverless Operator" +wait_for_operator "operators.coreos.com/rhods-operator.openshift-operators" "openshift-operators" "RHODS Operator" + +echo "--> All operators are ready." + +oc get csv -n openshift-operators + +# APPLY DEPENDENT RESOURCES +echo "--> Applying OperatorGroup from operatorgroup.yaml..." +oc apply -f "$BASE_DIR/manifests/operators/operatorgroup.yaml" + +echo "--> Applying DataScienceCluster from ds-cluster.yaml..." +oc apply -f "$BASE_DIR/manifests/operators/ds-cluster.yaml" + +echo "All files applied successfully. The DataScienceCluster is now provisioning." diff --git a/tests/e2e-prow/rhoai/scripts/deploy-vllm.sh b/tests/e2e-prow/rhoai/scripts/deploy-vllm.sh new file mode 100755 index 000000000..4d31e663c --- /dev/null +++ b/tests/e2e-prow/rhoai/scripts/deploy-vllm.sh @@ -0,0 +1,34 @@ +#!/bin/bash + +BASE_DIR="$1" + +# Wait until the CRDs exist +for crd in servingruntimes.serving.kserve.io inferenceservices.serving.kserve.io; do + echo "Waiting for CRD $crd to exist..." + until oc get crd $crd &>/dev/null; do + sleep 5 + done + echo "CRD $crd exists. Waiting to be established..." + oc wait --for=condition=established crd/$crd --timeout=120s +done + +# Wait for KServe controller deployment to appear +echo "Waiting for kserve-controller-manager deployment to be created..." +until oc get deployment kserve-controller-manager -n redhat-ods-applications &>/dev/null; do + sleep 10 +done + +# Wait for rollout to complete +echo "Waiting for kserve-controller-manager rollout..." +oc rollout status deployment/kserve-controller-manager -n redhat-ods-applications --timeout=300s + +# Wait for the webhook service endpoints to become ready +echo "Waiting for KServe webhook service endpoints..." +until oc get endpoints kserve-webhook-server-service -n redhat-ods-applications -o jsonpath='{.subsets[*].addresses[*].ip}' 2>/dev/null | grep -qE '.'; do + sleep 5 +done +echo "✅ KServe webhook service is ready." + +oc apply -f "$BASE_DIR/manifests/vllm/vllm-runtime-cpu.yaml" + +oc apply -f "$BASE_DIR/manifests/vllm/inference-service.yaml" \ No newline at end of file diff --git a/tests/e2e-prow/rhoai/scripts/get-vllm-pod-info.sh b/tests/e2e-prow/rhoai/scripts/get-vllm-pod-info.sh new file mode 100755 index 000000000..ac693a47f --- /dev/null +++ b/tests/e2e-prow/rhoai/scripts/get-vllm-pod-info.sh @@ -0,0 +1,69 @@ +#!/bin/bash +set -e + +NAMESPACE="e2e-rhoai-dsc" +ISVC_NAME="${1:-vllm-model}" +ENV_FILE="${ENV_FILE:-pod.env}" + +KSVC_NAME="${ISVC_NAME}-predictor" + +echo "--> Finding the pod for InferenceService '$ISVC_NAME'..." + +# Find the running pod for the InferenceService +POD_NAME="" +TIMEOUT=240 # seconds +INTERVAL=5 # check interval +ELAPSED=0 + +until [ -n "$POD_NAME" ] || [ $ELAPSED -ge $TIMEOUT ]; do + POD_NAME=$(oc get pods -n "$NAMESPACE" \ + -l "serving.kserve.io/inferenceservice=$ISVC_NAME" \ + -o jsonpath="{.items[?(@.status.phase=='Running')].metadata.name}" 2>/dev/null) + echo "Waiting for pod $POD_NAME in namespace $NAMESPACE" + + if [ -z "$POD_NAME" ]; then + echo " -> Pod not running yet, waiting $INTERVAL seconds..." + sleep $INTERVAL + ELAPSED=$((ELAPSED + INTERVAL)) + fi +done + +oc describe pod $POD_NAME -n $NAMESPACE || true +oc logs $POD_NAME -n $NAMESPACE || true + +POD_NAME=$(oc get pods -n $NAMESPACE -o jsonpath='{.items[0].metadata.name}') + +if [ -z "$POD_NAME" ]; then + echo " -> Timeout reached after $TIMEOUT seconds. Pod is not running." +else + echo " -> Pod is running: $POD_NAME" +fi + +# Get the 'app' label for Service selector +APP_LABEL=$(oc get pod "$POD_NAME" -n "$NAMESPACE" -o jsonpath='{.metadata.labels.app}') +if [ -z "$APP_LABEL" ]; then + echo "Error: Could not find 'app' label on pod $POD_NAME" + exit 1 +fi +echo " -> Found 'app' label: $APP_LABEL" + +# Get the Knative Service URL +KSVC_URL=$(oc get ksvc "$KSVC_NAME" -n "$NAMESPACE" -o jsonpath='{.status.url}') +if [ -z "$KSVC_URL" ]; then + echo "Error: Could not retrieve Knative URL for $KSVC_NAME" + exit 1 +fi +echo " -> Found Knative URL: $KSVC_URL" + +# Save all info to pod.env +cat < "$ENV_FILE" +# Environment variables for the vLLM service +POD_NAME=$POD_NAME +APP_LABEL=$APP_LABEL +NAMESPACE=$NAMESPACE +ISVC_NAME=$ISVC_NAME +KSVC_NAME=$KSVC_NAME +KSVC_URL=$KSVC_URL +EOF + +echo "✅ Success! Details saved in $ENV_FILE."