Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
223 changes: 223 additions & 0 deletions reliability-v2/config/reliability-ivs.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,223 @@
reliability:
kubeconfig: <path_to_kubeconfig>
users:
- admin_file: <path_to_admin_file>
- user_file: <path_to_users_file>

appTemplates:
- template: cakephp-mysql-persistent
- template: nodejs-postgresql-persistent # N/A for FIPS
- template: django-psql-persistent # N/A for FIPS
- template: rails-pgsql-persistent
#- template: dancer-mysql-persistent
# For test without pvc
# - template: cakephp-mysql-example
# - template: dancer-mysql-example
# - template: django-psql-example
# - template: nodejs-postgresql-example

# The max number of projects is limited by the size of cluster.
# For 3 nodes m5.xlarge cluster, 25 to 30 projects are recomended.
# For 5 nodes m5.xlarge cluster, 60 projects are recomended.
# The max number of projects = groups x users x new_project
groups:
- name: admin
# For cluster created by Jenkins Flexy-install job, the admin user_name is kubeadmin.
# For rosa cluster created in Prow, the admin user_name is rosa-admin.
user_name: <admin_username> # admin username as kubeadmin or rosa-admin.
loops: forever # run group for loops times. integer > 0 or 'forever', default is 1.
trigger: 600 # wait trigger seconds between each loop
jitter: 60 # randomly start the users in this group in trigger seconds. Default is 0.
interval: 10 # wait interval seconds between tasks.
# If admin network policy function is planed in the test, uncomment the following line
pre_tasks:
- func apply_nonamespace -p "<path_to_content>/network-policy/00_banp_deny-triffic-reliability.yaml"
- func apply_nonamespace -p "<path_to_content>/network-policy/01_anp_allow-common-service.yaml"
- func apply_nonamespace -p "<path_to_content>/network-policy/02_anp_allow-traffic-egress-cidr-cluster-network-p20.yaml"
- func apply_nonamespace -p "<path_to_content>/network-policy/04_anp_allow-traffic-dev-test-p30.yaml"
- func apply_nonamespace -p "<path_to_content>/network-policy/03_anp_allow-traffic-dev-prod-p31.yaml"
tasks:
- func check_operators
- oc get project -l purpose=reliability
- func check_nodes
- kubectl get pods -A -o wide | egrep -v "Completed|Running"
# Run test case as scripts. KUBECONFIG of the current user is set as env variable by reliability-v2.
#- . <path_to_script>/create-delete-pod-ensure-service.sh

- name: dev-test
user_name: testuser- # if user_start and user_end exist, this will be username prefix
# For cluster created by Jenkins Flexy-install job, the users start from testuser-0
# For cluster created in Prow and used the following step to create test users, the users start from testuser-1.
# https://github.com/openshift/release/blob/master/ci-operator/step-registry/osd-ccs/conf/idp/htpasswd/multi-users/osd-ccs-conf-idp-htpasswd-multi-users-ref.yaml
user_start: 1 # user_start is inclusive, start with testuser-1 in the users file.
user_end: 6 # user_end is exclusive, end with testuser-10 in the users file
loops: forever
trigger: 60
jitter: 600 # randomly start the users in this group in 10 minutes
interval: 10
tasks:
- func delete_all_projects # clear all projects
- func verify_project_deletion -n 2 # verfy project deletion in 2 namespaces
- func new_project -n 2 # new 2 projects
# If network policy is planed in the test, uncomment the following line
#- func apply -n 2 -p "<path_to_content>/network-policy/allow-same-namespace.yaml" # Apply network policy to 2 projects
- func check_all_projects # check all project under this user
- func apply -n 2 -p "<path_to_content>/nginx-deploy.yaml"
- func apply -n 2 -p "<path_to_content>/nginx-service.yaml"
- func apply -n 2 -p "<path_to_content>/route-http.yaml"
- func load_app -n 2 -p 10 # load apps in 2 namespaces with 10 clients for each
- func check_pods -n 2 # check pods in 2 namespaces
- func delete_project -n 2 # delete project in 2 namespaces
- func verify_project_deletion -n 2 # verfy project deletion in 2 namespaces

- name: dev-test-build
user_name: testuser- # if user_start and user_end exist, this will be username prefix
# For cluster created by Jenkins Flexy-install job, the users start from testuser-0
# For cluster created in Prow and used the following step to create test users, the users start from testuser-1.
# https://github.com/openshift/release/blob/master/ci-operator/step-registry/osd-ccs/conf/idp/htpasswd/multi-users/osd-ccs-conf-idp-htpasswd-multi-users-ref.yaml
user_start: 21 # user_start is inclusive, start with testuser-1 in the users file.
user_end: 26 # user_end is exclusive, end with testuser-10 in the users file
loops: forever
trigger: 60
jitter: 600 # randomly start the users in this group in 10 minutes
interval: 10
tasks:
- func delete_all_projects # clear all projects
- func verify_project_deletion -n 2 # verfy project deletion in 2 namespaces
- func new_project -n 2 # new 2 projects
# If network policy is planed in the test, uncomment the following line
#- func apply -n 2 -p "<path_to_content>/network-policy/allow-same-namespace.yaml" # Apply network policy to 2 projects
- func check_all_projects # check all project under this user
- func new_app -n 2 # new app in 2 namespaces
- func load_app -n 2 -p 10 # load apps in 2 namespaces with 10 clients for each
- func build -n 1 # build app in 1 namespace
- func check_pods -n 2 # check pods in 2 namespaces
- func delete_project -n 2 # delete project in 2 namespaces
- func verify_project_deletion -n 2 # verfy project deletion in 2 namespaces

- name: dev-prod
user_name: testuser-
user_start: 31
user_end: 36
loops: forever
trigger: 600
jitter: 3600
interval: 600
pre_tasks:
- func delete_all_projects
- func verify_project_deletion -n 8
- func new_project -n 8
# If network policy is planed in the test, uncomment the following line
#- func apply -n 2 -p "<path_to_content>/network-policy/allow-same-namespace.yaml" # Apply network policy to 2 projects
- func apply -n 8 -p "<path_to_content>/nginx-deploy.yaml"
- func apply -n 8 -p "<path_to_content>/nginx-service.yaml"
- func apply -n 8 -p "<path_to_content>/route-http.yaml"
tasks:
- func load_app -n 8 -p 50
- func scale_deployment -n 8 -p 2 # scale deployment in 2 namespaces to 2 replicas
- func scale_deployment -n 8 -p 1 # scale deployment in 2 namespaces to 1 replicas
post_tasks:
- func delete_project -n 8

- name: dev-cronjob
user_name: testuser- # if user_start and user_end exist, this will be username prefix
user_start: 41
user_end: 42
trigger: 600
jitter: 1200
loops: forever
pre_tasks:
- func delete_all_projects
- func verify_project_deletion -n 1
- func new_project -n 1
- <path_to_script>/cronjob.sh -n 10
tasks:
- <path_to_script>/cronjob.sh -c
post_tasks:
- <path_to_script>/cronjob.sh -d
- func delete_project -n 1
- func verify_project_deletion -n 1

- name: ivs-llama
user_name: <admin_username>
trigger: 3600
jitter: 600
interval: 10
loops: forever
pre_tasks:
- oc new-project ivs-llama
- oc label ns ivs-llama purpose=reliability
- oc create sa llama-sa -n ivs-llama
- oc adm policy add-scc-to-user anyuid -z llama-sa -n ivs-llama
- oc apply -f "<path_to_content>/ivs/ivs-ai-llama.yaml" -n ivs-llamacpp
- sleep 300
- oc get po -n ivs-llama
tasks:
- oc delete po llama-llama3-8b-instruct-bartowski-q5km-0-1 -n ivs-llama
- oc get po -n ivs-llama
- sleep 60
- oc get po -n ivs-llama

- name: ivs-nginx
user_name: <admin_username>
trigger: 600
jitter: 600
interval: 10
loops: forever
pre_tasks:
- oc new-project ivs-nginx
- oc label ns ivs-nginx purpose=reliability
tasks:
- oc apply -f "<path_to_content>/ivs/ivs-nginx.yaml" -n ivs-nginx
- sleep 60
- oc get po -n ivs-nginx
- oc delete ivs leaderworkerset-sample -n ivs-nginx

cerberusIntegration:
# start cerberus https://github.com/cloud-bulldozer/cerberus before starting reliabiity test.
cerberus_enable: False
# if cerberus_enable is false, the following 2 items are ignored.
cerberus_api: "http://0.0.0.0:8080"
# action to take when cerberus status is False, valid data: pause/halt/continue
cerberus_fail_action: pause

slackIntegration:
slack_enable: False
# the ID in the example is the id of slack channel #ocp-qe-reliability-monitoring.
slack_channel: C0266JJ4XM5
# slack_member is optional. If provided, the notification message will @ you.
# you must be a member of the slack channel to receive the notification.
slack_member: <Your slack member id>

krakenIntegration:
kraken_enable: False
# supported Kraken scenarios: https://github.com/cloud-bulldozer/kraken-hub/blob/main/README.md#supported-chaos-scenarios
# pod-scenarios, container-scenarios, node-scenarios, zone-outages, time-scenarios,
# node-cpu-hog, node-memory-hog, node-io-hog
# Please specify the parameters for each scenario. e.g. For pod-scenarios,
# refer to https://github.com/cloud-bulldozer/kraken-hub/blob/main/docs/pod-scenarios.md#supported-parameters
kraken_scenarios:
- name: pod-scenarios_etcd
scenario: "pod-scenarios"
interval_unit: minutes # weeks,days,hours,minutes,seconds
interval_number: 8
# start_date: "2021-10-28 17:36:00" # Optional. format: 2021-10-20 10:00:00.
# end_date: "2021-10-28 17:50:00" # Optional.
# timezone: "Asia/Shanghai" # Optional. e.g. US/Eastern. Default is "UTC
- name: pod-scenarios_monitoring
scenario: "pod-scenarios"
interval_unit: minutes # weeks,days,hours,minutes,seconds
interval_number: 10
parameters:
NAMESPACE: openshift-monitoring
POD_LABEL: app.kubernetes.io/component=prometheus
EXPECTED_POD_COUNT: 2
- name: node-scenarios_workerstopstart
scenario: "node-scenarios"
interval_unit: minutes
interval_number: 12
parameters:
AWS_DEFAULT_REGION: us-east-2
AWS_ACCESS_KEY_ID: xxxx
AWS_SECRET_ACCESS_KEY: xxxx
CLOUD_TYPE: aws
27 changes: 27 additions & 0 deletions reliability-v2/content/ivs/ivs-ai-llama.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
apiVersion: apps/v1
kind: Deployment
metadata:
name: ivs-llama
spec:
replicas: 1
selector:
matchLabels:
app: ivs-llama
template:
metadata:
labels:
app: ivs-llama
spec:
containers:
- name: llamacpp
image: quay.io/ai/llamacpp:latest
args: ["--model", "/models/llama.bin"]
volumeMounts:
- name: volume
mountPath: /models
subPath: dir
volumes:
- name: volume
image:
reference: quay.io/crio/artifact:v1
pullPolicy: IfNotPresent
26 changes: 26 additions & 0 deletions reliability-v2/content/ivs/ivs-nginx.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
apiVersion: apps/v1
kind: Deployment
metadata:
name: ivs-nginx
spec:
replicas: 3
selector:
matchLabels:
app: ivs-nginx
template:
metadata:
labels:
app: ivs-nginx
spec:
containers:
- name: nginx
image: quay.io/bitnami/nginx:latest
volumeMounts:
- name: volume
mountPath: /usr/share/nginx/html/models
subPath: dir
volumes:
- name: volume
image:
reference: quay.io/crio/artifact:v1
pullPolicy: IfNotPresent