diff --git a/docker/build/edxapp/lms.yml b/docker/build/edxapp/lms.yml index c0e34bb4e..339291182 100644 --- a/docker/build/edxapp/lms.yml +++ b/docker/build/edxapp/lms.yml @@ -240,6 +240,9 @@ EDX_PLATFORM_REVISION: master ELASTIC_SEARCH_CONFIG: - host: edx.devstack.elasticsearch710 port: 9200 +MEILI_SEARCH_CONFIG: +- host: edx.devstack.meilisearch184 + port: 7700 use_ssl: false EMAIL_BACKEND: django.core.mail.backends.smtp.EmailBackend EMAIL_HOST: localhost diff --git a/docker/build/edxapp/studio.yml b/docker/build/edxapp/studio.yml index d5603e0b4..78af3066c 100644 --- a/docker/build/edxapp/studio.yml +++ b/docker/build/edxapp/studio.yml @@ -217,6 +217,9 @@ EDX_PLATFORM_REVISION: master ELASTIC_SEARCH_CONFIG: - host: edx.devstack.elasticsearch710 port: 9200 +MEILI_SEARCH_CONFIG: +- host: edx.devstack.meilisearch184 + port: 7700 use_ssl: false EMAIL_BACKEND: django.core.mail.backends.smtp.EmailBackend EMAIL_HOST: localhost diff --git a/docker/build/meilisearch-devstack/Dockerfile b/docker/build/meilisearch-devstack/Dockerfile new file mode 100644 index 000000000..8bcc3fd86 --- /dev/null +++ b/docker/build/meilisearch-devstack/Dockerfile @@ -0,0 +1,13 @@ +# docker build -f docker/build/meilisearch-devstack/Dockerfile . -t edxops/meilisearch:devstack + +FROM getmeili/meilisearch:v1.11.0 +LABEL maintainer="edxops" + +# Default working directory +WORKDIR /meili_data + +# Expose Meilisearch default port +EXPOSE 7700 + +# Start Meilisearch +CMD ["meilisearch", "--db-path", "/meili_data", "--http-addr", "0.0.0.0:7700"] diff --git a/docker/build/meilisearch/Dockerfile b/docker/build/meilisearch/Dockerfile new file mode 100644 index 000000000..a251c6228 --- /dev/null +++ b/docker/build/meilisearch/Dockerfile @@ -0,0 +1,14 @@ +FROM edxops/xenial-common:latest +LABEL maintainer="edxops" + +# Copy ansible roles/playbooks into container +ADD . /edx/app/edx_ansible/edx_ansible +WORKDIR /edx/app/edx_ansible/edx_ansible/docker/plays + +# Run ansible playbook for Meilisearch (you need meilisearch.yml role defined) +RUN /edx/app/edx_ansible/venvs/edx_ansible/bin/ansible-playbook meilisearch.yml -c local \ + -i '127.0.0.1,' + +WORKDIR /etc/meilisearch +CMD service meilisearch start && sleep 5 && tail -f /edx/var/log/meilisearch/meilisearch.log +EXPOSE 7700 7701 diff --git a/docker/plays/meilisearch.yml b/docker/plays/meilisearch.yml new file mode 100644 index 000000000..3c437f33a --- /dev/null +++ b/docker/plays/meilisearch.yml @@ -0,0 +1,5 @@ +- hosts: all + become: True + roles: + - common + - meilisearch diff --git a/playbooks/edx_continuous_integration.yml b/playbooks/edx_continuous_integration.yml index fdc34ee38..18cfab1e9 100644 --- a/playbooks/edx_continuous_integration.yml +++ b/playbooks/edx_continuous_integration.yml @@ -38,6 +38,7 @@ - oraclejdk - elasticsearch - opensearch + - meilisearch - forum - { role: "xqueue", update_users: True } - edx_ansible diff --git a/playbooks/elasticsearch.yml b/playbooks/elasticsearch.yml index e5c3ff695..19f258642 100644 --- a/playbooks/elasticsearch.yml +++ b/playbooks/elasticsearch.yml @@ -42,4 +42,4 @@ state: present wait_timeout: 60 become: False - when: elb_pre_post + when: elb_pre_post \ No newline at end of file diff --git a/playbooks/meilisearch.yml b/playbooks/meilisearch.yml new file mode 100644 index 000000000..f5c3bb463 --- /dev/null +++ b/playbooks/meilisearch.yml @@ -0,0 +1,49 @@ +- hosts: all + become: true + vars: + # By default take instances in and out of the elb(s) they + # are attached to + # To skip elb operations use "-e elb_pre_post=false" + elb_pre_post: true + # Number of instances to operate on at a time + serial_count: 1 + CLUSTER_NAME: "commoncluster" + MEILI_VERSION: "v1.11.0" # change to desired version + MEILI_PORT: 7700 + serial: "{{ serial_count }}" + pre_tasks: + - action: ec2_metadata_facts + when: elb_pre_post + - debug: + var: ansible_ec2_instance_id + when: elb_pre_post + - name: Instance De-register + local_action: ec2_elb + args: + instance_id: "{{ ansible_ec2_instance_id }}" + region: us-east-1 + state: absent + wait_timeout: 60 + become: false + when: elb_pre_post + + roles: + - common + - role: aws + when: COMMON_ENABLE_AWS_ROLE + - meilisearch # add the roles for meilisearch + + post_tasks: + - debug: + var: ansible_ec2_instance_id + when: elb_pre_post + - name: Register instance in the elb + local_action: ec2_elb + args: + instance_id: "{{ ansible_ec2_instance_id }}" + ec2_elbs: "{{ ec2_elbs }}" + region: us-east-1 + state: present + wait_timeout: 60 + become: false + when: elb_pre_post diff --git a/playbooks/roles/common/tasks/main.yml b/playbooks/roles/common/tasks/main.yml index 4d4b840db..1586e2d66 100644 --- a/playbooks/roles/common/tasks/main.yml +++ b/playbooks/roles/common/tasks/main.yml @@ -240,4 +240,4 @@ tags: - "install" - "install:configuration" - - "logrotate" + - "logrotate" \ No newline at end of file diff --git a/playbooks/roles/common_vars/defaults/main.yml b/playbooks/roles/common_vars/defaults/main.yml index 47685a49e..9ddb453a4 100644 --- a/playbooks/roles/common_vars/defaults/main.yml +++ b/playbooks/roles/common_vars/defaults/main.yml @@ -304,4 +304,4 @@ CONFIGURE_JWTS: false COMMON_RETIREMENT_SERVICE_SETUP: false # How to log in as "lms" to xqueue. -COMMON_XQUEUE_LMS_PASSWORD: password +COMMON_XQUEUE_LMS_PASSWORD: password \ No newline at end of file diff --git a/playbooks/roles/edxapp/defaults/main.yml b/playbooks/roles/edxapp/defaults/main.yml index bd923eee8..95be79b66 100644 --- a/playbooks/roles/edxapp/defaults/main.yml +++ b/playbooks/roles/edxapp/defaults/main.yml @@ -170,6 +170,15 @@ EDXAPP_ELASTIC_SEARCH_CONFIG: port: "{{ EDXAPP_SEARCH_PORT }}" use_ssl: "{{ EDXAPP_SEARCH_USE_SSL }}" +# list of dictionaries of the format +# { 'url': 'http://hostname:port', 'api_key': 'masterKey' } +# https://github.com/meilisearch/meilisearch-python +EDXAPP_MEILI_SEARCH_CONFIG: + - host: "{{ EDXAPP_SEARCH_HOST }}" + port: "{{ EDXAPP_SEARCH_PORT }}" + use_ssl: "{{ EDXAPP_SEARCH_USE_SSL }}" + api_key: "{{ EDXAPP_SEARCH_API_KEY }}" + EDXAPP_SETTINGS: '{{ COMMON_EDXAPP_SETTINGS }}' EDXAPP_LMS_ENV: 'lms.envs.{{ EDXAPP_SETTINGS }}' @@ -1582,6 +1591,7 @@ generic_env_config: &edxapp_generic_env user: '{{ edxapp_sandbox_user }}' AFFILIATE_COOKIE_NAME: "{{ EDXAPP_AFFILIATE_COOKIE_NAME }}" ELASTIC_SEARCH_CONFIG: "{{ EDXAPP_ELASTIC_SEARCH_CONFIG }}" + MEILI_SEARCH_CONFIG: "{{ EDXAPP_MEILI_SEARCH_CONFIG }}" PLATFORM_TWITTER_ACCOUNT: "{{ EDXAPP_PLATFORM_TWITTER_ACCOUNT }}" PLATFORM_FACEBOOK_ACCOUNT: "{{ EDXAPP_PLATFORM_FACEBOOK_ACCOUNT }}" HELP_TOKENS_BOOKS: "{{ EDXAPP_HELP_TOKENS_BOOKS }}" diff --git a/playbooks/roles/meilisearch/defaults/main.yml b/playbooks/roles/meilisearch/defaults/main.yml new file mode 100644 index 000000000..d596b202a --- /dev/null +++ b/playbooks/roles/meilisearch/defaults/main.yml @@ -0,0 +1,30 @@ +--- +meilisearch_app_dir: "{{ COMMON_APP_DIR }}/meilisearch" +meilisearch_data_dir: "{{ COMMON_DATA_DIR }}/meilisearch" +meilisearch_log_dir: "{{ COMMON_LOG_DIR }}/meilisearch" +meilisearch_cfg_dir: "{{ COMMON_CFG_DIR }}/meilisearch" + +# Meilisearch doesn’t come from apt repo (like Elasticsearch), +# we fetch the binary directly from GitHub releases. +meilisearch_download_url: "https://github.com/meilisearch/meilisearch/releases/download/{{ meilisearch_version }}/meilisearch-linux-amd64" + +meilisearch_user: "meilisearch" +meilisearch_group: "meilisearch" + +# Defaults for a single server installation. +MEILISEARCH_CLUSTER_MEMBERS: [] # Not typically needed, but kept for parity +MEILISEARCH_ENV: "development" +# Service management +MEILISEARCH_START_TIMEOUT: "300" +# Version of Meilisearch to install +meilisearch_version: "v1.11.0" + +# Network and security settings +meilisearch_host: "0.0.0.0" +meilisearch_port: 7700 +MEILISEARCH_URL: "http://{{ meilisearch_host }}:{{ meilisearch_port }}" +MEILISEARCH_PUBLIC_URL: "http://{{ meilisearch_host }}:{{ meilisearch_port }}" + +# These are placeholders – actual secrets come from sandbox repo +MEILISEARCH_MASTER_KEY: "{{ lookup('env', 'MEILISEARCH_MASTER_KEY') | default('changeme', true) }}" +MEILISEARCH_API_KEY: "{{ lookup('env', 'MEILISEARCH_API_KEY') | default('changeme', true) }}" \ No newline at end of file diff --git a/playbooks/roles/meilisearch/meta/main.yml b/playbooks/roles/meilisearch/meta/main.yml new file mode 100644 index 000000000..2083f0e12 --- /dev/null +++ b/playbooks/roles/meilisearch/meta/main.yml @@ -0,0 +1,3 @@ +--- +dependencies: + - common diff --git a/playbooks/roles/meilisearch/tasks/main.yml b/playbooks/roles/meilisearch/tasks/main.yml new file mode 100644 index 000000000..ea29e308a --- /dev/null +++ b/playbooks/roles/meilisearch/tasks/main.yml @@ -0,0 +1,103 @@ +--- +# meilisearch +# +# Dependencies: +# +# * common +# +# Example play: +# +# This role installs and configures the Meilisearch service. +# It can be used for single-server installs (default). +# + +- name: create meilisearch group + group: + name: "{{ meilisearch_group }}" + state: present + tags: + - install + - install:base + +- name: create meilisearch user + user: + name: "{{ meilisearch_user }}" + group: "{{ meilisearch_group }}" + shell: /usr/sbin/nologin + system: yes + create_home: no + tags: + - install + - install:base +- name: create directories + file: + path: "{{ item }}" + state: directory + owner: "{{ meilisearch_user }}" + group: "{{ meilisearch_group }}" + mode: 0755 + loop: + - "{{ meilisearch_data_dir }}" + - "{{ meilisearch_log_dir }}" + - "{{ meilisearch_cfg_dir }}" + - "{{ meilisearch_app_dir }}" + tags: + - install + - install:base + +- name: download meilisearch binary + get_url: + url: "{{ meilisearch_download_url }}" + dest: "{{ meilisearch_app_dir }}/meilisearch-linux-amd64" + mode: '0755' + register: meilisearch_download + tags: + - install + - install:base + +- name: rename meilisearch binary to meilisearch + command: mv "{{ meilisearch_app_dir }}/meilisearch-linux-amd64" "{{ meilisearch_app_dir }}/meilisearch" + args: + creates: "{{ meilisearch_app_dir }}/meilisearch" + when: meilisearch_download.changed + tags: + - install + - install:base + +- name: drop the meilisearch config (env file) + template: + src: edx/etc/meilisearch/meilisearch.yml.j2 + dest: "{{ meilisearch_cfg_dir }}/meilisearch" + mode: 0644 + tags: + - install + - install:configuration + +- name: drop the meilisearch systemd service config + template: + src: lib/systemd/system/meilisearch.service.j2 + dest: "/lib/systemd/system/meilisearch.service" + mode: 0644 + tags: + - install + - install:configuration + +- name: Ensure meilisearch is enabled and started + service: + name: meilisearch + state: started + enabled: yes + tags: + - manage + - manage:start + +- name: Restart meilisearch when binary is updated + service: + name: meilisearch + state: restarted + enabled: yes + when: meilisearch_download.changed + tags: + - manage + - manage:restart + - install \ No newline at end of file diff --git a/playbooks/roles/meilisearch/templates/edx/etc/meilisearch/meilisearch.yml.j2 b/playbooks/roles/meilisearch/templates/edx/etc/meilisearch/meilisearch.yml.j2 new file mode 100644 index 000000000..254dd5742 --- /dev/null +++ b/playbooks/roles/meilisearch/templates/edx/etc/meilisearch/meilisearch.yml.j2 @@ -0,0 +1,60 @@ +# {{ ansible_managed }} + +# ======================== Meilisearch Configuration ========================= +# +# NOTE: Meilisearch comes with safe defaults for most settings. +# Only override what you need for your deployment. +# See official docs: +# https://www.meilisearch.com/docs/learn/configuration/instance_options +# +# This template generates a .env file that Meilisearch will load at startup. +# ---------------------------------------------------------------------------- + +# ---------------------------------- Paths ----------------------------------- +# Path to store Meilisearch data (indexes, snapshots, dumps) +MEILI_DB_PATH={{ meilisearch_data_dir }} + +# Path to store Meilisearch logs (stdout/stderr are default if unset) +MEILI_LOG_PATH={{ meilisearch_log_dir }} + +# ---------------------------------- Network --------------------------------- +# The IP address Meilisearch binds to (0.0.0.0 for all interfaces) +MEILI_HTTP_ADDR={{ meilisearch_host | default("0.0.0.0") }} + +# The port Meilisearch listens on +MEILI_HTTP_PORT={{ meilisearch_port | default("7700") }} + +# ---------------------------------- Master Key ------------------------------- +# The master key is used to generate API keys. Comment out to disable auth. +# !! Make sure to secure this in production !! +MEILISEARCH_MASTER_KEY={{ MEILISEARCH_MASTER_KEY }} + +# ---------------------------------- Snapshots & Dumps ------------------------ +# Directory to store snapshots +MEILI_SNAPSHOT_DIR={{ meilisearch_snapshot_dir | default(meilisearch_data_dir + "/snapshots") }} + +# Directory to store dumps +MEILI_DUMP_DIR={{ meilisearch_dump_dir | default(meilisearch_data_dir + "/dumps") }} + +# Enable scheduled snapshots (true/false) +MEILI_SCHEDULE_SNAPSHOT={{ meilisearch_schedule_snapshot | default("false") }} + +# ---------------------------------- Clustering ------------------------------- +# Meilisearch does not support clustering (like Elasticsearch). +# Scaling is typically handled with sharding at the app level or proxies. +# This section is informational only. + +# ---------------------------------- Experimental ----------------------------- +# Maximum index size (bytes) before compaction +MEILI_MAX_INDEX_SIZE={{ meilisearch_max_index_size | default("10000000000") }} # 10GB default + +# Number of threads used by Meilisearch +MEILI_MAX_THREADS={{ meilisearch_max_threads | default(ansible_processor_vcpus | default("4")) }} + +# ---------------------------------- Security -------------------------------- +# Whether to disable analytics sent to Meilisearch (default: false) +MEILI_NO_ANALYTICS={{ meilisearch_no_analytics | default("true") }} + +# ---------------------------------- Various --------------------------------- +# Require explicit UID for created indexes (default: false) +MEILISEARCH_ENV={{ MEILISEARCH_ENV }} diff --git a/playbooks/roles/meilisearch/templates/etc/default/meilisearch.j2 b/playbooks/roles/meilisearch/templates/etc/default/meilisearch.j2 new file mode 100644 index 000000000..7536b2e84 --- /dev/null +++ b/playbooks/roles/meilisearch/templates/etc/default/meilisearch.j2 @@ -0,0 +1,50 @@ +# {{ ansible_managed }} + +################################ +# Meilisearch +################################ + +# Meilisearch binary directory +#MEILI_HOME=/usr/bin/meilisearch + +# Meilisearch configuration directory +MEILI_PATH_CONF={{ meilisearch_cfg_dir }} + +# Meilisearch data directory +MEILI_DB_PATH={{ meilisearch_data_dir }} + +# Meilisearch log directory +MEILI_LOG_PATH={{ meilisearch_log_dir }} + +# Meilisearch PID directory +#PID_DIR=/var/run/meilisearch + +# Additional Meilisearch options (passed as CLI args or env variables) +#MEILI_OPTS= + +# Configure restart on package upgrade (true, every other setting will lead to not restarting) +#RESTART_ON_UPGRADE=true + +################################ +# Meilisearch service +################################ + +# SysV init.d +# +# The number of seconds to wait before checking if Meilisearch started successfully as a daemon process +MEILI_STARTUP_SLEEP_TIME=5 + +################################ +# System properties +################################ + +# Specifies the maximum file descriptor number that can be opened by this process +# When using Systemd, this setting is ignored and the LimitNOFILE defined in +# /usr/lib/systemd/system/meilisearch.service takes precedence +#MAX_OPEN_FILES=65535 + +# The maximum number of bytes of memory that may be locked into RAM +# Set to "unlimited" if you want to avoid swapping issues. +# When using systemd, LimitMEMLOCK must be set in a unit file such as +# /etc/systemd/system/meilisearch.service.d/override.conf. +#MAX_LOCKED_MEMORY=unlimited diff --git a/playbooks/roles/meilisearch/templates/lib/systemd/system/meilisearch.service.j2 b/playbooks/roles/meilisearch/templates/lib/systemd/system/meilisearch.service.j2 new file mode 100644 index 000000000..ea23bd8b7 --- /dev/null +++ b/playbooks/roles/meilisearch/templates/lib/systemd/system/meilisearch.service.j2 @@ -0,0 +1,55 @@ +[Unit] +Description=Meilisearch +Documentation=https://www.meilisearch.com +Wants=network-online.target +After=network-online.target + +[Service] +Type=simple +RuntimeDirectory=meilisearch +PrivateTmp=true +Environment=MEILI_HOME=/usr/bin/meilisearch +Environment=MEILI_PATH_CONF=/etc/meilisearch +Environment=PID_DIR=/var/run/meilisearch +EnvironmentFile=-/etc/default/meilisearch + +#WorkingDirectory=/var/lib/meilisearch +WorkingDirectory={{ meilisearch_data_dir }} + +User={{ meilisearch_user }} +Group={{ meilisearch_group }} + +ExecStart={{ meilisearch_app_dir }}/meilisearch \ + --db-path {{ meilisearch_data_dir }} \ + --http-addr {{ meilisearch_host }}:{{ meilisearch_port }} \ + --env {{ MEILISEARCH_ENV }} \ + --master-key {{ MEILISEARCH_MASTER_KEY }} \ + --log-level {{ meilisearch_log_level | default('INFO') }} + +StandardOutput=journal +StandardError=inherit + +# Resource limits (similar to Elasticsearch service) +LimitNOFILE=65535 +LimitNPROC=4096 +LimitAS=infinity +LimitFSIZE=infinity + +# Disable timeout logic and wait until process is stopped +TimeoutStopSec=0 + +# SIGTERM is used to gracefully stop Meilisearch +KillSignal=SIGTERM +KillMode=process +SendSIGKILL=no + +# Meilisearch exits cleanly with code 0 +SuccessExitStatus=0 + +# Allow some startup time before systemd marks it failed +TimeoutStartSec={{ MEILISEARCH_START_TIMEOUT | default(30) }} + +[Install] +WantedBy=multi-user.target + +# Built for Meilisearch {{ meilisearch_version | default('latest') }} \ No newline at end of file diff --git a/playbooks/roles/mongo_5_0/tasks/main.yml b/playbooks/roles/mongo_5_0/tasks/main.yml index 10dd2484c..f40ab2cef 100644 --- a/playbooks/roles/mongo_5_0/tasks/main.yml +++ b/playbooks/roles/mongo_5_0/tasks/main.yml @@ -401,4 +401,4 @@ when: MONGO_LOG_SERVERSTATUS tags: - "install" - - "install:app-configuration" + - "install:app-configuration" \ No newline at end of file diff --git a/requirements/meilisearch.in b/requirements/meilisearch.in new file mode 100644 index 000000000..5e2d73291 --- /dev/null +++ b/requirements/meilisearch.in @@ -0,0 +1,5 @@ +# Requirements for util/meilisearch/verify-index-copy.py + +deepdiff==8.6.0 +meilisearch==0.37.0 +jsonpickle==4.0.2 \ No newline at end of file diff --git a/util/meilisearch/copy-index.sh b/util/meilisearch/copy-index.sh new file mode 100644 index 000000000..57ee1ed77 --- /dev/null +++ b/util/meilisearch/copy-index.sh @@ -0,0 +1,58 @@ +#!/usr/bin/env bash +set -euo pipefail + +# Usage: +# ./copy-index.sh SOURCE_ES SOURCE_INDEX TARGET_MEILI TARGET_INDEX [API_KEY] + +SOURCE_ES=$1 +SOURCE_INDEX=$2 +TARGET_MEILI=$3 +TARGET_INDEX=$4 +API_KEY="${5:-}" + +AUTH_HEADER=() +if [ -n "$API_KEY" ]; then + AUTH_HEADER=(-H "Authorization: Bearer $API_KEY") +fi + +# Recreate target index (optional but safer for fresh import) +curl -s -X DELETE "$TARGET_MEILI/indexes/$TARGET_INDEX" "${AUTH_HEADER[@]}" || true +curl -s -X POST "$TARGET_MEILI/indexes" \ + -H "Content-Type: application/json" \ + "${AUTH_HEADER[@]}" \ + -d "{ \"uid\": \"$TARGET_INDEX\" }" + +# Scroll through Elasticsearch +SCROLL="1m" +SIZE=500 +SCROLL_ID=$(curl -s "$SOURCE_ES/$SOURCE_INDEX/_search?scroll=$SCROLL" \ + -H 'Content-Type: application/json' \ + -d "{ \"size\": $SIZE }" | jq -r '._scroll_id') + +while [ "$SCROLL_ID" != "null" ]; do + RESPONSE=$(curl -s "$SOURCE_ES/_search/scroll" \ + -H 'Content-Type: application/json' \ + -d "{ \"scroll\": \"$SCROLL\", \"scroll_id\": \"$SCROLL_ID\" }") + + DOCS=$(echo "$RESPONSE" | jq '.hits.hits[]._source') + COUNT=$(echo "$DOCS" | jq -s 'length') + + if [ "$COUNT" -eq 0 ]; then + break + fi + + echo "$DOCS" | jq -s '.' | curl -s -X POST \ + "$TARGET_MEILI/indexes/$TARGET_INDEX/documents" \ + -H "Content-Type: application/json" \ + "${AUTH_HEADER[@]}" \ + --data-binary @- + + SCROLL_ID=$(echo "$RESPONSE" | jq -r '._scroll_id') +done + +# Cleanup scroll context +curl -s -X DELETE "$SOURCE_ES/_search/scroll" \ + -H 'Content-Type: application/json' \ + -d "{ \"scroll_id\": [\"$SCROLL_ID\"] }" > /dev/null || true + +echo "Copied index $SOURCE_INDEX from $SOURCE_ES to $TARGET_INDEX on $TARGET_MEILI" diff --git a/util/meilisearch/forums-incremental-reindex.sh b/util/meilisearch/forums-incremental-reindex.sh new file mode 100644 index 000000000..63bfe376c --- /dev/null +++ b/util/meilisearch/forums-incremental-reindex.sh @@ -0,0 +1,50 @@ +#!/usr/bin/env bash +set -euo pipefail + +# Usage: +# ./forums-incremental-reindex.sh INDEX [WINDOW] [SLEEP_TIME] [BATCH_SIZE] +# +# Example: +# ./forums-incremental-reindex.sh content 30 + +INDEX="$1" +WINDOW="${2:-5}" +SLEEP_TIME="${3:-60}" +BATCH_SIZE="${4:-500}" +API_KEY="${MEILI_API_KEY:-}" +MEILI_URL="${MEILI_URL:-http://localhost:7700}" + +if [ "$SLEEP_TIME" -ge "$((WINDOW * 60))" ]; then + echo 'ERROR: SLEEP_TIME must not be longer than WINDOW, or else documents may be missed.' + exit 1 +fi + +AUTH_HEADER=() +if [ -n "$API_KEY" ]; then + AUTH_HEADER=(-H "Authorization: Bearer $API_KEY") +fi + +while : ; do + echo "Fetching documents newer than $WINDOW minutes..." + + # Fetch documents newer than $WINDOW minutes using Meilisearch API + SINCE_TIMESTAMP=$(date -u -d "$WINDOW minutes ago" +"%Y-%m-%dT%H:%M:%SZ") + NEW_DOCS=$(curl -s "${AUTH_HEADER[@]}" \ + "$MEILI_URL/indexes/$INDEX/documents?limit=$BATCH_SIZE" \ + | jq --arg since "$SINCE_TIMESTAMP" '[.[] | select(.updated_at >= $since)]') + + if [ -n "$NEW_DOCS" ] && [ "$NEW_DOCS" != "[]" ]; then + echo "$NEW_DOCS" | curl -s -X POST "$MEILI_URL/indexes/$INDEX/documents" \ + -H "Content-Type: application/json" \ + "${AUTH_HEADER[@]}" \ + --data-binary @- + echo "Indexed $(echo "$NEW_DOCS" | jq length) docs into $INDEX" + else + echo "No new docs." + fi + + echo "Sleeping $SLEEP_TIME seconds..." + sleep "$SLEEP_TIME" + + [ "$SLEEP_TIME" -le 0 ] && break +done diff --git a/util/meilisearch/requirements.txt b/util/meilisearch/requirements.txt new file mode 100644 index 000000000..aaf09d29e --- /dev/null +++ b/util/meilisearch/requirements.txt @@ -0,0 +1,18 @@ +# +# This file is autogenerated by pip-compile with Python 3.12 +# by the following command: +# +# pip-compile requirements/meilisearch.in +# +deepdiff==8.6.0 + # via -r requirements/meilisearch.in +jsonpickle==4.0.2 + # via -r requirements/meilisearch.in +meilisearch==0.37.0 + # via -r requirements/meilisearch.in +httpx==0.27.0 + # via meilisearch +anyio==4.4.0 + # via httpx +sniffio==1.3.1 + # via anyio diff --git a/util/meilisearch/verify-index-copy.py b/util/meilisearch/verify-index-copy.py new file mode 100644 index 000000000..1ddf35064 --- /dev/null +++ b/util/meilisearch/verify-index-copy.py @@ -0,0 +1,103 @@ +import random +import pprint +from deepdiff import DeepDiff +from meilisearch import Client + + +def docs_match(old_doc, new_doc, ignorable_fields=None): + """ + Compare two documents while ignoring order and optional ignorable fields. + """ + ignorable_fields = ignorable_fields or [] + diff = DeepDiff(old_doc, new_doc, ignore_order=True) + for field in ignorable_fields: + diff.pop(field, None) + return diff == {} + + +def compare_indices( + old_url, + old_index, + new_url, + new_index, + api_key=None, + random_check_percentage=0.1, + ignorable_fields=None, +): + """ + Compare two Meilisearch indices by: + 1. Document counts + 2. Index settings + 3. Random sample of documents + """ + + old_client = Client(old_url, api_key) + new_client = Client(new_url, api_key) + + old_idx = old_client.index(old_index) + new_idx = new_client.index(new_index) + + # ---- Compare stats ---- + old_stats = old_idx.get_stats() + new_stats = new_idx.get_stats() + + old_count = old_stats["numberOfDocuments"] + new_count = new_stats["numberOfDocuments"] + + print("{}: Document count (old={}, new={})".format( + "OK" if old_count == new_count else "FAILURE", old_count, new_count + )) + + # ---- Compare settings ---- + old_settings = old_idx.get_settings() + new_settings = new_idx.get_settings() + diff = DeepDiff(old_settings, new_settings, ignore_order=True) + if diff != {}: + print("FAILURE: Index settings do not match") + pprint.pprint(diff) + else: + print("OK: Index settings match") + + # ---- Random checks ---- + sample_size = int(old_count * random_check_percentage) + if sample_size == 0: + print("Skipping random checks (too few docs)") + return + + print(f"Checking {sample_size} random documents...") + + # Fetch docs from old index (cap batch size at 1000 for safety) + old_docs_response = old_idx.get_documents(limit=min(sample_size, 1000)) + old_docs = old_docs_response.get("results", []) + + checked = 0 + matching = 0 + for doc in random.sample(old_docs, min(sample_size, len(old_docs))): + doc_id = doc["id"] + try: + new_doc = new_idx.get_document(doc_id) + except Exception: + new_doc = None + + if new_doc and docs_match(doc, new_doc, ignorable_fields): + matching += 1 + else: + print(f"FAILURE: Document with id {doc_id} does not match") + checked += 1 + + print("{}: Random documents matching ({} out of {}, {:.2f}%)".format( + "OK" if matching == checked else "FAILURE", + matching, + checked, + (matching / checked * 100 if checked else 0), + )) + + +if __name__ == "__main__": + # Example usage: + # compare_indices( + # "http://localhost:7700", "content_old", + # "http://localhost:7700", "content_new", + # api_key="masterKey", random_check_percentage=0.1 + # ) + pass diff --git a/vagrant/release/analyticstack/Vagrantfile b/vagrant/release/analyticstack/Vagrantfile index 7c5ed276c..2527e44bd 100644 --- a/vagrant/release/analyticstack/Vagrantfile +++ b/vagrant/release/analyticstack/Vagrantfile @@ -129,6 +129,7 @@ Vagrant.configure(VAGRANTFILE_API_VERSION) do |config| config.vm.network :forwarded_port, guest: 8120, host: 8120 # edX Notes Service config.vm.network :forwarded_port, guest: 8765, host: 8765 config.vm.network :forwarded_port, guest: 9200, host: 9200 # Elasticsearch + config.vm.network :forwarded_port, guest: 7700, host: 7700 # Meilisearch config.vm.network :forwarded_port, guest: 18080, host: 18080 # Forums config.vm.network :forwarded_port, guest: 8100, host: 8100 # Analytics Data API config.vm.network :forwarded_port, guest: 8110, host: 8110 # Insights