SimpleOpenSoftware · 0xrushi · Jan 5, 2026 · Jan 5, 2026 · Jan 5, 2026 · coderabbitai
diff --git a/Docs/init-system.md b/Docs/init-system.md
@@ -192,6 +192,59 @@ cd extras/asr-services && docker compose up --build -d
 cd extras/openmemory-mcp && docker compose up --build -d
 ```
 
+## Startup Flow (Mermaid) diagram
+
+Chronicle has two layers:
+- **Setup** (`wizard.sh` / `wizard.py`) writes config (`.env`, `config/config.yml`, optional SSL/nginx config).
+- **Run** (`start.sh` / `services.py`) starts the configured services via `docker compose`.
+
+```mermaid
+flowchart TD
+  A[wizard.sh] --> B[uv run --with-requirements setup-requirements.txt wizard.py]
+  B --> C{Select services}
+  C --> D[backends/advanced/init.py\nwrites backends/advanced/.env + config/config.yml]
+  C --> E[extras/speaker-recognition/init.py\nwrites extras/speaker-recognition/.env\noptionally ssl/* + nginx.conf]
+  C --> F[extras/asr-services/init.py\nwrites extras/asr-services/.env]
+  C --> G[extras/openmemory-mcp/setup.sh]
+
+  A2[start.sh] --> B2[uv run --with-requirements setup-requirements.txt python services.py start ...]
+  B2 --> H{For each service:\n.env exists?}
+  H -->|yes| I[services.py runs docker compose\nin each service directory]
+  H -->|no| J[Skip (not configured)]
+```
+
+### How `services.py` picks Speaker Recognition variants
+
+`services.py` reads `extras/speaker-recognition/.env` and decides:
+- `COMPUTE_MODE=cpu|gpu|strixhalo` → choose compose profile
+- `REACT_UI_HTTPS=true|false` → include `nginx` (HTTPS) vs run only API+UI (HTTP)
+
+```mermaid
+flowchart TD
+  S[start.sh] --> P[services.py]
+  P --> R[Read extras/speaker-recognition/.env]
+  R --> M{COMPUTE_MODE}
+  M -->|cpu| C1[docker compose --profile cpu up ...]
+  M -->|gpu| C2[docker compose --profile gpu up ...]
+  M -->|strixhalo| C3[docker compose --profile strixhalo up ...]
+  R --> H{REACT_UI_HTTPS}
+  H -->|true| N1[Start profile default set:\nAPI + web-ui + nginx]
+  H -->|false| N2[Start only:\nAPI + web-ui (no nginx)]
+```
+
+### CPU + NVIDIA share the same `Dockerfile` + `pyproject.toml`
+
+Speaker recognition uses a single dependency definition with per-accelerator “extras”:
+- `extras/speaker-recognition/pyproject.toml` defines extras like `cpu`, `cu121`, `cu126`, `cu128`, `strixhalo`.
+- `extras/speaker-recognition/Dockerfile` takes `ARG PYTORCH_CUDA_VERSION` and runs:
+  - `uv sync --extra ${PYTORCH_CUDA_VERSION}`
+  - `uv run --extra ${PYTORCH_CUDA_VERSION} ...`
+- `extras/speaker-recognition/docker-compose.yml` sets that build arg per profile:
+  - CPU profile defaults to `PYTORCH_CUDA_VERSION=cpu`
+  - GPU profile defaults to `PYTORCH_CUDA_VERSION=cu126` and reserves NVIDIA GPUs
+
+AMD/ROCm (Strix Halo) uses the same `pyproject.toml` interface (the `strixhalo` extra), but a different build recipe (`extras/speaker-recognition/Dockerfile.strixhalo`) and ROCm device mappings, because the base image provides the torch stack.
+
 ## Configuration Files
 
 ### Generated Files
@@ -234,4 +287,4 @@ cd backends/advanced && docker compose logs chronicle-backend
 
 # Speaker Recognition logs
 cd extras/speaker-recognition && docker compose logs speaker-service
-```
+```
diff --git a/extras/asr-services/.env.template b/extras/asr-services/.env.template
@@ -2,7 +2,7 @@
 # Copy this file to .env and configure as needed
 
 # PyTorch CUDA version for Docker build
-# Options: cu121 (CUDA 12.1), cu126 (CUDA 12.6), cu128 (CUDA 12.8)
+# Options: cu121 (CUDA 12.1), cu126 (CUDA 12.6), cu128 (CUDA 12.8), strixhalo (AMD strixhalo apu)
 # Should match your system's CUDA version (check with: nvidia-smi)
 PYTORCH_CUDA_VERSION=cu126
 

diff --git a/extras/asr-services/Dockerfile_Parakeet b/extras/asr-services/Dockerfile_Parakeet
@@ -11,12 +11,13 @@ WORKDIR /app
 # NeMo and texterrors need libs and C++ compiler
 RUN apt-get update && apt-get install -y --no-install-recommends \
         libsndfile1 \
+        ffmpeg \
         build-essential git portaudio19-dev \
     && rm -rf /var/lib/apt/lists/*
 
 # Dependency manifest first for cache‑friendly installs
 COPY pyproject.toml uv.lock ./
-RUN uv sync --no-install-project --group parakeet --extra ${PYTORCH_CUDA_VERSION} && \
+RUN uv sync --frozen --no-install-project --group parakeet --extra ${PYTORCH_CUDA_VERSION} && \
     uv cache clean
 
 # Should prepare the .venv for use :)
@@ -34,4 +35,4 @@ ENV PATH="/app/.venv/bin:$PATH"
 
 EXPOSE 8765
 
-CMD ["python", "parakeet-offline.py", "--port", "8765"]
+CMD ["python", "parakeet-offline.py", "--port", "8765"]
diff --git a/extras/asr-services/docker-compose.yml b/extras/asr-services/docker-compose.yml
@@ -13,6 +13,31 @@ services:
       - ./model_cache:/models
       - ./debug:/app/debug
       - ./results:/app/results
+    environment:
+      - HF_HOME=/models
+      - PARAKEET_MODEL=$PARAKEET_MODEL
+      - CHUNKING_ENABLED=${CHUNKING_ENABLED:-true}
+      - CHUNK_DURATION_SECONDS=${CHUNK_DURATION_SECONDS:-30.0}
+      - OVERLAP_DURATION_SECONDS=${OVERLAP_DURATION_SECONDS:-5.0}
+      - MIN_AUDIO_FOR_CHUNKING=${MIN_AUDIO_FOR_CHUNKING:-60.0}
+      - CONFIDENCE_THRESHOLD=${CONFIDENCE_THRESHOLD:-0.8}
+    restart: unless-stopped
+
+  # NVIDIA GPU variant (requires NVIDIA container runtime)
+  parakeet-asr-nvidia:
+    profiles: ["nvidia"]
+    build:
+      context: .
+      dockerfile: Dockerfile_Parakeet
+      args:
+        PYTORCH_CUDA_VERSION: ${PYTORCH_CUDA_VERSION:-cu126}
+    image: parakeet-asr:latest
+    ports:
+      - "${PARAKEET_HOST_PORT:-8767}:${PARAKEET_CONTAINER_PORT:-8765}"
+    volumes:
+      - ./model_cache:/models
+      - ./debug:/app/debug
+      - ./results:/app/results
     deploy:
       resources:
         reservations:
@@ -23,7 +48,38 @@ services:
     environment:
       - HF_HOME=/models
       - PARAKEET_MODEL=$PARAKEET_MODEL
-      # Enhanced chunking configuration
+      - CHUNKING_ENABLED=${CHUNKING_ENABLED:-true}
+      - CHUNK_DURATION_SECONDS=${CHUNK_DURATION_SECONDS:-30.0}
+      - OVERLAP_DURATION_SECONDS=${OVERLAP_DURATION_SECONDS:-5.0}
+      - MIN_AUDIO_FOR_CHUNKING=${MIN_AUDIO_FOR_CHUNKING:-60.0}
+      - CONFIDENCE_THRESHOLD=${CONFIDENCE_THRESHOLD:-0.8}
+    restart: unless-stopped
+
+  # AMD ROCm / Strix Halo variant (requires /dev/kfd + /dev/dri passthrough)
+  parakeet-asr-strixhalo:
+    profiles: ["strixhalo", "amd"]
+    build:
+      context: .
+      dockerfile: Dockerfile_Parakeet
+      args:
+        PYTORCH_CUDA_VERSION: ${PYTORCH_CUDA_VERSION:-strixhalo}
+    image: parakeet-asr:latest
+    ports:
+      - "${PARAKEET_HOST_PORT:-8767}:${PARAKEET_CONTAINER_PORT:-8765}"
+    volumes:
+      - ./model_cache:/models
+      - ./debug:/app/debug
+      - ./results:/app/results
+    devices:
+      - /dev/kfd
+      - /dev/dri
+    group_add:
+      - video
+    security_opt:
+      - seccomp:unconfined
+    environment:
+      - HF_HOME=/models
+      - PARAKEET_MODEL=$PARAKEET_MODEL
       - CHUNKING_ENABLED=${CHUNKING_ENABLED:-true}
       - CHUNK_DURATION_SECONDS=${CHUNK_DURATION_SECONDS:-30.0}
       - OVERLAP_DURATION_SECONDS=${OVERLAP_DURATION_SECONDS:-5.0}
@@ -53,4 +109,4 @@ services:
   #             capabilities: [gpu]
   #   environment:
   #     - HF_HOME=/models
-  #   restart: unless-stopped
+  #   restart: unless-stopped
diff --git a/extras/asr-services/init.py b/extras/asr-services/init.py
@@ -155,7 +155,8 @@ def setup_cuda_version(self):
             cuda_choices = {
                 "1": "CUDA 12.1 (cu121)",
                 "2": "CUDA 12.6 (cu126) - Recommended",
-                "3": "CUDA 12.8 (cu128)"
+                "3": "CUDA 12.8 (cu128)",
+                "4": "AMD Strix Halo (NPU)"
             }
             cuda_choice = self.prompt_choice(
                 "Choose CUDA version for PyTorch:",
@@ -166,7 +167,8 @@ def setup_cuda_version(self):
             choice_to_cuda = {
                 "1": "cu121",
                 "2": "cu126",
-                "3": "cu128"
+                "3": "cu128",
+                "4": "strixhalo"
             }
             cuda_version = choice_to_cuda[cuda_choice]
 
@@ -255,7 +257,7 @@ def main():
     """Main entry point"""
     parser = argparse.ArgumentParser(description="ASR Services (Parakeet) Setup")
     parser.add_argument("--pytorch-cuda-version",
-                       choices=["cu121", "cu126", "cu128"],
+                       choices=["cu121", "cu126", "cu128", "strixhalo"],
                        help="PyTorch CUDA version (default: auto-detect)")
 
     args = parser.parse_args()

diff --git a/extras/asr-services/pyproject.toml b/extras/asr-services/pyproject.toml
@@ -36,6 +36,10 @@ cu128 = [
   "torch>=2.3",
   "torchaudio>=2.3",
 ]
+strixhalo = [
+  "torch>=2.3",
+  "torchaudio>=2.3",
+]
 
 [tool.uv]
 compile-bytecode = true
@@ -48,6 +52,7 @@ conflicts = [
     { extra = "cu121" },
     { extra = "cu126" },
     { extra = "cu128" },
+    { extra = "strixhalo" },
   ],
 ]
 
@@ -57,11 +62,13 @@ torch = [
     { index = "pytorch-cu121", extra = "cu121" },
     { index = "pytorch-cu126", extra = "cu126" },
     { index = "pytorch-cu128", extra = "cu128" },
+    { index = "rocm-gfx1151", extra = "strixhalo" },
 ]
 torchaudio = [
     { index = "pytorch-cu121", extra = "cu121" },
     { index = "pytorch-cu126", extra = "cu126" },
     { index = "pytorch-cu128", extra = "cu128" },
+    { index = "rocm-gfx1151", extra = "strixhalo" },
 ]
 
 [[tool.uv.index]]
@@ -79,6 +86,11 @@ name = "pytorch-cu128"
 url = "https://download.pytorch.org/whl/cu128"
 explicit = true
 
+[[tool.uv.index]]
+name = "rocm-gfx1151"
+url = "https://rocm.nightlies.amd.com/v2/gfx1151/"
+explicit = true
+
 [dependency-groups]
 demo = [
     "fastrtc>=0.0.23",

diff --git a/extras/speaker-recognition/Dockerfile.strixhalo b/extras/speaker-recognition/Dockerfile.strixhalo
@@ -0,0 +1,65 @@
+FROM docker.io/kyuz0/vllm-therock-gfx1151:sha-039484a
+
+ARG PYTORCH_CUDA_VERSION=strixhalo
+ENV PYTORCH_CUDA_VERSION=${PYTORCH_CUDA_VERSION}
+
+# Install system dependencies
+RUN set -eux; \
+    dnf -y install \
+        gcc gcc-c++ make git ffmpeg curl libjpeg-turbo-devel zlib-devel libpng-devel; \
+    dnf -y clean all
+
+WORKDIR /app
+
+# Install uv
+COPY --from=ghcr.io/astral-sh/uv:latest /uv /bin/uv
+
+# Copy dependency files first (for better caching)
+COPY pyproject.toml uv.lock ./
+
+# Use the base image's prebuilt venv (contains torch stack for gfx1151)
+RUN test -x /opt/venv/bin/python
+ENV VIRTUAL_ENV=/opt/venv
+ENV PATH="/opt/venv/bin:$PATH"
+
+# Install dependencies, but never install/override torch/torchvision/torchaudio in this image.
+# We avoid `uv sync` here because it recreates the venv and drops `--system-site-packages`.
+# For strixhalo, we install pyannote.audio from git with --no-deps separately.
+RUN set -eux; \
+    uv export --frozen --format requirements.txt --no-dev --no-hashes \
+        --extra ${PYTORCH_CUDA_VERSION} --no-emit-project \
+        --prune torch --prune torchvision --prune torchaudio \
+        --prune rocm --prune rocm-sdk-core --prune rocm-sdk-devel --prune rocm-sdk-libraries-gfx1151 \
+        --prune torchcodec \
+        $(if [ "${PYTORCH_CUDA_VERSION}" = "strixhalo" ]; then echo "--prune pyannote.audio"; fi) \
+        --output-file /tmp/requirements.txt; \
+    uv pip install --python /opt/venv/bin/python --no-managed-python --prerelease=if-necessary-or-explicit \
+        --requirements /tmp/requirements.txt; \
+    if [ "${PYTORCH_CUDA_VERSION}" = "strixhalo" ]; then \
+        uv pip install --python /opt/venv/bin/python --no-managed-python --no-deps \
+            "git+https://github.com/pyannote/pyannote-audio.git"; \
+    fi; \
+    uv cache clean
+
+# Copy the full source code (after dependencies are cached)
+COPY src/ src/
+
+# Install the project (editable so the compose bind-mount workflow still works)
+RUN uv pip install --python /opt/venv/bin/python --no-managed-python --no-deps --editable .
+
+# Verify we can import the base image's torch stack (and that torchvision ops are present)
+RUN python -c "import torch, torchvision; print('torch', torch.__version__, torch.__file__); print('torchvision', torchvision.__version__, torchvision.__file__)"
+
+# Create directories
+RUN mkdir -p /app/audio_chunks /app/debug /app/data /models
+
+# Set environment variables
+ENV HF_HOME=/models
+ENV PYTHONPATH=/app
+ENV LD_LIBRARY_PATH=/opt/rocm/lib:/opt/rocm/lib/host-math/lib:/opt/rocm/lib/rocm_sysdeps/lib:/opt/venv/lib/python3.13/site-packages/torch/lib:/usr/lib64:${LD_LIBRARY_PATH}
+
+# Expose port
+EXPOSE 8085
+
+# Run the service
+CMD ["/opt/venv/bin/simple-speaker-service"]
diff --git a/extras/speaker-recognition/docker-compose.yml b/extras/speaker-recognition/docker-compose.yml
@@ -55,13 +55,34 @@ services:
               count: all
               capabilities: [gpu]
 
+  # Strix Halo / AMD ROCm Profile Configuration
+  speaker-service-strixhalo:
+    <<: *base-speaker-service
+    profiles: ["strixhalo"]
+    networks:
+      default:
+        aliases:
+          - speaker-service
+    build:
+      context: .
+      dockerfile: Dockerfile.strixhalo
+      args:
+        PYTORCH_CUDA_VERSION: ${PYTORCH_CUDA_VERSION:-strixhalo}
+    devices:
+      - /dev/kfd
+      - /dev/dri
+    group_add:
+      - video
+    security_opt:
+      - seccomp:unconfined
+
   # React Web UI
   web-ui:
     platform: linux/amd64
     build:
       context: webui
       dockerfile: Dockerfile
-    profiles: ["cpu", "gpu"]
+    profiles: ["cpu", "gpu", "strixhalo"]
     ports:
       - "${REACT_UI_PORT:-5173}:${REACT_UI_PORT:-5173}"
     volumes:
@@ -88,7 +109,7 @@ services:
   # Nginx reverse proxy for unified HTTPS endpoint
   nginx:
     image: nginx:alpine
-    profiles: ["cpu", "gpu"]
+    profiles: ["cpu", "gpu", "strixhalo"]
     ports:
       - "8444:443"
       - "8081:80"
@@ -109,4 +130,4 @@ services:
 networks:
   default:
     name: chronicle-network
-    external: true
+    external: true