From 6950730f61b0ccef6198b8932689b7ec5f0245a5 Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Mon, 10 Nov 2025 17:10:54 +0000
Subject: [PATCH 1/7] Initial plan


From d19ed3c071388bcb79760cf4eab22e5c5f320004 Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Mon, 10 Nov 2025 17:14:24 +0000
Subject: [PATCH 2/7] Add EmbeddingClient wrapper with detailed logging and
 retries

Co-authored-by: Mte90 <403283+Mte90@users.noreply.github.com>
---
 ai/analyzer_embedding_usage_example.py |  20 +++
 ai/embedding_client.py                 | 207 +++++++++++++++++++++++++
 2 files changed, 227 insertions(+)
 create mode 100644 ai/analyzer_embedding_usage_example.py
 create mode 100644 ai/embedding_client.py

diff --git a/ai/analyzer_embedding_usage_example.py b/ai/analyzer_embedding_usage_example.py
new file mode 100644
index 0000000..bc596fd
--- /dev/null
+++ b/ai/analyzer_embedding_usage_example.py
@@ -0,0 +1,20 @@
+# ai/analyzer_embedding_usage_example.py
+import logging
+from ai.embedding_client import EmbeddingClient
+
+logger = logging.getLogger("ai.analyzer")
+
+# create client (will pick up env vars)
+client = EmbeddingClient()
+
+def process_file_and_embed(file_path: str, chunks: list[str]):
+    logger.info("Start embedding file", extra={"file": file_path, "num_chunks": len(chunks)})
+    results = client.embed_multiple(chunks, file_path=file_path)
+    # Inspect results for None embeddings and act accordingly
+    for r in results:
+        if r.get("embedding") is None:
+            logger.warning("Chunk embedding failed", extra={"file": file_path, "chunk_index": r["chunk_index"], "error": r.get("error")})
+        else:
+            # continue with storing the embedding
+            pass
+    return results
diff --git a/ai/embedding_client.py b/ai/embedding_client.py
new file mode 100644
index 0000000..d85bec7
--- /dev/null
+++ b/ai/embedding_client.py
@@ -0,0 +1,207 @@
+# ai/embedding_client.py
+import os
+import time
+import uuid
+import json
+import logging
+import traceback
+from typing import List, Optional, Dict, Any
+
+import requests
+
+logger = logging.getLogger("ai.analyzer.embedding")
+
+# Configurable via environment
+EMBEDDING_API_URL = os.getenv("PICOCODE_EMBEDDING_URL", "https://example.com/v1/embeddings")
+EMBEDDING_API_KEY = os.getenv("PICOCODE_EMBEDDING_API_KEY", "")
+DEFAULT_TIMEOUT = float(os.getenv("PICOCODE_EMBEDDING_TIMEOUT", "30"))  # seconds per request
+MAX_RETRIES = int(os.getenv("PICOCODE_EMBEDDING_RETRIES", "2"))
+BACKOFF_FACTOR = float(os.getenv("PICOCODE_EMBEDDING_BACKOFF", "1.5"))
+MODEL_NAME = os.getenv("PICOCODE_EMBEDDING_MODEL", "text-embedding-3-small")
+
+# Optionally enable requests debug logging by setting PICOCODE_HTTP_DEBUG=true
+if os.getenv("PICOCODE_HTTP_DEBUG", "").lower() in ("1", "true", "yes"):
+    logging.getLogger("requests").setLevel(logging.DEBUG)
+    logging.getLogger("urllib3").setLevel(logging.DEBUG)
+
+
+class EmbeddingError(Exception):
+    pass
+
+
+class EmbeddingClient:
+    def __init__(self,
+                 api_url: str = EMBEDDING_API_URL,
+                 api_key: str = EMBEDDING_API_KEY,
+                 model: str = MODEL_NAME,
+                 timeout: float = DEFAULT_TIMEOUT,
+                 max_retries: int = MAX_RETRIES,
+                 backoff: float = BACKOFF_FACTOR):
+        self.api_url = api_url
+        self.api_key = api_key
+        self.model = model
+        self.timeout = timeout
+        self.max_retries = max_retries
+        self.backoff = backoff
+        self.session = requests.Session()
+        if api_key:
+            self.session.headers.update({"Authorization": f"Bearer {api_key}"})
+        self.session.headers.update({"Content-Type": "application/json"})
+
+    def _log_request_start(self, request_id: str, file_path: str, chunk_index: int, chunk_len: int):
+        logger.debug(
+            "Embedding request START",
+            extra={
+                "request_id": request_id,
+                "file": file_path,
+                "chunk_index": chunk_index,
+                "chunk_length": chunk_len,
+                "model": self.model,
+                "api_url": self.api_url,
+                "timeout": self.timeout,
+            },
+        )
+
+    def _log_request_end(self, request_id: str, elapsed: float, status: Optional[int], response_body_preview: str):
+        logger.debug(
+            "Embedding request END",
+            extra={
+                "request_id": request_id,
+                "elapsed_s": elapsed,
+                "status": status,
+                "response_preview": response_body_preview,
+            },
+        )
+
+    def embed_text(self, text: str, file_path: str = "<unknown>", chunk_index: int = 0) -> List[float]:
+        """
+        Embed a single chunk of text. Returns the embedding vector.
+        Raises EmbeddingError on failure.
+        """
+        request_id = str(uuid.uuid4())
+        chunk_len = len(text)
+        self._log_request_start(request_id, file_path, chunk_index, chunk_len)
+
+        payload = {
+            "model": self.model,
+            "input": text,
+        }
+
+        attempt = 0
+        while True:
+            attempt += 1
+            start = time.perf_counter()
+            try:
+                resp = self.session.post(
+                    self.api_url,
+                    data=json.dumps(payload),
+                    timeout=self.timeout,
+                )
+                elapsed = time.perf_counter() - start
+
+                # Try to parse JSON safely
+                try:
+                    resp_json = resp.json()
+                except Exception:
+                    resp_json = None
+
+                preview = ""
+                if resp_json is not None:
+                    preview = json.dumps(resp_json)[:1000]
+                else:
+                    preview = (resp.text or "")[:1000]
+
+                self._log_request_end(request_id, elapsed, resp.status_code, preview)
+
+                if resp.status_code >= 200 and resp.status_code < 300:
+                    # expected format: {"data": [{"embedding": [...]}], ...}
+                    if not resp_json:
+                        raise EmbeddingError(f"Empty JSON response (status={resp.status_code})")
+                    try:
+                        # tolerant extraction
+                        data = resp_json.get("data") if isinstance(resp_json, dict) else None
+                        if data and isinstance(data, list) and len(data) > 0:
+                            emb = data[0].get("embedding")
+                            if emb and isinstance(emb, list):
+                                logger.info(
+                                    "Embedding succeeded",
+                                    extra={"request_id": request_id, "file": file_path, "chunk_index": chunk_index},
+                                )
+                                return emb
+                        # Fallback: maybe top-level "embedding" key
+                        if isinstance(resp_json, dict) and "embedding" in resp_json:
+                            emb = resp_json["embedding"]
+                            if isinstance(emb, list):
+                                return emb
+                        raise EmbeddingError(f"Unexpected embedding response shape: {resp_json}")
+                    except KeyError as e:
+                        raise EmbeddingError(f"Missing keys in embedding response: {e}")
+                else:
+                    # Non-2xx
+                    logger.warning(
+                        "Embedding API returned non-2xx",
+                        extra={
+                            "request_id": request_id,
+                            "status_code": resp.status_code,
+                            "file": file_path,
+                            "chunk_index": chunk_index,
+                            "attempt": attempt,
+                            "body_preview": preview,
+                        },
+                    )
+                    # fall through to retry logic
+                    err_msg = f"Status {resp.status_code}: {preview}"
+
+            except requests.Timeout as e:
+                elapsed = time.perf_counter() - start
+                err_msg = f"Timeout after {elapsed:.2f}s: {e}"
+                logger.error("Embedding API Timeout", extra={"request_id": request_id, "error": str(e)})
+            except requests.RequestException as e:
+                elapsed = time.perf_counter() - start
+                err_msg = f"RequestException after {elapsed:.2f}s: {e}\n{traceback.format_exc()}"
+                logger.error("Embedding request exception", extra={"request_id": request_id, "error": err_msg})
+            except Exception as e:
+                elapsed = time.perf_counter() - start
+                err_msg = f"Unexpected error after {elapsed:.2f}s: {e}\n{traceback.format_exc()}"
+                logger.exception("Unexpected embedding exception", extra={"request_id": request_id})
+
+            # Retry logic
+            if attempt > self.max_retries:
+                logger.error(
+                    "Max retries exceeded for embedding request",
+                    extra={"request_id": request_id, "file": file_path, "chunk_index": chunk_index, "attempts": attempt},
+                )
+                raise EmbeddingError(f"Failed to get embedding after {attempt} attempts. Last error: {err_msg}")
+
+            # Backoff and retry
+            sleep_for = self.backoff * (2 ** (attempt - 1))
+            logger.info(
+                "Retrying embedding request",
+                extra={
+                    "request_id": request_id,
+                    "file": file_path,
+                    "chunk_index": chunk_index,
+                    "attempt": attempt,
+                    "sleep_s": sleep_for,
+                },
+            )
+            time.sleep(sleep_for)
+
+    def embed_multiple(self, chunks: List[str], file_path: str = "<unknown>") -> List[Dict[str, Any]]:
+        """
+        Embed a list of text chunks. Returns list of dicts: {"chunk_index": i, "embedding": [...]}.
+        This method logs progress and errors for each chunk.
+        """
+        results = []
+        for i, chunk in enumerate(chunks):
+            try:
+                emb = self.embed_text(chunk, file_path=file_path, chunk_index=i)
+                results.append({"chunk_index": i, "embedding": emb})
+            except EmbeddingError as e:
+                logger.error(
+                    "Failed to embed chunk",
+                    extra={"file": file_path, "chunk_index": i, "error": str(e)},
+                )
+                # append a failure marker or skip depending on desired behavior
+                results.append({"chunk_index": i, "embedding": None, "error": str(e)})
+        return results

From 27b5b874707110792f6c3bba6aa0169b56a5d14e Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Mon, 10 Nov 2025 17:25:11 +0000
Subject: [PATCH 3/7] Integrate EmbeddingClient into analyzer and remove
 example file

- Remove analyzer_embedding_usage_example.py (not needed as per feedback)
- Import and instantiate EmbeddingClient in analyzer.py
- Replace get_embedding_for_text calls with _embedding_client.embed_text
- Pass file_path and chunk_index for detailed logging
- EmbeddingClient now actively used for all embedding operations

Co-authored-by: Mte90 <403283+Mte90@users.noreply.github.com>
---
 ai/analyzer.py                         | 16 +++++++++++-----
 ai/analyzer_embedding_usage_example.py | 20 --------------------
 2 files changed, 11 insertions(+), 25 deletions(-)
 delete mode 100644 ai/analyzer_embedding_usage_example.py

diff --git a/ai/analyzer.py b/ai/analyzer.py
index ba786d2..ef630b1 100644
--- a/ai/analyzer.py
+++ b/ai/analyzer.py
@@ -18,7 +18,8 @@
     search_vectors as _search_vectors,
     get_chunk_text as _get_chunk_text,
 )
-from .openai import get_embedding_for_text, call_coding_api
+from .openai import call_coding_api
+from .embedding_client import EmbeddingClient
 from llama_index.core import Document
 from utils.logger import get_logger
 from utils import compute_file_hash, chunk_text, norm, cosine
@@ -59,15 +60,20 @@
 
 logger = get_logger(__name__)
 
+# Initialize EmbeddingClient for structured logging and retry logic
+_embedding_client = EmbeddingClient()
 
-def _get_embedding_with_semaphore(semaphore: threading.Semaphore, text: str, model: Optional[str] = None):
+
+def _get_embedding_with_semaphore(semaphore: threading.Semaphore, text: str, file_path: str = "<unknown>", chunk_index: int = 0, model: Optional[str] = None):
     """
     Wrapper to acquire semaphore inside executor task to avoid deadlock.
     The semaphore is acquired in the worker thread, not the main thread.
+    Now uses EmbeddingClient for better logging and error handling.
     """
     semaphore.acquire()
     try:
-        return get_embedding_for_text(text, model)
+        # Use the embedding client with enhanced logging
+        return _embedding_client.embed_text(text, file_path=file_path, chunk_index=chunk_index)
     finally:
         semaphore.release()
 
@@ -192,7 +198,7 @@ def _process_file_sync(
             for idx, chunk_doc in batch:
                 # Submit task to executor; semaphore will be acquired inside the worker
                 embedding_start_time = time.time()
-                future = _EXECUTOR.submit(_get_embedding_with_semaphore, semaphore, chunk_doc.text, embedding_model)
+                future = _EXECUTOR.submit(_get_embedding_with_semaphore, semaphore, chunk_doc.text, rel_path, idx, embedding_model)
                 embedding_futures.append((idx, chunk_doc, future, embedding_start_time))
 
             # Wait for batch to complete and store results
@@ -434,7 +440,7 @@ def search_semantic(query: str, database_path: str, top_k: int = 5):
     Uses sqlite-vector's vector_full_scan to retrieve best-matching chunks and returns
     a list of {file_id, path, chunk_index, score}.
     """
-    q_emb = get_embedding_for_text(query)
+    q_emb = _embedding_client.embed_text(query, file_path="<query>", chunk_index=0)
     if not q_emb:
         return []
 
diff --git a/ai/analyzer_embedding_usage_example.py b/ai/analyzer_embedding_usage_example.py
deleted file mode 100644
index bc596fd..0000000
--- a/ai/analyzer_embedding_usage_example.py
+++ /dev/null
@@ -1,20 +0,0 @@
-# ai/analyzer_embedding_usage_example.py
-import logging
-from ai.embedding_client import EmbeddingClient
-
-logger = logging.getLogger("ai.analyzer")
-
-# create client (will pick up env vars)
-client = EmbeddingClient()
-
-def process_file_and_embed(file_path: str, chunks: list[str]):
-    logger.info("Start embedding file", extra={"file": file_path, "num_chunks": len(chunks)})
-    results = client.embed_multiple(chunks, file_path=file_path)
-    # Inspect results for None embeddings and act accordingly
-    for r in results:
-        if r.get("embedding") is None:
-            logger.warning("Chunk embedding failed", extra={"file": file_path, "chunk_index": r["chunk_index"], "error": r.get("error")})
-        else:
-            # continue with storing the embedding
-            pass
-    return results

From e73edcd6591956bc223a770e8db782b6d6053f1a Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Mon, 10 Nov 2025 17:34:06 +0000
Subject: [PATCH 4/7] Move EmbeddingClient into openai.py and remove separate
 file

- Move EmbeddingClient class and EmbeddingError from embedding_client.py to openai.py
- Update analyzer.py to import EmbeddingClient from openai module
- Delete ai/embedding_client.py (no longer needed)
- All embedding functionality now consolidated in openai.py
- get_embedding_for_text remains in openai.py (can be used for backward compatibility)

Co-authored-by: Mte90 <403283+Mte90@users.noreply.github.com>
---
 ai/analyzer.py         |   3 +-
 ai/embedding_client.py | 207 ----------------------------------------
 ai/openai.py           | 211 ++++++++++++++++++++++++++++++++++++++++-
 3 files changed, 211 insertions(+), 210 deletions(-)
 delete mode 100644 ai/embedding_client.py

diff --git a/ai/analyzer.py b/ai/analyzer.py
index ef630b1..ae02779 100644
--- a/ai/analyzer.py
+++ b/ai/analyzer.py
@@ -18,8 +18,7 @@
     search_vectors as _search_vectors,
     get_chunk_text as _get_chunk_text,
 )
-from .openai import call_coding_api
-from .embedding_client import EmbeddingClient
+from .openai import call_coding_api, EmbeddingClient
 from llama_index.core import Document
 from utils.logger import get_logger
 from utils import compute_file_hash, chunk_text, norm, cosine
diff --git a/ai/embedding_client.py b/ai/embedding_client.py
deleted file mode 100644
index d85bec7..0000000
--- a/ai/embedding_client.py
+++ /dev/null
@@ -1,207 +0,0 @@
-# ai/embedding_client.py
-import os
-import time
-import uuid
-import json
-import logging
-import traceback
-from typing import List, Optional, Dict, Any
-
-import requests
-
-logger = logging.getLogger("ai.analyzer.embedding")
-
-# Configurable via environment
-EMBEDDING_API_URL = os.getenv("PICOCODE_EMBEDDING_URL", "https://example.com/v1/embeddings")
-EMBEDDING_API_KEY = os.getenv("PICOCODE_EMBEDDING_API_KEY", "")
-DEFAULT_TIMEOUT = float(os.getenv("PICOCODE_EMBEDDING_TIMEOUT", "30"))  # seconds per request
-MAX_RETRIES = int(os.getenv("PICOCODE_EMBEDDING_RETRIES", "2"))
-BACKOFF_FACTOR = float(os.getenv("PICOCODE_EMBEDDING_BACKOFF", "1.5"))
-MODEL_NAME = os.getenv("PICOCODE_EMBEDDING_MODEL", "text-embedding-3-small")
-
-# Optionally enable requests debug logging by setting PICOCODE_HTTP_DEBUG=true
-if os.getenv("PICOCODE_HTTP_DEBUG", "").lower() in ("1", "true", "yes"):
-    logging.getLogger("requests").setLevel(logging.DEBUG)
-    logging.getLogger("urllib3").setLevel(logging.DEBUG)
-
-
-class EmbeddingError(Exception):
-    pass
-
-
-class EmbeddingClient:
-    def __init__(self,
-                 api_url: str = EMBEDDING_API_URL,
-                 api_key: str = EMBEDDING_API_KEY,
-                 model: str = MODEL_NAME,
-                 timeout: float = DEFAULT_TIMEOUT,
-                 max_retries: int = MAX_RETRIES,
-                 backoff: float = BACKOFF_FACTOR):
-        self.api_url = api_url
-        self.api_key = api_key
-        self.model = model
-        self.timeout = timeout
-        self.max_retries = max_retries
-        self.backoff = backoff
-        self.session = requests.Session()
-        if api_key:
-            self.session.headers.update({"Authorization": f"Bearer {api_key}"})
-        self.session.headers.update({"Content-Type": "application/json"})
-
-    def _log_request_start(self, request_id: str, file_path: str, chunk_index: int, chunk_len: int):
-        logger.debug(
-            "Embedding request START",
-            extra={
-                "request_id": request_id,
-                "file": file_path,
-                "chunk_index": chunk_index,
-                "chunk_length": chunk_len,
-                "model": self.model,
-                "api_url": self.api_url,
-                "timeout": self.timeout,
-            },
-        )
-
-    def _log_request_end(self, request_id: str, elapsed: float, status: Optional[int], response_body_preview: str):
-        logger.debug(
-            "Embedding request END",
-            extra={
-                "request_id": request_id,
-                "elapsed_s": elapsed,
-                "status": status,
-                "response_preview": response_body_preview,
-            },
-        )
-
-    def embed_text(self, text: str, file_path: str = "<unknown>", chunk_index: int = 0) -> List[float]:
-        """
-        Embed a single chunk of text. Returns the embedding vector.
-        Raises EmbeddingError on failure.
-        """
-        request_id = str(uuid.uuid4())
-        chunk_len = len(text)
-        self._log_request_start(request_id, file_path, chunk_index, chunk_len)
-
-        payload = {
-            "model": self.model,
-            "input": text,
-        }
-
-        attempt = 0
-        while True:
-            attempt += 1
-            start = time.perf_counter()
-            try:
-                resp = self.session.post(
-                    self.api_url,
-                    data=json.dumps(payload),
-                    timeout=self.timeout,
-                )
-                elapsed = time.perf_counter() - start
-
-                # Try to parse JSON safely
-                try:
-                    resp_json = resp.json()
-                except Exception:
-                    resp_json = None
-
-                preview = ""
-                if resp_json is not None:
-                    preview = json.dumps(resp_json)[:1000]
-                else:
-                    preview = (resp.text or "")[:1000]
-
-                self._log_request_end(request_id, elapsed, resp.status_code, preview)
-
-                if resp.status_code >= 200 and resp.status_code < 300:
-                    # expected format: {"data": [{"embedding": [...]}], ...}
-                    if not resp_json:
-                        raise EmbeddingError(f"Empty JSON response (status={resp.status_code})")
-                    try:
-                        # tolerant extraction
-                        data = resp_json.get("data") if isinstance(resp_json, dict) else None
-                        if data and isinstance(data, list) and len(data) > 0:
-                            emb = data[0].get("embedding")
-                            if emb and isinstance(emb, list):
-                                logger.info(
-                                    "Embedding succeeded",
-                                    extra={"request_id": request_id, "file": file_path, "chunk_index": chunk_index},
-                                )
-                                return emb
-                        # Fallback: maybe top-level "embedding" key
-                        if isinstance(resp_json, dict) and "embedding" in resp_json:
-                            emb = resp_json["embedding"]
-                            if isinstance(emb, list):
-                                return emb
-                        raise EmbeddingError(f"Unexpected embedding response shape: {resp_json}")
-                    except KeyError as e:
-                        raise EmbeddingError(f"Missing keys in embedding response: {e}")
-                else:
-                    # Non-2xx
-                    logger.warning(
-                        "Embedding API returned non-2xx",
-                        extra={
-                            "request_id": request_id,
-                            "status_code": resp.status_code,
-                            "file": file_path,
-                            "chunk_index": chunk_index,
-                            "attempt": attempt,
-                            "body_preview": preview,
-                        },
-                    )
-                    # fall through to retry logic
-                    err_msg = f"Status {resp.status_code}: {preview}"
-
-            except requests.Timeout as e:
-                elapsed = time.perf_counter() - start
-                err_msg = f"Timeout after {elapsed:.2f}s: {e}"
-                logger.error("Embedding API Timeout", extra={"request_id": request_id, "error": str(e)})
-            except requests.RequestException as e:
-                elapsed = time.perf_counter() - start
-                err_msg = f"RequestException after {elapsed:.2f}s: {e}\n{traceback.format_exc()}"
-                logger.error("Embedding request exception", extra={"request_id": request_id, "error": err_msg})
-            except Exception as e:
-                elapsed = time.perf_counter() - start
-                err_msg = f"Unexpected error after {elapsed:.2f}s: {e}\n{traceback.format_exc()}"
-                logger.exception("Unexpected embedding exception", extra={"request_id": request_id})
-
-            # Retry logic
-            if attempt > self.max_retries:
-                logger.error(
-                    "Max retries exceeded for embedding request",
-                    extra={"request_id": request_id, "file": file_path, "chunk_index": chunk_index, "attempts": attempt},
-                )
-                raise EmbeddingError(f"Failed to get embedding after {attempt} attempts. Last error: {err_msg}")
-
-            # Backoff and retry
-            sleep_for = self.backoff * (2 ** (attempt - 1))
-            logger.info(
-                "Retrying embedding request",
-                extra={
-                    "request_id": request_id,
-                    "file": file_path,
-                    "chunk_index": chunk_index,
-                    "attempt": attempt,
-                    "sleep_s": sleep_for,
-                },
-            )
-            time.sleep(sleep_for)
-
-    def embed_multiple(self, chunks: List[str], file_path: str = "<unknown>") -> List[Dict[str, Any]]:
-        """
-        Embed a list of text chunks. Returns list of dicts: {"chunk_index": i, "embedding": [...]}.
-        This method logs progress and errors for each chunk.
-        """
-        results = []
-        for i, chunk in enumerate(chunks):
-            try:
-                emb = self.embed_text(chunk, file_path=file_path, chunk_index=i)
-                results.append({"chunk_index": i, "embedding": emb})
-            except EmbeddingError as e:
-                logger.error(
-                    "Failed to embed chunk",
-                    extra={"file": file_path, "chunk_index": i, "error": str(e)},
-                )
-                # append a failure marker or skip depending on desired behavior
-                results.append({"chunk_index": i, "embedding": None, "error": str(e)})
-        return results
diff --git a/ai/openai.py b/ai/openai.py
index f0be269..f3f8d51 100644
--- a/ai/openai.py
+++ b/ai/openai.py
@@ -1,8 +1,13 @@
-from typing import Optional
+from typing import Optional, List, Dict, Any
 import os
 import time
+import uuid
+import json
+import logging
+import traceback
 import threading
 from openai import OpenAI
+import requests
 
 from utils.config import CFG
 
@@ -13,6 +18,22 @@
 DEFAULT_EMBEDDING_MODEL = CFG.get("embedding_model")
 DEFAULT_CODING_MODEL = CFG.get("coding_model")
 
+# Embedding client logger
+_embedding_logger = logging.getLogger("ai.analyzer.embedding")
+
+# Embedding client configuration (can override via environment)
+EMBEDDING_API_URL = os.getenv("PICOCODE_EMBEDDING_URL", CFG.get("api_url", "https://example.com/v1/embeddings"))
+EMBEDDING_API_KEY = os.getenv("PICOCODE_EMBEDDING_API_KEY", CFG.get("api_key", ""))
+DEFAULT_TIMEOUT = float(os.getenv("PICOCODE_EMBEDDING_TIMEOUT", "30"))  # seconds per request
+MAX_RETRIES = int(os.getenv("PICOCODE_EMBEDDING_RETRIES", "2"))
+BACKOFF_FACTOR = float(os.getenv("PICOCODE_EMBEDDING_BACKOFF", "1.5"))
+EMBEDDING_MODEL_NAME = os.getenv("PICOCODE_EMBEDDING_MODEL", DEFAULT_EMBEDDING_MODEL or "text-embedding-3-small")
+
+# Optionally enable requests debug logging by setting PICOCODE_HTTP_DEBUG=true
+if os.getenv("PICOCODE_HTTP_DEBUG", "").lower() in ("1", "true", "yes"):
+    logging.getLogger("requests").setLevel(logging.DEBUG)
+    logging.getLogger("urllib3").setLevel(logging.DEBUG)
+
 # Rate limiting configuration
 _RATE_LIMIT_CALLS = 100  # max calls per minute
 _RATE_LIMIT_WINDOW = 60.0  # seconds
@@ -100,6 +121,194 @@ def _retry_with_backoff(func, *args, **kwargs):
             time.sleep(delay)
 
 
+class EmbeddingError(Exception):
+    """Custom exception for embedding failures"""
+    pass
+
+
+class EmbeddingClient:
+    """
+    Embedding client with detailed logging, retry logic, and configurable timeouts.
+    Provides better debugging for embedding API failures.
+    """
+    def __init__(self,
+                 api_url: str = EMBEDDING_API_URL,
+                 api_key: str = EMBEDDING_API_KEY,
+                 model: str = EMBEDDING_MODEL_NAME,
+                 timeout: float = DEFAULT_TIMEOUT,
+                 max_retries: int = MAX_RETRIES,
+                 backoff: float = BACKOFF_FACTOR):
+        self.api_url = api_url
+        self.api_key = api_key
+        self.model = model
+        self.timeout = timeout
+        self.max_retries = max_retries
+        self.backoff = backoff
+        self.session = requests.Session()
+        if api_key:
+            self.session.headers.update({"Authorization": f"Bearer {api_key}"})
+        self.session.headers.update({"Content-Type": "application/json"})
+
+    def _log_request_start(self, request_id: str, file_path: str, chunk_index: int, chunk_len: int):
+        _embedding_logger.debug(
+            "Embedding request START",
+            extra={
+                "request_id": request_id,
+                "file": file_path,
+                "chunk_index": chunk_index,
+                "chunk_length": chunk_len,
+                "model": self.model,
+                "api_url": self.api_url,
+                "timeout": self.timeout,
+            },
+        )
+
+    def _log_request_end(self, request_id: str, elapsed: float, status: Optional[int], response_body_preview: str):
+        _embedding_logger.debug(
+            "Embedding request END",
+            extra={
+                "request_id": request_id,
+                "elapsed_s": elapsed,
+                "status": status,
+                "response_preview": response_body_preview,
+            },
+        )
+
+    def embed_text(self, text: str, file_path: str = "<unknown>", chunk_index: int = 0) -> List[float]:
+        """
+        Embed a single chunk of text. Returns the embedding vector.
+        Raises EmbeddingError on failure.
+        """
+        request_id = str(uuid.uuid4())
+        chunk_len = len(text)
+        self._log_request_start(request_id, file_path, chunk_index, chunk_len)
+
+        payload = {
+            "model": self.model,
+            "input": text,
+        }
+
+        attempt = 0
+        err_msg = ""
+        while True:
+            attempt += 1
+            start = time.perf_counter()
+            try:
+                resp = self.session.post(
+                    self.api_url,
+                    data=json.dumps(payload),
+                    timeout=self.timeout,
+                )
+                elapsed = time.perf_counter() - start
+
+                # Try to parse JSON safely
+                try:
+                    resp_json = resp.json()
+                except Exception:
+                    resp_json = None
+
+                preview = ""
+                if resp_json is not None:
+                    preview = json.dumps(resp_json)[:1000]
+                else:
+                    preview = (resp.text or "")[:1000]
+
+                self._log_request_end(request_id, elapsed, resp.status_code, preview)
+
+                if resp.status_code >= 200 and resp.status_code < 300:
+                    # expected format: {"data": [{"embedding": [...]}], ...}
+                    if not resp_json:
+                        raise EmbeddingError(f"Empty JSON response (status={resp.status_code})")
+                    try:
+                        # tolerant extraction
+                        data = resp_json.get("data") if isinstance(resp_json, dict) else None
+                        if data and isinstance(data, list) and len(data) > 0:
+                            emb = data[0].get("embedding")
+                            if emb and isinstance(emb, list):
+                                _embedding_logger.info(
+                                    "Embedding succeeded",
+                                    extra={"request_id": request_id, "file": file_path, "chunk_index": chunk_index},
+                                )
+                                return emb
+                        # Fallback: maybe top-level "embedding" key
+                        if isinstance(resp_json, dict) and "embedding" in resp_json:
+                            emb = resp_json["embedding"]
+                            if isinstance(emb, list):
+                                return emb
+                        raise EmbeddingError(f"Unexpected embedding response shape: {resp_json}")
+                    except KeyError as e:
+                        raise EmbeddingError(f"Missing keys in embedding response: {e}")
+                else:
+                    # Non-2xx
+                    _embedding_logger.warning(
+                        "Embedding API returned non-2xx",
+                        extra={
+                            "request_id": request_id,
+                            "status_code": resp.status_code,
+                            "file": file_path,
+                            "chunk_index": chunk_index,
+                            "attempt": attempt,
+                            "body_preview": preview,
+                        },
+                    )
+                    # fall through to retry logic
+                    err_msg = f"Status {resp.status_code}: {preview}"
+
+            except requests.Timeout as e:
+                elapsed = time.perf_counter() - start
+                err_msg = f"Timeout after {elapsed:.2f}s: {e}"
+                _embedding_logger.error("Embedding API Timeout", extra={"request_id": request_id, "error": str(e)})
+            except requests.RequestException as e:
+                elapsed = time.perf_counter() - start
+                err_msg = f"RequestException after {elapsed:.2f}s: {e}\n{traceback.format_exc()}"
+                _embedding_logger.error("Embedding request exception", extra={"request_id": request_id, "error": err_msg})
+            except Exception as e:
+                elapsed = time.perf_counter() - start
+                err_msg = f"Unexpected error after {elapsed:.2f}s: {e}\n{traceback.format_exc()}"
+                _embedding_logger.exception("Unexpected embedding exception", extra={"request_id": request_id})
+
+            # Retry logic
+            if attempt > self.max_retries:
+                _embedding_logger.error(
+                    "Max retries exceeded for embedding request",
+                    extra={"request_id": request_id, "file": file_path, "chunk_index": chunk_index, "attempts": attempt},
+                )
+                raise EmbeddingError(f"Failed to get embedding after {attempt} attempts. Last error: {err_msg}")
+
+            # Backoff and retry
+            sleep_for = self.backoff * (2 ** (attempt - 1))
+            _embedding_logger.info(
+                "Retrying embedding request",
+                extra={
+                    "request_id": request_id,
+                    "file": file_path,
+                    "chunk_index": chunk_index,
+                    "attempt": attempt,
+                    "sleep_s": sleep_for,
+                },
+            )
+            time.sleep(sleep_for)
+
+    def embed_multiple(self, chunks: List[str], file_path: str = "<unknown>") -> List[Dict[str, Any]]:
+        """
+        Embed a list of text chunks. Returns list of dicts: {"chunk_index": i, "embedding": [...]}.
+        This method logs progress and errors for each chunk.
+        """
+        results = []
+        for i, chunk in enumerate(chunks):
+            try:
+                emb = self.embed_text(chunk, file_path=file_path, chunk_index=i)
+                results.append({"chunk_index": i, "embedding": emb})
+            except EmbeddingError as e:
+                _embedding_logger.error(
+                    "Failed to embed chunk",
+                    extra={"file": file_path, "chunk_index": i, "error": str(e)},
+                )
+                # append a failure marker or skip depending on desired behavior
+                results.append({"chunk_index": i, "embedding": None, "error": str(e)})
+        return results
+
+
 def get_embedding_for_text(text: str, model: Optional[str] = None):
     """
     Return embedding vector (list[float]) using the new OpenAI client.

From c9310f1fcf49629b243e18e4a6032124290236ab Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Mon, 10 Nov 2025 17:40:59 +0000
Subject: [PATCH 5/7] Clean up redundant config and comments

- Remove duplicate config settings (EMBEDDING_API_URL, EMBEDDING_API_KEY, EMBEDDING_MODEL_NAME)
- EmbeddingClient now uses CFG values directly for api_url, api_key, and model
- Remove unnecessary comments in analyzer.py
- Keep only timeout, retries, and backoff as environment-configurable overrides

Co-authored-by: Mte90 <403283+Mte90@users.noreply.github.com>
---
 ai/analyzer.py |  2 --
 ai/openai.py   | 21 +++++++++------------
 2 files changed, 9 insertions(+), 14 deletions(-)

diff --git a/ai/analyzer.py b/ai/analyzer.py
index ae02779..9e32dea 100644
--- a/ai/analyzer.py
+++ b/ai/analyzer.py
@@ -67,11 +67,9 @@ def _get_embedding_with_semaphore(semaphore: threading.Semaphore, text: str, fil
     """
     Wrapper to acquire semaphore inside executor task to avoid deadlock.
     The semaphore is acquired in the worker thread, not the main thread.
-    Now uses EmbeddingClient for better logging and error handling.
     """
     semaphore.acquire()
     try:
-        # Use the embedding client with enhanced logging
         return _embedding_client.embed_text(text, file_path=file_path, chunk_index=chunk_index)
     finally:
         semaphore.release()
diff --git a/ai/openai.py b/ai/openai.py
index f3f8d51..22f1d08 100644
--- a/ai/openai.py
+++ b/ai/openai.py
@@ -21,13 +21,10 @@
 # Embedding client logger
 _embedding_logger = logging.getLogger("ai.analyzer.embedding")
 
-# Embedding client configuration (can override via environment)
-EMBEDDING_API_URL = os.getenv("PICOCODE_EMBEDDING_URL", CFG.get("api_url", "https://example.com/v1/embeddings"))
-EMBEDDING_API_KEY = os.getenv("PICOCODE_EMBEDDING_API_KEY", CFG.get("api_key", ""))
+# Embedding client configuration (uses CFG values, can override specific ones via environment)
 DEFAULT_TIMEOUT = float(os.getenv("PICOCODE_EMBEDDING_TIMEOUT", "30"))  # seconds per request
 MAX_RETRIES = int(os.getenv("PICOCODE_EMBEDDING_RETRIES", "2"))
 BACKOFF_FACTOR = float(os.getenv("PICOCODE_EMBEDDING_BACKOFF", "1.5"))
-EMBEDDING_MODEL_NAME = os.getenv("PICOCODE_EMBEDDING_MODEL", DEFAULT_EMBEDDING_MODEL or "text-embedding-3-small")
 
 # Optionally enable requests debug logging by setting PICOCODE_HTTP_DEBUG=true
 if os.getenv("PICOCODE_HTTP_DEBUG", "").lower() in ("1", "true", "yes"):
@@ -132,21 +129,21 @@ class EmbeddingClient:
     Provides better debugging for embedding API failures.
     """
     def __init__(self,
-                 api_url: str = EMBEDDING_API_URL,
-                 api_key: str = EMBEDDING_API_KEY,
-                 model: str = EMBEDDING_MODEL_NAME,
+                 api_url: Optional[str] = None,
+                 api_key: Optional[str] = None,
+                 model: Optional[str] = None,
                  timeout: float = DEFAULT_TIMEOUT,
                  max_retries: int = MAX_RETRIES,
                  backoff: float = BACKOFF_FACTOR):
-        self.api_url = api_url
-        self.api_key = api_key
-        self.model = model
+        self.api_url = api_url or CFG.get("api_url")
+        self.api_key = api_key or CFG.get("api_key")
+        self.model = model or DEFAULT_EMBEDDING_MODEL or "text-embedding-3-small"
         self.timeout = timeout
         self.max_retries = max_retries
         self.backoff = backoff
         self.session = requests.Session()
-        if api_key:
-            self.session.headers.update({"Authorization": f"Bearer {api_key}"})
+        if self.api_key:
+            self.session.headers.update({"Authorization": f"Bearer {self.api_key}"})
         self.session.headers.update({"Content-Type": "application/json"})
 
     def _log_request_start(self, request_id: str, file_path: str, chunk_index: int, chunk_len: int):

From f28a7efb165a0f0208aca36d2f3676d12336fe09 Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Mon, 10 Nov 2025 17:45:52 +0000
Subject: [PATCH 6/7] Remove module-level embedding config variables

- Remove DEFAULT_TIMEOUT, MAX_RETRIES, BACKOFF_FACTOR constants
- Remove PICOCODE_HTTP_DEBUG environment variable handling
- Use hard-coded defaults directly in EmbeddingClient.__init__ (timeout=30.0, max_retries=2, backoff=1.5)
- Simpler code with no extra configuration layer

Co-authored-by: Mte90 <403283+Mte90@users.noreply.github.com>
---
 ai/openai.py | 16 +++-------------
 1 file changed, 3 insertions(+), 13 deletions(-)

diff --git a/ai/openai.py b/ai/openai.py
index 22f1d08..30b7909 100644
--- a/ai/openai.py
+++ b/ai/openai.py
@@ -21,16 +21,6 @@
 # Embedding client logger
 _embedding_logger = logging.getLogger("ai.analyzer.embedding")
 
-# Embedding client configuration (uses CFG values, can override specific ones via environment)
-DEFAULT_TIMEOUT = float(os.getenv("PICOCODE_EMBEDDING_TIMEOUT", "30"))  # seconds per request
-MAX_RETRIES = int(os.getenv("PICOCODE_EMBEDDING_RETRIES", "2"))
-BACKOFF_FACTOR = float(os.getenv("PICOCODE_EMBEDDING_BACKOFF", "1.5"))
-
-# Optionally enable requests debug logging by setting PICOCODE_HTTP_DEBUG=true
-if os.getenv("PICOCODE_HTTP_DEBUG", "").lower() in ("1", "true", "yes"):
-    logging.getLogger("requests").setLevel(logging.DEBUG)
-    logging.getLogger("urllib3").setLevel(logging.DEBUG)
-
 # Rate limiting configuration
 _RATE_LIMIT_CALLS = 100  # max calls per minute
 _RATE_LIMIT_WINDOW = 60.0  # seconds
@@ -132,9 +122,9 @@ def __init__(self,
                  api_url: Optional[str] = None,
                  api_key: Optional[str] = None,
                  model: Optional[str] = None,
-                 timeout: float = DEFAULT_TIMEOUT,
-                 max_retries: int = MAX_RETRIES,
-                 backoff: float = BACKOFF_FACTOR):
+                 timeout: float = 30.0,
+                 max_retries: int = 2,
+                 backoff: float = 1.5):
         self.api_url = api_url or CFG.get("api_url")
         self.api_key = api_key or CFG.get("api_key")
         self.model = model or DEFAULT_EMBEDDING_MODEL or "text-embedding-3-small"

From d9ea51d02b7412555ec65d52f4ea11a5e6fd5865 Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Mon, 10 Nov 2025 17:58:36 +0000
Subject: [PATCH 7/7] Use EmbeddingClient in llama_integration and remove
 get_embedding_for_text

- Update llama_integration.py to use EmbeddingClient instead of get_embedding_for_text
- Create module-level _embedding_client instance in llama_integration.py
- Remove unused get_embedding_for_text function from openai.py
- All embedding operations now use EmbeddingClient for consistent logging and retry behavior

Co-authored-by: Mte90 <403283+Mte90@users.noreply.github.com>
---
 ai/llama_integration.py |  7 +++++--
 ai/openai.py            | 20 --------------------
 2 files changed, 5 insertions(+), 22 deletions(-)

diff --git a/ai/llama_integration.py b/ai/llama_integration.py
index 155fd23..bf8d18e 100644
--- a/ai/llama_integration.py
+++ b/ai/llama_integration.py
@@ -4,11 +4,14 @@
 from typing import List
 from llama_index.core import Document
 
-from .openai import get_embedding_for_text
+from .openai import EmbeddingClient
 from utils.logger import get_logger
 
 logger = get_logger(__name__)
 
+# Create a module-level embedding client instance
+_embedding_client = EmbeddingClient()
+
 
 def llama_index_retrieve_documents(query: str, database_path: str, top_k: int = 5, 
                                    search_func=None, get_chunk_func=None) -> List[Document]:
@@ -28,7 +31,7 @@ def llama_index_retrieve_documents(query: str, database_path: str, top_k: int =
     if search_func is None or get_chunk_func is None:
         raise ValueError("search_func and get_chunk_func must be provided")
     
-    q_emb = get_embedding_for_text(query)
+    q_emb = _embedding_client.embed_text(query, file_path="<query>", chunk_index=0)
     if not q_emb:
         return []
 
diff --git a/ai/openai.py b/ai/openai.py
index 30b7909..4a75bd2 100644
--- a/ai/openai.py
+++ b/ai/openai.py
@@ -296,26 +296,6 @@ def embed_multiple(self, chunks: List[str], file_path: str = "<unknown>") -> Lis
         return results
 
 
-def get_embedding_for_text(text: str, model: Optional[str] = None):
-    """
-    Return embedding vector (list[float]) using the new OpenAI client.
-    Includes rate limiting, retry logic with exponential backoff, and circuit breaker.
-    model: optional model id; if not provided, uses DEFAULT_EMBEDDING_MODEL from CFG.
-    """
-    model_to_use = model or DEFAULT_EMBEDDING_MODEL
-    if not model_to_use:
-        raise RuntimeError("No embedding model configured. Set EMBEDDING_MODEL in .env or pass model argument.")
-
-    def _get_embedding():
-        resp = _client.embeddings.create(model=model_to_use, input=text)
-        return resp.data[0].embedding
-    
-    try:
-        return _retry_with_backoff(_get_embedding)
-    except Exception as e:
-        raise RuntimeError(f"Failed to obtain embedding from OpenAI client: {e}") from e
-
-
 def call_coding_api(prompt: str, model: Optional[str] = None, max_tokens: int = 1024):
     """
     Call a generative/coding model via the new OpenAI client.