From 6950730f61b0ccef6198b8932689b7ec5f0245a5 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Mon, 10 Nov 2025 17:10:54 +0000 Subject: [PATCH 1/7] Initial plan From d19ed3c071388bcb79760cf4eab22e5c5f320004 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Mon, 10 Nov 2025 17:14:24 +0000 Subject: [PATCH 2/7] Add EmbeddingClient wrapper with detailed logging and retries Co-authored-by: Mte90 <403283+Mte90@users.noreply.github.com> --- ai/analyzer_embedding_usage_example.py | 20 +++ ai/embedding_client.py | 207 +++++++++++++++++++++++++ 2 files changed, 227 insertions(+) create mode 100644 ai/analyzer_embedding_usage_example.py create mode 100644 ai/embedding_client.py diff --git a/ai/analyzer_embedding_usage_example.py b/ai/analyzer_embedding_usage_example.py new file mode 100644 index 0000000..bc596fd --- /dev/null +++ b/ai/analyzer_embedding_usage_example.py @@ -0,0 +1,20 @@ +# ai/analyzer_embedding_usage_example.py +import logging +from ai.embedding_client import EmbeddingClient + +logger = logging.getLogger("ai.analyzer") + +# create client (will pick up env vars) +client = EmbeddingClient() + +def process_file_and_embed(file_path: str, chunks: list[str]): + logger.info("Start embedding file", extra={"file": file_path, "num_chunks": len(chunks)}) + results = client.embed_multiple(chunks, file_path=file_path) + # Inspect results for None embeddings and act accordingly + for r in results: + if r.get("embedding") is None: + logger.warning("Chunk embedding failed", extra={"file": file_path, "chunk_index": r["chunk_index"], "error": r.get("error")}) + else: + # continue with storing the embedding + pass + return results diff --git a/ai/embedding_client.py b/ai/embedding_client.py new file mode 100644 index 0000000..d85bec7 --- /dev/null +++ b/ai/embedding_client.py @@ -0,0 +1,207 @@ +# ai/embedding_client.py +import os +import time +import uuid +import json +import logging +import traceback +from typing import List, Optional, Dict, Any + +import requests + +logger = logging.getLogger("ai.analyzer.embedding") + +# Configurable via environment +EMBEDDING_API_URL = os.getenv("PICOCODE_EMBEDDING_URL", "https://example.com/v1/embeddings") +EMBEDDING_API_KEY = os.getenv("PICOCODE_EMBEDDING_API_KEY", "") +DEFAULT_TIMEOUT = float(os.getenv("PICOCODE_EMBEDDING_TIMEOUT", "30")) # seconds per request +MAX_RETRIES = int(os.getenv("PICOCODE_EMBEDDING_RETRIES", "2")) +BACKOFF_FACTOR = float(os.getenv("PICOCODE_EMBEDDING_BACKOFF", "1.5")) +MODEL_NAME = os.getenv("PICOCODE_EMBEDDING_MODEL", "text-embedding-3-small") + +# Optionally enable requests debug logging by setting PICOCODE_HTTP_DEBUG=true +if os.getenv("PICOCODE_HTTP_DEBUG", "").lower() in ("1", "true", "yes"): + logging.getLogger("requests").setLevel(logging.DEBUG) + logging.getLogger("urllib3").setLevel(logging.DEBUG) + + +class EmbeddingError(Exception): + pass + + +class EmbeddingClient: + def __init__(self, + api_url: str = EMBEDDING_API_URL, + api_key: str = EMBEDDING_API_KEY, + model: str = MODEL_NAME, + timeout: float = DEFAULT_TIMEOUT, + max_retries: int = MAX_RETRIES, + backoff: float = BACKOFF_FACTOR): + self.api_url = api_url + self.api_key = api_key + self.model = model + self.timeout = timeout + self.max_retries = max_retries + self.backoff = backoff + self.session = requests.Session() + if api_key: + self.session.headers.update({"Authorization": f"Bearer {api_key}"}) + self.session.headers.update({"Content-Type": "application/json"}) + + def _log_request_start(self, request_id: str, file_path: str, chunk_index: int, chunk_len: int): + logger.debug( + "Embedding request START", + extra={ + "request_id": request_id, + "file": file_path, + "chunk_index": chunk_index, + "chunk_length": chunk_len, + "model": self.model, + "api_url": self.api_url, + "timeout": self.timeout, + }, + ) + + def _log_request_end(self, request_id: str, elapsed: float, status: Optional[int], response_body_preview: str): + logger.debug( + "Embedding request END", + extra={ + "request_id": request_id, + "elapsed_s": elapsed, + "status": status, + "response_preview": response_body_preview, + }, + ) + + def embed_text(self, text: str, file_path: str = "", chunk_index: int = 0) -> List[float]: + """ + Embed a single chunk of text. Returns the embedding vector. + Raises EmbeddingError on failure. + """ + request_id = str(uuid.uuid4()) + chunk_len = len(text) + self._log_request_start(request_id, file_path, chunk_index, chunk_len) + + payload = { + "model": self.model, + "input": text, + } + + attempt = 0 + while True: + attempt += 1 + start = time.perf_counter() + try: + resp = self.session.post( + self.api_url, + data=json.dumps(payload), + timeout=self.timeout, + ) + elapsed = time.perf_counter() - start + + # Try to parse JSON safely + try: + resp_json = resp.json() + except Exception: + resp_json = None + + preview = "" + if resp_json is not None: + preview = json.dumps(resp_json)[:1000] + else: + preview = (resp.text or "")[:1000] + + self._log_request_end(request_id, elapsed, resp.status_code, preview) + + if resp.status_code >= 200 and resp.status_code < 300: + # expected format: {"data": [{"embedding": [...]}], ...} + if not resp_json: + raise EmbeddingError(f"Empty JSON response (status={resp.status_code})") + try: + # tolerant extraction + data = resp_json.get("data") if isinstance(resp_json, dict) else None + if data and isinstance(data, list) and len(data) > 0: + emb = data[0].get("embedding") + if emb and isinstance(emb, list): + logger.info( + "Embedding succeeded", + extra={"request_id": request_id, "file": file_path, "chunk_index": chunk_index}, + ) + return emb + # Fallback: maybe top-level "embedding" key + if isinstance(resp_json, dict) and "embedding" in resp_json: + emb = resp_json["embedding"] + if isinstance(emb, list): + return emb + raise EmbeddingError(f"Unexpected embedding response shape: {resp_json}") + except KeyError as e: + raise EmbeddingError(f"Missing keys in embedding response: {e}") + else: + # Non-2xx + logger.warning( + "Embedding API returned non-2xx", + extra={ + "request_id": request_id, + "status_code": resp.status_code, + "file": file_path, + "chunk_index": chunk_index, + "attempt": attempt, + "body_preview": preview, + }, + ) + # fall through to retry logic + err_msg = f"Status {resp.status_code}: {preview}" + + except requests.Timeout as e: + elapsed = time.perf_counter() - start + err_msg = f"Timeout after {elapsed:.2f}s: {e}" + logger.error("Embedding API Timeout", extra={"request_id": request_id, "error": str(e)}) + except requests.RequestException as e: + elapsed = time.perf_counter() - start + err_msg = f"RequestException after {elapsed:.2f}s: {e}\n{traceback.format_exc()}" + logger.error("Embedding request exception", extra={"request_id": request_id, "error": err_msg}) + except Exception as e: + elapsed = time.perf_counter() - start + err_msg = f"Unexpected error after {elapsed:.2f}s: {e}\n{traceback.format_exc()}" + logger.exception("Unexpected embedding exception", extra={"request_id": request_id}) + + # Retry logic + if attempt > self.max_retries: + logger.error( + "Max retries exceeded for embedding request", + extra={"request_id": request_id, "file": file_path, "chunk_index": chunk_index, "attempts": attempt}, + ) + raise EmbeddingError(f"Failed to get embedding after {attempt} attempts. Last error: {err_msg}") + + # Backoff and retry + sleep_for = self.backoff * (2 ** (attempt - 1)) + logger.info( + "Retrying embedding request", + extra={ + "request_id": request_id, + "file": file_path, + "chunk_index": chunk_index, + "attempt": attempt, + "sleep_s": sleep_for, + }, + ) + time.sleep(sleep_for) + + def embed_multiple(self, chunks: List[str], file_path: str = "") -> List[Dict[str, Any]]: + """ + Embed a list of text chunks. Returns list of dicts: {"chunk_index": i, "embedding": [...]}. + This method logs progress and errors for each chunk. + """ + results = [] + for i, chunk in enumerate(chunks): + try: + emb = self.embed_text(chunk, file_path=file_path, chunk_index=i) + results.append({"chunk_index": i, "embedding": emb}) + except EmbeddingError as e: + logger.error( + "Failed to embed chunk", + extra={"file": file_path, "chunk_index": i, "error": str(e)}, + ) + # append a failure marker or skip depending on desired behavior + results.append({"chunk_index": i, "embedding": None, "error": str(e)}) + return results From 27b5b874707110792f6c3bba6aa0169b56a5d14e Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Mon, 10 Nov 2025 17:25:11 +0000 Subject: [PATCH 3/7] Integrate EmbeddingClient into analyzer and remove example file - Remove analyzer_embedding_usage_example.py (not needed as per feedback) - Import and instantiate EmbeddingClient in analyzer.py - Replace get_embedding_for_text calls with _embedding_client.embed_text - Pass file_path and chunk_index for detailed logging - EmbeddingClient now actively used for all embedding operations Co-authored-by: Mte90 <403283+Mte90@users.noreply.github.com> --- ai/analyzer.py | 16 +++++++++++----- ai/analyzer_embedding_usage_example.py | 20 -------------------- 2 files changed, 11 insertions(+), 25 deletions(-) delete mode 100644 ai/analyzer_embedding_usage_example.py diff --git a/ai/analyzer.py b/ai/analyzer.py index ba786d2..ef630b1 100644 --- a/ai/analyzer.py +++ b/ai/analyzer.py @@ -18,7 +18,8 @@ search_vectors as _search_vectors, get_chunk_text as _get_chunk_text, ) -from .openai import get_embedding_for_text, call_coding_api +from .openai import call_coding_api +from .embedding_client import EmbeddingClient from llama_index.core import Document from utils.logger import get_logger from utils import compute_file_hash, chunk_text, norm, cosine @@ -59,15 +60,20 @@ logger = get_logger(__name__) +# Initialize EmbeddingClient for structured logging and retry logic +_embedding_client = EmbeddingClient() -def _get_embedding_with_semaphore(semaphore: threading.Semaphore, text: str, model: Optional[str] = None): + +def _get_embedding_with_semaphore(semaphore: threading.Semaphore, text: str, file_path: str = "", chunk_index: int = 0, model: Optional[str] = None): """ Wrapper to acquire semaphore inside executor task to avoid deadlock. The semaphore is acquired in the worker thread, not the main thread. + Now uses EmbeddingClient for better logging and error handling. """ semaphore.acquire() try: - return get_embedding_for_text(text, model) + # Use the embedding client with enhanced logging + return _embedding_client.embed_text(text, file_path=file_path, chunk_index=chunk_index) finally: semaphore.release() @@ -192,7 +198,7 @@ def _process_file_sync( for idx, chunk_doc in batch: # Submit task to executor; semaphore will be acquired inside the worker embedding_start_time = time.time() - future = _EXECUTOR.submit(_get_embedding_with_semaphore, semaphore, chunk_doc.text, embedding_model) + future = _EXECUTOR.submit(_get_embedding_with_semaphore, semaphore, chunk_doc.text, rel_path, idx, embedding_model) embedding_futures.append((idx, chunk_doc, future, embedding_start_time)) # Wait for batch to complete and store results @@ -434,7 +440,7 @@ def search_semantic(query: str, database_path: str, top_k: int = 5): Uses sqlite-vector's vector_full_scan to retrieve best-matching chunks and returns a list of {file_id, path, chunk_index, score}. """ - q_emb = get_embedding_for_text(query) + q_emb = _embedding_client.embed_text(query, file_path="", chunk_index=0) if not q_emb: return [] diff --git a/ai/analyzer_embedding_usage_example.py b/ai/analyzer_embedding_usage_example.py deleted file mode 100644 index bc596fd..0000000 --- a/ai/analyzer_embedding_usage_example.py +++ /dev/null @@ -1,20 +0,0 @@ -# ai/analyzer_embedding_usage_example.py -import logging -from ai.embedding_client import EmbeddingClient - -logger = logging.getLogger("ai.analyzer") - -# create client (will pick up env vars) -client = EmbeddingClient() - -def process_file_and_embed(file_path: str, chunks: list[str]): - logger.info("Start embedding file", extra={"file": file_path, "num_chunks": len(chunks)}) - results = client.embed_multiple(chunks, file_path=file_path) - # Inspect results for None embeddings and act accordingly - for r in results: - if r.get("embedding") is None: - logger.warning("Chunk embedding failed", extra={"file": file_path, "chunk_index": r["chunk_index"], "error": r.get("error")}) - else: - # continue with storing the embedding - pass - return results From e73edcd6591956bc223a770e8db782b6d6053f1a Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Mon, 10 Nov 2025 17:34:06 +0000 Subject: [PATCH 4/7] Move EmbeddingClient into openai.py and remove separate file - Move EmbeddingClient class and EmbeddingError from embedding_client.py to openai.py - Update analyzer.py to import EmbeddingClient from openai module - Delete ai/embedding_client.py (no longer needed) - All embedding functionality now consolidated in openai.py - get_embedding_for_text remains in openai.py (can be used for backward compatibility) Co-authored-by: Mte90 <403283+Mte90@users.noreply.github.com> --- ai/analyzer.py | 3 +- ai/embedding_client.py | 207 ---------------------------------------- ai/openai.py | 211 ++++++++++++++++++++++++++++++++++++++++- 3 files changed, 211 insertions(+), 210 deletions(-) delete mode 100644 ai/embedding_client.py diff --git a/ai/analyzer.py b/ai/analyzer.py index ef630b1..ae02779 100644 --- a/ai/analyzer.py +++ b/ai/analyzer.py @@ -18,8 +18,7 @@ search_vectors as _search_vectors, get_chunk_text as _get_chunk_text, ) -from .openai import call_coding_api -from .embedding_client import EmbeddingClient +from .openai import call_coding_api, EmbeddingClient from llama_index.core import Document from utils.logger import get_logger from utils import compute_file_hash, chunk_text, norm, cosine diff --git a/ai/embedding_client.py b/ai/embedding_client.py deleted file mode 100644 index d85bec7..0000000 --- a/ai/embedding_client.py +++ /dev/null @@ -1,207 +0,0 @@ -# ai/embedding_client.py -import os -import time -import uuid -import json -import logging -import traceback -from typing import List, Optional, Dict, Any - -import requests - -logger = logging.getLogger("ai.analyzer.embedding") - -# Configurable via environment -EMBEDDING_API_URL = os.getenv("PICOCODE_EMBEDDING_URL", "https://example.com/v1/embeddings") -EMBEDDING_API_KEY = os.getenv("PICOCODE_EMBEDDING_API_KEY", "") -DEFAULT_TIMEOUT = float(os.getenv("PICOCODE_EMBEDDING_TIMEOUT", "30")) # seconds per request -MAX_RETRIES = int(os.getenv("PICOCODE_EMBEDDING_RETRIES", "2")) -BACKOFF_FACTOR = float(os.getenv("PICOCODE_EMBEDDING_BACKOFF", "1.5")) -MODEL_NAME = os.getenv("PICOCODE_EMBEDDING_MODEL", "text-embedding-3-small") - -# Optionally enable requests debug logging by setting PICOCODE_HTTP_DEBUG=true -if os.getenv("PICOCODE_HTTP_DEBUG", "").lower() in ("1", "true", "yes"): - logging.getLogger("requests").setLevel(logging.DEBUG) - logging.getLogger("urllib3").setLevel(logging.DEBUG) - - -class EmbeddingError(Exception): - pass - - -class EmbeddingClient: - def __init__(self, - api_url: str = EMBEDDING_API_URL, - api_key: str = EMBEDDING_API_KEY, - model: str = MODEL_NAME, - timeout: float = DEFAULT_TIMEOUT, - max_retries: int = MAX_RETRIES, - backoff: float = BACKOFF_FACTOR): - self.api_url = api_url - self.api_key = api_key - self.model = model - self.timeout = timeout - self.max_retries = max_retries - self.backoff = backoff - self.session = requests.Session() - if api_key: - self.session.headers.update({"Authorization": f"Bearer {api_key}"}) - self.session.headers.update({"Content-Type": "application/json"}) - - def _log_request_start(self, request_id: str, file_path: str, chunk_index: int, chunk_len: int): - logger.debug( - "Embedding request START", - extra={ - "request_id": request_id, - "file": file_path, - "chunk_index": chunk_index, - "chunk_length": chunk_len, - "model": self.model, - "api_url": self.api_url, - "timeout": self.timeout, - }, - ) - - def _log_request_end(self, request_id: str, elapsed: float, status: Optional[int], response_body_preview: str): - logger.debug( - "Embedding request END", - extra={ - "request_id": request_id, - "elapsed_s": elapsed, - "status": status, - "response_preview": response_body_preview, - }, - ) - - def embed_text(self, text: str, file_path: str = "", chunk_index: int = 0) -> List[float]: - """ - Embed a single chunk of text. Returns the embedding vector. - Raises EmbeddingError on failure. - """ - request_id = str(uuid.uuid4()) - chunk_len = len(text) - self._log_request_start(request_id, file_path, chunk_index, chunk_len) - - payload = { - "model": self.model, - "input": text, - } - - attempt = 0 - while True: - attempt += 1 - start = time.perf_counter() - try: - resp = self.session.post( - self.api_url, - data=json.dumps(payload), - timeout=self.timeout, - ) - elapsed = time.perf_counter() - start - - # Try to parse JSON safely - try: - resp_json = resp.json() - except Exception: - resp_json = None - - preview = "" - if resp_json is not None: - preview = json.dumps(resp_json)[:1000] - else: - preview = (resp.text or "")[:1000] - - self._log_request_end(request_id, elapsed, resp.status_code, preview) - - if resp.status_code >= 200 and resp.status_code < 300: - # expected format: {"data": [{"embedding": [...]}], ...} - if not resp_json: - raise EmbeddingError(f"Empty JSON response (status={resp.status_code})") - try: - # tolerant extraction - data = resp_json.get("data") if isinstance(resp_json, dict) else None - if data and isinstance(data, list) and len(data) > 0: - emb = data[0].get("embedding") - if emb and isinstance(emb, list): - logger.info( - "Embedding succeeded", - extra={"request_id": request_id, "file": file_path, "chunk_index": chunk_index}, - ) - return emb - # Fallback: maybe top-level "embedding" key - if isinstance(resp_json, dict) and "embedding" in resp_json: - emb = resp_json["embedding"] - if isinstance(emb, list): - return emb - raise EmbeddingError(f"Unexpected embedding response shape: {resp_json}") - except KeyError as e: - raise EmbeddingError(f"Missing keys in embedding response: {e}") - else: - # Non-2xx - logger.warning( - "Embedding API returned non-2xx", - extra={ - "request_id": request_id, - "status_code": resp.status_code, - "file": file_path, - "chunk_index": chunk_index, - "attempt": attempt, - "body_preview": preview, - }, - ) - # fall through to retry logic - err_msg = f"Status {resp.status_code}: {preview}" - - except requests.Timeout as e: - elapsed = time.perf_counter() - start - err_msg = f"Timeout after {elapsed:.2f}s: {e}" - logger.error("Embedding API Timeout", extra={"request_id": request_id, "error": str(e)}) - except requests.RequestException as e: - elapsed = time.perf_counter() - start - err_msg = f"RequestException after {elapsed:.2f}s: {e}\n{traceback.format_exc()}" - logger.error("Embedding request exception", extra={"request_id": request_id, "error": err_msg}) - except Exception as e: - elapsed = time.perf_counter() - start - err_msg = f"Unexpected error after {elapsed:.2f}s: {e}\n{traceback.format_exc()}" - logger.exception("Unexpected embedding exception", extra={"request_id": request_id}) - - # Retry logic - if attempt > self.max_retries: - logger.error( - "Max retries exceeded for embedding request", - extra={"request_id": request_id, "file": file_path, "chunk_index": chunk_index, "attempts": attempt}, - ) - raise EmbeddingError(f"Failed to get embedding after {attempt} attempts. Last error: {err_msg}") - - # Backoff and retry - sleep_for = self.backoff * (2 ** (attempt - 1)) - logger.info( - "Retrying embedding request", - extra={ - "request_id": request_id, - "file": file_path, - "chunk_index": chunk_index, - "attempt": attempt, - "sleep_s": sleep_for, - }, - ) - time.sleep(sleep_for) - - def embed_multiple(self, chunks: List[str], file_path: str = "") -> List[Dict[str, Any]]: - """ - Embed a list of text chunks. Returns list of dicts: {"chunk_index": i, "embedding": [...]}. - This method logs progress and errors for each chunk. - """ - results = [] - for i, chunk in enumerate(chunks): - try: - emb = self.embed_text(chunk, file_path=file_path, chunk_index=i) - results.append({"chunk_index": i, "embedding": emb}) - except EmbeddingError as e: - logger.error( - "Failed to embed chunk", - extra={"file": file_path, "chunk_index": i, "error": str(e)}, - ) - # append a failure marker or skip depending on desired behavior - results.append({"chunk_index": i, "embedding": None, "error": str(e)}) - return results diff --git a/ai/openai.py b/ai/openai.py index f0be269..f3f8d51 100644 --- a/ai/openai.py +++ b/ai/openai.py @@ -1,8 +1,13 @@ -from typing import Optional +from typing import Optional, List, Dict, Any import os import time +import uuid +import json +import logging +import traceback import threading from openai import OpenAI +import requests from utils.config import CFG @@ -13,6 +18,22 @@ DEFAULT_EMBEDDING_MODEL = CFG.get("embedding_model") DEFAULT_CODING_MODEL = CFG.get("coding_model") +# Embedding client logger +_embedding_logger = logging.getLogger("ai.analyzer.embedding") + +# Embedding client configuration (can override via environment) +EMBEDDING_API_URL = os.getenv("PICOCODE_EMBEDDING_URL", CFG.get("api_url", "https://example.com/v1/embeddings")) +EMBEDDING_API_KEY = os.getenv("PICOCODE_EMBEDDING_API_KEY", CFG.get("api_key", "")) +DEFAULT_TIMEOUT = float(os.getenv("PICOCODE_EMBEDDING_TIMEOUT", "30")) # seconds per request +MAX_RETRIES = int(os.getenv("PICOCODE_EMBEDDING_RETRIES", "2")) +BACKOFF_FACTOR = float(os.getenv("PICOCODE_EMBEDDING_BACKOFF", "1.5")) +EMBEDDING_MODEL_NAME = os.getenv("PICOCODE_EMBEDDING_MODEL", DEFAULT_EMBEDDING_MODEL or "text-embedding-3-small") + +# Optionally enable requests debug logging by setting PICOCODE_HTTP_DEBUG=true +if os.getenv("PICOCODE_HTTP_DEBUG", "").lower() in ("1", "true", "yes"): + logging.getLogger("requests").setLevel(logging.DEBUG) + logging.getLogger("urllib3").setLevel(logging.DEBUG) + # Rate limiting configuration _RATE_LIMIT_CALLS = 100 # max calls per minute _RATE_LIMIT_WINDOW = 60.0 # seconds @@ -100,6 +121,194 @@ def _retry_with_backoff(func, *args, **kwargs): time.sleep(delay) +class EmbeddingError(Exception): + """Custom exception for embedding failures""" + pass + + +class EmbeddingClient: + """ + Embedding client with detailed logging, retry logic, and configurable timeouts. + Provides better debugging for embedding API failures. + """ + def __init__(self, + api_url: str = EMBEDDING_API_URL, + api_key: str = EMBEDDING_API_KEY, + model: str = EMBEDDING_MODEL_NAME, + timeout: float = DEFAULT_TIMEOUT, + max_retries: int = MAX_RETRIES, + backoff: float = BACKOFF_FACTOR): + self.api_url = api_url + self.api_key = api_key + self.model = model + self.timeout = timeout + self.max_retries = max_retries + self.backoff = backoff + self.session = requests.Session() + if api_key: + self.session.headers.update({"Authorization": f"Bearer {api_key}"}) + self.session.headers.update({"Content-Type": "application/json"}) + + def _log_request_start(self, request_id: str, file_path: str, chunk_index: int, chunk_len: int): + _embedding_logger.debug( + "Embedding request START", + extra={ + "request_id": request_id, + "file": file_path, + "chunk_index": chunk_index, + "chunk_length": chunk_len, + "model": self.model, + "api_url": self.api_url, + "timeout": self.timeout, + }, + ) + + def _log_request_end(self, request_id: str, elapsed: float, status: Optional[int], response_body_preview: str): + _embedding_logger.debug( + "Embedding request END", + extra={ + "request_id": request_id, + "elapsed_s": elapsed, + "status": status, + "response_preview": response_body_preview, + }, + ) + + def embed_text(self, text: str, file_path: str = "", chunk_index: int = 0) -> List[float]: + """ + Embed a single chunk of text. Returns the embedding vector. + Raises EmbeddingError on failure. + """ + request_id = str(uuid.uuid4()) + chunk_len = len(text) + self._log_request_start(request_id, file_path, chunk_index, chunk_len) + + payload = { + "model": self.model, + "input": text, + } + + attempt = 0 + err_msg = "" + while True: + attempt += 1 + start = time.perf_counter() + try: + resp = self.session.post( + self.api_url, + data=json.dumps(payload), + timeout=self.timeout, + ) + elapsed = time.perf_counter() - start + + # Try to parse JSON safely + try: + resp_json = resp.json() + except Exception: + resp_json = None + + preview = "" + if resp_json is not None: + preview = json.dumps(resp_json)[:1000] + else: + preview = (resp.text or "")[:1000] + + self._log_request_end(request_id, elapsed, resp.status_code, preview) + + if resp.status_code >= 200 and resp.status_code < 300: + # expected format: {"data": [{"embedding": [...]}], ...} + if not resp_json: + raise EmbeddingError(f"Empty JSON response (status={resp.status_code})") + try: + # tolerant extraction + data = resp_json.get("data") if isinstance(resp_json, dict) else None + if data and isinstance(data, list) and len(data) > 0: + emb = data[0].get("embedding") + if emb and isinstance(emb, list): + _embedding_logger.info( + "Embedding succeeded", + extra={"request_id": request_id, "file": file_path, "chunk_index": chunk_index}, + ) + return emb + # Fallback: maybe top-level "embedding" key + if isinstance(resp_json, dict) and "embedding" in resp_json: + emb = resp_json["embedding"] + if isinstance(emb, list): + return emb + raise EmbeddingError(f"Unexpected embedding response shape: {resp_json}") + except KeyError as e: + raise EmbeddingError(f"Missing keys in embedding response: {e}") + else: + # Non-2xx + _embedding_logger.warning( + "Embedding API returned non-2xx", + extra={ + "request_id": request_id, + "status_code": resp.status_code, + "file": file_path, + "chunk_index": chunk_index, + "attempt": attempt, + "body_preview": preview, + }, + ) + # fall through to retry logic + err_msg = f"Status {resp.status_code}: {preview}" + + except requests.Timeout as e: + elapsed = time.perf_counter() - start + err_msg = f"Timeout after {elapsed:.2f}s: {e}" + _embedding_logger.error("Embedding API Timeout", extra={"request_id": request_id, "error": str(e)}) + except requests.RequestException as e: + elapsed = time.perf_counter() - start + err_msg = f"RequestException after {elapsed:.2f}s: {e}\n{traceback.format_exc()}" + _embedding_logger.error("Embedding request exception", extra={"request_id": request_id, "error": err_msg}) + except Exception as e: + elapsed = time.perf_counter() - start + err_msg = f"Unexpected error after {elapsed:.2f}s: {e}\n{traceback.format_exc()}" + _embedding_logger.exception("Unexpected embedding exception", extra={"request_id": request_id}) + + # Retry logic + if attempt > self.max_retries: + _embedding_logger.error( + "Max retries exceeded for embedding request", + extra={"request_id": request_id, "file": file_path, "chunk_index": chunk_index, "attempts": attempt}, + ) + raise EmbeddingError(f"Failed to get embedding after {attempt} attempts. Last error: {err_msg}") + + # Backoff and retry + sleep_for = self.backoff * (2 ** (attempt - 1)) + _embedding_logger.info( + "Retrying embedding request", + extra={ + "request_id": request_id, + "file": file_path, + "chunk_index": chunk_index, + "attempt": attempt, + "sleep_s": sleep_for, + }, + ) + time.sleep(sleep_for) + + def embed_multiple(self, chunks: List[str], file_path: str = "") -> List[Dict[str, Any]]: + """ + Embed a list of text chunks. Returns list of dicts: {"chunk_index": i, "embedding": [...]}. + This method logs progress and errors for each chunk. + """ + results = [] + for i, chunk in enumerate(chunks): + try: + emb = self.embed_text(chunk, file_path=file_path, chunk_index=i) + results.append({"chunk_index": i, "embedding": emb}) + except EmbeddingError as e: + _embedding_logger.error( + "Failed to embed chunk", + extra={"file": file_path, "chunk_index": i, "error": str(e)}, + ) + # append a failure marker or skip depending on desired behavior + results.append({"chunk_index": i, "embedding": None, "error": str(e)}) + return results + + def get_embedding_for_text(text: str, model: Optional[str] = None): """ Return embedding vector (list[float]) using the new OpenAI client. From c9310f1fcf49629b243e18e4a6032124290236ab Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Mon, 10 Nov 2025 17:40:59 +0000 Subject: [PATCH 5/7] Clean up redundant config and comments - Remove duplicate config settings (EMBEDDING_API_URL, EMBEDDING_API_KEY, EMBEDDING_MODEL_NAME) - EmbeddingClient now uses CFG values directly for api_url, api_key, and model - Remove unnecessary comments in analyzer.py - Keep only timeout, retries, and backoff as environment-configurable overrides Co-authored-by: Mte90 <403283+Mte90@users.noreply.github.com> --- ai/analyzer.py | 2 -- ai/openai.py | 21 +++++++++------------ 2 files changed, 9 insertions(+), 14 deletions(-) diff --git a/ai/analyzer.py b/ai/analyzer.py index ae02779..9e32dea 100644 --- a/ai/analyzer.py +++ b/ai/analyzer.py @@ -67,11 +67,9 @@ def _get_embedding_with_semaphore(semaphore: threading.Semaphore, text: str, fil """ Wrapper to acquire semaphore inside executor task to avoid deadlock. The semaphore is acquired in the worker thread, not the main thread. - Now uses EmbeddingClient for better logging and error handling. """ semaphore.acquire() try: - # Use the embedding client with enhanced logging return _embedding_client.embed_text(text, file_path=file_path, chunk_index=chunk_index) finally: semaphore.release() diff --git a/ai/openai.py b/ai/openai.py index f3f8d51..22f1d08 100644 --- a/ai/openai.py +++ b/ai/openai.py @@ -21,13 +21,10 @@ # Embedding client logger _embedding_logger = logging.getLogger("ai.analyzer.embedding") -# Embedding client configuration (can override via environment) -EMBEDDING_API_URL = os.getenv("PICOCODE_EMBEDDING_URL", CFG.get("api_url", "https://example.com/v1/embeddings")) -EMBEDDING_API_KEY = os.getenv("PICOCODE_EMBEDDING_API_KEY", CFG.get("api_key", "")) +# Embedding client configuration (uses CFG values, can override specific ones via environment) DEFAULT_TIMEOUT = float(os.getenv("PICOCODE_EMBEDDING_TIMEOUT", "30")) # seconds per request MAX_RETRIES = int(os.getenv("PICOCODE_EMBEDDING_RETRIES", "2")) BACKOFF_FACTOR = float(os.getenv("PICOCODE_EMBEDDING_BACKOFF", "1.5")) -EMBEDDING_MODEL_NAME = os.getenv("PICOCODE_EMBEDDING_MODEL", DEFAULT_EMBEDDING_MODEL or "text-embedding-3-small") # Optionally enable requests debug logging by setting PICOCODE_HTTP_DEBUG=true if os.getenv("PICOCODE_HTTP_DEBUG", "").lower() in ("1", "true", "yes"): @@ -132,21 +129,21 @@ class EmbeddingClient: Provides better debugging for embedding API failures. """ def __init__(self, - api_url: str = EMBEDDING_API_URL, - api_key: str = EMBEDDING_API_KEY, - model: str = EMBEDDING_MODEL_NAME, + api_url: Optional[str] = None, + api_key: Optional[str] = None, + model: Optional[str] = None, timeout: float = DEFAULT_TIMEOUT, max_retries: int = MAX_RETRIES, backoff: float = BACKOFF_FACTOR): - self.api_url = api_url - self.api_key = api_key - self.model = model + self.api_url = api_url or CFG.get("api_url") + self.api_key = api_key or CFG.get("api_key") + self.model = model or DEFAULT_EMBEDDING_MODEL or "text-embedding-3-small" self.timeout = timeout self.max_retries = max_retries self.backoff = backoff self.session = requests.Session() - if api_key: - self.session.headers.update({"Authorization": f"Bearer {api_key}"}) + if self.api_key: + self.session.headers.update({"Authorization": f"Bearer {self.api_key}"}) self.session.headers.update({"Content-Type": "application/json"}) def _log_request_start(self, request_id: str, file_path: str, chunk_index: int, chunk_len: int): From f28a7efb165a0f0208aca36d2f3676d12336fe09 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Mon, 10 Nov 2025 17:45:52 +0000 Subject: [PATCH 6/7] Remove module-level embedding config variables - Remove DEFAULT_TIMEOUT, MAX_RETRIES, BACKOFF_FACTOR constants - Remove PICOCODE_HTTP_DEBUG environment variable handling - Use hard-coded defaults directly in EmbeddingClient.__init__ (timeout=30.0, max_retries=2, backoff=1.5) - Simpler code with no extra configuration layer Co-authored-by: Mte90 <403283+Mte90@users.noreply.github.com> --- ai/openai.py | 16 +++------------- 1 file changed, 3 insertions(+), 13 deletions(-) diff --git a/ai/openai.py b/ai/openai.py index 22f1d08..30b7909 100644 --- a/ai/openai.py +++ b/ai/openai.py @@ -21,16 +21,6 @@ # Embedding client logger _embedding_logger = logging.getLogger("ai.analyzer.embedding") -# Embedding client configuration (uses CFG values, can override specific ones via environment) -DEFAULT_TIMEOUT = float(os.getenv("PICOCODE_EMBEDDING_TIMEOUT", "30")) # seconds per request -MAX_RETRIES = int(os.getenv("PICOCODE_EMBEDDING_RETRIES", "2")) -BACKOFF_FACTOR = float(os.getenv("PICOCODE_EMBEDDING_BACKOFF", "1.5")) - -# Optionally enable requests debug logging by setting PICOCODE_HTTP_DEBUG=true -if os.getenv("PICOCODE_HTTP_DEBUG", "").lower() in ("1", "true", "yes"): - logging.getLogger("requests").setLevel(logging.DEBUG) - logging.getLogger("urllib3").setLevel(logging.DEBUG) - # Rate limiting configuration _RATE_LIMIT_CALLS = 100 # max calls per minute _RATE_LIMIT_WINDOW = 60.0 # seconds @@ -132,9 +122,9 @@ def __init__(self, api_url: Optional[str] = None, api_key: Optional[str] = None, model: Optional[str] = None, - timeout: float = DEFAULT_TIMEOUT, - max_retries: int = MAX_RETRIES, - backoff: float = BACKOFF_FACTOR): + timeout: float = 30.0, + max_retries: int = 2, + backoff: float = 1.5): self.api_url = api_url or CFG.get("api_url") self.api_key = api_key or CFG.get("api_key") self.model = model or DEFAULT_EMBEDDING_MODEL or "text-embedding-3-small" From d9ea51d02b7412555ec65d52f4ea11a5e6fd5865 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Mon, 10 Nov 2025 17:58:36 +0000 Subject: [PATCH 7/7] Use EmbeddingClient in llama_integration and remove get_embedding_for_text - Update llama_integration.py to use EmbeddingClient instead of get_embedding_for_text - Create module-level _embedding_client instance in llama_integration.py - Remove unused get_embedding_for_text function from openai.py - All embedding operations now use EmbeddingClient for consistent logging and retry behavior Co-authored-by: Mte90 <403283+Mte90@users.noreply.github.com> --- ai/llama_integration.py | 7 +++++-- ai/openai.py | 20 -------------------- 2 files changed, 5 insertions(+), 22 deletions(-) diff --git a/ai/llama_integration.py b/ai/llama_integration.py index 155fd23..bf8d18e 100644 --- a/ai/llama_integration.py +++ b/ai/llama_integration.py @@ -4,11 +4,14 @@ from typing import List from llama_index.core import Document -from .openai import get_embedding_for_text +from .openai import EmbeddingClient from utils.logger import get_logger logger = get_logger(__name__) +# Create a module-level embedding client instance +_embedding_client = EmbeddingClient() + def llama_index_retrieve_documents(query: str, database_path: str, top_k: int = 5, search_func=None, get_chunk_func=None) -> List[Document]: @@ -28,7 +31,7 @@ def llama_index_retrieve_documents(query: str, database_path: str, top_k: int = if search_func is None or get_chunk_func is None: raise ValueError("search_func and get_chunk_func must be provided") - q_emb = get_embedding_for_text(query) + q_emb = _embedding_client.embed_text(query, file_path="", chunk_index=0) if not q_emb: return [] diff --git a/ai/openai.py b/ai/openai.py index 30b7909..4a75bd2 100644 --- a/ai/openai.py +++ b/ai/openai.py @@ -296,26 +296,6 @@ def embed_multiple(self, chunks: List[str], file_path: str = "") -> Lis return results -def get_embedding_for_text(text: str, model: Optional[str] = None): - """ - Return embedding vector (list[float]) using the new OpenAI client. - Includes rate limiting, retry logic with exponential backoff, and circuit breaker. - model: optional model id; if not provided, uses DEFAULT_EMBEDDING_MODEL from CFG. - """ - model_to_use = model or DEFAULT_EMBEDDING_MODEL - if not model_to_use: - raise RuntimeError("No embedding model configured. Set EMBEDDING_MODEL in .env or pass model argument.") - - def _get_embedding(): - resp = _client.embeddings.create(model=model_to_use, input=text) - return resp.data[0].embedding - - try: - return _retry_with_backoff(_get_embedding) - except Exception as e: - raise RuntimeError(f"Failed to obtain embedding from OpenAI client: {e}") from e - - def call_coding_api(prompt: str, model: Optional[str] = None, max_tokens: int = 1024): """ Call a generative/coding model via the new OpenAI client.