Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion generate_presigned_url.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@

# List of files to process
files_to_process: List[Dict[str, str]] = [
{"bucket": "ckb", "key": "sm_someuuid/sm_someuuid.zip"},
{"bucket": "ckb", "key": "ID.ee/ID.ee.zip"},
]

# Generate presigned URLs
Expand Down
10 changes: 5 additions & 5 deletions src/contextual_retrieval/bm25_search.py
Original file line number Diff line number Diff line change
Expand Up @@ -141,19 +141,19 @@ async def search_bm25(

logger.info(f"BM25 search found {len(results)} chunks")

# Debug logging for BM25 results
logger.info("=== BM25 SEARCH RESULTS BREAKDOWN ===")
# Detailed results at DEBUG level (loguru filters based on log level config)
logger.debug("=== BM25 SEARCH RESULTS BREAKDOWN ===")
for i, chunk in enumerate(results[:10]): # Show top 10 results
content_preview = (
(chunk.get("original_content", "")[:150] + "...")
if len(chunk.get("original_content", "")) > 150
else chunk.get("original_content", "")
)
logger.info(
logger.debug(
f" Rank {i + 1}: BM25_score={chunk['score']:.4f}, id={chunk.get('chunk_id', 'unknown')}"
)
logger.info(f" content: '{content_preview}'")
logger.info("=== END BM25 SEARCH RESULTS ===")
logger.debug(f" content: '{content_preview}'")
logger.debug("=== END BM25 SEARCH RESULTS ===")

return results

Expand Down
10 changes: 5 additions & 5 deletions src/contextual_retrieval/qdrant_search.py
Original file line number Diff line number Diff line change
Expand Up @@ -148,19 +148,19 @@ async def search_contextual_embeddings_direct(
f"Semantic search found {len(all_results)} chunks across {len(collections)} collections"
)

# Debug logging for final sorted results
logger.info("=== SEMANTIC SEARCH RESULTS BREAKDOWN ===")
# Detailed results at DEBUG level (loguru filters based on log level config)
logger.debug("=== SEMANTIC SEARCH RESULTS BREAKDOWN ===")
for i, chunk in enumerate(all_results[:10]): # Show top 10 results
content_preview = (
(chunk.get("original_content", "")[:150] + "...")
if len(chunk.get("original_content", "")) > 150
else chunk.get("original_content", "")
)
logger.info(
logger.debug(
f" Rank {i + 1}: score={chunk['score']:.4f}, collection={chunk.get('source_collection', 'unknown')}, id={chunk['chunk_id']}"
)
logger.info(f" content: '{content_preview}'")
logger.info("=== END SEMANTIC SEARCH RESULTS ===")
logger.debug(f" content: '{content_preview}'")
logger.debug("=== END SEMANTIC SEARCH RESULTS ===")

return all_results

Expand Down
10 changes: 5 additions & 5 deletions src/contextual_retrieval/rank_fusion.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,8 +65,8 @@ def fuse_results(

logger.info(f"Fusion completed: {len(final_results)} final results")

# Debug logging for final fused results
logger.info("=== RANK FUSION FINAL RESULTS ===")
# Detailed results at DEBUG level (loguru filters based on log level config)
logger.debug("=== RANK FUSION FINAL RESULTS ===")
for i, chunk in enumerate(final_results):
content_preview_len = self._config.rank_fusion.content_preview_length
content_preview = (
Expand All @@ -78,13 +78,13 @@ def fuse_results(
bm25_score = chunk.get("bm25_score", 0)
fused_score = chunk.get("fused_score", 0)
search_type = chunk.get("search_type", QueryTypeConstants.UNKNOWN)
logger.info(
logger.debug(
f" Final Rank {i + 1}: fused_score={fused_score:.4f}, semantic={sem_score:.4f}, bm25={bm25_score:.4f}, type={search_type}"
)
logger.info(
logger.debug(
f" id={chunk.get('chunk_id', QueryTypeConstants.UNKNOWN)}, content: '{content_preview}'"
)
logger.info("=== END RANK FUSION RESULTS ===")
logger.debug("=== END RANK FUSION RESULTS ===")

return final_results

Expand Down
Loading
Loading