diff --git a/env.example b/env.example index f77f0f8..0da9d89 100644 --- a/env.example +++ b/env.example @@ -69,4 +69,7 @@ AZURE_OPENAI_API_KEY=your_azure_openai_api_key_here AZURE_OPENAI_DEPLOYMENT_NAME=gpt-4o-mini AWS_REGION=us-east-1 AWS_ACCESS_KEY_ID=your_aws_access_key_here -AWS_SECRET_ACCESS_KEY=your_aws_secret_key_here \ No newline at end of file +AWS_SECRET_ACCESS_KEY=your_aws_secret_key_here +LANGFUSE_PUBLIC_KEY=changeme +LANGFUSE_SECRET_KEY=changeme +LANGFUSE_HOST=http://langfuse-web:3000 \ No newline at end of file diff --git a/pyproject.toml b/pyproject.toml index 1319835..8eb164c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -31,6 +31,7 @@ dependencies = [ "deepteam>=0.2.5", "anthropic>=0.69.0", "nemoguardrails>=0.16.0", + "langfuse>=3.8.0", ] [tool.pyright] diff --git a/src/llm_orchestration_service.py b/src/llm_orchestration_service.py index 7743c27..bfeb802 100644 --- a/src/llm_orchestration_service.py +++ b/src/llm_orchestration_service.py @@ -24,6 +24,38 @@ from src.utils.cost_utils import calculate_total_costs from src.guardrails import NeMoRailsAdapter, GuardrailCheckResult +from langfuse import Langfuse, observe + + +class LangfuseConfig: + """Configuration for Langfuse integration.""" + + def __init__(self): + self.langfuse_client: Optional[Langfuse] = None + self._initialize_langfuse() + + def _initialize_langfuse(self): + """Initialize Langfuse client with Vault secrets.""" + try: + from llm_orchestrator_config.vault.vault_client import VaultAgentClient + + vault = VaultAgentClient() + if vault.is_vault_available(): + langfuse_secrets = vault.get_secret("langfuse/config") + if langfuse_secrets: + self.langfuse_client = Langfuse( + public_key=langfuse_secrets.get("public_key"), + secret_key=langfuse_secrets.get("secret_key"), + host=langfuse_secrets.get("host", "http://langfuse-web:3000"), + ) + logger.info("Langfuse client initialized successfully") + else: + logger.warning("Langfuse secrets not found in Vault") + else: + logger.warning("Vault not available, Langfuse tracing disabled") + except Exception as e: + logger.warning(f"Failed to initialize Langfuse: {e}") + class LLMOrchestrationService: """ @@ -36,8 +68,9 @@ class LLMOrchestrationService: def __init__(self) -> None: """Initialize the orchestration service.""" - pass + self.langfuse_config = LangfuseConfig() + @observe(name="orchestration_request", as_type="agent") def process_orchestration_request( self, request: OrchestrationRequest ) -> OrchestrationResponse: @@ -79,6 +112,38 @@ def process_orchestration_request( # Log final costs and return response self._log_costs(costs_dict) + if self.langfuse_config.langfuse_client: + langfuse = self.langfuse_config.langfuse_client + total_costs = calculate_total_costs(costs_dict) + + total_input_tokens = sum( + c.get("total_prompt_tokens", 0) for c in costs_dict.values() + ) + total_output_tokens = sum( + c.get("total_completion_tokens", 0) for c in costs_dict.values() + ) + + langfuse.update_current_generation( + model=components["llm_manager"] + .get_provider_info() + .get("model", "unknown"), + usage_details={ + "input": total_input_tokens, + "output": total_output_tokens, + "total": total_costs.get("total_tokens", 0), + }, + cost_details={ + "total": total_costs.get("total_cost", 0.0), + }, + metadata={ + "total_calls": total_costs.get("total_calls", 0), + "cost_breakdown": costs_dict, + "chat_id": request.chatId, + "author_id": request.authorId, + "environment": request.environment, + }, + ) + langfuse.flush() return response except Exception as e: @@ -86,9 +151,20 @@ def process_orchestration_request( f"Error processing orchestration request for chatId: {request.chatId}, " f"error: {str(e)}" ) + if self.langfuse_config.langfuse_client: + langfuse = self.langfuse_config.langfuse_client + langfuse.update_current_generation( + metadata={ + "error": str(e), + "error_type": type(e).__name__, + "response_type": "technical_issue", + } + ) + langfuse.flush() self._log_costs(costs_dict) return self._create_error_response(request) + @observe(name="initialize_service_components", as_type="span") def _initialize_service_components( self, request: OrchestrationRequest ) -> Dict[str, Any]: @@ -115,6 +191,7 @@ def _initialize_service_components( return components + @observe(name="execute_orchestration_pipeline", as_type="span") def _execute_orchestration_pipeline( self, request: OrchestrationRequest, @@ -160,6 +237,7 @@ def _execute_orchestration_pipeline( components["guardrails_adapter"], generated_response, request, costs_dict ) + @observe(name="safe_initialize_guardrails", as_type="span") def _safe_initialize_guardrails( self, environment: str, connection_id: Optional[str] ) -> Optional[NeMoRailsAdapter]: @@ -173,6 +251,7 @@ def _safe_initialize_guardrails( logger.warning("Continuing without guardrails protection") return None + @observe(name="safe_initialize_contextual_retriever", as_type="span") def _safe_initialize_hybrid_retriever(self) -> Optional[HybridRetriever]: """Safely initialize hybrid retriever with error handling.""" try: @@ -186,6 +265,7 @@ def _safe_initialize_hybrid_retriever(self) -> Optional[HybridRetriever]: logger.warning("Continuing without chunk retrieval capabilities") return None + @observe(name="safe_initialize_response_generator", as_type="span") def _safe_initialize_response_generator( self, llm_manager: LLMManager ) -> Optional[ResponseGeneratorAgent]: @@ -309,6 +389,7 @@ def _create_out_of_scope_response( content=OUT_OF_SCOPE_MESSAGE, ) + @observe(name="initialize_guardrails", as_type="span") def _initialize_guardrails( self, environment: str, connection_id: Optional[str] ) -> NeMoRailsAdapter: @@ -339,6 +420,7 @@ def _initialize_guardrails( logger.error(f"Failed to initialize Guardrails adapter: {str(e)}") raise + @observe(name="check_input_guardrails", as_type="span") def _check_input_guardrails( self, guardrails_adapter: NeMoRailsAdapter, @@ -363,6 +445,26 @@ def _check_input_guardrails( # Store guardrail costs costs_dict["input_guardrails"] = result.usage + if self.langfuse_config.langfuse_client: + langfuse = self.langfuse_config.langfuse_client + langfuse.update_current_generation( + input=user_message, + metadata={ + "guardrail_type": "input", + "allowed": result.allowed, + "verdict": result.verdict, + "blocked_reason": result.reason if not result.allowed else None, + "error": result.error if result.error else None, + }, + usage_details={ + "input": result.usage.get("total_prompt_tokens", 0), + "output": result.usage.get("total_completion_tokens", 0), + "total": result.usage.get("total_tokens", 0), + }, # type: ignore + cost_details={ + "total": result.usage.get("total_cost", 0.0), + }, + ) logger.info( f"Input guardrails check completed: allowed={result.allowed}, " @@ -373,6 +475,15 @@ def _check_input_guardrails( except Exception as e: logger.error(f"Input guardrails check failed: {str(e)}") + if self.langfuse_config.langfuse_client: + langfuse = self.langfuse_config.langfuse_client + langfuse.update_current_generation( + metadata={ + "error": str(e), + "error_type": type(e).__name__, + "guardrail_type": "input", + } + ) # Return conservative result on error return GuardrailCheckResult( allowed=False, @@ -382,6 +493,7 @@ def _check_input_guardrails( usage={}, ) + @observe(name="check_output_guardrails", as_type="span") def _check_output_guardrails( self, guardrails_adapter: NeMoRailsAdapter, @@ -406,7 +518,28 @@ def _check_output_guardrails( # Store guardrail costs costs_dict["output_guardrails"] = result.usage - + if self.langfuse_config.langfuse_client: + langfuse = self.langfuse_config.langfuse_client + langfuse.update_current_generation( + input=assistant_message[:500], # Truncate for readability + output=result.verdict, + metadata={ + "guardrail_type": "output", + "allowed": result.allowed, + "verdict": result.verdict, + "reason": result.reason if not result.allowed else None, + "error": result.error if result.error else None, + "response_length": len(assistant_message), + }, + usage_details={ + "input": result.usage.get("total_prompt_tokens", 0), + "output": result.usage.get("total_completion_tokens", 0), + "total": result.usage.get("total_tokens", 0), + }, # type: ignore + cost_details={ + "total": result.usage.get("total_cost", 0.0), + }, + ) logger.info( f"Output guardrails check completed: allowed={result.allowed}, " f"cost=${result.usage.get('total_cost', 0):.6f}" @@ -416,6 +549,15 @@ def _check_output_guardrails( except Exception as e: logger.error(f"Output guardrails check failed: {str(e)}") + if self.langfuse_config.langfuse_client: + langfuse = self.langfuse_config.langfuse_client + langfuse.update_current_generation( + metadata={ + "error": str(e), + "error_type": type(e).__name__, + "guardrail_type": "output", + } + ) # Return conservative result on error return GuardrailCheckResult( allowed=False, @@ -456,6 +598,7 @@ def _log_costs(self, costs_dict: Dict[str, Dict[str, Any]]) -> None: except Exception as e: logger.warning(f"Failed to log costs: {str(e)}") + @observe(name="initialize_llm_manager", as_type="span") def _initialize_llm_manager( self, environment: str, connection_id: Optional[str] ) -> LLMManager: @@ -485,6 +628,7 @@ def _initialize_llm_manager( logger.error(f"Failed to initialize LLM Manager: {str(e)}") raise + @observe(name="refine_user_prompt", as_type="chain") def _refine_user_prompt( self, llm_manager: LLMManager, @@ -550,7 +694,32 @@ def _refine_user_prompt( raise ValueError( f"Prompt refinement validation failed: {str(validation_error)}" ) from validation_error - + if self.langfuse_config.langfuse_client: + langfuse = self.langfuse_config.langfuse_client + refinement_applied = ( + original_message.strip() + != validated_output.original_question.strip() + ) + langfuse.update_current_generation( + model=llm_manager.get_provider_info().get("model", "unknown"), + input=original_message, + usage_details={ + "input": usage_info.get("total_prompt_tokens", 0), + "output": usage_info.get("total_completion_tokens", 0), + "total": usage_info.get("total_tokens", 0), + }, + cost_details={ + "total": usage_info.get("total_cost", 0.0), + }, + metadata={ + "num_calls": usage_info.get("num_calls", 0), + "num_refined_questions": len( + validated_output.refined_questions + ), + "refinement_applied": refinement_applied, + "conversation_history_length": len(history), + }, # type: ignore + ) output_json = validated_output.model_dump() logger.info( f"Prompt refinement output: {json.dumps(output_json, indent=2)}" @@ -563,9 +732,19 @@ def _refine_user_prompt( raise except Exception as e: logger.error(f"Prompt refinement failed: {str(e)}") + if self.langfuse_config.langfuse_client: + langfuse = self.langfuse_config.langfuse_client + langfuse.update_current_generation( + metadata={ + "error": str(e), + "error_type": type(e).__name__, + "refinement_failed": True, + } + ) logger.error(f"Failed to refine message: {original_message}") raise RuntimeError(f"Prompt refinement process failed: {str(e)}") from e + @observe(name="initialize_contextual_retriever", as_type="span") def _initialize_hybrid_retriever(self) -> HybridRetriever: """ Initialize hybrid retriever for document retrieval. @@ -587,6 +766,7 @@ def _initialize_hybrid_retriever(self) -> HybridRetriever: logger.error(f"Failed to initialize hybrid retriever: {str(e)}") raise + @observe(name="initialize_response_generator", as_type="span") def _initialize_response_generator( self, llm_manager: LLMManager ) -> ResponseGeneratorAgent: @@ -613,6 +793,7 @@ def _initialize_response_generator( logger.error(f"Failed to initialize response generator: {str(e)}") raise + @observe(name="retrieve_relevant_chunks", as_type="retriever") def _retrieve_relevant_chunks( self, hybrid_retriever: HybridRetriever, refined_output: PromptRefinerOutput ) -> List[Dict[str, Union[str, float, Dict[str, Any]]]]: @@ -669,6 +850,7 @@ def _retrieve_relevant_chunks( ) raise RuntimeError(f"Chunk retrieval process failed: {str(e)}") from e + @observe(name="generate_rag_response", as_type="generation") def _generate_rag_response( self, llm_manager: LLMManager, @@ -692,6 +874,15 @@ def _generate_rag_response( logger.warning( "Response generator unavailable – returning technical issue message." ) + if self.langfuse_config.langfuse_client: + langfuse = self.langfuse_config.langfuse_client + langfuse.update_current_generation( + metadata={ + "error": "Response generator unavailable", + "error_type": "technical_issue", + "retrieval_failed": True, + } + ) return OrchestrationResponse( chatId=request.chatId, llmServiceActive=False, @@ -725,7 +916,27 @@ def _generate_rag_response( }, ) costs_dict["response_generator"] = generator_usage - + if self.langfuse_config.langfuse_client: + langfuse = self.langfuse_config.langfuse_client + langfuse.update_current_generation( + model=llm_manager.get_provider_info().get("model", "unknown"), + usage_details={ + "input": generator_usage.get("total_prompt_tokens", 0), + "output": generator_usage.get("total_completion_tokens", 0), + "total": generator_usage.get("total_tokens", 0), + }, + cost_details={ + "total": generator_usage.get("total_cost", 0.0), + }, + metadata={ + "num_calls": generator_usage.get("num_calls", 0), + "question_out_of_scope": question_out_of_scope, + "num_chunks_used": len(relevant_chunks) + if relevant_chunks + else 0, + }, + output=answer, + ) if question_out_of_scope: logger.info("Question determined out-of-scope – sending fixed message.") return OrchestrationResponse( @@ -748,6 +959,16 @@ def _generate_rag_response( except Exception as e: logger.error(f"RAG Response generation failed: {str(e)}") + if self.langfuse_config.langfuse_client: + langfuse = self.langfuse_config.langfuse_client + langfuse.update_current_generation( + metadata={ + "error": str(e), + "error_type": type(e).__name__, + "response_type": "technical_issue", + "refinement_failed": False, + } + ) # Standardized technical issue; no second LLM call, no citations return OrchestrationResponse( chatId=request.chatId, diff --git a/uv.lock b/uv.lock index 636fa54..cb3743e 100644 --- a/uv.lock +++ b/uv.lock @@ -1079,6 +1079,27 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/58/0d/41a51b40d24ff0384ec4f7ab8dd3dcea8353c05c973836b5e289f1465d4f/langchain_text_splitters-0.3.11-py3-none-any.whl", hash = "sha256:cf079131166a487f1372c8ab5d0bfaa6c0a4291733d9c43a34a16ac9bcd6a393", size = 33845, upload-time = "2025-08-31T23:02:57.195Z" }, ] +[[package]] +name = "langfuse" +version = "3.8.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "backoff" }, + { name = "httpx" }, + { name = "openai" }, + { name = "opentelemetry-api" }, + { name = "opentelemetry-exporter-otlp-proto-http" }, + { name = "opentelemetry-sdk" }, + { name = "packaging" }, + { name = "pydantic" }, + { name = "requests" }, + { name = "wrapt" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/44/4c/3b35002cfd055f16fec759fe063a1692fde297f6dccdab33bc32647a8734/langfuse-3.8.0.tar.gz", hash = "sha256:f10ecd76a02d89368b41568e386f2bde8744729209a1ca0838b1209703eb7455", size = 191282, upload-time = "2025-10-20T13:45:00.561Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/da/d9/43a9b3d64cf65f62ccd21046991c72ce4e5b2d851b66a00ce7faca38ffdd/langfuse-3.8.0-py3-none-any.whl", hash = "sha256:9b7e786e7ae8ad895af479b8ad5d094e600f2c7ec1b3dc8bbcd225b1bc7e320a", size = 351985, upload-time = "2025-10-20T13:44:58.473Z" }, +] + [[package]] name = "langsmith" version = "0.4.32" @@ -1349,6 +1370,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/a0/c4/c2971a3ba4c6103a3d10c4b0f24f461ddc027f0f09763220cf35ca1401b3/nest_asyncio-1.6.0-py3-none-any.whl", hash = "sha256:87af6efd6b5e897c81050477ef65c62e2b2f35d51703cae01aff2905b1852e1c", size = 5195, upload-time = "2024-01-21T14:25:17.223Z" }, ] +[[package]] +name = "networkx" +version = "3.5" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/6c/4f/ccdb8ad3a38e583f214547fd2f7ff1fc160c43a75af88e6aec213404b96a/networkx-3.5.tar.gz", hash = "sha256:d4c6f9cf81f52d69230866796b82afbccdec3db7ae4fbd1b65ea750feed50037", size = 2471065, upload-time = "2025-05-29T11:35:07.804Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/eb/8d/776adee7bbf76365fdd7f2552710282c79a4ead5d2a46408c9043a2b70ba/networkx-3.5-py3-none-any.whl", hash = "sha256:0030d386a9a06dee3565298b4a734b68589749a544acbb6c412dc9e2489ec6ec", size = 2034406, upload-time = "2025-05-29T11:35:04.961Z" }, +] + [[package]] name = "nodeenv" version = "1.9.1" @@ -1377,6 +1407,140 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/54/cd/7b5f49d5d78db7badab22d8323c1b6ae458fbf86c4fdfa194ab3cd4eb39b/numpy-2.3.2-cp312-cp312-win_arm64.whl", hash = "sha256:ee807923782faaf60d0d7331f5e86da7d5e3079e28b291973c545476c2b00d07", size = 10194071, upload-time = "2025-07-24T20:42:36.657Z" }, ] +[[package]] +name = "nvidia-cublas-cu12" +version = "12.8.4.1" +source = { registry = "https://pypi.org/simple" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/dc/61/e24b560ab2e2eaeb3c839129175fb330dfcfc29e5203196e5541a4c44682/nvidia_cublas_cu12-12.8.4.1-py3-none-manylinux_2_27_x86_64.whl", hash = "sha256:8ac4e771d5a348c551b2a426eda6193c19aa630236b418086020df5ba9667142", size = 594346921, upload-time = "2025-03-07T01:44:31.254Z" }, +] + +[[package]] +name = "nvidia-cuda-cupti-cu12" +version = "12.8.90" +source = { registry = "https://pypi.org/simple" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/f8/02/2adcaa145158bf1a8295d83591d22e4103dbfd821bcaf6f3f53151ca4ffa/nvidia_cuda_cupti_cu12-12.8.90-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:ea0cb07ebda26bb9b29ba82cda34849e73c166c18162d3913575b0c9db9a6182", size = 10248621, upload-time = "2025-03-07T01:40:21.213Z" }, +] + +[[package]] +name = "nvidia-cuda-nvrtc-cu12" +version = "12.8.93" +source = { registry = "https://pypi.org/simple" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/05/6b/32f747947df2da6994e999492ab306a903659555dddc0fbdeb9d71f75e52/nvidia_cuda_nvrtc_cu12-12.8.93-py3-none-manylinux2010_x86_64.manylinux_2_12_x86_64.whl", hash = "sha256:a7756528852ef889772a84c6cd89d41dfa74667e24cca16bb31f8f061e3e9994", size = 88040029, upload-time = "2025-03-07T01:42:13.562Z" }, +] + +[[package]] +name = "nvidia-cuda-runtime-cu12" +version = "12.8.90" +source = { registry = "https://pypi.org/simple" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/0d/9b/a997b638fcd068ad6e4d53b8551a7d30fe8b404d6f1804abf1df69838932/nvidia_cuda_runtime_cu12-12.8.90-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:adade8dcbd0edf427b7204d480d6066d33902cab2a4707dcfc48a2d0fd44ab90", size = 954765, upload-time = "2025-03-07T01:40:01.615Z" }, +] + +[[package]] +name = "nvidia-cudnn-cu12" +version = "9.10.2.21" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "nvidia-cublas-cu12" }, +] +wheels = [ + { url = "https://files.pythonhosted.org/packages/ba/51/e123d997aa098c61d029f76663dedbfb9bc8dcf8c60cbd6adbe42f76d049/nvidia_cudnn_cu12-9.10.2.21-py3-none-manylinux_2_27_x86_64.whl", hash = "sha256:949452be657fa16687d0930933f032835951ef0892b37d2d53824d1a84dc97a8", size = 706758467, upload-time = "2025-06-06T21:54:08.597Z" }, +] + +[[package]] +name = "nvidia-cufft-cu12" +version = "11.3.3.83" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "nvidia-nvjitlink-cu12" }, +] +wheels = [ + { url = "https://files.pythonhosted.org/packages/1f/13/ee4e00f30e676b66ae65b4f08cb5bcbb8392c03f54f2d5413ea99a5d1c80/nvidia_cufft_cu12-11.3.3.83-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:4d2dd21ec0b88cf61b62e6b43564355e5222e4a3fb394cac0db101f2dd0d4f74", size = 193118695, upload-time = "2025-03-07T01:45:27.821Z" }, +] + +[[package]] +name = "nvidia-cufile-cu12" +version = "1.13.1.3" +source = { registry = "https://pypi.org/simple" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/bb/fe/1bcba1dfbfb8d01be8d93f07bfc502c93fa23afa6fd5ab3fc7c1df71038a/nvidia_cufile_cu12-1.13.1.3-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:1d069003be650e131b21c932ec3d8969c1715379251f8d23a1860554b1cb24fc", size = 1197834, upload-time = "2025-03-07T01:45:50.723Z" }, +] + +[[package]] +name = "nvidia-curand-cu12" +version = "10.3.9.90" +source = { registry = "https://pypi.org/simple" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/fb/aa/6584b56dc84ebe9cf93226a5cde4d99080c8e90ab40f0c27bda7a0f29aa1/nvidia_curand_cu12-10.3.9.90-py3-none-manylinux_2_27_x86_64.whl", hash = "sha256:b32331d4f4df5d6eefa0554c565b626c7216f87a06a4f56fab27c3b68a830ec9", size = 63619976, upload-time = "2025-03-07T01:46:23.323Z" }, +] + +[[package]] +name = "nvidia-cusolver-cu12" +version = "11.7.3.90" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "nvidia-cublas-cu12" }, + { name = "nvidia-cusparse-cu12" }, + { name = "nvidia-nvjitlink-cu12" }, +] +wheels = [ + { url = "https://files.pythonhosted.org/packages/85/48/9a13d2975803e8cf2777d5ed57b87a0b6ca2cc795f9a4f59796a910bfb80/nvidia_cusolver_cu12-11.7.3.90-py3-none-manylinux_2_27_x86_64.whl", hash = "sha256:4376c11ad263152bd50ea295c05370360776f8c3427b30991df774f9fb26c450", size = 267506905, upload-time = "2025-03-07T01:47:16.273Z" }, +] + +[[package]] +name = "nvidia-cusparse-cu12" +version = "12.5.8.93" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "nvidia-nvjitlink-cu12" }, +] +wheels = [ + { url = "https://files.pythonhosted.org/packages/c2/f5/e1854cb2f2bcd4280c44736c93550cc300ff4b8c95ebe370d0aa7d2b473d/nvidia_cusparse_cu12-12.5.8.93-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:1ec05d76bbbd8b61b06a80e1eaf8cf4959c3d4ce8e711b65ebd0443bb0ebb13b", size = 288216466, upload-time = "2025-03-07T01:48:13.779Z" }, +] + +[[package]] +name = "nvidia-cusparselt-cu12" +version = "0.7.1" +source = { registry = "https://pypi.org/simple" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/56/79/12978b96bd44274fe38b5dde5cfb660b1d114f70a65ef962bcbbed99b549/nvidia_cusparselt_cu12-0.7.1-py3-none-manylinux2014_x86_64.whl", hash = "sha256:f1bb701d6b930d5a7cea44c19ceb973311500847f81b634d802b7b539dc55623", size = 287193691, upload-time = "2025-02-26T00:15:44.104Z" }, +] + +[[package]] +name = "nvidia-nccl-cu12" +version = "2.27.5" +source = { registry = "https://pypi.org/simple" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/6e/89/f7a07dc961b60645dbbf42e80f2bc85ade7feb9a491b11a1e973aa00071f/nvidia_nccl_cu12-2.27.5-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:ad730cf15cb5d25fe849c6e6ca9eb5b76db16a80f13f425ac68d8e2e55624457", size = 322348229, upload-time = "2025-06-26T04:11:28.385Z" }, +] + +[[package]] +name = "nvidia-nvjitlink-cu12" +version = "12.8.93" +source = { registry = "https://pypi.org/simple" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/f6/74/86a07f1d0f42998ca31312f998bd3b9a7eff7f52378f4f270c8679c77fb9/nvidia_nvjitlink_cu12-12.8.93-py3-none-manylinux2010_x86_64.manylinux_2_12_x86_64.whl", hash = "sha256:81ff63371a7ebd6e6451970684f916be2eab07321b73c9d244dc2b4da7f73b88", size = 39254836, upload-time = "2025-03-07T01:49:55.661Z" }, +] + +[[package]] +name = "nvidia-nvshmem-cu12" +version = "3.3.20" +source = { registry = "https://pypi.org/simple" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/3b/6c/99acb2f9eb85c29fc6f3a7ac4dccfd992e22666dd08a642b303311326a97/nvidia_nvshmem_cu12-3.3.20-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:d00f26d3f9b2e3c3065be895e3059d6479ea5c638a3f38c9fec49b1b9dd7c1e5", size = 124657145, upload-time = "2025-08-04T20:25:19.995Z" }, +] + +[[package]] +name = "nvidia-nvtx-cu12" +version = "12.8.90" +source = { registry = "https://pypi.org/simple" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/a2/eb/86626c1bbc2edb86323022371c39aa48df6fd8b0a1647bc274577f72e90b/nvidia_nvtx_cu12-12.8.90-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:5b17e2001cc0d751a5bc2c6ec6d26ad95913324a4adb86788c944f8ce9ba441f", size = 89954, upload-time = "2025-03-07T01:42:44.131Z" }, +] + [[package]] name = "ollama" version = "0.6.0" @@ -1410,6 +1574,39 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/19/90/d5b4ea0bd6805f3f21aac2fe549a5b58ee10d1c99c499d867539620a002b/onnxruntime-1.23.0-cp312-cp312-win_amd64.whl", hash = "sha256:e100f3869da4c12b17a9b942934a96a542406f860eb8beb74a68342ea43aaa55", size = 13392437, upload-time = "2025-09-25T19:16:36.066Z" }, ] +[[package]] +name = "openai" +version = "1.106.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "httpx" }, + { name = "pydantic" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/d6/47/f9ee32467fe92744474a8c72e138113f3b529fc266eea76abfdec9a33f3b/ollama-0.6.0.tar.gz", hash = "sha256:da2b2d846b5944cfbcee1ca1e6ee0585f6c9d45a2fe9467cbcd096a37383da2f", size = 50811, upload-time = "2025-09-24T22:46:02.417Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/b5/c1/edc9f41b425ca40b26b7c104c5f6841a4537bb2552bfa6ca66e81405bb95/ollama-0.6.0-py3-none-any.whl", hash = "sha256:534511b3ccea2dff419ae06c3b58d7f217c55be7897c8ce5868dfb6b219cf7a0", size = 14130, upload-time = "2025-09-24T22:46:01.19Z" }, +] + +[[package]] +name = "onnxruntime" +version = "1.23.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "coloredlogs" }, + { name = "flatbuffers" }, + { name = "numpy" }, + { name = "packaging" }, + { name = "protobuf" }, + { name = "sympy" }, +] +wheels = [ + { url = "https://files.pythonhosted.org/packages/fb/33/ec5395c9539423246e4976d6ec7c4e7a4624ad8bcbe783fea5c629d7980a/onnxruntime-1.23.0-cp312-cp312-macosx_13_0_arm64.whl", hash = "sha256:5921f2e106f5faf2b32095b2ecdfae047e445c3bce063e439dadc75c212e7be7", size = 17081368, upload-time = "2025-09-25T19:16:46.585Z" }, + { url = "https://files.pythonhosted.org/packages/f0/3c/d1976a9933e075291a3d67f4e949c667ff36a3e3a4a0cbd883af3c4eae5a/onnxruntime-1.23.0-cp312-cp312-macosx_13_0_x86_64.whl", hash = "sha256:053df2f9c6522b258055bce4b776aa9ea3adb4b28d2530ab07b204a3d4b04bf9", size = 19028636, upload-time = "2025-09-25T18:56:34.457Z" }, + { url = "https://files.pythonhosted.org/packages/1a/1f/5b76864a970a23dc85f8745d045b81a9151aa101bbb426af6fa489f59364/onnxruntime-1.23.0-cp312-cp312-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:974e327ca3b6d43da404b9a45df1f61e2503667fde46843ee7ad1567a98f3f0b", size = 15140544, upload-time = "2025-09-25T18:56:15.9Z" }, + { url = "https://files.pythonhosted.org/packages/0b/62/84f23952d01e07ce8aa02e657e3a0c8fa40aba0d5e11a0e9904a9063af76/onnxruntime-1.23.0-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:05f67edb93678cab5cd77eda89b65bb1b58f3d4c0742058742cfad8b172cfa83", size = 17274126, upload-time = "2025-09-25T19:16:11.21Z" }, + { url = "https://files.pythonhosted.org/packages/19/90/d5b4ea0bd6805f3f21aac2fe549a5b58ee10d1c99c499d867539620a002b/onnxruntime-1.23.0-cp312-cp312-win_amd64.whl", hash = "sha256:e100f3869da4c12b17a9b942934a96a542406f860eb8beb74a68342ea43aaa55", size = 13392437, upload-time = "2025-09-25T19:16:36.066Z" }, +] + [[package]] name = "openai" version = "1.106.1" @@ -1472,6 +1669,24 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/39/17/46630b74751031a658706bef23ac99cdc2953cd3b2d28ec90590a0766b3e/opentelemetry_exporter_otlp_proto_grpc-1.37.0-py3-none-any.whl", hash = "sha256:aee5104835bf7993b7ddaaf380b6467472abaedb1f1dbfcc54a52a7d781a3890", size = 19305, upload-time = "2025-09-11T10:28:45.776Z" }, ] +[[package]] +name = "opentelemetry-exporter-otlp-proto-http" +version = "1.37.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "googleapis-common-protos" }, + { name = "opentelemetry-api" }, + { name = "opentelemetry-exporter-otlp-proto-common" }, + { name = "opentelemetry-proto" }, + { name = "opentelemetry-sdk" }, + { name = "requests" }, + { name = "typing-extensions" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/5d/e3/6e320aeb24f951449e73867e53c55542bebbaf24faeee7623ef677d66736/opentelemetry_exporter_otlp_proto_http-1.37.0.tar.gz", hash = "sha256:e52e8600f1720d6de298419a802108a8f5afa63c96809ff83becb03f874e44ac", size = 17281, upload-time = "2025-09-11T10:29:04.844Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/e9/e9/70d74a664d83976556cec395d6bfedd9b85ec1498b778367d5f93e373397/opentelemetry_exporter_otlp_proto_http-1.37.0-py3-none-any.whl", hash = "sha256:54c42b39945a6cc9d9a2a33decb876eabb9547e0dcb49df090122773447f1aef", size = 19576, upload-time = "2025-09-11T10:28:46.726Z" }, +] + [[package]] name = "opentelemetry-proto" version = "1.37.0" @@ -2051,6 +2266,7 @@ dependencies = [ { name = "dspy" }, { name = "fastapi" }, { name = "hvac" }, + { name = "langfuse" }, { name = "loguru" }, { name = "nemoguardrails" }, { name = "numpy" }, @@ -2065,6 +2281,7 @@ dependencies = [ { name = "qdrant-client" }, { name = "rank-bm25" }, { name = "requests" }, + { name = "rerankers", extra = ["transformers"] }, { name = "ruff" }, { name = "testcontainers" }, { name = "uvicorn" }, @@ -2075,11 +2292,12 @@ requires-dist = [ { name = "anthropic", specifier = ">=0.69.0" }, { name = "azure-identity", specifier = ">=1.24.0" }, { name = "boto3", specifier = ">=1.40.25" }, - { name = "deepeval", specifier = ">=3.6.6" }, + { name = "deepeval", specifier = ">=3.6.0" }, { name = "deepteam", specifier = ">=0.2.5" }, { name = "dspy", specifier = ">=3.0.3" }, { name = "fastapi", specifier = ">=0.116.1" }, { name = "hvac", specifier = ">=2.3.0" }, + { name = "langfuse", specifier = ">=3.8.0" }, { name = "loguru", specifier = ">=0.7.3" }, { name = "nemoguardrails", specifier = ">=0.16.0" }, { name = "numpy", specifier = ">=2.3.2" }, @@ -2094,6 +2312,7 @@ requires-dist = [ { name = "qdrant-client", specifier = ">=1.15.1" }, { name = "rank-bm25", specifier = ">=0.2.2" }, { name = "requests", specifier = ">=2.32.5" }, + { name = "rerankers", extras = ["transformers"], specifier = ">=0.10.0" }, { name = "ruff", specifier = ">=0.12.12" }, { name = "testcontainers", specifier = ">=4.13.0" }, { name = "uvicorn", specifier = ">=0.35.0" }, @@ -2174,6 +2393,23 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/3f/51/d4db610ef29373b879047326cbf6fa98b6c1969d6f6dc423279de2b1be2c/requests_toolbelt-1.0.0-py2.py3-none-any.whl", hash = "sha256:cccfdd665f0a24fcf4726e690f65639d272bb0637b9b92dfd91a5568ccf6bd06", size = 54481, upload-time = "2023-05-01T04:11:28.427Z" }, ] +[[package]] +name = "rerankers" +version = "0.10.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/a2/1e/3ed2026be7c135939905eac4f50d1bf8339180821c6757b2e91b83de2fa5/rerankers-0.10.0.tar.gz", hash = "sha256:b8e8b363abc4e9757151956949c27b197993c0a774437287a932f855afc17a73", size = 49679, upload-time = "2025-05-22T08:22:53.396Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/df/ed/f3b81ca8743d69b95d679b95e6e1d22cb7cc678ae77c6a57827303a7e48c/rerankers-0.10.0-py3-none-any.whl", hash = "sha256:634a6befa130a245ed46022ade217ee482869448f01aae2051ed54d7d5bd2791", size = 53084, upload-time = "2025-05-22T08:22:52.022Z" }, +] + +[package.optional-dependencies] +transformers = [ + { name = "protobuf" }, + { name = "sentencepiece" }, + { name = "torch" }, + { name = "transformers" }, +] + [[package]] name = "rich" version = "14.1.0" @@ -2260,6 +2496,44 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/6d/4f/d073e09df851cfa251ef7840007d04db3293a0482ce607d2b993926089be/s3transfer-0.13.1-py3-none-any.whl", hash = "sha256:a981aa7429be23fe6dfc13e80e4020057cbab622b08c0315288758d67cabc724", size = 85308, upload-time = "2025-07-18T19:22:40.947Z" }, ] +[[package]] +name = "safetensors" +version = "0.6.2" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/ac/cc/738f3011628920e027a11754d9cae9abec1aed00f7ae860abbf843755233/safetensors-0.6.2.tar.gz", hash = "sha256:43ff2aa0e6fa2dc3ea5524ac7ad93a9839256b8703761e76e2d0b2a3fa4f15d9", size = 197968, upload-time = "2025-08-08T13:13:58.654Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/4d/b1/3f5fd73c039fc87dba3ff8b5d528bfc5a32b597fea8e7a6a4800343a17c7/safetensors-0.6.2-cp38-abi3-macosx_10_12_x86_64.whl", hash = "sha256:9c85ede8ec58f120bad982ec47746981e210492a6db876882aa021446af8ffba", size = 454797, upload-time = "2025-08-08T13:13:52.066Z" }, + { url = "https://files.pythonhosted.org/packages/8c/c9/bb114c158540ee17907ec470d01980957fdaf87b4aa07914c24eba87b9c6/safetensors-0.6.2-cp38-abi3-macosx_11_0_arm64.whl", hash = "sha256:d6675cf4b39c98dbd7d940598028f3742e0375a6b4d4277e76beb0c35f4b843b", size = 432206, upload-time = "2025-08-08T13:13:50.931Z" }, + { url = "https://files.pythonhosted.org/packages/d3/8e/f70c34e47df3110e8e0bb268d90db8d4be8958a54ab0336c9be4fe86dac8/safetensors-0.6.2-cp38-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1d2d2b3ce1e2509c68932ca03ab8f20570920cd9754b05063d4368ee52833ecd", size = 473261, upload-time = "2025-08-08T13:13:41.259Z" }, + { url = "https://files.pythonhosted.org/packages/2a/f5/be9c6a7c7ef773e1996dc214e73485286df1836dbd063e8085ee1976f9cb/safetensors-0.6.2-cp38-abi3-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:93de35a18f46b0f5a6a1f9e26d91b442094f2df02e9fd7acf224cfec4238821a", size = 485117, upload-time = "2025-08-08T13:13:43.506Z" }, + { url = "https://files.pythonhosted.org/packages/c9/55/23f2d0a2c96ed8665bf17a30ab4ce5270413f4d74b6d87dd663258b9af31/safetensors-0.6.2-cp38-abi3-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:89a89b505f335640f9120fac65ddeb83e40f1fd081cb8ed88b505bdccec8d0a1", size = 616154, upload-time = "2025-08-08T13:13:45.096Z" }, + { url = "https://files.pythonhosted.org/packages/98/c6/affb0bd9ce02aa46e7acddbe087912a04d953d7a4d74b708c91b5806ef3f/safetensors-0.6.2-cp38-abi3-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:fc4d0d0b937e04bdf2ae6f70cd3ad51328635fe0e6214aa1fc811f3b576b3bda", size = 520713, upload-time = "2025-08-08T13:13:46.25Z" }, + { url = "https://files.pythonhosted.org/packages/fe/5d/5a514d7b88e310c8b146e2404e0dc161282e78634d9358975fd56dfd14be/safetensors-0.6.2-cp38-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8045db2c872db8f4cbe3faa0495932d89c38c899c603f21e9b6486951a5ecb8f", size = 485835, upload-time = "2025-08-08T13:13:49.373Z" }, + { url = "https://files.pythonhosted.org/packages/7a/7b/4fc3b2ba62c352b2071bea9cfbad330fadda70579f617506ae1a2f129cab/safetensors-0.6.2-cp38-abi3-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:81e67e8bab9878bb568cffbc5f5e655adb38d2418351dc0859ccac158f753e19", size = 521503, upload-time = "2025-08-08T13:13:47.651Z" }, + { url = "https://files.pythonhosted.org/packages/5a/50/0057e11fe1f3cead9254315a6c106a16dd4b1a19cd247f7cc6414f6b7866/safetensors-0.6.2-cp38-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:b0e4d029ab0a0e0e4fdf142b194514695b1d7d3735503ba700cf36d0fc7136ce", size = 652256, upload-time = "2025-08-08T13:13:53.167Z" }, + { url = "https://files.pythonhosted.org/packages/e9/29/473f789e4ac242593ac1656fbece6e1ecd860bb289e635e963667807afe3/safetensors-0.6.2-cp38-abi3-musllinux_1_2_armv7l.whl", hash = "sha256:fa48268185c52bfe8771e46325a1e21d317207bcabcb72e65c6e28e9ffeb29c7", size = 747281, upload-time = "2025-08-08T13:13:54.656Z" }, + { url = "https://files.pythonhosted.org/packages/68/52/f7324aad7f2df99e05525c84d352dc217e0fa637a4f603e9f2eedfbe2c67/safetensors-0.6.2-cp38-abi3-musllinux_1_2_i686.whl", hash = "sha256:d83c20c12c2d2f465997c51b7ecb00e407e5f94d7dec3ea0cc11d86f60d3fde5", size = 692286, upload-time = "2025-08-08T13:13:55.884Z" }, + { url = "https://files.pythonhosted.org/packages/ad/fe/cad1d9762868c7c5dc70c8620074df28ebb1a8e4c17d4c0cb031889c457e/safetensors-0.6.2-cp38-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:d944cea65fad0ead848b6ec2c37cc0b197194bec228f8020054742190e9312ac", size = 655957, upload-time = "2025-08-08T13:13:57.029Z" }, + { url = "https://files.pythonhosted.org/packages/59/a7/e2158e17bbe57d104f0abbd95dff60dda916cf277c9f9663b4bf9bad8b6e/safetensors-0.6.2-cp38-abi3-win32.whl", hash = "sha256:cab75ca7c064d3911411461151cb69380c9225798a20e712b102edda2542ddb1", size = 308926, upload-time = "2025-08-08T13:14:01.095Z" }, + { url = "https://files.pythonhosted.org/packages/2c/c3/c0be1135726618dc1e28d181b8c442403d8dbb9e273fd791de2d4384bcdd/safetensors-0.6.2-cp38-abi3-win_amd64.whl", hash = "sha256:c7b214870df923cbc1593c3faee16bec59ea462758699bd3fee399d00aac072c", size = 320192, upload-time = "2025-08-08T13:13:59.467Z" }, +] + +[[package]] +name = "sentencepiece" +version = "0.2.1" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/15/15/2e7a025fc62d764b151ae6d0f2a92f8081755ebe8d4a64099accc6f77ba6/sentencepiece-0.2.1.tar.gz", hash = "sha256:8138cec27c2f2282f4a34d9a016e3374cd40e5c6e9cb335063db66a0a3b71fad", size = 3228515, upload-time = "2025-08-12T07:00:51.718Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/4a/be/32ce495aa1d0e0c323dcb1ba87096037358edee539cac5baf8755a6bd396/sentencepiece-0.2.1-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:57cae326c8727de58c85977b175af132a7138d84c764635d7e71bbee7e774133", size = 1943152, upload-time = "2025-08-12T06:59:40.048Z" }, + { url = "https://files.pythonhosted.org/packages/88/7e/ff23008899a58678e98c6ff592bf4d368eee5a71af96d0df6b38a039dd4f/sentencepiece-0.2.1-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:56dd39a3c4d6493db3cdca7e8cc68c6b633f0d4195495cbadfcf5af8a22d05a6", size = 1325651, upload-time = "2025-08-12T06:59:41.536Z" }, + { url = "https://files.pythonhosted.org/packages/19/84/42eb3ce4796777a1b5d3699dfd4dca85113e68b637f194a6c8d786f16a04/sentencepiece-0.2.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:d9381351182ff9888cc80e41c632e7e274b106f450de33d67a9e8f6043da6f76", size = 1253645, upload-time = "2025-08-12T06:59:42.903Z" }, + { url = "https://files.pythonhosted.org/packages/89/fa/d3d5ebcba3cb9e6d3775a096251860c41a6bc53a1b9461151df83fe93255/sentencepiece-0.2.1-cp312-cp312-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:99f955df238021bf11f0fc37cdb54fd5e5b5f7fd30ecc3d93fb48b6815437167", size = 1316273, upload-time = "2025-08-12T06:59:44.476Z" }, + { url = "https://files.pythonhosted.org/packages/04/88/14f2f4a2b922d8b39be45bf63d79e6cd3a9b2f248b2fcb98a69b12af12f5/sentencepiece-0.2.1-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:0cdfecef430d985f1c2bcbfff3defd1d95dae876fbd0173376012d2d7d24044b", size = 1387881, upload-time = "2025-08-12T06:59:46.09Z" }, + { url = "https://files.pythonhosted.org/packages/fd/b8/903e5ccb77b4ef140605d5d71b4f9e0ad95d456d6184688073ed11712809/sentencepiece-0.2.1-cp312-cp312-win32.whl", hash = "sha256:a483fd29a34c3e34c39ac5556b0a90942bec253d260235729e50976f5dba1068", size = 999540, upload-time = "2025-08-12T06:59:48.023Z" }, + { url = "https://files.pythonhosted.org/packages/2d/81/92df5673c067148c2545b1bfe49adfd775bcc3a169a047f5a0e6575ddaca/sentencepiece-0.2.1-cp312-cp312-win_amd64.whl", hash = "sha256:4cdc7c36234fda305e85c32949c5211faaf8dd886096c7cea289ddc12a2d02de", size = 1054671, upload-time = "2025-08-12T06:59:49.895Z" }, + { url = "https://files.pythonhosted.org/packages/fe/02/c5e3bc518655d714622bec87d83db9cdba1cd0619a4a04e2109751c4f47f/sentencepiece-0.2.1-cp312-cp312-win_arm64.whl", hash = "sha256:daeb5e9e9fcad012324807856113708614d534f596d5008638eb9b40112cd9e4", size = 1033923, upload-time = "2025-08-12T06:59:51.952Z" }, +] + [[package]] name = "sentry-sdk" version = "2.40.0" @@ -2441,6 +2715,42 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/d1/9b/0e0bf82214ee20231845b127aa4a8015936ad5a46779f30865d10e404167/tokenizers-0.22.0-cp39-abi3-win_amd64.whl", hash = "sha256:c78174859eeaee96021f248a56c801e36bfb6bd5b067f2e95aa82445ca324f00", size = 2680494, upload-time = "2025-08-29T10:25:35.14Z" }, ] +[[package]] +name = "torch" +version = "2.9.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "filelock" }, + { name = "fsspec" }, + { name = "jinja2" }, + { name = "networkx" }, + { name = "nvidia-cublas-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" }, + { name = "nvidia-cuda-cupti-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" }, + { name = "nvidia-cuda-nvrtc-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" }, + { name = "nvidia-cuda-runtime-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" }, + { name = "nvidia-cudnn-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" }, + { name = "nvidia-cufft-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" }, + { name = "nvidia-cufile-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" }, + { name = "nvidia-curand-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" }, + { name = "nvidia-cusolver-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" }, + { name = "nvidia-cusparse-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" }, + { name = "nvidia-cusparselt-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" }, + { name = "nvidia-nccl-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" }, + { name = "nvidia-nvjitlink-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" }, + { name = "nvidia-nvshmem-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" }, + { name = "nvidia-nvtx-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" }, + { name = "setuptools" }, + { name = "sympy" }, + { name = "triton", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" }, + { name = "typing-extensions" }, +] +wheels = [ + { url = "https://files.pythonhosted.org/packages/d1/d3/3985739f3b8e88675127bf70f82b3a48ae083e39cda56305dbd90398fec0/torch-2.9.0-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:e5f7af1dc4c0a7c4a260c2534f41ddaf209714f7c89145e644c44712fbd6b642", size = 104107898, upload-time = "2025-10-15T15:46:20.883Z" }, + { url = "https://files.pythonhosted.org/packages/a5/4b/f4bb2e6c25d0272f798cd6d7a04ed315da76cec68c602d87040c7847287f/torch-2.9.0-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:01cff95ecd9a212ea2f141db28acccdceb6a4c54f64e6c51091146f5e2a772c6", size = 899738273, upload-time = "2025-10-15T15:50:04.188Z" }, + { url = "https://files.pythonhosted.org/packages/66/11/c1c5ba6691cda6279087c35bd626536e4fd29521fe740abf5008377a9a02/torch-2.9.0-cp312-cp312-win_amd64.whl", hash = "sha256:4582b162f541651f0cb184d3e291c05c2f556c7117c64a9873e2ee158d40062b", size = 109280887, upload-time = "2025-10-15T15:46:26.228Z" }, + { url = "https://files.pythonhosted.org/packages/dd/5f/b85bd8c05312d71de9402bf5868d217c38827cfd09d8f8514e5be128a52b/torch-2.9.0-cp312-none-macosx_11_0_arm64.whl", hash = "sha256:33f58e9a102a91259af289d50525c30323b5c9ae1d31322b6447c0814da68695", size = 74478983, upload-time = "2025-10-15T15:46:39.406Z" }, +] + [[package]] name = "tqdm" version = "4.67.1" @@ -2453,6 +2763,35 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/d0/30/dc54f88dd4a2b5dc8a0279bdd7270e735851848b762aeb1c1184ed1f6b14/tqdm-4.67.1-py3-none-any.whl", hash = "sha256:26445eca388f82e72884e0d580d5464cd801a3ea01e63e5601bdff9ba6a48de2", size = 78540, upload-time = "2024-11-24T20:12:19.698Z" }, ] +[[package]] +name = "transformers" +version = "4.57.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "filelock" }, + { name = "huggingface-hub" }, + { name = "numpy" }, + { name = "packaging" }, + { name = "pyyaml" }, + { name = "regex" }, + { name = "requests" }, + { name = "safetensors" }, + { name = "tokenizers" }, + { name = "tqdm" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/d6/68/a39307bcc4116a30b2106f2e689130a48de8bd8a1e635b5e1030e46fcd9e/transformers-4.57.1.tar.gz", hash = "sha256:f06c837959196c75039809636cd964b959f6604b75b8eeec6fdfc0440b89cc55", size = 10142511, upload-time = "2025-10-14T15:39:26.18Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/71/d3/c16c3b3cf7655a67db1144da94b021c200ac1303f82428f2beef6c2e72bb/transformers-4.57.1-py3-none-any.whl", hash = "sha256:b10d05da8fa67dc41644dbbf9bc45a44cb86ae33da6f9295f5fbf5b7890bd267", size = 11990925, upload-time = "2025-10-14T15:39:23.085Z" }, +] + +[[package]] +name = "triton" +version = "3.5.0" +source = { registry = "https://pypi.org/simple" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/f5/3a/e991574f3102147b642e49637e0281e9bb7c4ba254edb2bab78247c85e01/triton-3.5.0-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:c9e71db82261c4ffa3921cd050cd5faa18322d2d405c30eb56084afaff3b0833", size = 170476535, upload-time = "2025-10-13T16:38:05.18Z" }, +] + [[package]] name = "typer" version = "0.19.2"