msamsami · msamsami · May 6, 2025 · May 5, 2025 · May 5, 2025 · May 5, 2025
diff --git a/poetry.lock b/poetry.lock
diff --git a/pyproject.toml b/pyproject.toml
@@ -16,7 +16,7 @@ repository = "https://github.com/msamsami/clonellm"
 
 [tool.poetry.dependencies]
 python = ">=3.10,<3.14"
-litellm = "^1.36.0"
+litellm = "^1.42.1"
 langchain = "^0.1.17"
 pydantic = {version = ">=2.8.0", python = ">=3.12.4"}
 legacy-cgi = {version = ">=2.6.2", python = ">=3.13"}
@@ -45,7 +45,7 @@ strict = true
 
 [[tool.mypy.overrides]]
 module = [
-    "litellm.*",
+    "litellm.litellm_core_utils.*",
     "openai.lib.streaming",
     "opentelemetry.*",
     "google",

diff --git a/requirements.txt b/requirements.txt
@@ -1,4 +1,4 @@
-litellm>=1.36.0,<2.0.0
+litellm>=1.42.1,<2.0.0
 langchain>=0.1.17,<1.0.0
 pydantic>=2.8.0; python_version >= '3.12.4'
 legacy-cgi>=2.6.2; python_version >= '3.13'
diff --git a/src/clonellm/_base.py b/src/clonellm/_base.py
@@ -1,5 +1,5 @@
 from abc import ABCMeta
-from typing import Any, Optional, cast
+from typing import Any, cast
 
 from litellm.utils import get_api_key
 
@@ -13,7 +13,7 @@
 
 
 class LiteLLMMixin(metaclass=ABCMeta):
-    def __init__(self, model: str, api_key: Optional[str] = None, **kwargs: Any) -> None:
+    def __init__(self, model: str, api_key: str | None = None, **kwargs: Any) -> None:
         self.model = model
         self.api_key = api_key
         self._litellm_kwargs: dict[str, Any] = kwargs
@@ -23,8 +23,8 @@ def _llm_provider(self) -> str:
         return cast(str, get_llm_provider(model=self.model)[1])
 
     @property
-    def _api_key(self) -> Optional[str]:
+    def _api_key(self) -> str | None:
         if self.api_key:
             return self.api_key
         else:
-            return cast(Optional[str], get_api_key(llm_provider=self._llm_provider, dynamic_api_key=self.api_key))
+            return cast(str | None, get_api_key(llm_provider=self._llm_provider, dynamic_api_key=self.api_key))
diff --git a/src/clonellm/_prompt.py b/src/clonellm/_prompt.py
@@ -1,5 +1,3 @@
-from typing import Optional
-
 from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
 
 __all__ = ("summarize_context_prompt", "get_context_prompt", "user_profile_prompt", "history_prompt", "question_prompt")
@@ -15,7 +13,7 @@
 )
 
 
-def get_context_prompt(additional_system_prompts: Optional[list[str]] = None) -> ChatPromptTemplate:
+def get_context_prompt(additional_system_prompts: list[str] | None = None) -> ChatPromptTemplate:
     prompts = [
         ("system", "You are my human clone. Act as I would. Do not ask questions about me. If anyone asks, you are me."),
         ("system", "You are having a conversation with someone who wants to learn more about you."),

diff --git a/src/clonellm/core.py b/src/clonellm/core.py
@@ -5,7 +5,7 @@
 import uuid
 from importlib.util import find_spec
 from operator import itemgetter
-from typing import Any, AsyncIterator, Iterator, Optional, cast
+from typing import Any, AsyncIterator, Iterator, cast
 
 from langchain.text_splitter import CharacterTextSplitter, TextSplitter
 from langchain_community.chat_models import ChatLiteLLM
@@ -47,12 +47,12 @@ class CloneLLM(LiteLLMMixin):
     Args:
         model (str): Name of the language model.
         documents (list[Document | str]): List of documents or strings related to cloning user to use for LLM context.
-        embedding (Optional[Embeddings]): The embedding function to use for RAG. Defaults to None for no embedding, i.e., a summary of `documents` is used for RAG.
-        vector_store (Optional[str | RagVectorStore]): The vector store to use for embedding-based retrieval. Defaults to None for "in-memory" vector store.
-        user_profile (Optional[UserProfile | dict[str, Any] | str]): The profile of the user to be cloned by the language model. Defaults to None.
-        memory (Optional[bool | int]): Maximum number of messages in conversation memory. Defaults to None (or 0) for no memory. -1 or `True` means infinite memory.
-        api_key (Optional[str]): The API key to use. Defaults to None.
-        system_prompts (Optional[list[str]]): Additional system prompts (instructions) for the language model. Defaults to None.
+        embedding (Embeddings | None): The embedding function to use for RAG. Defaults to None for no embedding, i.e., a summary of `documents` is used for RAG.
+        vector_store (str | RagVectorStore | None): The vector store to use for embedding-based retrieval. Defaults to None for "in-memory" vector store.
+        user_profile (UserProfile | dict[str, Any] | str | None): The profile of the user to be cloned by the language model. Defaults to None.
+        memory (bool | int | None): Maximum number of messages in conversation memory. Defaults to None (or 0) for no memory. -1 or `True` means infinite memory.
+        api_key (str | None): The API key to use. Defaults to None.
+        system_prompts (list[str] | None): Additional system prompts (instructions) for the language model. Defaults to None.
         **kwargs (Any): Additional keyword arguments supported by the `langchain_community.chat_models.ChatLiteLLM` class.
 
     """
@@ -66,12 +66,12 @@ def __init__(
         self,
         model: str,
         documents: list[Document | str],
-        embedding: Optional[Embeddings] = None,
-        vector_store: Optional[str | RagVectorStore] = None,
-        user_profile: Optional[UserProfile | dict[str, Any] | str] = None,
-        memory: Optional[bool | int] = None,
-        api_key: Optional[str] = None,
-        system_prompts: Optional[list[str]] = None,
+        embedding: Embeddings | None = None,
+        vector_store: str | RagVectorStore | None = None,
+        user_profile: UserProfile | dict[str, Any] | str | None = None,
+        memory: bool | int | None = None,
+        api_key: str | None = None,
+        system_prompts: list[str] | None = None,
         **kwargs: Any,
     ) -> None:
         self.embedding = embedding
@@ -81,7 +81,7 @@ def __init__(
         self.memory = memory
         self.system_prompts = system_prompts
 
-        from_class_method: Optional[dict[str, Any]] = kwargs.pop(self._FROM_CLASS_METHOD_KWARG, None)
+        from_class_method: dict[str, Any] | None = kwargs.pop(self._FROM_CLASS_METHOD_KWARG, None)
         super().__init__(model, api_key, **kwargs)
         self._internal_init(from_class_method)
 
@@ -110,7 +110,7 @@ def _check_dependencies(self) -> None:
             else:
                 raise ValueError(f"Unsupported vector store '{self.vector_store}' provided.")
 
-    def _internal_init(self, from_class_method: Optional[dict[str, Any]] = None) -> None:
+    def _internal_init(self, from_class_method: dict[str, Any] | None = None) -> None:
         self._splitter: TextSplitter
         self.context: str
         self.db: VectorStore
@@ -133,23 +133,23 @@ def from_persist_directory(
         cls,
         model: str,
         chroma_persist_directory: str,
-        embedding: Optional[Embeddings] = None,
-        user_profile: Optional[UserProfile | dict[str, Any] | str] = None,
-        memory: Optional[bool | int] = None,
-        api_key: Optional[str] = None,
-        system_prompts: Optional[list[str]] = None,
+        embedding: Embeddings | None = None,
+        user_profile: UserProfile | dict[str, Any] | str | None = None,
+        memory: bool | int | None = None,
+        api_key: str | None = None,
+        system_prompts: list[str] | None = None,
         **kwargs: Any,
     ) -> Self:
         """Creates an instance of CloneLLM by loading a Chroma vector store from a persistent directory.
 
         Args:
             model (str): Name of the language model.
             chroma_persist_directory (str): Directory path to the persisted Chroma vector store.
-            embedding (Optional[Embeddings]): The embedding function to use for Chroma store. Defaults to None for no embedding, i.e., a summary of `documents` is used for RAG.
-            user_profile (Optional[UserProfile | dict[str, Any] | str]): The profile of the user to be cloned by the language model. Defaults to None.
-            memory (Optional[bool | int]): Maximum number of messages in conversation memory. Defaults to None (or 0) for no memory. -1 or `True` means infinite memory.
-            api_key (Optional[str]): The API key to use. Defaults to None.
-            system_prompts (Optional[list[str]]): Additional system prompts (instructions) for the language model. Defaults to None.
+            embedding (Embeddings | None): The embedding function to use for Chroma store. Defaults to None for no embedding, i.e., a summary of `documents` is used for RAG.
+            user_profile (UserProfile | dict[str, Any] | str | None): The profile of the user to be cloned by the language model. Defaults to None.
+            memory (bool | int | None): Maximum number of messages in conversation memory. Defaults to None (or 0) for no memory. -1 or `True` means infinite memory.
+            api_key (str | None): The API key to use. Defaults to None.
+            system_prompts (list[str] | None): Additional system prompts (instructions) for the language model. Defaults to None.
             **kwargs (Any): Additional keyword arguments supported by the `langchain_community.chat_models.ChatLiteLLM` class.
 
         Returns:
@@ -184,21 +184,21 @@ def from_context(
         cls,
         model: str,
         context: str,
-        user_profile: Optional[UserProfile | dict[str, Any] | str] = None,
-        memory: Optional[bool | int] = None,
-        api_key: Optional[str] = None,
-        system_prompts: Optional[list[str]] = None,
+        user_profile: UserProfile | dict[str, Any] | str | None = None,
+        memory: bool | int | None = None,
+        api_key: str | None = None,
+        system_prompts: list[str] | None = None,
         **kwargs: Any,
     ) -> Self:
         """Creates an instance of CloneLLM using a summarized context string instead of documents.
 
         Args:
             model (str): Name of the language model.
             context (str): Pre-summarized context string for the language model.
-            user_profile (Optional[UserProfile | dict[str, Any] | str]): The profile of the user to be cloned by the language model. Defaults to None.
-            memory (Optional[bool | int]): Maximum number of messages in conversation memory. Defaults to None (or 0) for no memory. -1 or `True` means infinite memory.
-            api_key (Optional[str]): The API key to use. Defaults to None.
-            system_prompts (Optional[list[str]]): Additional system prompts (instructions) for the language model. Defaults to None.
+            user_profile (UserProfile | dict[str, Any] | str | None): The profile of the user to be cloned by the language model. Defaults to None.
+            memory (bool | int | None): Maximum number of messages in conversation memory. Defaults to None (or 0) for no memory. -1 or `True` means infinite memory.
+            api_key (str | None): The API key to use. Defaults to None.
+            system_prompts (list[str] | None): Additional system prompts (instructions) for the language model. Defaults to None.
             **kwargs (Any): Additional keyword arguments supported by the `langchain_community.chat_models.ChatLiteLLM` class.
 
         Returns:
@@ -215,7 +215,7 @@ def from_context(
             **kwargs,
         )
 
-    def _get_documents(self, documents: Optional[list[Document | str]] = None) -> list[Document]:
+    def _get_documents(self, documents: list[Document | str] | None = None) -> list[Document]:
         if not (documents or self.documents):
             raise ValueError("No documents provided")
         documents_ = []
@@ -491,7 +491,7 @@ def models_by_provider(self) -> dict[str, list[str]]:
         """
         Returns the available models grouped by their providers.
         """
-        return cast(dict[str, list[str]], models_by_provider)
+        return models_by_provider
 
     def __repr__(self) -> str:
         return f"CloneLLM<(model='{self.model}'" + (f", memory={self.memory}" * (self.memory is not None)) + ")>"
diff --git a/src/clonellm/embed.py b/src/clonellm/embed.py
@@ -1,4 +1,4 @@
-from typing import Any, Optional, cast
+from typing import Any
 
 from langchain_core.embeddings import Embeddings
 from litellm import aembedding, all_embedding_models, embedding
@@ -13,13 +13,13 @@ class LiteLLMEmbeddings(LiteLLMMixin, Embeddings):
 
     Args:
         model (str): The embedding model to use.
-        api_key (Optional[str]): The API key to use. Defaults to None.
-        dimensions (Optional[int]): The number of dimensions the resulting output embeddings should have. Defaults to None.
+        api_key (str | None): The API key to use. Defaults to None.
+        dimensions (int | None): The number of dimensions the resulting output embeddings should have. Defaults to None.
         **kwargs (Any): Additional keyword arguments supported by the `litellm.embedding` and `litellm.aembedding` functions.
 
     """
 
-    def __init__(self, model: str, api_key: Optional[str] = None, dimensions: Optional[int] = None, **kwargs: Any) -> None:
+    def __init__(self, model: str, api_key: str | None = None, dimensions: int | None = None, **kwargs: Any) -> None:
         super().__init__(model, api_key, **kwargs)
         self.dimensions = dimensions
 
@@ -79,7 +79,7 @@ def all_embedding_models(self) -> list[str]:
         """
         Returns the names of supported embedding models.
         """
-        return cast(list[str], all_embedding_models)
+        return all_embedding_models
 
     def __repr__(self) -> str:
         return (

diff --git a/src/clonellm/models.py b/src/clonellm/models.py
@@ -1,5 +1,5 @@
 import datetime
-from typing import Any, Optional
+from typing import Any
 
 from pydantic import BaseModel, Field
 
@@ -48,30 +48,30 @@ class CommunicationSample(BaseModel):
 
 class UserProfile(BaseModel):
     first_name: str
-    middle_name: Optional[str] = None
+    middle_name: str | None = None
     last_name: str
-    preferred_name: Optional[str] = None
-    prefix: Optional[str] = None
-    birth_date: Optional[datetime.date | str] = None
-    gender: Optional[str] = None
-    city: Optional[str] = None
-    state: Optional[str] = None
-    country: Optional[str] = None
-    phone_number: Optional[str] = None
-    email: Optional[str] = None
-    personality_traits: Optional[PersonalityTraits] = None
-    education_experience: Optional[Any] = None
-    work_experience: Optional[Any] = None
-    expertise: Optional[Any] = None
-    communication_samples: Optional[list[CommunicationSample]] = None
-    home_page: Optional[str] = None
-    github_page: Optional[str] = None
-    linkedin_page: Optional[str] = None
+    preferred_name: str | None = None
+    prefix: str | None = None
+    birth_date: datetime.date | str | None = None
+    gender: str | None = None
+    city: str | None = None
+    state: str | None = None
+    country: str | None = None
+    phone_number: str | None = None
+    email: str | None = None
+    personality_traits: PersonalityTraits | None = None
+    education_experience: Any = None
+    work_experience: Any = None
+    expertise: Any = None
+    communication_samples: list[CommunicationSample] | None = None
+    home_page: str | None = None
+    github_page: str | None = None
+    linkedin_page: str | None = None
 
     @property
     def full_name(self) -> str:
         return " ".join([self.first_name, self.middle_name or "", self.last_name])
 
     @property
-    def age(self) -> Optional[int]:
+    def age(self) -> int | None:
         return (datetime.date.today() - self.birth_date).days // 365 if isinstance(self.birth_date, datetime.date) else None
diff --git a/src/clonellm/py.typed b/src/clonellm/py.typed
@@ -0,0 +1,2 @@
+# Marker file to instruct type checkers to look for inline type annotations in this package.
+# See PEP 561 for more information.
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,2 @@
		# Marker file to instruct type checkers to look for inline type annotations in this package.
		# See PEP 561 for more information.