Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
292 changes: 153 additions & 139 deletions poetry.lock

Large diffs are not rendered by default.

4 changes: 2 additions & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ repository = "https://github.com/msamsami/clonellm"

[tool.poetry.dependencies]
python = ">=3.10,<3.14"
litellm = "^1.36.0"
litellm = "^1.42.1"
langchain = "^0.1.17"
pydantic = {version = ">=2.8.0", python = ">=3.12.4"}
legacy-cgi = {version = ">=2.6.2", python = ">=3.13"}
Expand Down Expand Up @@ -45,7 +45,7 @@ strict = true

[[tool.mypy.overrides]]
module = [
"litellm.*",
"litellm.litellm_core_utils.*",
"openai.lib.streaming",
"opentelemetry.*",
"google",
Expand Down
2 changes: 1 addition & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
litellm>=1.36.0,<2.0.0
litellm>=1.42.1,<2.0.0
langchain>=0.1.17,<1.0.0
pydantic>=2.8.0; python_version >= '3.12.4'
legacy-cgi>=2.6.2; python_version >= '3.13'
8 changes: 4 additions & 4 deletions src/clonellm/_base.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from abc import ABCMeta
from typing import Any, Optional, cast
from typing import Any, cast

from litellm.utils import get_api_key

Expand All @@ -13,7 +13,7 @@


class LiteLLMMixin(metaclass=ABCMeta):
def __init__(self, model: str, api_key: Optional[str] = None, **kwargs: Any) -> None:
def __init__(self, model: str, api_key: str | None = None, **kwargs: Any) -> None:
self.model = model
self.api_key = api_key
self._litellm_kwargs: dict[str, Any] = kwargs
Expand All @@ -23,8 +23,8 @@ def _llm_provider(self) -> str:
return cast(str, get_llm_provider(model=self.model)[1])

@property
def _api_key(self) -> Optional[str]:
def _api_key(self) -> str | None:
if self.api_key:
return self.api_key
else:
return cast(Optional[str], get_api_key(llm_provider=self._llm_provider, dynamic_api_key=self.api_key))
return cast(str | None, get_api_key(llm_provider=self._llm_provider, dynamic_api_key=self.api_key))
4 changes: 1 addition & 3 deletions src/clonellm/_prompt.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,3 @@
from typing import Optional

from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder

__all__ = ("summarize_context_prompt", "get_context_prompt", "user_profile_prompt", "history_prompt", "question_prompt")
Expand All @@ -15,7 +13,7 @@
)


def get_context_prompt(additional_system_prompts: Optional[list[str]] = None) -> ChatPromptTemplate:
def get_context_prompt(additional_system_prompts: list[str] | None = None) -> ChatPromptTemplate:
prompts = [
("system", "You are my human clone. Act as I would. Do not ask questions about me. If anyone asks, you are me."),
("system", "You are having a conversation with someone who wants to learn more about you."),
Expand Down
70 changes: 35 additions & 35 deletions src/clonellm/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
import uuid
from importlib.util import find_spec
from operator import itemgetter
from typing import Any, AsyncIterator, Iterator, Optional, cast
from typing import Any, AsyncIterator, Iterator, cast

from langchain.text_splitter import CharacterTextSplitter, TextSplitter
from langchain_community.chat_models import ChatLiteLLM
Expand Down Expand Up @@ -47,12 +47,12 @@ class CloneLLM(LiteLLMMixin):
Args:
model (str): Name of the language model.
documents (list[Document | str]): List of documents or strings related to cloning user to use for LLM context.
embedding (Optional[Embeddings]): The embedding function to use for RAG. Defaults to None for no embedding, i.e., a summary of `documents` is used for RAG.
vector_store (Optional[str | RagVectorStore]): The vector store to use for embedding-based retrieval. Defaults to None for "in-memory" vector store.
user_profile (Optional[UserProfile | dict[str, Any] | str]): The profile of the user to be cloned by the language model. Defaults to None.
memory (Optional[bool | int]): Maximum number of messages in conversation memory. Defaults to None (or 0) for no memory. -1 or `True` means infinite memory.
api_key (Optional[str]): The API key to use. Defaults to None.
system_prompts (Optional[list[str]]): Additional system prompts (instructions) for the language model. Defaults to None.
embedding (Embeddings | None): The embedding function to use for RAG. Defaults to None for no embedding, i.e., a summary of `documents` is used for RAG.
vector_store (str | RagVectorStore | None): The vector store to use for embedding-based retrieval. Defaults to None for "in-memory" vector store.
user_profile (UserProfile | dict[str, Any] | str | None): The profile of the user to be cloned by the language model. Defaults to None.
memory (bool | int | None): Maximum number of messages in conversation memory. Defaults to None (or 0) for no memory. -1 or `True` means infinite memory.
api_key (str | None): The API key to use. Defaults to None.
system_prompts (list[str] | None): Additional system prompts (instructions) for the language model. Defaults to None.
**kwargs (Any): Additional keyword arguments supported by the `langchain_community.chat_models.ChatLiteLLM` class.

"""
Expand All @@ -66,12 +66,12 @@ def __init__(
self,
model: str,
documents: list[Document | str],
embedding: Optional[Embeddings] = None,
vector_store: Optional[str | RagVectorStore] = None,
user_profile: Optional[UserProfile | dict[str, Any] | str] = None,
memory: Optional[bool | int] = None,
api_key: Optional[str] = None,
system_prompts: Optional[list[str]] = None,
embedding: Embeddings | None = None,
vector_store: str | RagVectorStore | None = None,
user_profile: UserProfile | dict[str, Any] | str | None = None,
memory: bool | int | None = None,
api_key: str | None = None,
system_prompts: list[str] | None = None,
**kwargs: Any,
) -> None:
self.embedding = embedding
Expand All @@ -81,7 +81,7 @@ def __init__(
self.memory = memory
self.system_prompts = system_prompts

from_class_method: Optional[dict[str, Any]] = kwargs.pop(self._FROM_CLASS_METHOD_KWARG, None)
from_class_method: dict[str, Any] | None = kwargs.pop(self._FROM_CLASS_METHOD_KWARG, None)
super().__init__(model, api_key, **kwargs)
self._internal_init(from_class_method)

Expand Down Expand Up @@ -110,7 +110,7 @@ def _check_dependencies(self) -> None:
else:
raise ValueError(f"Unsupported vector store '{self.vector_store}' provided.")

def _internal_init(self, from_class_method: Optional[dict[str, Any]] = None) -> None:
def _internal_init(self, from_class_method: dict[str, Any] | None = None) -> None:
self._splitter: TextSplitter
self.context: str
self.db: VectorStore
Expand All @@ -133,23 +133,23 @@ def from_persist_directory(
cls,
model: str,
chroma_persist_directory: str,
embedding: Optional[Embeddings] = None,
user_profile: Optional[UserProfile | dict[str, Any] | str] = None,
memory: Optional[bool | int] = None,
api_key: Optional[str] = None,
system_prompts: Optional[list[str]] = None,
embedding: Embeddings | None = None,
user_profile: UserProfile | dict[str, Any] | str | None = None,
memory: bool | int | None = None,
api_key: str | None = None,
system_prompts: list[str] | None = None,
**kwargs: Any,
) -> Self:
"""Creates an instance of CloneLLM by loading a Chroma vector store from a persistent directory.

Args:
model (str): Name of the language model.
chroma_persist_directory (str): Directory path to the persisted Chroma vector store.
embedding (Optional[Embeddings]): The embedding function to use for Chroma store. Defaults to None for no embedding, i.e., a summary of `documents` is used for RAG.
user_profile (Optional[UserProfile | dict[str, Any] | str]): The profile of the user to be cloned by the language model. Defaults to None.
memory (Optional[bool | int]): Maximum number of messages in conversation memory. Defaults to None (or 0) for no memory. -1 or `True` means infinite memory.
api_key (Optional[str]): The API key to use. Defaults to None.
system_prompts (Optional[list[str]]): Additional system prompts (instructions) for the language model. Defaults to None.
embedding (Embeddings | None): The embedding function to use for Chroma store. Defaults to None for no embedding, i.e., a summary of `documents` is used for RAG.
user_profile (UserProfile | dict[str, Any] | str | None): The profile of the user to be cloned by the language model. Defaults to None.
memory (bool | int | None): Maximum number of messages in conversation memory. Defaults to None (or 0) for no memory. -1 or `True` means infinite memory.
api_key (str | None): The API key to use. Defaults to None.
system_prompts (list[str] | None): Additional system prompts (instructions) for the language model. Defaults to None.
**kwargs (Any): Additional keyword arguments supported by the `langchain_community.chat_models.ChatLiteLLM` class.

Returns:
Expand Down Expand Up @@ -184,21 +184,21 @@ def from_context(
cls,
model: str,
context: str,
user_profile: Optional[UserProfile | dict[str, Any] | str] = None,
memory: Optional[bool | int] = None,
api_key: Optional[str] = None,
system_prompts: Optional[list[str]] = None,
user_profile: UserProfile | dict[str, Any] | str | None = None,
memory: bool | int | None = None,
api_key: str | None = None,
system_prompts: list[str] | None = None,
**kwargs: Any,
) -> Self:
"""Creates an instance of CloneLLM using a summarized context string instead of documents.

Args:
model (str): Name of the language model.
context (str): Pre-summarized context string for the language model.
user_profile (Optional[UserProfile | dict[str, Any] | str]): The profile of the user to be cloned by the language model. Defaults to None.
memory (Optional[bool | int]): Maximum number of messages in conversation memory. Defaults to None (or 0) for no memory. -1 or `True` means infinite memory.
api_key (Optional[str]): The API key to use. Defaults to None.
system_prompts (Optional[list[str]]): Additional system prompts (instructions) for the language model. Defaults to None.
user_profile (UserProfile | dict[str, Any] | str | None): The profile of the user to be cloned by the language model. Defaults to None.
memory (bool | int | None): Maximum number of messages in conversation memory. Defaults to None (or 0) for no memory. -1 or `True` means infinite memory.
api_key (str | None): The API key to use. Defaults to None.
system_prompts (list[str] | None): Additional system prompts (instructions) for the language model. Defaults to None.
**kwargs (Any): Additional keyword arguments supported by the `langchain_community.chat_models.ChatLiteLLM` class.

Returns:
Expand All @@ -215,7 +215,7 @@ def from_context(
**kwargs,
)

def _get_documents(self, documents: Optional[list[Document | str]] = None) -> list[Document]:
def _get_documents(self, documents: list[Document | str] | None = None) -> list[Document]:
if not (documents or self.documents):
raise ValueError("No documents provided")
documents_ = []
Expand Down Expand Up @@ -491,7 +491,7 @@ def models_by_provider(self) -> dict[str, list[str]]:
"""
Returns the available models grouped by their providers.
"""
return cast(dict[str, list[str]], models_by_provider)
return models_by_provider

def __repr__(self) -> str:
return f"CloneLLM<(model='{self.model}'" + (f", memory={self.memory}" * (self.memory is not None)) + ")>"
10 changes: 5 additions & 5 deletions src/clonellm/embed.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from typing import Any, Optional, cast
from typing import Any

from langchain_core.embeddings import Embeddings
from litellm import aembedding, all_embedding_models, embedding
Expand All @@ -13,13 +13,13 @@ class LiteLLMEmbeddings(LiteLLMMixin, Embeddings):

Args:
model (str): The embedding model to use.
api_key (Optional[str]): The API key to use. Defaults to None.
dimensions (Optional[int]): The number of dimensions the resulting output embeddings should have. Defaults to None.
api_key (str | None): The API key to use. Defaults to None.
dimensions (int | None): The number of dimensions the resulting output embeddings should have. Defaults to None.
**kwargs (Any): Additional keyword arguments supported by the `litellm.embedding` and `litellm.aembedding` functions.

"""

def __init__(self, model: str, api_key: Optional[str] = None, dimensions: Optional[int] = None, **kwargs: Any) -> None:
def __init__(self, model: str, api_key: str | None = None, dimensions: int | None = None, **kwargs: Any) -> None:
super().__init__(model, api_key, **kwargs)
self.dimensions = dimensions

Expand Down Expand Up @@ -79,7 +79,7 @@ def all_embedding_models(self) -> list[str]:
"""
Returns the names of supported embedding models.
"""
return cast(list[str], all_embedding_models)
return all_embedding_models

def __repr__(self) -> str:
return (
Expand Down
40 changes: 20 additions & 20 deletions src/clonellm/models.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import datetime
from typing import Any, Optional
from typing import Any

from pydantic import BaseModel, Field

Expand Down Expand Up @@ -48,30 +48,30 @@ class CommunicationSample(BaseModel):

class UserProfile(BaseModel):
first_name: str
middle_name: Optional[str] = None
middle_name: str | None = None
last_name: str
preferred_name: Optional[str] = None
prefix: Optional[str] = None
birth_date: Optional[datetime.date | str] = None
gender: Optional[str] = None
city: Optional[str] = None
state: Optional[str] = None
country: Optional[str] = None
phone_number: Optional[str] = None
email: Optional[str] = None
personality_traits: Optional[PersonalityTraits] = None
education_experience: Optional[Any] = None
work_experience: Optional[Any] = None
expertise: Optional[Any] = None
communication_samples: Optional[list[CommunicationSample]] = None
home_page: Optional[str] = None
github_page: Optional[str] = None
linkedin_page: Optional[str] = None
preferred_name: str | None = None
prefix: str | None = None
birth_date: datetime.date | str | None = None
gender: str | None = None
city: str | None = None
state: str | None = None
country: str | None = None
phone_number: str | None = None
email: str | None = None
personality_traits: PersonalityTraits | None = None
education_experience: Any = None
work_experience: Any = None
expertise: Any = None
communication_samples: list[CommunicationSample] | None = None
home_page: str | None = None
github_page: str | None = None
linkedin_page: str | None = None

@property
def full_name(self) -> str:
return " ".join([self.first_name, self.middle_name or "", self.last_name])

@property
def age(self) -> Optional[int]:
def age(self) -> int | None:
return (datetime.date.today() - self.birth_date).days // 365 if isinstance(self.birth_date, datetime.date) else None
2 changes: 2 additions & 0 deletions src/clonellm/py.typed
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
# Marker file to instruct type checkers to look for inline type annotations in this package.
# See PEP 561 for more information.