Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
32 commits
Select commit Hold shift + click to select a range
94992e4
feat: enable Gemini and GKE support
Jun 6, 2025
bb06d6f
chore: change env var names
Jun 6, 2025
d59ccba
chore: update references to Claude and Anthropic
Jun 6, 2025
cfbbc0a
chore: enable gke cluster credentials in kubernetes MCP server
Jun 6, 2025
ac35913
chore: update variables and add gcp compose file
Jun 6, 2025
464b102
fix: use correct host for MCP Prompt Server
Jun 6, 2025
447bb11
chore: fix licensecheck
Jun 6, 2025
c34d659
chore: cache HF model on host and update slack_channel_id ref
Jun 7, 2025
41d8982
Merge remote-tracking branch 'gawbul/spm/add-gemini-and-gke-support' …
alanbijuthomas Jun 26, 2025
f6322b2
Add Gemini key to LLM Server
alanbijuthomas Jun 26, 2025
e677825
Update Docker compose.gcp.yaml
alanbijuthomas Jun 27, 2025
cf3dd6b
feature: added single setup and run script
YazIbrahim Jun 27, 2025
2555bd7
docs: updating credentials setup
YazIbrahim Jun 27, 2025
00973c0
fix: healthcheck dependency
YazIbrahim Jun 27, 2025
964541d
Merge pull request #79 from fuzzylabs/add-gemini-and-gke-support-dev-1
YazIbrahim Jun 27, 2025
254892b
Add adapters and client to enable Gemini support
alanbijuthomas Jun 27, 2025
3067c4b
docs: added supported LLMs
YazIbrahim Jun 27, 2025
35843cb
Update adapter test with dummy tool name for Anthropic models
alanbijuthomas Jun 27, 2025
d5b6c1c
Fix pre-commit issues and improve code quality
alanbijuthomas Jun 27, 2025
ba96127
Add Gemini API key to compose.tests.yaml
alanbijuthomas Jun 28, 2025
01d1bf4
Log Gemini token usage
alanbijuthomas Jun 28, 2025
41a535d
fix: removed comment
YazIbrahim Jun 30, 2025
64b02ea
fix: added healthchecks
YazIbrahim Jun 30, 2025
1a0b665
refactor: making max tokens an enviroment variable
YazIbrahim Jun 30, 2025
ff973b5
feature: added caching to gemini model
YazIbrahim Jul 1, 2025
94d1cc3
docs: added link to security blog
YazIbrahim Jul 1, 2025
27d9620
Merge branch 'main' into feature/gemini-caching
YazIbrahim Jul 1, 2025
4b68aa6
chore: revert change
YazIbrahim Jul 1, 2025
a360170
refactor: reducing max output tokens
YazIbrahim Jul 1, 2025
a20f7b3
refactor: changed models in docs to ones that have been tested
YazIbrahim Jul 1, 2025
2cc82cc
chore: typing
YazIbrahim Jul 1, 2025
ea2c83c
chore: revert change
YazIbrahim Jul 1, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ We've been writing blogs and sharing our learnings along the way. Check out our
The SRE Agent supports multiple the following LLM providers:

### Anthropic
- **Models**: e.g. "claude-4-0-sonnet-latest"
- **Models**: e.g. "claude-3-7-sonnet-latest"
- **Setup**: Requires `ANTHROPIC_API_KEY`

### Google Gemini
Expand Down Expand Up @@ -233,3 +233,4 @@ Check out our blog posts for insights and updates:

- [Bringing Agentic AI into the Real World](https://www.fuzzylabs.ai/blog-post/bringing-agentic-ai-into-the-real-world)
- [How We're Building an Autonomous SRE with FastMCP](https://www.fuzzylabs.ai/blog-post/how-were-building-an-autonomous-sre-with-fastmcp)
- [Can we trust an agent in Prod?](https://www.fuzzylabs.ai/blog-post/can-we-trust-an-agent-in-prod)
2 changes: 1 addition & 1 deletion docs/credentials.md
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ The following credentials must be retrieved prior to running the agent. These cr
> **Note**: You only need to configure **one** LLM provider. Choose either Anthropic or Google Gemini and provide the corresponding API key.

- **PROVIDER**: The LLM provider name (e.g., "anthropic", "google").
- **MODEL**: The specific model name to use (e.g., "claude-3-5-sonnet-20241022", "gemini-1.5-pro").
- **MODEL**: The specific model name to use (e.g., "claude-3-7-sonnet-latest", "gemini-2.5-flash").

**Choose one of the following:**
- **ANTHROPIC_API_KEY**: An API key for Anthropic Claude models *(required if using Anthropic provider)*.
Expand Down
2 changes: 1 addition & 1 deletion setup_credentials.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,7 @@ def get_credential_config(platform: str) -> dict[str, dict[str, Any]]:
},
"MAX_TOKENS": {
"prompt": "Controls the maximum number of tokens the LLM can generate in "
"its response e.g. 10000: ",
"its response e.g. 8000: ",
"mask_value": False,
},
"DEV_BEARER_TOKEN": {
Expand Down
2 changes: 1 addition & 1 deletion sre_agent/client/utils/schemas.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@ class ClientConfig:
default_factory=lambda: json.loads(os.getenv("TOOLS", "[]"))
)
model: str = os.getenv("LLM_MODEL", "claude-3-7-sonnet-latest")
max_tokens: int = 1000
max_tokens: int = 8000
max_tool_retries: int = 3
query_timeout: int = int(
os.getenv("QUERY_TIMEOUT", DEFAULT_QUERY_TIMEOUT) or DEFAULT_QUERY_TIMEOUT
Expand Down
42 changes: 34 additions & 8 deletions sre_agent/llm/utils/clients.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
from anthropic.types import ToolParam
from google import genai
from google.genai import types
from google.genai.types import CachedContent
from pydantic import BaseModel
from shared.logger import logger # type: ignore
from shared.schemas import ( # type: ignore
Expand Down Expand Up @@ -177,26 +178,49 @@ def __init__(self, settings: LLMSettings = LLMSettings()) -> None:
"""The constructor for the Gemini client."""
super().__init__(settings)
self.client = genai.Client(api_key=os.getenv("GEMINI_API_KEY"))
self._cache: CachedContent | None = None

def cache_tools(self, tools: list[Any]) -> list[Any]:
"""A method for adding a cache block to tools."""
if tools:
try:
from google.genai import types

config = types.CreateCachedContentConfig(
tools=tools,
ttl="600s",
)
self._cache = self.client.caches.create(
model=self.settings.model, config=config
)
except Exception as e:
logger.warning(f"Failed to create Gemini cache: {e}")
return tools

def generate(self, payload: TextGenerationPayload) -> Message:
"""A method for generating text using the Gemini API."""
adapter = GeminiTextGenerationPayloadAdapter(payload)

messages, tools = adapter.adapt()

if not self.settings.max_tokens:
raise ValueError("Max tokens configuration has not been set.")
cached_tools = self.cache_tools(tools)

# Use cache if available
config_kwargs = {"max_output_tokens": self.settings.max_tokens}
if self._cache:
config_kwargs["cached_content"] = self._cache.name
messages = [messages[-1]] if messages else []
else:
config_kwargs["tools"] = cached_tools

response = self.client.models.generate_content(
model=self.settings.model,
contents=messages,
config=types.GenerateContentConfig(
tools=tools,
max_output_tokens=self.settings.max_tokens,
),
config=types.GenerateContentConfig(**config_kwargs),
)

if response.usage_metadata:
# Log with cache information

logger.info(
f"Token usage - Input: {response.usage_metadata.prompt_token_count}, "
f"Output: {response.usage_metadata.candidates_token_count}, "
Expand All @@ -219,7 +243,9 @@ def generate(self, payload: TextGenerationPayload) -> Message:
usage=Usage(
input_tokens=response.usage_metadata.prompt_token_count,
output_tokens=response.usage_metadata.candidates_token_count,
cache_creation_input_tokens=None,
cache_creation_input_tokens=response.usage_metadata.cache_creation_token_count
if hasattr(response.usage_metadata, "cache_creation_token_count")
else None,
cache_read_input_tokens=response.usage_metadata.cached_content_token_count,
)
if response.usage_metadata
Expand Down