lisashen8 · dihmandrake · Oct 11, 2025 · Oct 11, 2025 · Oct 11, 2025
diff --git a/README.md b/README.md
@@ -52,6 +52,21 @@ gcloud config set run/region europe-west1
 
 # Enable APIs
 gcloud services enable run.googleapis.com cloudbuild.googleapis.com aiplatform.googleapis.com
+
+# Ensure some default permissions exist in the project for `gcloud run deploy` are correctly set; Primarily Cloud Build
+export PROJECT_NUMBER=$(gcloud projects describe $(gcloud config get-value project) --format="value(projectNumber)")
+gcloud projects add-iam-policy-binding $(gcloud config get-value project) \
+	--role roles/run.viewer \
+	--member "serviceAccount:$PROJECT_NUMBER-compute@developer.gserviceaccount.com"
+gcloud projects add-iam-policy-binding $(gcloud config get-value project) \
+	--role roles/storage.objectAdmin \
+	--member "serviceAccount:$PROJECT_NUMBER-compute@developer.gserviceaccount.com"
+gcloud projects add-iam-policy-binding $(gcloud config get-value project) \
+	--role roles/artifactregistry.createOnPushRepoAdmin \
+	--member "serviceAccount:$PROJECT_NUMBER-compute@developer.gserviceaccount.com"
+gcloud projects add-iam-policy-binding $(gcloud config get-value project) \
+	--role roles/logging.logWriter \
+	--member "serviceAccount:$PROJECT_NUMBER-compute@developer.gserviceaccount.com"
 ```
 
 ## Deploy Gemma Backend
@@ -73,10 +88,16 @@ gcloud run deploy ollama-gemma3-4b-gpu \
   --no-gpu-zonal-redundancy \
   --timeout=600
 
+# Disable Invoker IAM check
+gcloud run services update ollama-gemma3-4b-gpu --no-invoker-iam-check
 
 ## download ollama utility and test the Cloud Run GPU service that is created
-curl -fsSL https://ollama.com/install.sh
-OLLAMA_HOST=<Cloud Run SERVICE URL generated above> ollama run gemma3:4b
+#curl -fsSL https://ollama.com/install.sh
+#OLLAMA_HOST=<Cloud Run SERVICE URL generated above> ollama run gemma3:4b
+export OLLAMA_URL=$(gcloud run services describe ollama-gemma3-4b-gpu \
+  --region europe-west1 \
+  --format='value(status.url)')
+curl "$OLLAMA_URL"
 ```
 
 ## Deploy ADK Cloud Run Agent that calls the Gemma Backend
@@ -112,9 +133,41 @@ gcloud run deploy production-adk-agent \
     --set-env-vars GOOGLE_CLOUD_PROJECT=$PROJECT_ID \
     --set-env-vars GOOGLE_CLOUD_LOCATION=europe-west1 \
     --set-env-vars GEMMA_MODEL_NAME=gemma3:4b \
-    --set-env-vars OLLAMA_API_BASE=$OLLAMA_URL
+    --set-env-vars OLLAMA_API_BASE=$OLLAMA_URL \
+    --set-env-vars USE_OPENAI_FAKE=True
+
+gcloud run services update production-adk-agent --no-invoker-iam-check
 ```
 
+### Understanding some env variables:
+
+### Environment Variables
+
+The application's behavior can be configured through the following environment variables.
+
+| Variable                | Description                                                                                                                            | Default Value               |
+| ----------------------- | -------------------------------------------------------------------------------------------------------------------------------------- | --------------------------- |
+| `GEMMA_MODEL_NAME`      | Specifies the name of the Gemma model to be used.                                                                                      | `gemma3:4b`                 |
+| `OLLAMA_API_BASE`       | The base URL for the Ollama API endpoint.                                                                                              | `http://localhost:10010`    |
+| `USE_OPENAI_FAKE`       | Set to `true` to use an OpenAI-compatible API wrapper around Ollama. This enables context-aware, multi-modal conversations.              | `False`                     |
+| `USE_OLLAMA_NO_CONTEXT` | Set to `true` to use the direct Ollama API for multi-modal input. Note: This mode may not retain conversational context.                 | `False`                     |
+
+#### Connection Modes
+
+The agent connects to the Ollama model in one of three ways, controlled by the boolean flags `USE_OPENAI_FAKE` and `USE_OLLAMA_NO_CONTEXT`:
+
+1.  **Default (Context-aware Chat):**
+    -   **Configuration:** `USE_OPENAI_FAKE` and `USE_OLLAMA_NO_CONTEXT` are both `False`.
+    -   **Behavior:** Uses the `ollama_chat` provider for standard, context-aware chat. According to code comments, this mode may have issues with multi-modal inputs.
+
+2.  **OpenAI Fake (Context-aware, Multi-modal):**
+    -   **Configuration:** `USE_OPENAI_FAKE=true`
+    -   **Behavior:** Routes requests through an OpenAI-compatible endpoint (`/v1`) on the Ollama server. This is the recommended mode for achieving context-aware, multi-modal chat.
+
+3.  **Ollama Direct (Multi-modal, No Context):**
+    -   **Configuration:** `USE_OLLAMA_NO_CONTEXT=true`
+    -   **Behavior:** Uses the standard `ollama` provider. This mode supports multi-modal inputs directly but may fail to retain conversation history.
+
 ## Test Your Agent's health
 
 ```bash

diff --git a/adk-agent/production_agent/agent.py b/adk-agent/production_agent/agent.py
@@ -14,14 +14,14 @@
 
 import os
 from pathlib import Path
-from typing import Dict, Any
+from typing import Any, Dict
 
+import google.auth
 from dotenv import load_dotenv
-from google.adk.agents import LlmAgent, Agent
-from google.adk.tools import google_search
+from google.adk.agents import Agent, LlmAgent
 from google.adk.models.lite_llm import LiteLlm
+from google.adk.tools import google_search
 from google.cloud import logging as google_cloud_logging
-import google.auth
 
 # Load environment variables from .env file in root directory
 root_dir = Path(__file__).parent.parent
@@ -46,14 +46,22 @@
 
 # Configure the deployed model endpoint
 gemma_model_name = os.getenv("GEMMA_MODEL_NAME", "gemma3:4b")  # Gemma model name
-api_base = os.getenv("OLLAMA_API_BASE", "http://localhost:10010")  # Location of Ollama server
+os.environ["OLLAMA_API_BASE"] = os.getenv("OLLAMA_API_BASE", "http://localhost:10010")
+if os.getenv("USE_OPENAI_FAKE", "False").lower() in ('true', '1', 't'): # See: Allows context based chat with multi-modal: https://github.com/google/adk-python/issues/49
+    os.environ["OPENAI_API_BASE"] = os.getenv("OLLAMA_API_BASE", "") + "/v1"
+    os.environ["OPENAI_API_KEY"] = "undefined"
+    model=LiteLlm(model=f"openai/{gemma_model_name}")
+elif os.getenv("USE_OLLAMA_NO_CONTEXT", "False").lower() in ('true', '1', 't'): # Allows multi-modal via direct ollama API, but might loose context
+    model=LiteLlm(model=f"ollama/{gemma_model_name}")
+else:   # Direct via ollama with context, but current has issues with multi modal
+    model=LiteLlm(model=f"ollama_chat/{gemma_model_name}")
 
 # Production Gemma Agent - GPU-accelerated conversational assistant
 # 1. Connects to your deployed Gemma backend via LiteLlm
 # 2. Creates a simple conversational agent
 # 3. Configures Google Cloud integration
 production_agent = Agent(
-    model=LiteLlm(model=f"ollama_chat/{gemma_model_name}", api_base=api_base),
+    model=model,
     name="production_agent",
     description="A production-ready conversational assistant powered by GPU-accelerated Gemma.",
     instruction="""You are 'Gem', a friendly, knowledgeable, and enthusiastic zoo tour guide. 

diff --git a/adk-agent/pyproject.toml b/adk-agent/pyproject.toml
@@ -5,16 +5,17 @@ description = "Production-ready ADK agent for Lab 3 - Prototype to Production"
 readme = "README.md"
 requires-python = ">=3.13"
 dependencies = [
-    "google-adk==1.12.0",
+    "google-adk==1.16.0",
     "python-dotenv==1.1.0",
     "httpx>=0.25.0",
     "google-auth>=2.23.0",
     "locust==2.37.10",
     "litellm>=1.0.0",
+    "pillow>=11.3.0",
 ]
 
 [dependency-groups]
 dev = [
     "pytest==8.4.0",
     "ruff==0.11.13",
-]
+]