From 86f55b3a4fc6dd78fda937d4a39664798be4e001 Mon Sep 17 00:00:00 2001
From: Paul Zabalegui <paul@aliasrobotics.com>
Date: Wed, 10 Dec 2025 14:32:01 +0100
Subject: [PATCH 1/4] feat: Add Ollama Cloud integration with ollama_cloud/
 prefix

- Added support for Ollama Cloud models via AsyncOpenAI
- Models use ollama_cloud/ prefix (e.g., ollama_cloud/gpt-oss:120b)
- Reads OLLAMA_API_KEY and OLLAMA_API_BASE for cloud authentication
- Added predefined Ollama Cloud models to /model-show
- Filtered obsolete ollama/*-cloud models from LiteLLM database
- Updated authentication headers in completer, banner, and toolbar
- Added concise English documentation in docs/providers/
- Adapted to new global model cache architecture from #371

Modified files:
- src/cai/sdk/agents/models/openai_chatcompletions.py
- src/cai/repl/commands/model.py (adapted to global cache)
- src/cai/util.py
- src/cai/repl/commands/completer.py
- src/cai/repl/ui/banner.py
- src/cai/repl/ui/toolbar.py
- docs/providers/ollama.md
- docs/providers/ollama_cloud.md

All existing functionality preserved (backward compatible).
---
 docs/providers/ollama.md                      | 27 +++++++
 docs/providers/ollama_cloud.md                | 79 +++++++++++++++++++
 src/cai/repl/commands/completer.py            | 12 ++-
 src/cai/repl/commands/model.py                | 18 ++++-
 src/cai/repl/ui/banner.py                     | 15 +++-
 src/cai/repl/ui/toolbar.py                    | 17 +++-
 .../agents/models/openai_chatcompletions.py   | 63 ++++++++++++++-
 src/cai/util.py                               | 20 ++++-
 8 files changed, 237 insertions(+), 14 deletions(-)
 create mode 100644 docs/providers/ollama_cloud.md

diff --git a/docs/providers/ollama.md b/docs/providers/ollama.md
index 1ff39758..34f32aec 100644
--- a/docs/providers/ollama.md
+++ b/docs/providers/ollama.md
@@ -1,5 +1,7 @@
 # Ollama Configuration
 
+## Ollama Local (Self-hosted)
+
 #### [Ollama Integration](https://ollama.com/)
 For local models using Ollama, add the following to your .env:
 
@@ -9,3 +11,28 @@ OLLAMA_API_BASE=http://localhost:8000/v1 # note, maybe you have a different endp
 ```
 
 Make sure that the Ollama server is running and accessible at the specified base URL. You can swap the model with any other supported by your local Ollama instance.
+
+## Ollama Cloud
+
+For cloud models using Ollama Cloud (no GPU required), add the following to your .env:
+
+```bash
+# API Key from ollama.com
+OLLAMA_API_KEY=your_api_key_here
+OLLAMA_API_BASE=https://ollama.com
+
+# Cloud model (note the ollama_cloud/ prefix)
+CAI_MODEL=ollama_cloud/gpt-oss:120b
+```
+
+**Requirements:**
+1. Create an account at [ollama.com](https://ollama.com)
+2. Generate an API key from your profile
+3. Use models with `ollama_cloud/` prefix (e.g., `ollama_cloud/gpt-oss:120b`)
+
+**Key differences:**
+- Prefix: `ollama_cloud/` (cloud) vs `ollama/` (local)
+- API Key: Required for cloud, not needed for local
+- Endpoint: `https://ollama.com/v1` (cloud) vs `http://localhost:8000/v1` (local)
+
+See [Ollama Cloud documentation](ollama_cloud.md) for detailed setup instructions.
diff --git a/docs/providers/ollama_cloud.md b/docs/providers/ollama_cloud.md
new file mode 100644
index 00000000..6498731e
--- /dev/null
+++ b/docs/providers/ollama_cloud.md
@@ -0,0 +1,79 @@
+# Ollama Cloud
+
+Run large language models without local GPU using Ollama's cloud service.
+
+## Quick Start
+
+### 1. Get API Key
+
+- Create account at [ollama.com](https://ollama.com)
+- Generate API key from your profile
+
+### 2. Configure `.env`
+
+```bash
+OLLAMA_API_KEY=your_api_key_here
+OLLAMA_API_BASE=https://ollama.com
+CAI_MODEL=ollama_cloud/gpt-oss:120b
+```
+
+### 3. Run
+
+```bash
+cai
+```
+
+## Available Models
+
+View in CAI with `/model-show` under "Ollama Cloud" category:
+
+- `ollama_cloud/gpt-oss:120b` - General purpose 120B model
+- `ollama_cloud/llama3.3:70b` - Llama 3.3 70B
+- `ollama_cloud/qwen2.5:72b` - Qwen 2.5 72B
+- `ollama_cloud/deepseek-v3:671b` - DeepSeek V3 671B
+
+More models at [ollama.com/library](https://ollama.com/library).
+
+## Model Selection
+
+```bash
+# By name
+CAI> /model ollama_cloud/gpt-oss:120b
+
+# By number (after /model-show)
+CAI> /model 3
+```
+
+## Local vs Cloud
+
+| Feature | Local | Cloud |
+|---------|-------|-------|
+| Prefix | `ollama/` | `ollama_cloud/` |
+| API Key | Not required | Required |
+| Endpoint | `http://localhost:8000/v1` | `https://ollama.com/v1` |
+| GPU | Required | Not required |
+
+## Troubleshooting
+
+**Unauthorized error**: Verify `OLLAMA_API_KEY` is set correctly
+
+**Path not found**: Ensure `OLLAMA_API_BASE=https://ollama.com` (without `/v1`)
+
+**Model not listed**: Check model prefix is `ollama_cloud/`, not `ollama/`
+
+## Validation
+
+Test connection with curl:
+
+```bash
+curl https://ollama.com/v1/chat/completions \
+  -H "Authorization: Bearer $OLLAMA_API_KEY" \
+  -H "Content-Type: application/json" \
+  -d '{"model": "gpt-oss:120b", "messages": [{"role": "user", "content": "test"}]}'
+```
+
+## References
+
+- [Ollama Cloud Docs](https://ollama.com/docs/cloud)
+- [Model Library](https://ollama.com/library)
+- [Get API Key](https://ollama.com/settings/keys)
diff --git a/src/cai/repl/commands/completer.py b/src/cai/repl/commands/completer.py
index f655d9c7..0c313335 100644
--- a/src/cai/repl/commands/completer.py
+++ b/src/cai/repl/commands/completer.py
@@ -184,8 +184,18 @@ def fetch_all_models(self):  # pylint: disable=too-many-branches,too-many-statem
             try:
                 # Get Ollama models with a short timeout to prevent hanging
                 api_base = get_ollama_api_base()
+                
+                # Add authentication headers for Ollama Cloud if using OPENAI_BASE_URL
+                headers = {}
+                if "ollama.com" in api_base:
+                    api_key = os.getenv("OPENAI_API_KEY")
+                    if api_key:
+                        headers["Authorization"] = f"Bearer {api_key}"
+                
                 response = requests.get(
-                    f"{api_base.replace('/v1', '')}/api/tags", timeout=0.5)
+                    f"{api_base.replace('/v1', '')}/api/tags",
+                    headers=headers,
+                    timeout=0.5)
 
                 if response.status_code == 200:
                     data = response.json()
diff --git a/src/cai/repl/commands/model.py b/src/cai/repl/commands/model.py
index a16bc5d4..5981eee9 100644
--- a/src/cai/repl/commands/model.py
+++ b/src/cai/repl/commands/model.py
@@ -175,7 +175,11 @@ def load_all_available_models() -> tuple[List[str], List[Dict[str, Any]]]:
     try:
         response = requests.get(LITELLM_URL, timeout=5)
         if response.status_code == 200:
-            litellm_names = sorted(response.json().keys())
+            # Filter out obsolete Ollama Cloud models (replaced by ollama_cloud/ prefix)
+            litellm_names = [
+                model_name for model_name in sorted(response.json().keys())
+                if not (model_name.startswith("ollama/") and "-cloud" in model_name)
+            ]
     except Exception:  # pylint: disable=broad-except
         pass
     
@@ -184,7 +188,17 @@ def load_all_available_models() -> tuple[List[str], List[Dict[str, Any]]]:
     ollama_names = []
     try:
         api_base = get_ollama_api_base()
-        response = requests.get(f"{api_base.replace('/v1', '')}/api/tags", timeout=1)
+        ollama_base = api_base.replace('/v1', '')
+        
+        # Add authentication headers for Ollama Cloud if needed
+        headers = {}
+        is_cloud = "ollama.com" in api_base
+        timeout = 5 if is_cloud else 1  # Cloud needs more time
+        
+        if is_cloud:
+            headers = get_ollama_auth_headers()
+        
+        response = requests.get(f"{ollama_base}/api/tags", headers=headers, timeout=timeout)
         if response.status_code == 200:
             data = response.json()
             ollama_data = data.get('models', data.get('items', []))
diff --git a/src/cai/repl/ui/banner.py b/src/cai/repl/ui/banner.py
index 0050b992..a5e40140 100644
--- a/src/cai/repl/ui/banner.py
+++ b/src/cai/repl/ui/banner.py
@@ -76,12 +76,19 @@ def get_supported_models_count():
 
             # Try to get Ollama models count
             try:
-                ollama_api_base = os.getenv(
-                    "OLLAMA_API_BASE",
-                    "http://host.docker.internal:8000/v1"
-                )
+                from cai.util import get_ollama_api_base
+                ollama_api_base = get_ollama_api_base()
+                
+                # Add authentication headers for Ollama Cloud if using OPENAI_BASE_URL
+                headers = {}
+                if "ollama.com" in ollama_api_base:
+                    api_key = os.getenv("OPENAI_API_KEY")
+                    if api_key:
+                        headers["Authorization"] = f"Bearer {api_key}"
+                
                 ollama_response = requests.get(
                     f"{ollama_api_base.replace('/v1', '')}/api/tags",
+                    headers=headers,
                     timeout=1
                 )
 
diff --git a/src/cai/repl/ui/toolbar.py b/src/cai/repl/ui/toolbar.py
index 06b1e70f..fd6fd282 100644
--- a/src/cai/repl/ui/toolbar.py
+++ b/src/cai/repl/ui/toolbar.py
@@ -96,11 +96,20 @@ def update_toolbar_in_background():
         ollama_status = "unavailable"
         try:
             # Get Ollama models with a short timeout to prevent hanging
-            api_base = os.getenv(
-                "OLLAMA_API_BASE",
-                "http://host.docker.internal:8000/v1")
+            from cai.util import get_ollama_api_base
+            api_base = get_ollama_api_base()
+            
+            # Add authentication headers for Ollama Cloud if using OPENAI_BASE_URL
+            headers = {}
+            if "ollama.com" in api_base:
+                api_key = os.getenv("OPENAI_API_KEY")
+                if api_key:
+                    headers["Authorization"] = f"Bearer {api_key}"
+            
             response = requests.get(
-                f"{api_base.replace('/v1', '')}/api/tags", timeout=0.5)
+                f"{api_base.replace('/v1', '')}/api/tags",
+                headers=headers,
+                timeout=0.5)
 
             if response.status_code == 200:
                 data = response.json()
diff --git a/src/cai/sdk/agents/models/openai_chatcompletions.py b/src/cai/sdk/agents/models/openai_chatcompletions.py
index 7e88f438..8931edd6 100644
--- a/src/cai/sdk/agents/models/openai_chatcompletions.py
+++ b/src/cai/sdk/agents/models/openai_chatcompletions.py
@@ -2708,7 +2708,23 @@ async def _fetch_response(
             provider = model_str.split("/")[0]
 
             # Apply provider-specific configurations
-            if provider == "deepseek":
+            if provider == "ollama_cloud":
+                # Ollama Cloud configuration
+                ollama_api_key = os.getenv("OLLAMA_API_KEY")
+                ollama_api_base = os.getenv("OLLAMA_API_BASE", "https://ollama.com")
+                
+                if ollama_api_key:
+                    kwargs["api_key"] = ollama_api_key
+                if ollama_api_base:
+                    kwargs["api_base"] = ollama_api_base
+                    
+                # Drop params not supported by Ollama
+                litellm.drop_params = True
+                kwargs.pop("parallel_tool_calls", None)
+                kwargs.pop("store", None)
+                if not converted_tools:
+                    kwargs.pop("tool_choice", None)
+            elif provider == "deepseek":
                 litellm.drop_params = True
                 kwargs.pop("parallel_tool_calls", None)
                 kwargs.pop("store", None)  # DeepSeek doesn't support store parameter
@@ -2846,6 +2862,51 @@ async def _fetch_response(
         max_retries = 3
         retry_count = 0
         
+        # Check if this is Ollama Cloud (ollama_cloud/ prefix)
+        # Ollama Cloud is OpenAI-compatible, so we bypass LiteLLM to avoid parsing issues
+        is_ollama_cloud = "ollama_cloud/" in model_str
+        
+        if is_ollama_cloud:
+            # Use AsyncOpenAI client directly for Ollama Cloud
+            # Ollama Cloud is fully OpenAI-compatible at /v1/chat/completions
+            try:
+                # Configure the client with Ollama Cloud settings
+                ollama_api_key = os.getenv("OLLAMA_API_KEY") or os.getenv("OPENAI_API_KEY")
+                ollama_base_url = os.getenv("OLLAMA_API_BASE", "https://ollama.com")
+                
+                # Ensure the URL has /v1 for OpenAI compatibility
+                if not ollama_base_url.endswith("/v1"):
+                    ollama_base_url = f"{ollama_base_url}/v1"
+                
+                # Create a temporary client configured for Ollama Cloud
+                ollama_client = AsyncOpenAI(
+                    api_key=ollama_api_key,
+                    base_url=ollama_base_url
+                )
+                
+                # Remove the ollama_cloud/ prefix from the model name
+                clean_model = kwargs["model"].replace("ollama_cloud/", "")
+                kwargs["model"] = clean_model
+                
+                # Remove LiteLLM-specific parameters
+                kwargs.pop("extra_headers", None)
+                kwargs.pop("api_key", None)
+                kwargs.pop("api_base", None)
+                kwargs.pop("custom_llm_provider", None)
+                
+                # Call Ollama Cloud using OpenAI-compatible API
+                if stream:
+                    return await ollama_client.chat.completions.create(**kwargs)
+                else:
+                    return await ollama_client.chat.completions.create(**kwargs)
+                    
+            except Exception as e:
+                # If Ollama Cloud fails, raise with helpful message
+                raise Exception(
+                    f"Error connecting to Ollama Cloud: {str(e)}\n"
+                    f"Verify OLLAMA_API_KEY and OLLAMA_API_BASE are configured correctly."
+                ) from e
+        
         while retry_count < max_retries:
             try:
                 if self.is_ollama:
diff --git a/src/cai/util.py b/src/cai/util.py
index a935e944..c587ffd7 100644
--- a/src/cai/util.py
+++ b/src/cai/util.py
@@ -744,8 +744,24 @@ def process_total_cost(
 
 
 def get_ollama_api_base():
-    """Get the Ollama API base URL from environment variable or default to localhost:8000."""
-    return os.environ.get("OLLAMA_API_BASE", "http://localhost:8000/v1")
+    """Get the Ollama API base URL from environment variable or default to localhost:8000.
+    
+    Supports both:
+    - OLLAMA_API_BASE: For local Ollama instances (e.g., http://localhost:8000/v1)
+    - OPENAI_BASE_URL: For Ollama Cloud or other OpenAI-compatible services (e.g., https://ollama.com/api/v1)
+    """
+    # First check OLLAMA_API_BASE for local Ollama
+    ollama_base = os.environ.get("OLLAMA_API_BASE")
+    if ollama_base:
+        return ollama_base
+    
+    # Then check OPENAI_BASE_URL for Ollama Cloud or other services
+    openai_base = os.environ.get("OPENAI_BASE_URL")
+    if openai_base and "ollama.com" in openai_base:
+        return openai_base
+    
+    # Default to local Ollama
+    return "http://localhost:8000/v1"
 
 
 def load_prompt_template(template_path):

From ab4e055c6c0702e86719af8b356f520aa3a6e1ad Mon Sep 17 00:00:00 2001
From: Paul Zabalegui <paul@aliasrobotics.com>
Date: Wed, 10 Dec 2025 14:36:03 +0100
Subject: [PATCH 2/4] fix: Add missing Ollama Cloud models and
 get_ollama_auth_headers

- Added Ollama Cloud models to get_predefined_model_categories()
- Added get_ollama_auth_headers() function to util.py
- Added Ollama Cloud to category_to_provider mapping
- Imported get_ollama_auth_headers in model.py

This fixes the issue where predefined models weren't showing in /model-show.
---
 src/cai/repl/commands/model.py | 31 +++++++++++++++++++++++++++++--
 src/cai/util.py                | 12 ++++++++++++
 2 files changed, 41 insertions(+), 2 deletions(-)

diff --git a/src/cai/repl/commands/model.py b/src/cai/repl/commands/model.py
index 5981eee9..1015fb84 100644
--- a/src/cai/repl/commands/model.py
+++ b/src/cai/repl/commands/model.py
@@ -12,7 +12,7 @@
 from rich.console import Console  # pylint: disable=import-error
 from rich.table import Table  # pylint: disable=import-error
 from rich.panel import Panel  # pylint: disable=import-error
-from cai.util import get_ollama_api_base, COST_TRACKER
+from cai.util import get_ollama_api_base, get_ollama_auth_headers, COST_TRACKER
 from cai.repl.commands.base import Command, register_command
 
 console = Console()
@@ -99,6 +99,32 @@ def get_predefined_model_categories() -> Dict[str, List[Dict[str, str]]]:
                 "name": "deepseek-r1",
                 "description": "DeepSeek's specialized reasoning model"
             }
+        ],
+        "Ollama Cloud": [
+            {
+                "name": "ollama_cloud/gpt-oss:120b",
+                "description": (
+                    "Ollama Cloud - Large 120B parameter model (no GPU required)"
+                )
+            },
+            {
+                "name": "ollama_cloud/llama3.3:70b",
+                "description": (
+                    "Ollama Cloud - Llama 3.3 70B model (no GPU required)"
+                )
+            },
+            {
+                "name": "ollama_cloud/qwen2.5:72b",
+                "description": (
+                    "Ollama Cloud - Qwen 2.5 72B model (no GPU required)"
+                )
+            },
+            {
+                "name": "ollama_cloud/deepseek-v3:671b",
+                "description": (
+                    "Ollama Cloud - DeepSeek V3 671B model (no GPU required)"
+                )
+            }
         ]
     }
 
@@ -117,7 +143,8 @@ def get_all_predefined_models() -> List[Dict[str, Any]]:
         "Alias": "OpenAI",  # Alias models use OpenAI as base
         "Anthropic Claude": "Anthropic",
         "OpenAI": "OpenAI", 
-        "DeepSeek": "DeepSeek"
+        "DeepSeek": "DeepSeek",
+        "Ollama Cloud": "Ollama Cloud"
     }
     
     for category, models in model_categories.items():
diff --git a/src/cai/util.py b/src/cai/util.py
index c587ffd7..e693813d 100644
--- a/src/cai/util.py
+++ b/src/cai/util.py
@@ -764,6 +764,18 @@ def get_ollama_api_base():
     return "http://localhost:8000/v1"
 
 
+def get_ollama_auth_headers():
+    """Get authentication headers for Ollama Cloud if API key is set.
+    
+    Returns:
+        Dictionary with Authorization header if API key exists, empty dict otherwise
+    """
+    api_key = os.getenv("OLLAMA_API_KEY") or os.getenv("OPENAI_API_KEY")
+    if api_key:
+        return {"Authorization": f"Bearer {api_key}"}
+    return {}
+
+
 def load_prompt_template(template_path):
     """
     Load a prompt template from the package resources.

From bd88b3cd23af12e3e7168e83d2c3d9b296364041 Mon Sep 17 00:00:00 2001
From: Paul Zabalegui <paul@aliasrobotics.com>
Date: Wed, 10 Dec 2025 14:42:46 +0100
Subject: [PATCH 3/4] fix: Display predefined models first in /model-show

- Added loop to display predefined models (Alias, Claude, OpenAI, DeepSeek, Ollama Cloud) before LiteLLM models
- Models #1-14 now correctly show predefined models
- LiteLLM models start from #15+ as expected
- Added get_ollama_auth_headers() function in util.py for Ollama Cloud auth
- Fixed model numbering to be consistent with global cache

This resolves the issue where predefined models were skipped and only LiteLLM models appeared starting from #15.
---
 src/cai/repl/commands/model.py | 41 +++++++++++++++++++++++++++++++++-
 1 file changed, 40 insertions(+), 1 deletion(-)

diff --git a/src/cai/repl/commands/model.py b/src/cai/repl/commands/model.py
index 1015fb84..3afdf6fc 100644
--- a/src/cai/repl/commands/model.py
+++ b/src/cai/repl/commands/model.py
@@ -540,7 +540,46 @@ def handle(self, args: Optional[List[str]] = None) -> bool:  # pylint: disable=t
             total_models = 0
             displayed_models = 0
 
-            # Process and display models (use global cache for numbering)
+            # First, add predefined models (Alias, Claude, OpenAI, DeepSeek, Ollama Cloud)
+            predefined_models = get_all_predefined_models()
+            for model in predefined_models:
+                model_name = model["name"]
+                
+                # Skip if search term provided and not in model name
+                if search_term and search_term not in model_name.lower():
+                    continue
+                
+                displayed_models += 1
+                total_models += 1
+                
+                # Find index from global cache
+                try:
+                    model_index = _GLOBAL_MODEL_CACHE.index(model_name) + 1
+                except ValueError:
+                    continue
+                
+                # Format pricing info
+                input_cost_str = (
+                    f"${model['input_cost']:.2f}"
+                    if model['input_cost'] is not None else "Unknown"
+                )
+                output_cost_str = (
+                    f"${model['output_cost']:.2f}"
+                    if model['output_cost'] is not None else "Unknown"
+                )
+                
+                # Add row to table
+                model_table.add_row(
+                    str(model_index),
+                    model_name,
+                    model["provider"],
+                    "N/A",  # max_tokens
+                    input_cost_str,
+                    output_cost_str,
+                    model.get("description", "")
+                )
+
+            # Process and display LiteLLM models (use global cache for numbering)
             for model_name, model_info in sorted(model_data.items()):
                 # Find the model index from global cache
                 try:

From 2f884743388c9be0071e022f5124590df48725a3 Mon Sep 17 00:00:00 2001
From: Paul Zabalegui <paul@aliasrobotics.com>
Date: Wed, 10 Dec 2025 15:22:34 +0100
Subject: [PATCH 4/4] fix: Restore correct import of cai.caibench instead of
 pentestperf

During rebase, the import was incorrectly changed from 'import cai.caibench as ptt'
to 'import pentestperf as ptt'. This commit restores the correct import.

The original code uses cai.caibench, not an external pentestperf module.
---
 src/cai/util.py | 12 +++++++++++-
 1 file changed, 11 insertions(+), 1 deletion(-)

diff --git a/src/cai/util.py b/src/cai/util.py
index e693813d..b5acca68 100644
--- a/src/cai/util.py
+++ b/src/cai/util.py
@@ -31,8 +31,14 @@
 
 from cai import is_pentestperf_available
 
+# Import caibench (pentestperf) if available
 if is_pentestperf_available():
-    import pentestperf as ptt
+    import cai.caibench as ptt
+    PTT_AVAILABLE = True
+else:
+    ptt = None
+    PTT_AVAILABLE = False
+
 import signal
 
 # Global timing variables for tracking active and idle time
@@ -4368,6 +4374,10 @@ def setup_ctf():
         print(color("CTF name not provided, necessary to run CTF", fg="white", bg="red"))
         sys.exit(1)
 
+    if not PTT_AVAILABLE or ptt is None:
+        print(color("pentestperf module not available, cannot setup CTF", fg="white", bg="red"))
+        sys.exit(1)
+
     print(
         color("Setting up CTF: ", fg="black", bg="yellow")
         + color(ctf_name, fg="black", bg="yellow")