MemTensor · CaralHsi · Feb 11, 2026 · Feb 11, 2026 · Feb 11, 2026
diff --git a/src/memos/api/config.py b/src/memos/api/config.py
@@ -335,7 +335,7 @@ def get_memreader_config() -> dict[str, Any]:
                 # validation requirements during tests/import.
                 "api_base": os.getenv("MEMRADER_API_BASE", "https://api.openai.com/v1"),
                 "remove_think_prefix": True,
-                "extra_body": {"chat_template_kwargs": {"enable_thinking": False}},
+                "extra_body": {"enable_thinking": False},
             },
         }
 
@@ -531,7 +531,7 @@ def get_internet_config() -> dict[str, Any]:
                                 "api_key": os.getenv("MEMRADER_API_KEY", "EMPTY"),
                                 "api_base": os.getenv("MEMRADER_API_BASE"),
                                 "remove_think_prefix": True,
-                                "extra_body": {"chat_template_kwargs": {"enable_thinking": False}},
+                                "extra_body": {"enable_thinking": False},
                             },
                         },
                         "embedder": APIConfig.get_embedder_config(),

diff --git a/src/memos/api/handlers/config_builders.py b/src/memos/api/handlers/config_builders.py
@@ -105,6 +105,7 @@ def build_chat_llm_config() -> list[dict[str, Any]]:
                 }
             ),
             "support_models": cfg.get("support_models", None),
+            "extra_body": cfg.get("extra_body", None),
         }
         for cfg in configs
     ]

diff --git a/src/memos/configs/llm.py b/src/memos/configs/llm.py
@@ -116,6 +116,7 @@ class VLLMLLMConfig(BaseLLMConfig):
         default=False,
         description="Enable reasoning outputs from vLLM",
     )
+    extra_body: Any = Field(default=None, description="Extra options for API")
 
 
 class LLMConfigFactory(BaseConfig):

diff --git a/src/memos/llms/vllm.py b/src/memos/llms/vllm.py
@@ -111,13 +111,7 @@ def _generate_with_api_client(self, messages: list[MessageDict], **kwargs) -> st
                 "temperature": kwargs.get("temperature", self.config.temperature),
                 "max_tokens": kwargs.get("max_tokens", self.config.max_tokens),
                 "top_p": kwargs.get("top_p", self.config.top_p),
-                "extra_body": {
-                    "chat_template_kwargs": {
-                        "enable_thinking": kwargs.get(
-                            "enable_thinking", self.config.enable_thinking
-                        )
-                    }
-                },
+                "extra_body": kwargs.get("extra_body", self.config.extra_body),
             }
             if kwargs.get("tools"):
                 completion_kwargs["tools"] = kwargs.get("tools")
@@ -175,13 +169,7 @@ def generate_stream(self, messages: list[MessageDict], **kwargs):
                 "max_tokens": kwargs.get("max_tokens", self.config.max_tokens),
                 "top_p": kwargs.get("top_p", self.config.top_p),
                 "stream": True,
-                "extra_body": {
-                    "chat_template_kwargs": {
-                        "enable_thinking": kwargs.get(
-                            "enable_thinking", self.config.enable_thinking
-                        )
-                    }
-                },
+                "extra_body": kwargs.get("extra_body", self.config.extra_body),
             }
 
             stream = self.client.chat.completions.create(**completion_kwargs)
-Original file line number
+Diff line change
@@ Expand Up / @@ -105,6 +105,7 @@ def build_chat_llm_config() -> list[dict[str, Any]]: @@
                     }
                 ),
                 "support_models": cfg.get("support_models", None),
+                "extra_body": cfg.get("extra_body", None),
             }
             for cfg in configs
         ]
@@ Expand Down @@