From 8915ed40c9b1cf5b5bdc45581252df5f7227a2bf Mon Sep 17 00:00:00 2001
From: "yuan.wang" <yuan.wang@yuanwangdebijibendiannao.local>
Date: Wed, 14 Jan 2026 17:11:53 +0800
Subject: [PATCH 1/3] modify implicit preference

---
 src/memos/templates/prefer_complete_prompt.py | 46 ++++++++++++++++++-
 1 file changed, 44 insertions(+), 2 deletions(-)

diff --git a/src/memos/templates/prefer_complete_prompt.py b/src/memos/templates/prefer_complete_prompt.py
index a67f0c12c..d5c73d11a 100644
--- a/src/memos/templates/prefer_complete_prompt.py
+++ b/src/memos/templates/prefer_complete_prompt.py
@@ -69,6 +69,28 @@
 (preferences that the user did not explicitly state but can be reasonably inferred from their underlying motivations, behavioral patterns, decision-making logic, and latent needs).
 
 Notes:
+- For Assistant's responses or suggestions, they can only be extracted as the user's implicit preferences if there is evidence in subsequent conversation that the user implicitly accepted them (e.g., adoption, agreement, acting on the suggestion, etc.). Assistant suggestions alone do not constitute user preferences.
+- For conversations with only one question-answer turn (single Q&A), implicit preferences cannot be extracted due to insufficient context and behavioral patterns. Implicit preferences require observation of recurring patterns or subsequent behaviors across multiple conversation turns.
+
+Counter-examples:
+【Counter-example 1 - Assistant suggestion not accepted by user】
+Conversation:
+User: I want to buy a phone, any recommendations?
+Assistant: I suggest considering the iPhone 15 Pro, it has powerful performance and great camera quality.
+User: What about the iPhone 16?
+Assistant: The iPhone 16 is expected to be released in September 2026, it will have a new design and features.
+
+Analysis: Although the Assistant recommended iPhone, the user showed no acceptance (e.g., "okay", "I'll consider it", or follow-up questions about iPhone), so this cannot be extracted as the user's implicit preference.
+Result: Cannot extract implicit preference
+
+【Counter-example 2 - Single question-answer situation】
+Conversation:
+User: Any good movies recently?
+Assistant: "Dune 2" has good reviews, it's a sci-fi epic genre.
+
+Analysis: This is just a single simple Q&A exchange. The user provided no further feedback or behavior, lacking sufficient context to infer user preferences for sci-fi movies or other hidden tendencies.
+Result: Cannot extract implicit preference
+
 - Implicit preferences refer to user inclinations or choices that are not directly expressed, but can be deeply inferred by analyzing:
   * **Hidden motivations**: What underlying needs or goals might drive the user's behavior?
   * **Behavioral patterns**: What recurring patterns or tendencies can be observed?
@@ -77,7 +99,6 @@
   * **Contextual signals**: What do the user's choices, comparisons, exclusions, or scenario selections reveal about their deeper preferences?
 - Do not treat explicitly stated preferences as implicit preferences; this prompt is only for inferring preferences that are not directly mentioned.
 - Go beyond surface-level facts to understand the user's hidden possibilities and underlying logic.
-- For Assistant's responses or suggestions, they can only be extracted as the user's implicit preferences if there is evidence in subsequent conversation that the user implicitly accepted them (e.g., adoption, agreement, acting on the suggestion, etc.). Assistant suggestions alone do not constitute user preferences.
 
 Requirements:
 1. Only make inferences when there is sufficient evidence in the conversation; avoid unsupported or far-fetched guesses.
@@ -110,6 +131,28 @@
 （用户没有明确表述，但可以通过分析其潜在动机、行为模式、决策逻辑和隐藏需求深度推断出的偏好）。
 
 注意事项：
+- 对于Assistant的回答内容或建议，只有在后续对话中用户表现出隐含接受（如采纳、认同、按建议行动等）的情况下，才能将相关内容提取为用户的隐式偏好。单纯的Assistant建议本身不构成用户偏好。
+- 对于只有一轮问答（一问一答）的对话，由于缺乏足够的上下文和行为模式，不能提取隐式偏好。隐式偏好需要从多轮对话中观察到的重复模式或后续行为来推断。
+
+反例示例：
+【反例1 - 未被用户认可的Assistant建议】
+对话：
+User: 我想买个手机，有什么推荐吗？
+Assistant: 建议你考虑iPhone 15 Pro，性能强大，拍照效果好。
+User: iPhone 16 怎么样？
+Assistant: iPhone 16 预计将在2026年9月发布，会有新的设计和功能。
+
+分析：虽然Assistant推荐了iPhone，但用户没有表现出任何接受态度（如"好的"、"我会考虑"、后续询问iPhone相关问题等），因此不能提取为用户的隐式偏好。
+结果：无法提取隐式偏好
+
+【反例2 - 只有一问一答的情况】
+对话：
+User: 最近有什么好看的电影吗？
+Assistant: 《沙丘2》口碑不错，是科幻史诗类型的。
+
+分析：这只是一轮简单问答，用户没有进一步的反馈或行为，缺乏足够的上下文来推断用户对科幻电影的偏好或其他隐藏倾向。
+结果：无法提取隐式偏好
+
 - 隐式偏好是指用户未直接表达，但可以通过深入分析以下方面推断出的倾向或选择：
   * **隐藏动机**：什么样的潜在需求或目标可能驱动用户的行为？
   * **行为模式**：可以观察到什么样的重复模式或倾向？
@@ -118,7 +161,6 @@
   * **情境信号**：用户的选择、比较、排除或场景选择揭示了什么样的深层偏好？
 - 不要将明确陈述的偏好视为隐式偏好；此提示仅用于推断未直接提及的偏好。
 - 超越表面事实，理解用户的隐藏可能性和背后的逻辑。
-- 对于Assistant的回答内容或建议，只有在后续对话中用户表现出隐含接受（如采纳、认同、按建议行动等）的情况下，才能将相关内容提取为用户的隐式偏好。单纯的Assistant建议本身不构成用户偏好。
 
 要求：
 1. 仅在对话中有充分证据时进行推断；避免无根据或牵强的猜测。

From 0343581cfeeca9a8ebc049b975df992483a1834e Mon Sep 17 00:00:00 2001
From: "yuan.wang" <yuan.wang@yuanwangdebijibendiannao.local>
Date: Wed, 14 Jan 2026 17:18:09 +0800
Subject: [PATCH 2/3] fix: modify logger.error to logger.info in extractor

---
 .../memories/textual/prefer_text_memory/extractor.py      | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/src/memos/memories/textual/prefer_text_memory/extractor.py b/src/memos/memories/textual/prefer_text_memory/extractor.py
index 0c6e5339d..aa4f3cb44 100644
--- a/src/memos/memories/textual/prefer_text_memory/extractor.py
+++ b/src/memos/memories/textual/prefer_text_memory/extractor.py
@@ -70,7 +70,7 @@ def extract_explicit_preference(self, qa_pair: MessageList | str) -> dict[str, A
         try:
             response = self.llm_provider.generate([{"role": "user", "content": prompt}])
             if not response:
-                logger.error(
+                logger.info(
                     f"[prefer_extractor]: (Error) LLM response content is {response} when extracting explicit preference"
                 )
                 return None
@@ -80,7 +80,7 @@ def extract_explicit_preference(self, qa_pair: MessageList | str) -> dict[str, A
                 d["preference"] = d.pop("explicit_preference")
             return result
         except Exception as e:
-            logger.error(f"Error extracting explicit preference: {e}, return None")
+            logger.info(f"Error extracting explicit preference: {e}, return None")
             return None
 
     def extract_implicit_preference(self, qa_pair: MessageList | str) -> dict[str, Any] | None:
@@ -98,7 +98,7 @@ def extract_implicit_preference(self, qa_pair: MessageList | str) -> dict[str, A
         try:
             response = self.llm_provider.generate([{"role": "user", "content": prompt}])
             if not response:
-                logger.error(
+                logger.info(
                     f"[prefer_extractor]: (Error) LLM response content is {response} when extracting implicit preference"
                 )
                 return None
@@ -108,7 +108,7 @@ def extract_implicit_preference(self, qa_pair: MessageList | str) -> dict[str, A
                 d["preference"] = d.pop("implicit_preference")
             return result
         except Exception as e:
-            logger.error(f"Error extracting implicit preferences: {e}, return None")
+            logger.info(f"Error extracting implicit preferences: {e}, return None")
             return None
 
     def _process_single_chunk_explicit(

From 1e27f0c4aef67069435d869fe0640b248af5ba13 Mon Sep 17 00:00:00 2001
From: "yuan.wang" <yuan.wang@yuanwangdebijibendiannao.local>
Date: Fri, 16 Jan 2026 11:26:38 +0800
Subject: [PATCH 3/3] fix: repair llm and plugin simple retrieve None type bug

---
 src/memos/llms/openai.py                      | 12 +++++
 src/memos/llms/vllm.py                        |  6 +++
 .../tree_text_memory/retrieve/searcher.py     | 48 ++-----------------
 3 files changed, 21 insertions(+), 45 deletions(-)

diff --git a/src/memos/llms/openai.py b/src/memos/llms/openai.py
index f49f1d7d1..93dac42fb 100644
--- a/src/memos/llms/openai.py
+++ b/src/memos/llms/openai.py
@@ -57,6 +57,10 @@ def generate(self, messages: MessageList, **kwargs) -> str:
             f"Request body: {request_body}, Response from OpenAI: {response.model_dump_json()}, Cost time: {cost_time}"
         )
 
+        if not response.choices:
+            logger.warning("OpenAI response has no choices")
+            return ""
+
         tool_calls = getattr(response.choices[0].message, "tool_calls", None)
         if isinstance(tool_calls, list) and len(tool_calls) > 0:
             return self.tool_call_parser(tool_calls)
@@ -99,6 +103,8 @@ def generate_stream(self, messages: MessageList, **kwargs) -> Generator[str, Non
         reasoning_started = False
 
         for chunk in response:
+            if not chunk.choices:
+                continue
             delta = chunk.choices[0].delta
 
             # Support for custom 'reasoning_content' (if present in OpenAI-compatible models like Qwen, DeepSeek)
@@ -153,6 +159,10 @@ def generate(self, messages: MessageList, **kwargs) -> str:
             extra_body=kwargs.get("extra_body", self.config.extra_body),
         )
         logger.info(f"Response from Azure OpenAI: {response.model_dump_json()}")
+        if not response.choices:
+            logger.warning("Azure OpenAI response has no choices")
+            return ""
+
         if response.choices[0].message.tool_calls:
             return self.tool_call_parser(response.choices[0].message.tool_calls)
         response_content = response.choices[0].message.content
@@ -180,6 +190,8 @@ def generate_stream(self, messages: MessageList, **kwargs) -> Generator[str, Non
         reasoning_started = False
 
         for chunk in response:
+            if not chunk.choices:
+                continue
             delta = chunk.choices[0].delta
 
             # Support for custom 'reasoning_content' (if present in OpenAI-compatible models like Qwen, DeepSeek)
diff --git a/src/memos/llms/vllm.py b/src/memos/llms/vllm.py
index 1cf8d4f39..362112f11 100644
--- a/src/memos/llms/vllm.py
+++ b/src/memos/llms/vllm.py
@@ -125,6 +125,10 @@ def _generate_with_api_client(self, messages: list[MessageDict], **kwargs) -> st
 
             response = self.client.chat.completions.create(**completion_kwargs)
 
+            if not response.choices:
+                logger.warning("VLLM response has no choices")
+                return ""
+
             if response.choices[0].message.tool_calls:
                 return self.tool_call_parser(response.choices[0].message.tool_calls)
 
@@ -184,6 +188,8 @@ def generate_stream(self, messages: list[MessageDict], **kwargs):
 
             reasoning_started = False
             for chunk in stream:
+                if not chunk.choices:
+                    continue
                 delta = chunk.choices[0].delta
                 if hasattr(delta, "reasoning") and delta.reasoning:
                     if not reasoning_started and not self.config.remove_think_prefix:
diff --git a/src/memos/memories/textual/tree_text_memory/retrieve/searcher.py b/src/memos/memories/textual/tree_text_memory/retrieve/searcher.py
index 3612d37eb..8c30d74f3 100644
--- a/src/memos/memories/textual/tree_text_memory/retrieve/searcher.py
+++ b/src/memos/memories/textual/tree_text_memory/retrieve/searcher.py
@@ -290,51 +290,6 @@ def _parse_task(
 
         return parsed_goal, query_embedding, context, query
 
-    @timed
-    def _retrieve_simple(
-        self,
-        query: str,
-        top_k: int,
-        search_filter: dict | None = None,
-        user_name: str | None = None,
-        **kwargs,
-    ):
-        """Retrieve from by keywords and embedding"""
-        query_words = []
-        if self.tokenizer:
-            query_words = self.tokenizer.tokenize_mixed(query)
-        else:
-            query_words = query.strip().split()
-        query_words = [query, *query_words]
-        logger.info(f"[SIMPLESEARCH] Query words: {query_words}")
-        query_embeddings = self.embedder.embed(query_words)
-
-        items = self.graph_retriever.retrieve_from_mixed(
-            top_k=top_k * 2,
-            memory_scope=None,
-            query_embedding=query_embeddings,
-            search_filter=search_filter,
-            user_name=user_name,
-            use_fast_graph=self.use_fast_graph,
-        )
-        logger.info(f"[SIMPLESEARCH] Items count: {len(items)}")
-        documents = [getattr(item, "memory", "") for item in items]
-        if not documents:
-            return []
-        documents_embeddings = self.embedder.embed(documents)
-        similarity_matrix = cosine_similarity_matrix(documents_embeddings)
-        selected_indices, _ = find_best_unrelated_subgroup(documents, similarity_matrix)
-        selected_items = [items[i] for i in selected_indices]
-        logger.info(
-            f"[SIMPLESEARCH] after unrelated subgroup selection items count: {len(selected_items)}"
-        )
-        return self.reranker.rerank(
-            query=query,
-            query_embedding=query_embeddings[0],
-            graph_results=selected_items,
-            top_k=top_k,
-        )
-
     @timed
     def _retrieve_paths(
         self,
@@ -722,6 +677,9 @@ def _retrieve_simple(
         if not documents:
             return []
         documents_embeddings = self.embedder.embed(documents)
+        if not documents_embeddings:
+            logger.info("[SIMPLESEARCH] Documents embeddings is empty")
+            return []
         similarity_matrix = cosine_similarity_matrix(documents_embeddings)
         selected_indices, _ = find_best_unrelated_subgroup(documents, similarity_matrix)
         selected_items = [items[i] for i in selected_indices]