From 5f4d6c11917d5f626fa7aa9feeef0325c9e6a5c4 Mon Sep 17 00:00:00 2001 From: hijzy Date: Tue, 10 Feb 2026 16:01:49 +0800 Subject: [PATCH 01/12] feat: add keyword search --- .../tree_text_memory/retrieve/searcher.py | 112 ++++++++++++++++++ 1 file changed, 112 insertions(+) diff --git a/src/memos/memories/textual/tree_text_memory/retrieve/searcher.py b/src/memos/memories/textual/tree_text_memory/retrieve/searcher.py index bc8d76517..c39cf7b8b 100644 --- a/src/memos/memories/textual/tree_text_memory/retrieve/searcher.py +++ b/src/memos/memories/textual/tree_text_memory/retrieve/searcher.py @@ -1,3 +1,4 @@ +import copy import traceback from concurrent.futures import as_completed @@ -379,6 +380,20 @@ def _retrieve_paths( user_name, ) ) + tasks.append( + executor.submit( + self._retrieve_from_keyword, + query, + parsed_goal, + query_embedding, + top_k, + memory_type, + search_filter, + search_priority, + user_name, + id_filter, + ) + ) if search_tool_memory: tasks.append( executor.submit( @@ -456,6 +471,103 @@ def _retrieve_from_working_memory( search_filter=search_filter, ) + @timed + def _retrieve_from_keyword( + self, + query, + parsed_goal, + query_embedding, + top_k, + memory_type, + search_filter: dict | None = None, + search_priority: dict | None = None, + user_name: str | None = None, + id_filter: dict | None = None, + ) -> list[tuple[TextualMemoryItem, float]]: + """Keyword/fulltext path that directly calls graph DB fulltext search.""" + + if memory_type not in ["All", "LongTermMemory", "UserMemory"]: + return [] + if not query_embedding: + return [] + + query_words: list[str] = [] + if self.tokenizer: + query_words = self.tokenizer.tokenize_mixed(query) + else: + query_words = query.strip().split() + # Use unique tokens; avoid passing the raw query into `to_tsquery(...)` because it may contain + # spaces/operators that cause tsquery parsing errors. + query_words = list(dict.fromkeys(query_words)) + if len(query_words) > 64: + query_words = query_words[:64] + if not query_words: + return [] + tsquery_terms = [ + "'" + w.replace("'", "''") + "'" for w in query_words if w and w.strip() + ] + if not tsquery_terms: + return [] + + scopes = ( + [memory_type] + if memory_type != "All" + else ["LongTermMemory", "UserMemory"] + ) + + id_to_score: dict[str, float] = {} + for scope in scopes: + hits = self.graph_store.search_by_fulltext( + query_words=tsquery_terms, + top_k=top_k * 2, + status="activated", + scope=scope, + search_filter=None, + filter=search_filter, + user_name=user_name, + tsquery_config="jiebaqry", + ) + for h in hits or []: + hid = str(h.get("id") or "").strip().strip("'\"") + if not hid: + continue + score = h.get("score", 0.0) + if hid not in id_to_score or score > id_to_score[hid]: + id_to_score[hid] = score + if not id_to_score: + return [] + + sorted_ids = sorted(id_to_score.keys(), key=lambda x: id_to_score[x], reverse=True) + # sorted_ids = sorted_ids[:top_k] + sorted_ids = sorted_ids[:6] + node_dicts = ( + self.graph_store.get_nodes(sorted_ids, include_embedding=True, user_name=user_name) + or [] + ) + id_to_node = {n.get("id"): n for n in node_dicts} + ordered_nodes = [] + + for rid in sorted_ids: + if rid in id_to_node: + node = copy.deepcopy(id_to_node[rid]) + meta = node.setdefault("metadata", {}) + meta_target = meta + if isinstance(meta, dict) and isinstance(meta.get("metadata"), dict): + meta_target = meta["metadata"] + if isinstance(meta_target, dict): + meta_target["keyword_score"] = id_to_score[rid] + ordered_nodes.append(node) + + results = [TextualMemoryItem.from_dict(n) for n in ordered_nodes] + return self.reranker.rerank( + query=query, + query_embedding=query_embedding[0], + graph_results=results, + top_k=top_k, + parsed_goal=parsed_goal, + search_filter=search_filter, + ) + # --- Path B @timed def _retrieve_from_long_term_and_user( From 8636344f95a6bf1020983a9949cb8ddf3d26a17e Mon Sep 17 00:00:00 2001 From: hijzy Date: Tue, 10 Feb 2026 16:25:22 +0800 Subject: [PATCH 02/12] =?UTF-8?q?test:=20=E6=8C=89=E7=85=A7=20top=5Fk=20?= =?UTF-8?q?=E6=88=AA=E6=96=AD=20keyword=20=E8=B7=AF=E6=A3=80=E7=B4=A2?= =?UTF-8?q?=E8=80=8C=E4=B8=8D=E6=98=AF=20top6?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../memories/textual/tree_text_memory/retrieve/searcher.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/memos/memories/textual/tree_text_memory/retrieve/searcher.py b/src/memos/memories/textual/tree_text_memory/retrieve/searcher.py index c39cf7b8b..59b3a0ce5 100644 --- a/src/memos/memories/textual/tree_text_memory/retrieve/searcher.py +++ b/src/memos/memories/textual/tree_text_memory/retrieve/searcher.py @@ -538,8 +538,8 @@ def _retrieve_from_keyword( return [] sorted_ids = sorted(id_to_score.keys(), key=lambda x: id_to_score[x], reverse=True) - # sorted_ids = sorted_ids[:top_k] - sorted_ids = sorted_ids[:6] + sorted_ids = sorted_ids[:top_k] + # sorted_ids = sorted_ids[:6] node_dicts = ( self.graph_store.get_nodes(sorted_ids, include_embedding=True, user_name=user_name) or [] From 27967fee9bb9f134aa7cedce746af86f4038bb29 Mon Sep 17 00:00:00 2001 From: hijzy Date: Tue, 10 Feb 2026 16:53:56 +0800 Subject: [PATCH 03/12] =?UTF-8?q?fix:=20=E4=BF=AE=E5=A4=8D=20cot=20embeddi?= =?UTF-8?q?ng=20=E9=A1=BA=E5=BA=8F=E9=97=AE=E9=A2=98,=20=E4=BF=AE=E5=A4=8D?= =?UTF-8?q?=E9=98=88=E5=80=BC=E7=AD=9B=E9=80=89=E5=81=8F=E5=A5=BD=E8=AE=B0?= =?UTF-8?q?=E5=BF=86=E5=8F=AF=E8=83=BD=E8=AF=AF=E5=88=A0=E7=9A=84=E9=97=AE?= =?UTF-8?q?=E9=A2=98?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/memos/api/handlers/search_handler.py | 9 ++++++--- .../textual/tree_text_memory/retrieve/searcher.py | 5 ++--- 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/src/memos/api/handlers/search_handler.py b/src/memos/api/handlers/search_handler.py index 91980bdeb..1b3d32bbc 100644 --- a/src/memos/api/handlers/search_handler.py +++ b/src/memos/api/handlers/search_handler.py @@ -120,11 +120,14 @@ def _apply_relativity_threshold(results: dict[str, Any], relativity: float) -> d if not isinstance(mem, dict): continue meta = mem.get("metadata", {}) - score = meta.get("relativity", 0.0) if isinstance(meta, dict) else 0.0 + if key == "text_mem": + score = meta.get("relativity", 1.0) if isinstance(meta, dict) else 1.0 + else: + score = meta.get("score", 1.0) if isinstance(meta, dict) else 1.0 try: - score_val = float(score) if score is not None else 0.0 + score_val = float(score) if score is not None else 1.0 except (TypeError, ValueError): - score_val = 0.0 + score_val = 1.0 if score_val >= relativity: filtered.append(mem) diff --git a/src/memos/memories/textual/tree_text_memory/retrieve/searcher.py b/src/memos/memories/textual/tree_text_memory/retrieve/searcher.py index 59b3a0ce5..0696feee2 100644 --- a/src/memos/memories/textual/tree_text_memory/retrieve/searcher.py +++ b/src/memos/memories/textual/tree_text_memory/retrieve/searcher.py @@ -307,8 +307,8 @@ def _parse_task( query = parsed_goal.rephrased_query or query # if goal has extra memories, embed them too if parsed_goal.memories: - query_embedding = self.embedder.embed(list({query, *parsed_goal.memories})) - + embed_texts = list(dict.fromkeys([query, *parsed_goal.memories])) + query_embedding = self.embedder.embed(embed_texts) return parsed_goal, query_embedding, context, query @timed @@ -539,7 +539,6 @@ def _retrieve_from_keyword( sorted_ids = sorted(id_to_score.keys(), key=lambda x: id_to_score[x], reverse=True) sorted_ids = sorted_ids[:top_k] - # sorted_ids = sorted_ids[:6] node_dicts = ( self.graph_store.get_nodes(sorted_ids, include_embedding=True, user_name=user_name) or [] From 3d18bba0ad2e3639163d29cf87e88293b8a66782 Mon Sep 17 00:00:00 2001 From: hijzy Date: Tue, 10 Feb 2026 17:03:12 +0800 Subject: [PATCH 04/12] =?UTF-8?q?feat:=20=E5=BC=BA=E4=BF=9D=E8=AF=AD?= =?UTF-8?q?=E4=B9=89=E5=92=8C=E5=85=B3=E9=94=AE=E8=AF=8D=E8=B7=AF=20top1?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/memos/api/handlers/search_handler.py | 156 +++++++++++++++++++++++ 1 file changed, 156 insertions(+) diff --git a/src/memos/api/handlers/search_handler.py b/src/memos/api/handlers/search_handler.py index 1b3d32bbc..122eae7b8 100644 --- a/src/memos/api/handlers/search_handler.py +++ b/src/memos/api/handlers/search_handler.py @@ -73,6 +73,7 @@ def handle_search_memories(self, search_req: APISearchRequest) -> SearchResponse search_req_local.relativity = 0 self.logger.info(f"[SearchHandler] Relativity filter: {search_req_local.relativity}") results = self._apply_relativity_threshold(results, search_req_local.relativity) + forced_text_memories = self._collect_forced_text_memories(results) if search_req_local.dedup == "sim": results = self._dedup_text_memories(results, search_req.top_k) @@ -90,6 +91,9 @@ def handle_search_memories(self, search_req: APISearchRequest) -> SearchResponse top_k=search_req_local.top_k, file_mem_proportion=0.5, ) + results["text_mem"] = self._inject_forced_text_memories( + results.get("text_mem", []), forced_text_memories + ) self.logger.info( f"[SearchHandler] Final search results: count={len(results)} results={results}" @@ -100,6 +104,158 @@ def handle_search_memories(self, search_req: APISearchRequest) -> SearchResponse data=results, ) + @staticmethod + def _safe_float(value: Any, default: float = 0.0) -> float: + try: + if value is None: + return default + return float(value) + except (TypeError, ValueError): + return default + + def _select_best_memory(self, memories: list[Any], predicate) -> dict[str, Any] | None: + best = None + best_score = None + for mem in memories: + if not isinstance(mem, dict): + continue + if not predicate(mem): + continue + meta = mem.get("metadata", {}) + score = meta.get("relativity", 0.0) if isinstance(meta, dict) else 0.0 + score_val = self._safe_float(score, default=0.0) + if best is None or best_score is None or score_val > best_score: + best = mem + best_score = score_val + return best + + def _collect_forced_text_memories(self, results: dict[str, Any]) -> dict[str, dict[str, Any]]: + forced: dict[str, dict[str, Any]] = {} + buckets = results.get("text_mem", []) + if not isinstance(buckets, list): + return forced + + longterm_types = {"LongTermMemory", "UserMemory", "RawFileMemory"} + for bucket in buckets: + if not isinstance(bucket, dict): + continue + cube_id = bucket.get("cube_id") + memories = bucket.get("memories", []) + if not isinstance(cube_id, str) or not cube_id: + continue + if not isinstance(memories, list) or not memories: + continue + + keyword_best = self._select_best_memory( + memories, + lambda m: isinstance(m.get("metadata"), dict) + and "keyword_score" in m.get("metadata", {}), + ) + longterm_best = self._select_best_memory( + memories, + lambda m: isinstance(m.get("metadata"), dict) + and m.get("metadata", {}).get("memory_type") in longterm_types + and "keyword_score" not in m.get("metadata", {}), + ) + + payload: dict[str, Any] = {} + if keyword_best is not None: + payload["keyword"] = copy.deepcopy(keyword_best) + if longterm_best is not None: + payload["longterm_user"] = copy.deepcopy(longterm_best) + if payload: + forced[cube_id] = payload + + return forced + + @staticmethod + def _normalize_text_memory_item(mem: dict[str, Any]) -> dict[str, Any]: + meta = mem.get("metadata") + if not isinstance(meta, dict): + meta = {} + mem["metadata"] = meta + + sources = meta.get("sources", []) + memory_type = meta.get("memory_type") + if ( + memory_type != "RawFileMemory" + and isinstance(sources, list) + and sources + and isinstance(sources[0], dict) + and sources[0].get("type") == "file" + ): + content = sources[0].get("content") + if isinstance(content, str) and content: + mem["memory"] = content + + meta["sources"] = [] + if "embedding" in meta: + meta["embedding"] = [] + + return mem + + def _inject_forced_text_memories( + self, + text_mem: list[Any], + forced_by_cube: dict[str, dict[str, Any]], + ) -> list[Any]: + if not forced_by_cube or not isinstance(text_mem, list): + return text_mem + + for group in text_mem: + if not isinstance(group, dict): + continue + cube_id = group.get("cube_id") + if not isinstance(cube_id, str) or cube_id not in forced_by_cube: + continue + memories = group.get("memories") + if not isinstance(memories, list): + continue + + original_len = len(memories) + existing_ids = { + str(mem.get("id")) + for mem in memories + if isinstance(mem, dict) and mem.get("id") is not None + } + + forced_items: list[dict[str, Any]] = [] + for key in ("keyword", "longterm_user"): + candidate = forced_by_cube[cube_id].get(key) + if isinstance(candidate, dict): + forced_items.append(self._normalize_text_memory_item(copy.deepcopy(candidate))) + + new_items: list[dict[str, Any]] = [] + new_ids: set[str] = set() + for item in forced_items: + item_id = item.get("id") + if item_id is None: + continue + item_id_str = str(item_id) + if item_id_str in existing_ids or item_id_str in new_ids: + continue + new_items.append(item) + new_ids.add(item_id_str) + + if not new_items: + continue + + merged = new_items + [ + mem + for mem in memories + if not (isinstance(mem, dict) and str(mem.get("id")) in new_ids) + ] + + target_len = original_len if original_len >= len(new_items) else len(new_items) + if len(merged) > target_len: + merged = merged[:target_len] + + group["memories"] = merged + if "total_nodes" in group: + group["total_nodes"] = len(merged) + + return text_mem + @staticmethod def _apply_relativity_threshold(results: dict[str, Any], relativity: float) -> dict[str, Any]: if relativity <= 0: From ea2d86d3b21c1ea5613b3b384ee8023ead027815 Mon Sep 17 00:00:00 2001 From: hijzy Date: Tue, 10 Feb 2026 17:17:42 +0800 Subject: [PATCH 05/12] =?UTF-8?q?fix:=20=E4=BD=BF=E7=94=A8=20keyword=5Fsco?= =?UTF-8?q?re=20=E8=80=8C=E4=B8=8D=E6=98=AF=20relativity=20=E4=BD=9C?= =?UTF-8?q?=E4=B8=BA=E5=85=B3=E9=94=AE=E8=AF=8D=E8=B7=AF=E5=BC=BA=E4=BF=9D?= =?UTF-8?q?=E5=88=A4=E6=96=AD=E4=BE=9D=E6=8D=AE?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/memos/api/handlers/search_handler.py | 35 ++++++++++++++++++++---- 1 file changed, 30 insertions(+), 5 deletions(-) diff --git a/src/memos/api/handlers/search_handler.py b/src/memos/api/handlers/search_handler.py index 122eae7b8..7bf5aaff7 100644 --- a/src/memos/api/handlers/search_handler.py +++ b/src/memos/api/handlers/search_handler.py @@ -129,6 +129,35 @@ def _select_best_memory(self, memories: list[Any], predicate) -> dict[str, Any] best_score = score_val return best + def _select_best_keyword_memory(self, memories: list[Any]) -> dict[str, Any] | None: + best = None + best_keyword_score = None + best_relativity = None + for mem in memories: + if not isinstance(mem, dict): + continue + meta = mem.get("metadata", {}) + if not isinstance(meta, dict) or "keyword_score" not in meta: + continue + keyword_score_val = self._safe_float(meta.get("keyword_score"), default=0.0) + relativity_val = self._safe_float(meta.get("relativity"), default=0.0) + if best is None or best_keyword_score is None: + best = mem + best_keyword_score = keyword_score_val + best_relativity = relativity_val + continue + if keyword_score_val > best_keyword_score: + best = mem + best_keyword_score = keyword_score_val + best_relativity = relativity_val + continue + if keyword_score_val == best_keyword_score and best_relativity is not None: + if relativity_val > best_relativity: + best = mem + best_keyword_score = keyword_score_val + best_relativity = relativity_val + return best + def _collect_forced_text_memories(self, results: dict[str, Any]) -> dict[str, dict[str, Any]]: forced: dict[str, dict[str, Any]] = {} buckets = results.get("text_mem", []) @@ -146,11 +175,7 @@ def _collect_forced_text_memories(self, results: dict[str, Any]) -> dict[str, di if not isinstance(memories, list) or not memories: continue - keyword_best = self._select_best_memory( - memories, - lambda m: isinstance(m.get("metadata"), dict) - and "keyword_score" in m.get("metadata", {}), - ) + keyword_best = self._select_best_keyword_memory(memories) longterm_best = self._select_best_memory( memories, lambda m: isinstance(m.get("metadata"), dict) From a291a7af6b6f2702881f881abe4aeebf67fa9c50 Mon Sep 17 00:00:00 2001 From: hijzy Date: Tue, 10 Feb 2026 17:40:33 +0800 Subject: [PATCH 06/12] =?UTF-8?q?feat:=20=E4=B8=A4=E8=B7=AF=20top1=20?= =?UTF-8?q?=E5=8F=82=E4=B8=8E=20mmr=20=E8=AE=A1=E7=AE=97=E4=BD=86=E6=98=AF?= =?UTF-8?q?=E4=B8=8D=E4=BC=9A=E8=A2=AB=E6=B7=98=E6=B1=B0?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/memos/api/handlers/search_handler.py | 62 ++++++++++++++++++++++-- 1 file changed, 57 insertions(+), 5 deletions(-) diff --git a/src/memos/api/handlers/search_handler.py b/src/memos/api/handlers/search_handler.py index 7bf5aaff7..ea0fbf062 100644 --- a/src/memos/api/handlers/search_handler.py +++ b/src/memos/api/handlers/search_handler.py @@ -80,7 +80,9 @@ def handle_search_memories(self, search_req: APISearchRequest) -> SearchResponse self._strip_embeddings(results) elif search_req_local.dedup == "mmr": pref_top_k = getattr(search_req_local, "pref_top_k", 6) - results = self._mmr_dedup_text_memories(results, search_req.top_k, pref_top_k) + results = self._mmr_dedup_text_memories( + results, search_req.top_k, pref_top_k, forced_by_cube=forced_text_memories + ) self._strip_embeddings(results) text_mem = results["text_mem"] @@ -366,7 +368,11 @@ def _dedup_text_memories(self, results: dict[str, Any], target_top_k: int) -> di return results def _mmr_dedup_text_memories( - self, results: dict[str, Any], text_top_k: int, pref_top_k: int = 6 + self, + results: dict[str, Any], + text_top_k: int, + pref_top_k: int = 6, + forced_by_cube: dict[str, dict[str, Any]] | None = None, ) -> dict[str, Any]: """ MMR-based deduplication with progressive penalty for high similarity. @@ -437,6 +443,47 @@ def _mmr_dedup_text_memories( text_selected_by_bucket: dict[int, list[int]] = {i: [] for i in range(len(text_buckets))} pref_selected_by_bucket: dict[int, list[int]] = {i: [] for i in range(len(pref_buckets))} selected_texts: set[str] = set() # Track exact text content to avoid duplicates + text_top_k_by_bucket: dict[int, int] = {i: text_top_k for i in range(len(text_buckets))} + + if forced_by_cube and isinstance(text_buckets, list): + text_key_to_flat_index: dict[tuple[int, str], int] = {} + for flat_index, (mem_type, bucket_idx, mem, _) in enumerate(flat): + if mem_type != "text": + continue + if not isinstance(mem, dict): + continue + mem_id = mem.get("id") + if mem_id is None: + continue + text_key_to_flat_index[(bucket_idx, str(mem_id))] = flat_index + + for bucket_idx, bucket in enumerate(text_buckets): + if not isinstance(bucket, dict): + continue + cube_id = bucket.get("cube_id") + if not isinstance(cube_id, str) or cube_id not in forced_by_cube: + continue + payload = forced_by_cube.get(cube_id) or {} + for key in ("keyword", "longterm_user"): + candidate = payload.get(key) + if not isinstance(candidate, dict): + continue + candidate_id = candidate.get("id") + if candidate_id is None: + continue + forced_index = text_key_to_flat_index.get((bucket_idx, str(candidate_id))) + if forced_index is None or forced_index in selected_global: + continue + mem_text = flat[forced_index][2].get("memory", "").strip() + if mem_text in selected_texts: + continue + selected_global.append(forced_index) + text_selected_by_bucket[bucket_idx].append(forced_index) + selected_texts.add(mem_text) + + for bucket_idx in range(len(text_buckets)): + if len(text_selected_by_bucket[bucket_idx]) > text_top_k_by_bucket[bucket_idx]: + text_top_k_by_bucket[bucket_idx] = len(text_selected_by_bucket[bucket_idx]) # Phase 1: Prefill top N by relevance # Use the smaller of text_top_k and pref_top_k for prefill count @@ -459,7 +506,10 @@ def _mmr_dedup_text_memories( continue # Check bucket capacity with correct top_k for each type - if mem_type == "text" and len(text_selected_by_bucket[bucket_idx]) < text_top_k: + if ( + mem_type == "text" + and len(text_selected_by_bucket[bucket_idx]) < text_top_k_by_bucket[bucket_idx] + ): selected_global.append(idx) text_selected_by_bucket[bucket_idx].append(idx) selected_texts.add(mem_text) @@ -483,7 +533,8 @@ def _mmr_dedup_text_memories( # Check bucket capacity with correct top_k for each type if ( - mem_type == "text" and len(text_selected_by_bucket[bucket_idx]) >= text_top_k + mem_type == "text" + and len(text_selected_by_bucket[bucket_idx]) >= text_top_k_by_bucket[bucket_idx] ) or ( mem_type == "preference" and len(pref_selected_by_bucket[bucket_idx]) >= pref_top_k @@ -541,7 +592,8 @@ def _mmr_dedup_text_memories( # Early termination: all buckets are full text_all_full = all( - len(text_selected_by_bucket[b_idx]) >= min(text_top_k, len(bucket_indices)) + len(text_selected_by_bucket[b_idx]) + >= min(text_top_k_by_bucket[b_idx], len(bucket_indices)) for b_idx, bucket_indices in text_indices_by_bucket.items() ) pref_all_full = all( From 52035335bc83dfe5fd93a552db8c7e9ced0525b4 Mon Sep 17 00:00:00 2001 From: hijzy Date: Tue, 10 Feb 2026 19:06:45 +0800 Subject: [PATCH 07/12] =?UTF-8?q?revert=20and=20=E4=BD=BF=E7=94=A8=20http?= =?UTF-8?q?=20bge=20reranker?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/memos/api/handlers/search_handler.py | 117 ++++++++++++----------- src/memos/multi_mem_cube/single_cube.py | 4 +- 2 files changed, 62 insertions(+), 59 deletions(-) diff --git a/src/memos/api/handlers/search_handler.py b/src/memos/api/handlers/search_handler.py index ea0fbf062..37dc123f0 100644 --- a/src/memos/api/handlers/search_handler.py +++ b/src/memos/api/handlers/search_handler.py @@ -20,7 +20,11 @@ from memos.multi_mem_cube.composite_cube import CompositeCubeView from memos.multi_mem_cube.single_cube import SingleCubeView from memos.multi_mem_cube.views import MemCubeView +from memos.reranker.http_bge import HTTPBGEReranker +# ====== 全局配置 ====== +BGE_RERANKER_URL = "http://106.75.235.231:8082/v1/rerank" # HTTP BGE rerank 服务地址 +MMR_OUTPUT_MULTIPLIER = 3 # MMR 阶段每个 bucket 目标返回数量倍数(相对原 top_k) logger = get_logger(__name__) @@ -43,6 +47,7 @@ def __init__(self, dependencies: HandlerDependencies): self._validate_dependencies( "naive_mem_cube", "mem_scheduler", "searcher", "deepsearch_agent" ) + self.http_bge_reranker = HTTPBGEReranker(reranker_url=BGE_RERANKER_URL) def handle_search_memories(self, search_req: APISearchRequest) -> SearchResponse: """ @@ -80,9 +85,9 @@ def handle_search_memories(self, search_req: APISearchRequest) -> SearchResponse self._strip_embeddings(results) elif search_req_local.dedup == "mmr": pref_top_k = getattr(search_req_local, "pref_top_k", 6) - results = self._mmr_dedup_text_memories( - results, search_req.top_k, pref_top_k, forced_by_cube=forced_text_memories - ) + mmr_text_top_k = max(int(search_req.top_k) * MMR_OUTPUT_MULTIPLIER, int(search_req.top_k)) + results = self._mmr_dedup_text_memories(results, mmr_text_top_k, pref_top_k) + results = self._http_bge_rerank_text_memories(query=search_req.query, results=results) self._strip_embeddings(results) text_mem = results["text_mem"] @@ -115,6 +120,54 @@ def _safe_float(value: Any, default: float = 0.0) -> float: except (TypeError, ValueError): return default + def _http_bge_rerank_text_memories(self, query: str, results: dict[str, Any]) -> dict[str, Any]: + buckets = results.get("text_mem", []) + if not isinstance(buckets, list) or not buckets: + return results + + memories_list: list[dict[str, Any]] = [] + memid2mem: dict[str, dict[str, Any]] = {} + for bucket in buckets: + if not isinstance(bucket, dict): + continue + for mem in bucket.get("memories", []): + if not isinstance(mem, dict): + continue + mem_id = mem.get("id") + if mem_id is None: + continue + mem_id_str = str(mem_id) + memories_list.append(mem) + memid2mem[mem_id_str] = mem + + if not memories_list: + return results + + rerank_k = len(memories_list) + ranked = self.http_bge_reranker.rerank( + query=query, + graph_results=memories_list, + top_k=rerank_k, + ) + score_by_id: dict[str, float] = {} + for item, score in ranked: + item_id = item.get("id") if isinstance(item, dict) else getattr(item, "id", None) + if item_id is None: + continue + score_by_id[str(item_id)] = score + + for mem_id_str, score in score_by_id.items(): + mem = memid2mem.get(mem_id_str) + if not isinstance(mem, dict): + continue + meta = mem.get("metadata") + if not isinstance(meta, dict): + meta = {} + mem["metadata"] = meta + meta["relativity"] = float(score) + + return results + def _select_best_memory(self, memories: list[Any], predicate) -> dict[str, Any] | None: best = None best_score = None @@ -368,11 +421,7 @@ def _dedup_text_memories(self, results: dict[str, Any], target_top_k: int) -> di return results def _mmr_dedup_text_memories( - self, - results: dict[str, Any], - text_top_k: int, - pref_top_k: int = 6, - forced_by_cube: dict[str, dict[str, Any]] | None = None, + self, results: dict[str, Any], text_top_k: int, pref_top_k: int = 6 ) -> dict[str, Any]: """ MMR-based deduplication with progressive penalty for high similarity. @@ -443,47 +492,6 @@ def _mmr_dedup_text_memories( text_selected_by_bucket: dict[int, list[int]] = {i: [] for i in range(len(text_buckets))} pref_selected_by_bucket: dict[int, list[int]] = {i: [] for i in range(len(pref_buckets))} selected_texts: set[str] = set() # Track exact text content to avoid duplicates - text_top_k_by_bucket: dict[int, int] = {i: text_top_k for i in range(len(text_buckets))} - - if forced_by_cube and isinstance(text_buckets, list): - text_key_to_flat_index: dict[tuple[int, str], int] = {} - for flat_index, (mem_type, bucket_idx, mem, _) in enumerate(flat): - if mem_type != "text": - continue - if not isinstance(mem, dict): - continue - mem_id = mem.get("id") - if mem_id is None: - continue - text_key_to_flat_index[(bucket_idx, str(mem_id))] = flat_index - - for bucket_idx, bucket in enumerate(text_buckets): - if not isinstance(bucket, dict): - continue - cube_id = bucket.get("cube_id") - if not isinstance(cube_id, str) or cube_id not in forced_by_cube: - continue - payload = forced_by_cube.get(cube_id) or {} - for key in ("keyword", "longterm_user"): - candidate = payload.get(key) - if not isinstance(candidate, dict): - continue - candidate_id = candidate.get("id") - if candidate_id is None: - continue - forced_index = text_key_to_flat_index.get((bucket_idx, str(candidate_id))) - if forced_index is None or forced_index in selected_global: - continue - mem_text = flat[forced_index][2].get("memory", "").strip() - if mem_text in selected_texts: - continue - selected_global.append(forced_index) - text_selected_by_bucket[bucket_idx].append(forced_index) - selected_texts.add(mem_text) - - for bucket_idx in range(len(text_buckets)): - if len(text_selected_by_bucket[bucket_idx]) > text_top_k_by_bucket[bucket_idx]: - text_top_k_by_bucket[bucket_idx] = len(text_selected_by_bucket[bucket_idx]) # Phase 1: Prefill top N by relevance # Use the smaller of text_top_k and pref_top_k for prefill count @@ -506,10 +514,7 @@ def _mmr_dedup_text_memories( continue # Check bucket capacity with correct top_k for each type - if ( - mem_type == "text" - and len(text_selected_by_bucket[bucket_idx]) < text_top_k_by_bucket[bucket_idx] - ): + if mem_type == "text" and len(text_selected_by_bucket[bucket_idx]) < text_top_k: selected_global.append(idx) text_selected_by_bucket[bucket_idx].append(idx) selected_texts.add(mem_text) @@ -533,8 +538,7 @@ def _mmr_dedup_text_memories( # Check bucket capacity with correct top_k for each type if ( - mem_type == "text" - and len(text_selected_by_bucket[bucket_idx]) >= text_top_k_by_bucket[bucket_idx] + mem_type == "text" and len(text_selected_by_bucket[bucket_idx]) >= text_top_k ) or ( mem_type == "preference" and len(pref_selected_by_bucket[bucket_idx]) >= pref_top_k @@ -592,8 +596,7 @@ def _mmr_dedup_text_memories( # Early termination: all buckets are full text_all_full = all( - len(text_selected_by_bucket[b_idx]) - >= min(text_top_k_by_bucket[b_idx], len(bucket_indices)) + len(text_selected_by_bucket[b_idx]) >= min(text_top_k, len(bucket_indices)) for b_idx, bucket_indices in text_indices_by_bucket.items() ) pref_all_full = all( diff --git a/src/memos/multi_mem_cube/single_cube.py b/src/memos/multi_mem_cube/single_cube.py index 6da55ce02..307bf05b2 100644 --- a/src/memos/multi_mem_cube/single_cube.py +++ b/src/memos/multi_mem_cube/single_cube.py @@ -468,13 +468,13 @@ def _fast_search( search_req=search_req, user_context=user_context, mode=SearchMode.FAST, - include_embedding=(search_req.dedup == "mmr"), + include_embedding=(search_req.dedup in ("mmr", "sim")), ) return self._postformat_memories( search_results, user_context.mem_cube_id, - include_embedding=search_req.dedup == "sim", + include_embedding=(search_req.dedup in ("mmr", "sim")), neighbor_discovery=search_req.neighbor_discovery, ) From 8f46d0a8c3831372ec9f76f3a3b99da587f79167 Mon Sep 17 00:00:00 2001 From: hijzy Date: Tue, 10 Feb 2026 19:26:53 +0800 Subject: [PATCH 08/12] =?UTF-8?q?Revert=20"fix:=20=E4=BF=AE=E5=A4=8D=20cot?= =?UTF-8?q?=20embedding=20=E9=A1=BA=E5=BA=8F=E9=97=AE=E9=A2=98,=20?= =?UTF-8?q?=E4=BF=AE=E5=A4=8D=E9=98=88=E5=80=BC=E7=AD=9B=E9=80=89=E5=81=8F?= =?UTF-8?q?=E5=A5=BD=E8=AE=B0=E5=BF=86=E5=8F=AF=E8=83=BD=E8=AF=AF=E5=88=A0?= =?UTF-8?q?=E7=9A=84=E9=97=AE=E9=A2=98"?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This reverts commit 27967fee9bb9f134aa7cedce746af86f4038bb29. --- src/memos/api/handlers/search_handler.py | 9 +++------ .../textual/tree_text_memory/retrieve/searcher.py | 5 +++-- 2 files changed, 6 insertions(+), 8 deletions(-) diff --git a/src/memos/api/handlers/search_handler.py b/src/memos/api/handlers/search_handler.py index 37dc123f0..8bffdada1 100644 --- a/src/memos/api/handlers/search_handler.py +++ b/src/memos/api/handlers/search_handler.py @@ -356,14 +356,11 @@ def _apply_relativity_threshold(results: dict[str, Any], relativity: float) -> d if not isinstance(mem, dict): continue meta = mem.get("metadata", {}) - if key == "text_mem": - score = meta.get("relativity", 1.0) if isinstance(meta, dict) else 1.0 - else: - score = meta.get("score", 1.0) if isinstance(meta, dict) else 1.0 + score = meta.get("relativity", 0.0) if isinstance(meta, dict) else 0.0 try: - score_val = float(score) if score is not None else 1.0 + score_val = float(score) if score is not None else 0.0 except (TypeError, ValueError): - score_val = 1.0 + score_val = 0.0 if score_val >= relativity: filtered.append(mem) diff --git a/src/memos/memories/textual/tree_text_memory/retrieve/searcher.py b/src/memos/memories/textual/tree_text_memory/retrieve/searcher.py index 0696feee2..59b3a0ce5 100644 --- a/src/memos/memories/textual/tree_text_memory/retrieve/searcher.py +++ b/src/memos/memories/textual/tree_text_memory/retrieve/searcher.py @@ -307,8 +307,8 @@ def _parse_task( query = parsed_goal.rephrased_query or query # if goal has extra memories, embed them too if parsed_goal.memories: - embed_texts = list(dict.fromkeys([query, *parsed_goal.memories])) - query_embedding = self.embedder.embed(embed_texts) + query_embedding = self.embedder.embed(list({query, *parsed_goal.memories})) + return parsed_goal, query_embedding, context, query @timed @@ -539,6 +539,7 @@ def _retrieve_from_keyword( sorted_ids = sorted(id_to_score.keys(), key=lambda x: id_to_score[x], reverse=True) sorted_ids = sorted_ids[:top_k] + # sorted_ids = sorted_ids[:6] node_dicts = ( self.graph_store.get_nodes(sorted_ids, include_embedding=True, user_name=user_name) or [] From 7ce8ba1167803a720cb846d115c7b638429da811 Mon Sep 17 00:00:00 2001 From: hijzy Date: Tue, 10 Feb 2026 19:44:40 +0800 Subject: [PATCH 09/12] =?UTF-8?q?fix:=20=E4=BF=AE=E5=A4=8D=20query=20embed?= =?UTF-8?q?ding=20=E9=A1=BA=E5=BA=8F=E9=97=AE=E9=A2=98=E5=92=8C=E5=81=8F?= =?UTF-8?q?=E5=A5=BD=E8=AE=B0=E5=BF=86=E7=9A=84=E7=9B=B8=E5=85=B3=E7=9A=84?= =?UTF-8?q?=E5=88=86=E6=95=B0=E5=AD=97=E6=AE=B5=E9=94=99=E8=AF=AF?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/memos/api/handlers/search_handler.py | 9 ++++++--- .../textual/tree_text_memory/retrieve/searcher.py | 5 ++--- 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/src/memos/api/handlers/search_handler.py b/src/memos/api/handlers/search_handler.py index 8bffdada1..37dc123f0 100644 --- a/src/memos/api/handlers/search_handler.py +++ b/src/memos/api/handlers/search_handler.py @@ -356,11 +356,14 @@ def _apply_relativity_threshold(results: dict[str, Any], relativity: float) -> d if not isinstance(mem, dict): continue meta = mem.get("metadata", {}) - score = meta.get("relativity", 0.0) if isinstance(meta, dict) else 0.0 + if key == "text_mem": + score = meta.get("relativity", 1.0) if isinstance(meta, dict) else 1.0 + else: + score = meta.get("score", 1.0) if isinstance(meta, dict) else 1.0 try: - score_val = float(score) if score is not None else 0.0 + score_val = float(score) if score is not None else 1.0 except (TypeError, ValueError): - score_val = 0.0 + score_val = 1.0 if score_val >= relativity: filtered.append(mem) diff --git a/src/memos/memories/textual/tree_text_memory/retrieve/searcher.py b/src/memos/memories/textual/tree_text_memory/retrieve/searcher.py index 59b3a0ce5..0696feee2 100644 --- a/src/memos/memories/textual/tree_text_memory/retrieve/searcher.py +++ b/src/memos/memories/textual/tree_text_memory/retrieve/searcher.py @@ -307,8 +307,8 @@ def _parse_task( query = parsed_goal.rephrased_query or query # if goal has extra memories, embed them too if parsed_goal.memories: - query_embedding = self.embedder.embed(list({query, *parsed_goal.memories})) - + embed_texts = list(dict.fromkeys([query, *parsed_goal.memories])) + query_embedding = self.embedder.embed(embed_texts) return parsed_goal, query_embedding, context, query @timed @@ -539,7 +539,6 @@ def _retrieve_from_keyword( sorted_ids = sorted(id_to_score.keys(), key=lambda x: id_to_score[x], reverse=True) sorted_ids = sorted_ids[:top_k] - # sorted_ids = sorted_ids[:6] node_dicts = ( self.graph_store.get_nodes(sorted_ids, include_embedding=True, user_name=user_name) or [] From e787503445e761ac7fc0172032d2b879727a2076 Mon Sep 17 00:00:00 2001 From: hijzy Date: Tue, 10 Feb 2026 20:01:06 +0800 Subject: [PATCH 10/12] revert --- src/memos/api/handlers/search_handler.py | 238 +---------------------- 1 file changed, 1 insertion(+), 237 deletions(-) diff --git a/src/memos/api/handlers/search_handler.py b/src/memos/api/handlers/search_handler.py index 37dc123f0..1b3d32bbc 100644 --- a/src/memos/api/handlers/search_handler.py +++ b/src/memos/api/handlers/search_handler.py @@ -20,11 +20,7 @@ from memos.multi_mem_cube.composite_cube import CompositeCubeView from memos.multi_mem_cube.single_cube import SingleCubeView from memos.multi_mem_cube.views import MemCubeView -from memos.reranker.http_bge import HTTPBGEReranker -# ====== 全局配置 ====== -BGE_RERANKER_URL = "http://106.75.235.231:8082/v1/rerank" # HTTP BGE rerank 服务地址 -MMR_OUTPUT_MULTIPLIER = 3 # MMR 阶段每个 bucket 目标返回数量倍数(相对原 top_k) logger = get_logger(__name__) @@ -47,7 +43,6 @@ def __init__(self, dependencies: HandlerDependencies): self._validate_dependencies( "naive_mem_cube", "mem_scheduler", "searcher", "deepsearch_agent" ) - self.http_bge_reranker = HTTPBGEReranker(reranker_url=BGE_RERANKER_URL) def handle_search_memories(self, search_req: APISearchRequest) -> SearchResponse: """ @@ -78,16 +73,13 @@ def handle_search_memories(self, search_req: APISearchRequest) -> SearchResponse search_req_local.relativity = 0 self.logger.info(f"[SearchHandler] Relativity filter: {search_req_local.relativity}") results = self._apply_relativity_threshold(results, search_req_local.relativity) - forced_text_memories = self._collect_forced_text_memories(results) if search_req_local.dedup == "sim": results = self._dedup_text_memories(results, search_req.top_k) self._strip_embeddings(results) elif search_req_local.dedup == "mmr": pref_top_k = getattr(search_req_local, "pref_top_k", 6) - mmr_text_top_k = max(int(search_req.top_k) * MMR_OUTPUT_MULTIPLIER, int(search_req.top_k)) - results = self._mmr_dedup_text_memories(results, mmr_text_top_k, pref_top_k) - results = self._http_bge_rerank_text_memories(query=search_req.query, results=results) + results = self._mmr_dedup_text_memories(results, search_req.top_k, pref_top_k) self._strip_embeddings(results) text_mem = results["text_mem"] @@ -98,9 +90,6 @@ def handle_search_memories(self, search_req: APISearchRequest) -> SearchResponse top_k=search_req_local.top_k, file_mem_proportion=0.5, ) - results["text_mem"] = self._inject_forced_text_memories( - results.get("text_mem", []), forced_text_memories - ) self.logger.info( f"[SearchHandler] Final search results: count={len(results)} results={results}" @@ -111,231 +100,6 @@ def handle_search_memories(self, search_req: APISearchRequest) -> SearchResponse data=results, ) - @staticmethod - def _safe_float(value: Any, default: float = 0.0) -> float: - try: - if value is None: - return default - return float(value) - except (TypeError, ValueError): - return default - - def _http_bge_rerank_text_memories(self, query: str, results: dict[str, Any]) -> dict[str, Any]: - buckets = results.get("text_mem", []) - if not isinstance(buckets, list) or not buckets: - return results - - memories_list: list[dict[str, Any]] = [] - memid2mem: dict[str, dict[str, Any]] = {} - for bucket in buckets: - if not isinstance(bucket, dict): - continue - for mem in bucket.get("memories", []): - if not isinstance(mem, dict): - continue - mem_id = mem.get("id") - if mem_id is None: - continue - mem_id_str = str(mem_id) - memories_list.append(mem) - memid2mem[mem_id_str] = mem - - if not memories_list: - return results - - rerank_k = len(memories_list) - ranked = self.http_bge_reranker.rerank( - query=query, - graph_results=memories_list, - top_k=rerank_k, - ) - score_by_id: dict[str, float] = {} - for item, score in ranked: - item_id = item.get("id") if isinstance(item, dict) else getattr(item, "id", None) - if item_id is None: - continue - score_by_id[str(item_id)] = score - - for mem_id_str, score in score_by_id.items(): - mem = memid2mem.get(mem_id_str) - if not isinstance(mem, dict): - continue - meta = mem.get("metadata") - if not isinstance(meta, dict): - meta = {} - mem["metadata"] = meta - meta["relativity"] = float(score) - - return results - - def _select_best_memory(self, memories: list[Any], predicate) -> dict[str, Any] | None: - best = None - best_score = None - for mem in memories: - if not isinstance(mem, dict): - continue - if not predicate(mem): - continue - meta = mem.get("metadata", {}) - score = meta.get("relativity", 0.0) if isinstance(meta, dict) else 0.0 - score_val = self._safe_float(score, default=0.0) - if best is None or best_score is None or score_val > best_score: - best = mem - best_score = score_val - return best - - def _select_best_keyword_memory(self, memories: list[Any]) -> dict[str, Any] | None: - best = None - best_keyword_score = None - best_relativity = None - for mem in memories: - if not isinstance(mem, dict): - continue - meta = mem.get("metadata", {}) - if not isinstance(meta, dict) or "keyword_score" not in meta: - continue - keyword_score_val = self._safe_float(meta.get("keyword_score"), default=0.0) - relativity_val = self._safe_float(meta.get("relativity"), default=0.0) - if best is None or best_keyword_score is None: - best = mem - best_keyword_score = keyword_score_val - best_relativity = relativity_val - continue - if keyword_score_val > best_keyword_score: - best = mem - best_keyword_score = keyword_score_val - best_relativity = relativity_val - continue - if keyword_score_val == best_keyword_score and best_relativity is not None: - if relativity_val > best_relativity: - best = mem - best_keyword_score = keyword_score_val - best_relativity = relativity_val - return best - - def _collect_forced_text_memories(self, results: dict[str, Any]) -> dict[str, dict[str, Any]]: - forced: dict[str, dict[str, Any]] = {} - buckets = results.get("text_mem", []) - if not isinstance(buckets, list): - return forced - - longterm_types = {"LongTermMemory", "UserMemory", "RawFileMemory"} - for bucket in buckets: - if not isinstance(bucket, dict): - continue - cube_id = bucket.get("cube_id") - memories = bucket.get("memories", []) - if not isinstance(cube_id, str) or not cube_id: - continue - if not isinstance(memories, list) or not memories: - continue - - keyword_best = self._select_best_keyword_memory(memories) - longterm_best = self._select_best_memory( - memories, - lambda m: isinstance(m.get("metadata"), dict) - and m.get("metadata", {}).get("memory_type") in longterm_types - and "keyword_score" not in m.get("metadata", {}), - ) - - payload: dict[str, Any] = {} - if keyword_best is not None: - payload["keyword"] = copy.deepcopy(keyword_best) - if longterm_best is not None: - payload["longterm_user"] = copy.deepcopy(longterm_best) - if payload: - forced[cube_id] = payload - - return forced - - @staticmethod - def _normalize_text_memory_item(mem: dict[str, Any]) -> dict[str, Any]: - meta = mem.get("metadata") - if not isinstance(meta, dict): - meta = {} - mem["metadata"] = meta - - sources = meta.get("sources", []) - memory_type = meta.get("memory_type") - if ( - memory_type != "RawFileMemory" - and isinstance(sources, list) - and sources - and isinstance(sources[0], dict) - and sources[0].get("type") == "file" - ): - content = sources[0].get("content") - if isinstance(content, str) and content: - mem["memory"] = content - - meta["sources"] = [] - if "embedding" in meta: - meta["embedding"] = [] - - return mem - - def _inject_forced_text_memories( - self, - text_mem: list[Any], - forced_by_cube: dict[str, dict[str, Any]], - ) -> list[Any]: - if not forced_by_cube or not isinstance(text_mem, list): - return text_mem - - for group in text_mem: - if not isinstance(group, dict): - continue - cube_id = group.get("cube_id") - if not isinstance(cube_id, str) or cube_id not in forced_by_cube: - continue - memories = group.get("memories") - if not isinstance(memories, list): - continue - - original_len = len(memories) - existing_ids = { - str(mem.get("id")) - for mem in memories - if isinstance(mem, dict) and mem.get("id") is not None - } - - forced_items: list[dict[str, Any]] = [] - for key in ("keyword", "longterm_user"): - candidate = forced_by_cube[cube_id].get(key) - if isinstance(candidate, dict): - forced_items.append(self._normalize_text_memory_item(copy.deepcopy(candidate))) - - new_items: list[dict[str, Any]] = [] - new_ids: set[str] = set() - for item in forced_items: - item_id = item.get("id") - if item_id is None: - continue - item_id_str = str(item_id) - if item_id_str in existing_ids or item_id_str in new_ids: - continue - new_items.append(item) - new_ids.add(item_id_str) - - if not new_items: - continue - - merged = new_items + [ - mem - for mem in memories - if not (isinstance(mem, dict) and str(mem.get("id")) in new_ids) - ] - - target_len = original_len if original_len >= len(new_items) else len(new_items) - if len(merged) > target_len: - merged = merged[:target_len] - - group["memories"] = merged - if "total_nodes" in group: - group["total_nodes"] = len(merged) - - return text_mem - @staticmethod def _apply_relativity_threshold(results: dict[str, Any], relativity: float) -> dict[str, Any]: if relativity <= 0: From 995361e555b926e87f8d60ffea3362b80d6513b3 Mon Sep 17 00:00:00 2001 From: hijzy Date: Tue, 10 Feb 2026 20:33:41 +0800 Subject: [PATCH 11/12] reformat --- .../textual/tree_text_memory/retrieve/searcher.py | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/src/memos/memories/textual/tree_text_memory/retrieve/searcher.py b/src/memos/memories/textual/tree_text_memory/retrieve/searcher.py index 0696feee2..f00efccb6 100644 --- a/src/memos/memories/textual/tree_text_memory/retrieve/searcher.py +++ b/src/memos/memories/textual/tree_text_memory/retrieve/searcher.py @@ -503,17 +503,11 @@ def _retrieve_from_keyword( query_words = query_words[:64] if not query_words: return [] - tsquery_terms = [ - "'" + w.replace("'", "''") + "'" for w in query_words if w and w.strip() - ] + tsquery_terms = ["'" + w.replace("'", "''") + "'" for w in query_words if w and w.strip()] if not tsquery_terms: return [] - scopes = ( - [memory_type] - if memory_type != "All" - else ["LongTermMemory", "UserMemory"] - ) + scopes = [memory_type] if memory_type != "All" else ["LongTermMemory", "UserMemory"] id_to_score: dict[str, float] = {} for scope in scopes: From 7c83795263e4f8e66f50604a2ffca929618ff686 Mon Sep 17 00:00:00 2001 From: hijzy Date: Wed, 11 Feb 2026 11:52:22 +0800 Subject: [PATCH 12/12] =?UTF-8?q?fix:=20=E4=BF=AE=E5=A4=8D=E5=81=8F?= =?UTF-8?q?=E5=A5=BD=E8=AE=B0=E5=BF=86=20score=20=E9=97=AE=E9=A2=98?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/memos/api/handlers/search_handler.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/memos/api/handlers/search_handler.py b/src/memos/api/handlers/search_handler.py index 1b3d32bbc..267d1bb28 100644 --- a/src/memos/api/handlers/search_handler.py +++ b/src/memos/api/handlers/search_handler.py @@ -223,7 +223,11 @@ def _mmr_dedup_text_memories( # Flatten preference memories for bucket_idx, bucket in enumerate(pref_buckets): for mem in bucket.get("memories", []): - score = mem.get("metadata", {}).get("relativity", 0.0) + meta = mem.get("metadata", {}) + if isinstance(meta, dict): + score = meta.get("score", meta.get("relativity", 0.0)) + else: + score = 0.0 flat.append( ("preference", bucket_idx, mem, float(score) if score is not None else 0.0) )