diff --git a/src/memos/api/handlers/search_handler.py b/src/memos/api/handlers/search_handler.py index 91980bdeb..1b3d32bbc 100644 --- a/src/memos/api/handlers/search_handler.py +++ b/src/memos/api/handlers/search_handler.py @@ -120,11 +120,14 @@ def _apply_relativity_threshold(results: dict[str, Any], relativity: float) -> d if not isinstance(mem, dict): continue meta = mem.get("metadata", {}) - score = meta.get("relativity", 0.0) if isinstance(meta, dict) else 0.0 + if key == "text_mem": + score = meta.get("relativity", 1.0) if isinstance(meta, dict) else 1.0 + else: + score = meta.get("score", 1.0) if isinstance(meta, dict) else 1.0 try: - score_val = float(score) if score is not None else 0.0 + score_val = float(score) if score is not None else 1.0 except (TypeError, ValueError): - score_val = 0.0 + score_val = 1.0 if score_val >= relativity: filtered.append(mem) diff --git a/src/memos/memories/textual/tree_text_memory/retrieve/searcher.py b/src/memos/memories/textual/tree_text_memory/retrieve/searcher.py index bc8d76517..f00efccb6 100644 --- a/src/memos/memories/textual/tree_text_memory/retrieve/searcher.py +++ b/src/memos/memories/textual/tree_text_memory/retrieve/searcher.py @@ -1,3 +1,4 @@ +import copy import traceback from concurrent.futures import as_completed @@ -306,8 +307,8 @@ def _parse_task( query = parsed_goal.rephrased_query or query # if goal has extra memories, embed them too if parsed_goal.memories: - query_embedding = self.embedder.embed(list({query, *parsed_goal.memories})) - + embed_texts = list(dict.fromkeys([query, *parsed_goal.memories])) + query_embedding = self.embedder.embed(embed_texts) return parsed_goal, query_embedding, context, query @timed @@ -379,6 +380,20 @@ def _retrieve_paths( user_name, ) ) + tasks.append( + executor.submit( + self._retrieve_from_keyword, + query, + parsed_goal, + query_embedding, + top_k, + memory_type, + search_filter, + search_priority, + user_name, + id_filter, + ) + ) if search_tool_memory: tasks.append( executor.submit( @@ -456,6 +471,96 @@ def _retrieve_from_working_memory( search_filter=search_filter, ) + @timed + def _retrieve_from_keyword( + self, + query, + parsed_goal, + query_embedding, + top_k, + memory_type, + search_filter: dict | None = None, + search_priority: dict | None = None, + user_name: str | None = None, + id_filter: dict | None = None, + ) -> list[tuple[TextualMemoryItem, float]]: + """Keyword/fulltext path that directly calls graph DB fulltext search.""" + + if memory_type not in ["All", "LongTermMemory", "UserMemory"]: + return [] + if not query_embedding: + return [] + + query_words: list[str] = [] + if self.tokenizer: + query_words = self.tokenizer.tokenize_mixed(query) + else: + query_words = query.strip().split() + # Use unique tokens; avoid passing the raw query into `to_tsquery(...)` because it may contain + # spaces/operators that cause tsquery parsing errors. + query_words = list(dict.fromkeys(query_words)) + if len(query_words) > 64: + query_words = query_words[:64] + if not query_words: + return [] + tsquery_terms = ["'" + w.replace("'", "''") + "'" for w in query_words if w and w.strip()] + if not tsquery_terms: + return [] + + scopes = [memory_type] if memory_type != "All" else ["LongTermMemory", "UserMemory"] + + id_to_score: dict[str, float] = {} + for scope in scopes: + hits = self.graph_store.search_by_fulltext( + query_words=tsquery_terms, + top_k=top_k * 2, + status="activated", + scope=scope, + search_filter=None, + filter=search_filter, + user_name=user_name, + tsquery_config="jiebaqry", + ) + for h in hits or []: + hid = str(h.get("id") or "").strip().strip("'\"") + if not hid: + continue + score = h.get("score", 0.0) + if hid not in id_to_score or score > id_to_score[hid]: + id_to_score[hid] = score + if not id_to_score: + return [] + + sorted_ids = sorted(id_to_score.keys(), key=lambda x: id_to_score[x], reverse=True) + sorted_ids = sorted_ids[:top_k] + node_dicts = ( + self.graph_store.get_nodes(sorted_ids, include_embedding=True, user_name=user_name) + or [] + ) + id_to_node = {n.get("id"): n for n in node_dicts} + ordered_nodes = [] + + for rid in sorted_ids: + if rid in id_to_node: + node = copy.deepcopy(id_to_node[rid]) + meta = node.setdefault("metadata", {}) + meta_target = meta + if isinstance(meta, dict) and isinstance(meta.get("metadata"), dict): + meta_target = meta["metadata"] + if isinstance(meta_target, dict): + meta_target["keyword_score"] = id_to_score[rid] + ordered_nodes.append(node) + + results = [TextualMemoryItem.from_dict(n) for n in ordered_nodes] + return self.reranker.rerank( + query=query, + query_embedding=query_embedding[0], + graph_results=results, + top_k=top_k, + parsed_goal=parsed_goal, + search_filter=search_filter, + ) + # --- Path B @timed def _retrieve_from_long_term_and_user( diff --git a/src/memos/multi_mem_cube/single_cube.py b/src/memos/multi_mem_cube/single_cube.py index 6da55ce02..307bf05b2 100644 --- a/src/memos/multi_mem_cube/single_cube.py +++ b/src/memos/multi_mem_cube/single_cube.py @@ -468,13 +468,13 @@ def _fast_search( search_req=search_req, user_context=user_context, mode=SearchMode.FAST, - include_embedding=(search_req.dedup == "mmr"), + include_embedding=(search_req.dedup in ("mmr", "sim")), ) return self._postformat_memories( search_results, user_context.mem_cube_id, - include_embedding=search_req.dedup == "sim", + include_embedding=(search_req.dedup in ("mmr", "sim")), neighbor_discovery=search_req.neighbor_discovery, )