From 3bd0959d4f7684afe7c3069c7717f1a443b966c2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=BB=91=E5=B8=83=E6=9E=97?= <11641432+heiheiyouyou@user.noreply.gitee.com> Date: Mon, 2 Feb 2026 11:06:31 +0800 Subject: [PATCH 1/5] fix: add fileurl to memoryvalue --- .../read_multi_modal/file_content_parser.py | 14 -------------- 1 file changed, 14 deletions(-) diff --git a/src/memos/mem_reader/read_multi_modal/file_content_parser.py b/src/memos/mem_reader/read_multi_modal/file_content_parser.py index fbc704d0b..00da08b1c 100644 --- a/src/memos/mem_reader/read_multi_modal/file_content_parser.py +++ b/src/memos/mem_reader/read_multi_modal/file_content_parser.py @@ -412,7 +412,6 @@ def parse_fast( # Extract file parameters (all are optional) file_data = file_info.get("file_data", "") file_id = file_info.get("file_id", "") - filename = file_info.get("filename", "") file_url_flag = False # Build content string based on available information content_parts = [] @@ -433,25 +432,12 @@ def parse_fast( # Check if it looks like a URL elif file_data.startswith(("http://", "https://", "file://")): file_url_flag = True - content_parts.append(f"[File URL: {file_data}]") else: # TODO: split into multiple memory items content_parts.append(file_data) else: content_parts.append(f"[File Data: {type(file_data).__name__}]") - # Priority 2: If file_id is provided, reference it - if file_id: - content_parts.append(f"[File ID: {file_id}]") - - # Priority 3: If filename is provided, include it - if filename: - content_parts.append(f"[Filename: {filename}]") - - # If no content can be extracted, create a placeholder - if not content_parts: - content_parts.append("[File: unknown]") - # Combine content parts content = " ".join(content_parts) From 55bb67d3164d603c9f12f5604f2f7a5fb469eba9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=BB=91=E5=B8=83=E6=9E=97?= <11641432+heiheiyouyou@user.noreply.gitee.com> Date: Mon, 2 Feb 2026 17:24:29 +0800 Subject: [PATCH 2/5] Extract the phrases from the key and input them into the tags. --- src/memos/mem_reader/read_multi_modal/base.py | 2 ++ .../mem_reader/read_multi_modal/file_content_parser.py | 7 ++++++- 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/src/memos/mem_reader/read_multi_modal/base.py b/src/memos/mem_reader/read_multi_modal/base.py index 95d427864..737a3fe1e 100644 --- a/src/memos/mem_reader/read_multi_modal/base.py +++ b/src/memos/mem_reader/read_multi_modal/base.py @@ -15,6 +15,7 @@ TextualMemoryItem, TreeNodeTextualMemoryMetadata, ) +from memos.memories.textual.tree_text_memory.retrieve.retrieve_utils import FastTokenizer from memos.utils import timed from .utils import detect_lang, get_text_splitter @@ -90,6 +91,7 @@ def __init__(self, embedder, llm=None): """ self.embedder = embedder self.llm = llm + self.tokenizer = FastTokenizer(use_jieba=True, use_stopwords=True) @abstractmethod def create_source( diff --git a/src/memos/mem_reader/read_multi_modal/file_content_parser.py b/src/memos/mem_reader/read_multi_modal/file_content_parser.py index 00da08b1c..9f4ab94c2 100644 --- a/src/memos/mem_reader/read_multi_modal/file_content_parser.py +++ b/src/memos/mem_reader/read_multi_modal/file_content_parser.py @@ -710,7 +710,7 @@ def _make_fallback( chunk_idx: int, chunk_text: str, reason: str = "raw" ) -> TextualMemoryItem: """Create fallback memory item with raw chunk text.""" - return _make_memory_item( + raw_chunk_mem = _make_memory_item( value=chunk_text, tags=[ "mode:fine", @@ -721,6 +721,11 @@ def _make_fallback( chunk_idx=chunk_idx, chunk_content=chunk_text, ) + tags_list = self.tokenizer.tokenize_mixed(raw_chunk_mem.metadata.key) + tags_list = [tag for tag in tags_list if len(tag) > 1] + tags_list = sorted(tags_list, key=len, reverse=True) + raw_chunk_mem.metadata.tags.extend(tags_list[:5]) + return raw_chunk_mem # Handle empty chunks case if not valid_chunks: From aecae5eb193c31b7fc53f2f27577df9bd0abce64 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=BB=91=E5=B8=83=E6=9E=97?= <11641432+heiheiyouyou@user.noreply.gitee.com> Date: Wed, 4 Feb 2026 10:53:37 +0800 Subject: [PATCH 3/5] The issue of inaccurate retrieval of "total nodes" through the "get memory" interface --- src/memos/api/handlers/memory_handler.py | 65 ++++++++++++++++++++---- src/memos/memories/textual/tree.py | 8 ++- 2 files changed, 63 insertions(+), 10 deletions(-) diff --git a/src/memos/api/handlers/memory_handler.py b/src/memos/api/handlers/memory_handler.py index e8bc5b640..17cb66023 100644 --- a/src/memos/api/handlers/memory_handler.py +++ b/src/memos/api/handlers/memory_handler.py @@ -9,7 +9,6 @@ from memos.api.handlers.formatters_handler import ( format_memory_item, post_process_pref_mem, - post_process_textual_mem, ) from memos.api.product_models import ( DeleteMemoryRequest, @@ -249,22 +248,68 @@ def handle_get_memories( get_mem_req: GetMemoryRequest, naive_mem_cube: NaiveMemCube ) -> GetMemoryResponse: results: dict[str, Any] = {"text_mem": [], "pref_mem": [], "tool_mem": [], "skill_mem": []} - memories = naive_mem_cube.text_mem.get_all( + text_memory_type = ["WorkingMemory", "LongTermMemory", "UserMemory", "OuterMemory"] + text_memories_info = naive_mem_cube.text_mem.get_all( user_name=get_mem_req.mem_cube_id, user_id=get_mem_req.user_id, page=get_mem_req.page, page_size=get_mem_req.page_size, filter=get_mem_req.filter, - )["nodes"] + memory_type=text_memory_type, + ) + text_memories, total_text_nodes = text_memories_info["nodes"], text_memories_info["total_nodes"] + results["text_mem"] = [ + { + "cube_id": get_mem_req.mem_cube_id, + "memories": text_memories, + "total_nodes": total_text_nodes, + } + ] - results = post_process_textual_mem(results, memories, get_mem_req.mem_cube_id) + if get_mem_req.include_tool_memory: + tool_memories_info = naive_mem_cube.text_mem.get_all( + user_name=get_mem_req.mem_cube_id, + user_id=get_mem_req.user_id, + page=get_mem_req.page, + page_size=get_mem_req.page_size, + filter=get_mem_req.filter, + memory_type=["ToolSchemaMemory", "ToolTrajectoryMemory"], + ) + tool_memories, total_tool_nodes = ( + tool_memories_info["nodes"], + tool_memories_info["total_nodes"], + ) - if not get_mem_req.include_tool_memory: - results["tool_mem"] = [] - if not get_mem_req.include_skill_memory: - results["skill_mem"] = [] + results["tool_mem"] = [ + { + "cube_id": get_mem_req.mem_cube_id, + "memories": tool_memories, + "total_nodes": total_tool_nodes, + } + ] + if get_mem_req.include_skill_memory: + skill_memories_info = naive_mem_cube.text_mem.get_all( + user_name=get_mem_req.mem_cube_id, + user_id=get_mem_req.user_id, + page=get_mem_req.page, + page_size=get_mem_req.page_size, + filter=get_mem_req.filter, + memory_type=["SkillMemory"], + ) + skill_memories, total_skill_nodes = ( + skill_memories_info["nodes"], + skill_memories_info["total_nodes"], + ) + results["skill_mem"] = [ + { + "cube_id": get_mem_req.mem_cube_id, + "memories": skill_memories, + "total_nodes": total_skill_nodes, + } + ] preferences: list[TextualMemoryItem] = [] + total_preference_nodes = 0 format_preferences = [] if get_mem_req.include_preference and naive_mem_cube.pref_mem is not None: @@ -293,7 +338,7 @@ def handle_get_memories( filter_params.update(filter_copy) - preferences, _ = naive_mem_cube.pref_mem.get_memory_by_filter( + preferences, total_preference_nodes = naive_mem_cube.pref_mem.get_memory_by_filter( filter_params, page=get_mem_req.page, page_size=get_mem_req.page_size ) format_preferences = [format_memory_item(item, save_sources=False) for item in preferences] @@ -301,6 +346,8 @@ def handle_get_memories( results = post_process_pref_mem( results, format_preferences, get_mem_req.mem_cube_id, get_mem_req.include_preference ) + if total_preference_nodes > 0 and results.get("pref_mem", []): + results["pref_mem"][0]["total_nodes"] = total_preference_nodes # Filter to only keep text_mem, pref_mem, tool_mem filtered_results = { diff --git a/src/memos/memories/textual/tree.py b/src/memos/memories/textual/tree.py index b556db5d7..5489dfd38 100644 --- a/src/memos/memories/textual/tree.py +++ b/src/memos/memories/textual/tree.py @@ -333,13 +333,19 @@ def get_all( page: int | None = None, page_size: int | None = None, filter: dict | None = None, + memory_type: list[str] | None = None, ) -> dict: """Get all memories. Returns: list[TextualMemoryItem]: List of all memories. """ graph_output = self.graph_store.export_graph( - user_name=user_name, user_id=user_id, page=page, page_size=page_size, filter=filter + user_name=user_name, + user_id=user_id, + page=page, + page_size=page_size, + filter=filter, + memory_type=memory_type, ) return graph_output From 5a51c0469255ebea2332a3f95b09031672b05d19 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=BB=91=E5=B8=83=E6=9E=97?= <11641432+heiheiyouyou@user.noreply.gitee.com> Date: Fri, 6 Feb 2026 16:23:01 +0800 Subject: [PATCH 4/5] fix --- src/memos/api/handlers/memory_handler.py | 64 +++++++++++++++++++++--- 1 file changed, 56 insertions(+), 8 deletions(-) diff --git a/src/memos/api/handlers/memory_handler.py b/src/memos/api/handlers/memory_handler.py index aa378123c..54f795696 100644 --- a/src/memos/api/handlers/memory_handler.py +++ b/src/memos/api/handlers/memory_handler.py @@ -412,22 +412,68 @@ def handle_get_memories_dashboard( get_mem_req: GetMemoryDashboardRequest, naive_mem_cube: NaiveMemCube ) -> GetMemoryResponse: results: dict[str, Any] = {"text_mem": [], "pref_mem": [], "tool_mem": [], "skill_mem": []} - memories = naive_mem_cube.text_mem.get_all( + text_memory_type = ["WorkingMemory", "LongTermMemory", "UserMemory", "OuterMemory"] + text_memories_info = naive_mem_cube.text_mem.get_all( user_name=get_mem_req.mem_cube_id, user_id=get_mem_req.user_id, page=get_mem_req.page, page_size=get_mem_req.page_size, filter=get_mem_req.filter, - )["nodes"] + memory_type=text_memory_type, + ) + text_memories, total_text_nodes = text_memories_info["nodes"], text_memories_info["total_nodes"] + results["text_mem"] = [ + { + "cube_id": get_mem_req.mem_cube_id, + "memories": text_memories, + "total_nodes": total_text_nodes, + } + ] - results = post_process_textual_mem(results, memories, get_mem_req.mem_cube_id) + if get_mem_req.include_tool_memory: + tool_memories_info = naive_mem_cube.text_mem.get_all( + user_name=get_mem_req.mem_cube_id, + user_id=get_mem_req.user_id, + page=get_mem_req.page, + page_size=get_mem_req.page_size, + filter=get_mem_req.filter, + memory_type=["ToolSchemaMemory", "ToolTrajectoryMemory"], + ) + tool_memories, total_tool_nodes = ( + tool_memories_info["nodes"], + tool_memories_info["total_nodes"], + ) - if not get_mem_req.include_tool_memory: - results["tool_mem"] = [] - if not get_mem_req.include_skill_memory: - results["skill_mem"] = [] + results["tool_mem"] = [ + { + "cube_id": get_mem_req.mem_cube_id, + "memories": tool_memories, + "total_nodes": total_tool_nodes, + } + ] + if get_mem_req.include_skill_memory: + skill_memories_info = naive_mem_cube.text_mem.get_all( + user_name=get_mem_req.mem_cube_id, + user_id=get_mem_req.user_id, + page=get_mem_req.page, + page_size=get_mem_req.page_size, + filter=get_mem_req.filter, + memory_type=["SkillMemory"], + ) + skill_memories, total_skill_nodes = ( + skill_memories_info["nodes"], + skill_memories_info["total_nodes"], + ) + results["skill_mem"] = [ + { + "cube_id": get_mem_req.mem_cube_id, + "memories": skill_memories, + "total_nodes": total_skill_nodes, + } + ] preferences: list[TextualMemoryItem] = [] + total_preference_nodes = 0 format_preferences = [] if get_mem_req.include_preference and naive_mem_cube.pref_mem is not None: @@ -456,7 +502,7 @@ def handle_get_memories_dashboard( filter_params.update(filter_copy) - preferences, _ = naive_mem_cube.pref_mem.get_memory_by_filter( + preferences, total_preference_nodes = naive_mem_cube.pref_mem.get_memory_by_filter( filter_params, page=get_mem_req.page, page_size=get_mem_req.page_size ) format_preferences = [format_memory_item(item, save_sources=False) for item in preferences] @@ -464,6 +510,8 @@ def handle_get_memories_dashboard( results = post_process_pref_mem( results, format_preferences, get_mem_req.mem_cube_id, get_mem_req.include_preference ) + if total_preference_nodes > 0 and results.get("pref_mem", []): + results["pref_mem"][0]["total_nodes"] = total_preference_nodes # Filter to only keep text_mem, pref_mem, tool_mem filtered_results = { From be925c16d587460f47e0c5604e0f65c34b91cb39 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=BB=91=E5=B8=83=E6=9E=97?= <11641432+heiheiyouyou@user.noreply.gitee.com> Date: Sat, 7 Feb 2026 20:31:34 +0800 Subject: [PATCH 5/5] hotfix: Code restoration, with the original content of the knowledge base replaced by the sources. --- src/memos/api/handlers/search_handler.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/src/memos/api/handlers/search_handler.py b/src/memos/api/handlers/search_handler.py index e1a71737a..91980bdeb 100644 --- a/src/memos/api/handlers/search_handler.py +++ b/src/memos/api/handlers/search_handler.py @@ -11,6 +11,7 @@ from typing import Any from memos.api.handlers.base_handler import BaseHandler, HandlerDependencies +from memos.api.handlers.formatters_handler import rerank_knowledge_mem from memos.api.product_models import APISearchRequest, SearchResponse from memos.log import get_logger from memos.memories.textual.tree_text_memory.retrieve.retrieve_utils import ( @@ -81,6 +82,15 @@ def handle_search_memories(self, search_req: APISearchRequest) -> SearchResponse results = self._mmr_dedup_text_memories(results, search_req.top_k, pref_top_k) self._strip_embeddings(results) + text_mem = results["text_mem"] + results["text_mem"] = rerank_knowledge_mem( + self.reranker, + query=search_req.query, + text_mem=text_mem, + top_k=search_req_local.top_k, + file_mem_proportion=0.5, + ) + self.logger.info( f"[SearchHandler] Final search results: count={len(results)} results={results}" )