MemTensor · CaralHsi · Jan 16, 2026 · Jan 14, 2026 · Jan 14, 2026 · Jan 14, 2026
diff --git a/src/memos/graph_dbs/base.py b/src/memos/graph_dbs/base.py
@@ -160,13 +160,17 @@ def search_by_embedding(self, vector: list[float], top_k: int = 5, **kwargs) ->
         """
 
     @abstractmethod
-    def get_by_metadata(self, filters: list[dict[str, Any]]) -> list[str]:
+    def get_by_metadata(
+        self, filters: list[dict[str, Any]], status: str | None = None
+    ) -> list[str]:
         """
         Retrieve node IDs that match given metadata filters.
 
         Args:
             filters (dict[str, Any]): A dictionary of attribute-value filters.
                 Example: {"topic": "psychology", "importance": 2}
+            status (str, optional): Filter by status (e.g., 'activated', 'archived').
+                If None, no status filter is applied.
 
         Returns:
             list[str]: Node IDs whose metadata match the filter conditions.
@@ -239,13 +243,17 @@ def import_graph(self, data: dict[str, Any]) -> None:
         """
 
     @abstractmethod
-    def get_all_memory_items(self, scope: str, include_embedding: bool = False) -> list[dict]:
+    def get_all_memory_items(
+        self, scope: str, include_embedding: bool = False, status: str | None = None
+    ) -> list[dict]:
         """
         Retrieve all memory items of a specific memory_type.
 
         Args:
             scope (str): Must be one of 'WorkingMemory', 'LongTermMemory', or 'UserMemory'.
             include_embedding: with/without embedding
+            status (str, optional): Filter by status (e.g., 'activated', 'archived').
+                If None, no status filter is applied.
 
         Returns:
             list[dict]: Full list of memory items under this scope.

diff --git a/src/memos/graph_dbs/neo4j.py b/src/memos/graph_dbs/neo4j.py
@@ -916,6 +916,7 @@ def get_by_metadata(
         filter: dict | None = None,
         knowledgebase_ids: list[str] | None = None,
         user_name_flag: bool = True,
+        status: str | None = None,
     ) -> list[str]:
         """
         TODO:
@@ -933,6 +934,8 @@ def get_by_metadata(
                 {"field": "tags", "op": "contains", "value": "AI"},
                 ...
             ]
+        status (str, optional): Filter by status (e.g., 'activated', 'archived').
+            If None, no status filter is applied.
 
         Returns:
             list[str]: Node IDs whose metadata match the filter conditions. (AND logic).
@@ -942,15 +945,20 @@ def get_by_metadata(
             - Can be used for faceted recall or prefiltering before embedding rerank.
         """
         logger.info(
-            f"[get_by_metadata] filters: {filters},user_name: {user_name},filter: {filter},knowledgebase_ids: {knowledgebase_ids}"
+            f"[get_by_metadata] filters: {filters},user_name: {user_name},filter: {filter},knowledgebase_ids: {knowledgebase_ids},status: {status}"
         )
         print(
-            f"[get_by_metadata] filters: {filters},user_name: {user_name},filter: {filter},knowledgebase_ids: {knowledgebase_ids}"
+            f"[get_by_metadata] filters: {filters},user_name: {user_name},filter: {filter},knowledgebase_ids: {knowledgebase_ids},status: {status}"
         )
         user_name = user_name if user_name else self.config.user_name
         where_clauses = []
         params = {}
 
+        # Add status filter if provided
+        if status:
+            where_clauses.append("n.status = $status")
+            params["status"] = status
+
         for i, f in enumerate(filters):
             field = f["field"]
             op = f.get("op", "=")
@@ -1272,27 +1280,32 @@ def import_graph(self, data: dict[str, Any], user_name: str | None = None) -> No
     def get_all_memory_items(
         self,
         scope: str,
+        include_embedding: bool = False,
         filter: dict | None = None,
         knowledgebase_ids: list[str] | None = None,
+        status: str | None = None,
         **kwargs,
     ) -> list[dict]:
         """
         Retrieve all memory items of a specific memory_type.
 
         Args:
             scope (str): Must be one of 'WorkingMemory', 'LongTermMemory', or 'UserMemory'.
+            include_embedding (bool): Whether to include embedding in results.
             filter (dict, optional): Filter conditions with 'and' or 'or' logic for search results.
                 Example: {"and": [{"id": "xxx"}, {"A": "yyy"}]} or {"or": [{"id": "xxx"}, {"A": "yyy"}]}
-        Returns:
+            knowledgebase_ids (list[str], optional): List of knowledgebase IDs to filter by.
+            status (str, optional): Filter by status (e.g., 'activated', 'archived').
+                If None, no status filter is applied.
 
         Returns:
             list[dict]: Full list of memory items under this scope.
         """
         logger.info(
-            f"[get_all_memory_items] scope: {scope},filter: {filter},knowledgebase_ids: {knowledgebase_ids}"
+            f"[get_all_memory_items] scope: {scope},filter: {filter},knowledgebase_ids: {knowledgebase_ids},status: {status}"
         )
         print(
-            f"[get_all_memory_items] scope: {scope},filter: {filter},knowledgebase_ids: {knowledgebase_ids}"
+            f"[get_all_memory_items] scope: {scope},filter: {filter},knowledgebase_ids: {knowledgebase_ids},status: {status}"
         )
 
         user_name = kwargs.get("user_name") if kwargs.get("user_name") else self.config.user_name
@@ -1302,6 +1315,11 @@ def get_all_memory_items(
         where_clauses = ["n.memory_type = $scope"]
         params = {"scope": scope}
 
+        # Add status filter if provided
+        if status:
+            where_clauses.append("n.status = $status")
+            params["status"] = status
+
         # Build user_name filter with knowledgebase_ids support (OR relationship) using common method
         user_name_conditions, user_name_params = self._build_user_name_and_kb_ids_conditions_cypher(
             user_name=user_name,

diff --git a/src/memos/graph_dbs/polardb.py b/src/memos/graph_dbs/polardb.py
@@ -2823,6 +2823,7 @@ def get_all_memory_items(
         user_name: str | None = None,
         filter: dict | None = None,
         knowledgebase_ids: list | None = None,
+        status: str | None = None,
     ) -> list[dict]:
         """
         Retrieve all memory items of a specific memory_type.
@@ -2831,12 +2832,16 @@ def get_all_memory_items(
             scope (str): Must be one of 'WorkingMemory', 'LongTermMemory', or 'UserMemory'.
             include_embedding: with/without embedding
             user_name (str, optional): User name for filtering in non-multi-db mode
+            filter (dict, optional): Filter conditions with 'and' or 'or' logic for search results.
+            knowledgebase_ids (list, optional): List of knowledgebase IDs to filter by.
+            status (str, optional): Filter by status (e.g., 'activated', 'archived').
+                If None, no status filter is applied.
 
         Returns:
             list[dict]: Full list of memory items under this scope.
         """
         logger.info(
-            f"[get_all_memory_items] filter: {filter}, knowledgebase_ids: {knowledgebase_ids}"
+            f"[get_all_memory_items] filter: {filter}, knowledgebase_ids: {knowledgebase_ids}, status: {status}"
         )
 
         user_name = user_name if user_name else self._get_config_value("user_name")
@@ -2867,6 +2872,8 @@ def get_all_memory_items(
         if include_embedding:
             # Build WHERE clause with user_name/knowledgebase_ids and filter
             where_parts = [f"n.memory_type = '{scope}'"]
+            if status:
+                where_parts.append(f"n.status = '{status}'")
             if user_name_where:
                 # user_name_where already contains parentheses if it's an OR condition
                 where_parts.append(user_name_where)
@@ -2927,6 +2934,8 @@ def get_all_memory_items(
         else:
             # Build WHERE clause with user_name/knowledgebase_ids and filter
             where_parts = [f"n.memory_type = '{scope}'"]
+            if status:
+                where_parts.append(f"n.status = '{status}'")
             if user_name_where:
                 # user_name_where already contains parentheses if it's an OR condition
                 where_parts.append(user_name_where)

diff --git a/src/memos/templates/mem_reader_prompts.py b/src/memos/templates/mem_reader_prompts.py
@@ -867,52 +867,120 @@
 Important: Output **only** the JSON. No extra text.
 """
 
-MEMORY_MERGE_PROMPT_EN = """You are a memory consolidation expert. Given a new memory and similar existing memories, decide if they should be merged.
+MEMORY_MERGE_PROMPT_EN = """You are a memory consolidation expert. Given a new memory and a set of similar existing memories, determine whether they should be merged.
+
+Before generating the value, you must complete the following reasoning steps (done in internal reasoning, no need to output them):
+1.	Identify the “fact units” contained in the new memory, for example:
+•	Identity-type facts: name, occupation, place of residence, etc.
+•	Stable preference-type facts: things the user likes/dislikes long-term, frequently visited places, etc.
+•	Relationship-type facts: relationships with someone (friend, colleague, fixed activity partner, etc.)
+•	One-off event/plan-type facts: events on a specific day, temporary plans for this weekend, etc.
+2.	For each fact unit, determine:
+•	Which existing memories are expressing “the same kind of fact”
+•	Whether the corresponding fact in the new memory is just a “repeated confirmation” of that fact, rather than “new factual content”
+
+Merge rules (must be followed when generating value):
+•	The merged value:
+•	Must not repeat the same meaning (each fact should be described only once)
+•	Must not repeat the same fact just because it was mentioned multiple times or at different times
+•	Unless time itself changes the meaning (for example, “used to dislike → now likes”), do not keep specific time information
+•	If the new memory contains multiple different types of facts (for example: “name + hobby + plan for this weekend”):
+•	You may output multiple merge results; each merge result should focus on only one type of fact (for example: one about “name”, one about “hobby”)
+•	Do not force unrelated facts into the same value
+•	One-off events/plans (such as “going skiing this weekend”, “attending a party on Sunday”):
+•	If there is no directly related and complementary event memory in the existing memories, treat it as an independent memory and do not merge it with identity/stable preference-type memories
+•	Do not merge a “temporary plan” and a “long-term preference” into the same value just because they are related (e.g. a plan to ski vs. a long-term preference for skiing)
+
+Output format requirements:
+•	You must return a single JSON object.
+•	If a merge occurred:
+•	“value”: The merged memory content (only describe the final conclusion, preserving all “semantically unique” information, without repetition)
+•	“merged_from”: A list of IDs of the similar memories that were merged
+•	“should_merge”: true
+•	If the new memory cannot be merged with any existing memories, return:
+•	“should_merge”: false
 
 Example:
 New memory:
-The user’s name is Tom, the user likes skiing, and plans to go skiing this weekend
+The user’s name is Tom, the user likes skiing, and plans to go skiing this weekend.
 
 Similar existing memories:
 xxxx-xxxx-xxxx-xxxx-01: The user’s name is Tom
 xxxx-xxxx-xxxx-xxxx-10: The user likes skiing
 xxxx-xxxx-xxxx-xxxx-11: The user lives by the sea
 
-Expected output:
+Expected return value:
 {{
-“value”: “The user’s name is Tom, the user likes skiing”,
-“merged_from”: [“xxxx-xxxx-xxxx-xxxx-01”, “xxxx-xxxx-xxxx-xxxx-10”],
-“should_merge”: true
+"value": "The user's name is Tom and the user likes skiing",
+"merged_from": ["xxxx-xxxx-xxxx-xxxx-01", "xxxx-xxxx-xxxx-xxxx-10"],
+"should_merge": true
 }}
 
 New memory:
-The user is going to attend a party on Sunday
+The user is going to attend a party on Sunday.
 
 Similar existing memories:
-xxxx-xxxx-xxxx-xxxx-01: The user read a book yesterday
+xxxx-xxxx-xxxx-xxxx-01: The user read a book yesterday.
 
-Expected output:
+Expected return value:
 {{
-“should_merge”: false
+"should_merge": false
 }}
 
-If the new memory substantially overlaps with or complements the existing memories, merge them into a single consolidated memory and return a JSON object with:
-- "value": the merged memory content (preserving all unique information)
-- "merged_from": list of IDs from similar_memories that were merged
-- "should_merge": true
+If the new memory largely overlaps with or complements the existing memories, merge them into an integrated memory and return a JSON object:
+•	“value”: The merged memory content
+•	“merged_from”: A list of IDs of the similar memories that were merged
+•	“should_merge”: true
+
+If the new memory is unique and should remain independent, return:
+{{
+"should_merge": false
+}}
 
-If the new memory is distinct and should remain separate, return:
-- "should_merge": false
+You must only return a valid JSON object in the final output, and no additional content (no natural language explanations, no extra fields).
 
-New Memory:
+New memory:
 {new_memory}
 
-Similar Existing Memories:
+Similar existing memories:
 {similar_memories}
 
-Return ONLY a valid JSON object, nothing else."""
+Only return a valid JSON object, and do not include any other content.
+"""
 
-MEMORY_MERGE_PROMPT_ZH = """你是一个记忆整合专家。给定一个新记忆和相似的现有记忆，判断它们是否应该合并。
+MEMORY_MERGE_PROMPT_ZH = """
+你是一个记忆整合专家。给定一个新记忆和相似的现有记忆，判断它们是否应该合并。
+
+在生成 value 之前，必须先完成以下判断步骤（在内在推理中完成，不需要输出）：
+1. 识别新记忆中包含的「事实单元」，例如：
+   - 身份信息类：名字、职业、居住地等
+   - 稳定偏好类：长期喜欢/不喜欢的事物、常去地点等
+   - 关系类：与某人的关系（朋友、同事、固定搭子等）
+   - 一次性事件/计划类：某天要参加的活动、本周末的临时安排等
+2. 对每个事实单元，判断：
+   - 哪些 existing memories 在表达“同一类事实”，
+   - 新记忆中对应的事实是否只是对该事实的「重复确认」，而不是“新的事实内容”
+
+合并规则（生成 value 时必须遵守）：
+- 合并后的 value：
+  - 不要重复表达同一语义（同一事实只描述一次）
+  - 不要因为多次提及或不同时间而重复同一事实
+  - 除非时间本身改变了语义（例如“从不喜欢 → 现在开始喜欢”），否则不要保留具体时间信息
+- 如果新记忆中包含多个不同类型的事实（例如“名字 + 爱好 + 本周计划”）：
+  - 不要合并就好
+  - 不要把彼此无关的事实硬塞进同一个 value 中
+- 一次性事件/计划（如“本周末去滑雪”“周天参加聚会”）：
+  - 如果 existing memories 中没有与之直接相关、可互补的事件记忆，则视为独立记忆，不要与身份/长期偏好类记忆合并
+  - 不要因为它和某个长期偏好有关（例如喜欢滑雪），就把“临时计划”和“长期偏好”合在一个 value 里
+
+输出格式要求：
+- 你需要返回一个 JSON 对象。
+- 若发生了合并：
+  - "value": 合并后的记忆内容（只描述最终结论，保留所有「语义上独特」的信息，不重复）
+  - "merged_from": 被合并的相似记忆 ID 列表
+  - "should_merge": true
+- 若新记忆无法与现有记忆合并，返回：
+  - "should_merge": false
 
 示例：
 新记忆：
@@ -941,14 +1009,17 @@
     "should_merge": false
 }}
 
-
-如果新记忆与现有记忆大量重叠或互补，将它们合并为一个整合的记忆，并返回一个JSON列表：
-- "value": 合并后的记忆内容（保留所有独特信息）
+如果新记忆与现有记忆大量重叠或互补，将它们合并为一个整合的记忆，并返回一个JSON对象：
+- "value": 合并后的记忆内容
 - "merged_from": 被合并的相似记忆ID列表
 - "should_merge": true
 
 如果新记忆是独特的，应该保持独立，返回：
-- "should_merge": false
+{{
+    "should_merge": false
+}}
+
+最终只返回有效的 JSON 对象，不要任何额外内容（不要自然语言解释、不要多余字段）。
 
 新记忆：
 {new_memory}