diff --git a/src/memos/graph_dbs/base.py b/src/memos/graph_dbs/base.py index a8f8ff414..87a50d443 100644 --- a/src/memos/graph_dbs/base.py +++ b/src/memos/graph_dbs/base.py @@ -160,13 +160,17 @@ def search_by_embedding(self, vector: list[float], top_k: int = 5, **kwargs) -> """ @abstractmethod - def get_by_metadata(self, filters: list[dict[str, Any]]) -> list[str]: + def get_by_metadata( + self, filters: list[dict[str, Any]], status: str | None = None + ) -> list[str]: """ Retrieve node IDs that match given metadata filters. Args: filters (dict[str, Any]): A dictionary of attribute-value filters. Example: {"topic": "psychology", "importance": 2} + status (str, optional): Filter by status (e.g., 'activated', 'archived'). + If None, no status filter is applied. Returns: list[str]: Node IDs whose metadata match the filter conditions. @@ -239,13 +243,17 @@ def import_graph(self, data: dict[str, Any]) -> None: """ @abstractmethod - def get_all_memory_items(self, scope: str, include_embedding: bool = False) -> list[dict]: + def get_all_memory_items( + self, scope: str, include_embedding: bool = False, status: str | None = None + ) -> list[dict]: """ Retrieve all memory items of a specific memory_type. Args: scope (str): Must be one of 'WorkingMemory', 'LongTermMemory', or 'UserMemory'. include_embedding: with/without embedding + status (str, optional): Filter by status (e.g., 'activated', 'archived'). + If None, no status filter is applied. Returns: list[dict]: Full list of memory items under this scope. diff --git a/src/memos/graph_dbs/neo4j.py b/src/memos/graph_dbs/neo4j.py index 64aedc8f4..8698b6f73 100644 --- a/src/memos/graph_dbs/neo4j.py +++ b/src/memos/graph_dbs/neo4j.py @@ -916,6 +916,7 @@ def get_by_metadata( filter: dict | None = None, knowledgebase_ids: list[str] | None = None, user_name_flag: bool = True, + status: str | None = None, ) -> list[str]: """ TODO: @@ -933,6 +934,8 @@ def get_by_metadata( {"field": "tags", "op": "contains", "value": "AI"}, ... ] + status (str, optional): Filter by status (e.g., 'activated', 'archived'). + If None, no status filter is applied. Returns: list[str]: Node IDs whose metadata match the filter conditions. (AND logic). @@ -942,15 +945,20 @@ def get_by_metadata( - Can be used for faceted recall or prefiltering before embedding rerank. """ logger.info( - f"[get_by_metadata] filters: {filters},user_name: {user_name},filter: {filter},knowledgebase_ids: {knowledgebase_ids}" + f"[get_by_metadata] filters: {filters},user_name: {user_name},filter: {filter},knowledgebase_ids: {knowledgebase_ids},status: {status}" ) print( - f"[get_by_metadata] filters: {filters},user_name: {user_name},filter: {filter},knowledgebase_ids: {knowledgebase_ids}" + f"[get_by_metadata] filters: {filters},user_name: {user_name},filter: {filter},knowledgebase_ids: {knowledgebase_ids},status: {status}" ) user_name = user_name if user_name else self.config.user_name where_clauses = [] params = {} + # Add status filter if provided + if status: + where_clauses.append("n.status = $status") + params["status"] = status + for i, f in enumerate(filters): field = f["field"] op = f.get("op", "=") @@ -1272,8 +1280,10 @@ def import_graph(self, data: dict[str, Any], user_name: str | None = None) -> No def get_all_memory_items( self, scope: str, + include_embedding: bool = False, filter: dict | None = None, knowledgebase_ids: list[str] | None = None, + status: str | None = None, **kwargs, ) -> list[dict]: """ @@ -1281,18 +1291,21 @@ def get_all_memory_items( Args: scope (str): Must be one of 'WorkingMemory', 'LongTermMemory', or 'UserMemory'. + include_embedding (bool): Whether to include embedding in results. filter (dict, optional): Filter conditions with 'and' or 'or' logic for search results. Example: {"and": [{"id": "xxx"}, {"A": "yyy"}]} or {"or": [{"id": "xxx"}, {"A": "yyy"}]} - Returns: + knowledgebase_ids (list[str], optional): List of knowledgebase IDs to filter by. + status (str, optional): Filter by status (e.g., 'activated', 'archived'). + If None, no status filter is applied. Returns: list[dict]: Full list of memory items under this scope. """ logger.info( - f"[get_all_memory_items] scope: {scope},filter: {filter},knowledgebase_ids: {knowledgebase_ids}" + f"[get_all_memory_items] scope: {scope},filter: {filter},knowledgebase_ids: {knowledgebase_ids},status: {status}" ) print( - f"[get_all_memory_items] scope: {scope},filter: {filter},knowledgebase_ids: {knowledgebase_ids}" + f"[get_all_memory_items] scope: {scope},filter: {filter},knowledgebase_ids: {knowledgebase_ids},status: {status}" ) user_name = kwargs.get("user_name") if kwargs.get("user_name") else self.config.user_name @@ -1302,6 +1315,11 @@ def get_all_memory_items( where_clauses = ["n.memory_type = $scope"] params = {"scope": scope} + # Add status filter if provided + if status: + where_clauses.append("n.status = $status") + params["status"] = status + # Build user_name filter with knowledgebase_ids support (OR relationship) using common method user_name_conditions, user_name_params = self._build_user_name_and_kb_ids_conditions_cypher( user_name=user_name, diff --git a/src/memos/graph_dbs/polardb.py b/src/memos/graph_dbs/polardb.py index e67f866ac..4b739bb0f 100644 --- a/src/memos/graph_dbs/polardb.py +++ b/src/memos/graph_dbs/polardb.py @@ -2823,6 +2823,7 @@ def get_all_memory_items( user_name: str | None = None, filter: dict | None = None, knowledgebase_ids: list | None = None, + status: str | None = None, ) -> list[dict]: """ Retrieve all memory items of a specific memory_type. @@ -2831,12 +2832,16 @@ def get_all_memory_items( scope (str): Must be one of 'WorkingMemory', 'LongTermMemory', or 'UserMemory'. include_embedding: with/without embedding user_name (str, optional): User name for filtering in non-multi-db mode + filter (dict, optional): Filter conditions with 'and' or 'or' logic for search results. + knowledgebase_ids (list, optional): List of knowledgebase IDs to filter by. + status (str, optional): Filter by status (e.g., 'activated', 'archived'). + If None, no status filter is applied. Returns: list[dict]: Full list of memory items under this scope. """ logger.info( - f"[get_all_memory_items] filter: {filter}, knowledgebase_ids: {knowledgebase_ids}" + f"[get_all_memory_items] filter: {filter}, knowledgebase_ids: {knowledgebase_ids}, status: {status}" ) user_name = user_name if user_name else self._get_config_value("user_name") @@ -2867,6 +2872,8 @@ def get_all_memory_items( if include_embedding: # Build WHERE clause with user_name/knowledgebase_ids and filter where_parts = [f"n.memory_type = '{scope}'"] + if status: + where_parts.append(f"n.status = '{status}'") if user_name_where: # user_name_where already contains parentheses if it's an OR condition where_parts.append(user_name_where) @@ -2927,6 +2934,8 @@ def get_all_memory_items( else: # Build WHERE clause with user_name/knowledgebase_ids and filter where_parts = [f"n.memory_type = '{scope}'"] + if status: + where_parts.append(f"n.status = '{status}'") if user_name_where: # user_name_where already contains parentheses if it's an OR condition where_parts.append(user_name_where) diff --git a/src/memos/templates/mem_reader_prompts.py b/src/memos/templates/mem_reader_prompts.py index 4a451c842..2a2df0e0b 100644 --- a/src/memos/templates/mem_reader_prompts.py +++ b/src/memos/templates/mem_reader_prompts.py @@ -867,52 +867,120 @@ Important: Output **only** the JSON. No extra text. """ -MEMORY_MERGE_PROMPT_EN = """You are a memory consolidation expert. Given a new memory and similar existing memories, decide if they should be merged. +MEMORY_MERGE_PROMPT_EN = """You are a memory consolidation expert. Given a new memory and a set of similar existing memories, determine whether they should be merged. + +Before generating the value, you must complete the following reasoning steps (done in internal reasoning, no need to output them): +1. Identify the “fact units” contained in the new memory, for example: +• Identity-type facts: name, occupation, place of residence, etc. +• Stable preference-type facts: things the user likes/dislikes long-term, frequently visited places, etc. +• Relationship-type facts: relationships with someone (friend, colleague, fixed activity partner, etc.) +• One-off event/plan-type facts: events on a specific day, temporary plans for this weekend, etc. +2. For each fact unit, determine: +• Which existing memories are expressing “the same kind of fact” +• Whether the corresponding fact in the new memory is just a “repeated confirmation” of that fact, rather than “new factual content” + +Merge rules (must be followed when generating value): +• The merged value: +• Must not repeat the same meaning (each fact should be described only once) +• Must not repeat the same fact just because it was mentioned multiple times or at different times +• Unless time itself changes the meaning (for example, “used to dislike → now likes”), do not keep specific time information +• If the new memory contains multiple different types of facts (for example: “name + hobby + plan for this weekend”): +• You may output multiple merge results; each merge result should focus on only one type of fact (for example: one about “name”, one about “hobby”) +• Do not force unrelated facts into the same value +• One-off events/plans (such as “going skiing this weekend”, “attending a party on Sunday”): +• If there is no directly related and complementary event memory in the existing memories, treat it as an independent memory and do not merge it with identity/stable preference-type memories +• Do not merge a “temporary plan” and a “long-term preference” into the same value just because they are related (e.g. a plan to ski vs. a long-term preference for skiing) + +Output format requirements: +• You must return a single JSON object. +• If a merge occurred: +• “value”: The merged memory content (only describe the final conclusion, preserving all “semantically unique” information, without repetition) +• “merged_from”: A list of IDs of the similar memories that were merged +• “should_merge”: true +• If the new memory cannot be merged with any existing memories, return: +• “should_merge”: false Example: New memory: -The user’s name is Tom, the user likes skiing, and plans to go skiing this weekend +The user’s name is Tom, the user likes skiing, and plans to go skiing this weekend. Similar existing memories: xxxx-xxxx-xxxx-xxxx-01: The user’s name is Tom xxxx-xxxx-xxxx-xxxx-10: The user likes skiing xxxx-xxxx-xxxx-xxxx-11: The user lives by the sea -Expected output: +Expected return value: {{ -“value”: “The user’s name is Tom, the user likes skiing”, -“merged_from”: [“xxxx-xxxx-xxxx-xxxx-01”, “xxxx-xxxx-xxxx-xxxx-10”], -“should_merge”: true +"value": "The user's name is Tom and the user likes skiing", +"merged_from": ["xxxx-xxxx-xxxx-xxxx-01", "xxxx-xxxx-xxxx-xxxx-10"], +"should_merge": true }} New memory: -The user is going to attend a party on Sunday +The user is going to attend a party on Sunday. Similar existing memories: -xxxx-xxxx-xxxx-xxxx-01: The user read a book yesterday +xxxx-xxxx-xxxx-xxxx-01: The user read a book yesterday. -Expected output: +Expected return value: {{ -“should_merge”: false +"should_merge": false }} -If the new memory substantially overlaps with or complements the existing memories, merge them into a single consolidated memory and return a JSON object with: -- "value": the merged memory content (preserving all unique information) -- "merged_from": list of IDs from similar_memories that were merged -- "should_merge": true +If the new memory largely overlaps with or complements the existing memories, merge them into an integrated memory and return a JSON object: +• “value”: The merged memory content +• “merged_from”: A list of IDs of the similar memories that were merged +• “should_merge”: true + +If the new memory is unique and should remain independent, return: +{{ +"should_merge": false +}} -If the new memory is distinct and should remain separate, return: -- "should_merge": false +You must only return a valid JSON object in the final output, and no additional content (no natural language explanations, no extra fields). -New Memory: +New memory: {new_memory} -Similar Existing Memories: +Similar existing memories: {similar_memories} -Return ONLY a valid JSON object, nothing else.""" +Only return a valid JSON object, and do not include any other content. +""" -MEMORY_MERGE_PROMPT_ZH = """你是一个记忆整合专家。给定一个新记忆和相似的现有记忆,判断它们是否应该合并。 +MEMORY_MERGE_PROMPT_ZH = """ +你是一个记忆整合专家。给定一个新记忆和相似的现有记忆,判断它们是否应该合并。 + +在生成 value 之前,必须先完成以下判断步骤(在内在推理中完成,不需要输出): +1. 识别新记忆中包含的「事实单元」,例如: + - 身份信息类:名字、职业、居住地等 + - 稳定偏好类:长期喜欢/不喜欢的事物、常去地点等 + - 关系类:与某人的关系(朋友、同事、固定搭子等) + - 一次性事件/计划类:某天要参加的活动、本周末的临时安排等 +2. 对每个事实单元,判断: + - 哪些 existing memories 在表达“同一类事实”, + - 新记忆中对应的事实是否只是对该事实的「重复确认」,而不是“新的事实内容” + +合并规则(生成 value 时必须遵守): +- 合并后的 value: + - 不要重复表达同一语义(同一事实只描述一次) + - 不要因为多次提及或不同时间而重复同一事实 + - 除非时间本身改变了语义(例如“从不喜欢 → 现在开始喜欢”),否则不要保留具体时间信息 +- 如果新记忆中包含多个不同类型的事实(例如“名字 + 爱好 + 本周计划”): + - 不要合并就好 + - 不要把彼此无关的事实硬塞进同一个 value 中 +- 一次性事件/计划(如“本周末去滑雪”“周天参加聚会”): + - 如果 existing memories 中没有与之直接相关、可互补的事件记忆,则视为独立记忆,不要与身份/长期偏好类记忆合并 + - 不要因为它和某个长期偏好有关(例如喜欢滑雪),就把“临时计划”和“长期偏好”合在一个 value 里 + +输出格式要求: +- 你需要返回一个 JSON 对象。 +- 若发生了合并: + - "value": 合并后的记忆内容(只描述最终结论,保留所有「语义上独特」的信息,不重复) + - "merged_from": 被合并的相似记忆 ID 列表 + - "should_merge": true +- 若新记忆无法与现有记忆合并,返回: + - "should_merge": false 示例: 新记忆: @@ -941,14 +1009,17 @@ "should_merge": false }} - -如果新记忆与现有记忆大量重叠或互补,将它们合并为一个整合的记忆,并返回一个JSON列表: -- "value": 合并后的记忆内容(保留所有独特信息) +如果新记忆与现有记忆大量重叠或互补,将它们合并为一个整合的记忆,并返回一个JSON对象: +- "value": 合并后的记忆内容 - "merged_from": 被合并的相似记忆ID列表 - "should_merge": true 如果新记忆是独特的,应该保持独立,返回: -- "should_merge": false +{{ + "should_merge": false +}} + +最终只返回有效的 JSON 对象,不要任何额外内容(不要自然语言解释、不要多余字段)。 新记忆: {new_memory}