Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 6 additions & 3 deletions src/memos/api/handlers/search_handler.py
Original file line number Diff line number Diff line change
Expand Up @@ -120,11 +120,14 @@ def _apply_relativity_threshold(results: dict[str, Any], relativity: float) -> d
if not isinstance(mem, dict):
continue
meta = mem.get("metadata", {})
score = meta.get("relativity", 0.0) if isinstance(meta, dict) else 0.0
if key == "text_mem":
score = meta.get("relativity", 1.0) if isinstance(meta, dict) else 1.0
else:
score = meta.get("score", 1.0) if isinstance(meta, dict) else 1.0
try:
score_val = float(score) if score is not None else 0.0
score_val = float(score) if score is not None else 1.0
except (TypeError, ValueError):
score_val = 0.0
score_val = 1.0
if score_val >= relativity:
filtered.append(mem)

Expand Down
109 changes: 107 additions & 2 deletions src/memos/memories/textual/tree_text_memory/retrieve/searcher.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import copy
import traceback

from concurrent.futures import as_completed
Expand Down Expand Up @@ -306,8 +307,8 @@ def _parse_task(
query = parsed_goal.rephrased_query or query
# if goal has extra memories, embed them too
if parsed_goal.memories:
query_embedding = self.embedder.embed(list({query, *parsed_goal.memories}))

embed_texts = list(dict.fromkeys([query, *parsed_goal.memories]))
query_embedding = self.embedder.embed(embed_texts)
return parsed_goal, query_embedding, context, query

@timed
Expand Down Expand Up @@ -379,6 +380,20 @@ def _retrieve_paths(
user_name,
)
)
tasks.append(
executor.submit(
self._retrieve_from_keyword,
query,
parsed_goal,
query_embedding,
top_k,
memory_type,
search_filter,
search_priority,
user_name,
id_filter,
)
)
if search_tool_memory:
tasks.append(
executor.submit(
Expand Down Expand Up @@ -456,6 +471,96 @@ def _retrieve_from_working_memory(
search_filter=search_filter,
)

@timed
def _retrieve_from_keyword(
self,
query,
parsed_goal,
query_embedding,
top_k,
memory_type,
search_filter: dict | None = None,
search_priority: dict | None = None,
user_name: str | None = None,
id_filter: dict | None = None,
) -> list[tuple[TextualMemoryItem, float]]:
"""Keyword/fulltext path that directly calls graph DB fulltext search."""

if memory_type not in ["All", "LongTermMemory", "UserMemory"]:
return []
if not query_embedding:
return []

query_words: list[str] = []
if self.tokenizer:
query_words = self.tokenizer.tokenize_mixed(query)
else:
query_words = query.strip().split()
# Use unique tokens; avoid passing the raw query into `to_tsquery(...)` because it may contain
# spaces/operators that cause tsquery parsing errors.
query_words = list(dict.fromkeys(query_words))
if len(query_words) > 64:
query_words = query_words[:64]
if not query_words:
return []
tsquery_terms = ["'" + w.replace("'", "''") + "'" for w in query_words if w and w.strip()]
if not tsquery_terms:
return []

scopes = [memory_type] if memory_type != "All" else ["LongTermMemory", "UserMemory"]

id_to_score: dict[str, float] = {}
for scope in scopes:
hits = self.graph_store.search_by_fulltext(
query_words=tsquery_terms,
top_k=top_k * 2,
status="activated",
scope=scope,
search_filter=None,
filter=search_filter,
user_name=user_name,
tsquery_config="jiebaqry",
)
for h in hits or []:
hid = str(h.get("id") or "").strip().strip("'\"")
if not hid:
continue
score = h.get("score", 0.0)
if hid not in id_to_score or score > id_to_score[hid]:
id_to_score[hid] = score
if not id_to_score:
return []

sorted_ids = sorted(id_to_score.keys(), key=lambda x: id_to_score[x], reverse=True)
sorted_ids = sorted_ids[:top_k]
node_dicts = (
self.graph_store.get_nodes(sorted_ids, include_embedding=True, user_name=user_name)
or []
)
id_to_node = {n.get("id"): n for n in node_dicts}
ordered_nodes = []

for rid in sorted_ids:
if rid in id_to_node:
node = copy.deepcopy(id_to_node[rid])
meta = node.setdefault("metadata", {})
meta_target = meta
if isinstance(meta, dict) and isinstance(meta.get("metadata"), dict):
meta_target = meta["metadata"]
if isinstance(meta_target, dict):
meta_target["keyword_score"] = id_to_score[rid]
ordered_nodes.append(node)

results = [TextualMemoryItem.from_dict(n) for n in ordered_nodes]
return self.reranker.rerank(
query=query,
query_embedding=query_embedding[0],
graph_results=results,
top_k=top_k,
parsed_goal=parsed_goal,
search_filter=search_filter,
)

# --- Path B
@timed
def _retrieve_from_long_term_and_user(
Expand Down
4 changes: 2 additions & 2 deletions src/memos/multi_mem_cube/single_cube.py
Original file line number Diff line number Diff line change
Expand Up @@ -468,13 +468,13 @@ def _fast_search(
search_req=search_req,
user_context=user_context,
mode=SearchMode.FAST,
include_embedding=(search_req.dedup == "mmr"),
include_embedding=(search_req.dedup in ("mmr", "sim")),
)

return self._postformat_memories(
search_results,
user_context.mem_cube_id,
include_embedding=search_req.dedup == "sim",
include_embedding=(search_req.dedup in ("mmr", "sim")),
neighbor_discovery=search_req.neighbor_discovery,
)

Expand Down