From 8d9cc27fd5754250367bb39051643a8bf3a8c152 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=BB=91=E5=B8=83=E6=9E=97?= <11641432+heiheiyouyou@user.noreply.gitee.com> Date: Wed, 21 Jan 2026 09:58:34 +0800 Subject: [PATCH 1/6] fix rerank too long --- src/memos/api/config.py | 2 ++ src/memos/reranker/factory.py | 4 ++++ src/memos/reranker/http_bge.py | 8 ++++++++ src/memos/reranker/http_bge_strategy.py | 9 +++++++++ 4 files changed, 23 insertions(+) diff --git a/src/memos/api/config.py b/src/memos/api/config.py index 204b0961c..a3bf25be0 100644 --- a/src/memos/api/config.py +++ b/src/memos/api/config.py @@ -410,6 +410,8 @@ def get_feedback_reranker_config() -> dict[str, Any]: "url": os.getenv("MOS_RERANKER_URL", "localhost:8000/v1/rerank"), "model": os.getenv("MOS_FEEDBACK_RERANKER_MODEL", "bge-reranker-v2-m3"), "timeout": 10, + "max_query_tokens": int(os.getenv("MOS_RERANKER_MAX_TOKENS", 8000)), + "concate_len": int(os.getenv("MOS_RERANKER_CONCAT_LEN", 1000)), "headers_extra": json.loads(os.getenv("MOS_RERANKER_HEADERS_EXTRA", "{}")), "rerank_source": os.getenv("MOS_RERANK_SOURCE"), "reranker_strategy": os.getenv("MOS_RERANKER_STRATEGY", "single_turn"), diff --git a/src/memos/reranker/factory.py b/src/memos/reranker/factory.py index 1440704a6..21282f613 100644 --- a/src/memos/reranker/factory.py +++ b/src/memos/reranker/factory.py @@ -42,6 +42,8 @@ def from_config(cfg: RerankerConfigFactory | None) -> BaseReranker | None: reranker_url=c.get("url") or c.get("endpoint") or c.get("reranker_url"), model=c.get("model", "bge-reranker-v2-m3"), timeout=int(c.get("timeout", 10)), + max_query_tokens=min(max(c.get("max_query_tokens", 8000), 100), 8000), + concate_len=min(max(c.get("concate_len", 1000), 4), 8000), headers_extra=headers_extra, rerank_source=c.get("rerank_source"), ) @@ -60,6 +62,8 @@ def from_config(cfg: RerankerConfigFactory | None) -> BaseReranker | None: reranker_url=c.get("url") or c.get("endpoint") or c.get("reranker_url"), model=c.get("model", "bge-reranker-v2-m3"), timeout=int(c.get("timeout", 10)), + max_query_tokens=min(max(c.get("max_query_tokens", 8000), 100), 8000), + concate_len=min(max(c.get("concate_len", 1000), 4), 8000), headers_extra=headers_extra, rerank_source=c.get("rerank_source"), reranker_strategy=c.get("reranker_strategy"), diff --git a/src/memos/reranker/http_bge.py b/src/memos/reranker/http_bge.py index 32034cf6d..b100badd5 100644 --- a/src/memos/reranker/http_bge.py +++ b/src/memos/reranker/http_bge.py @@ -80,6 +80,8 @@ def __init__( token: str = "", model: str = "bge-reranker-v2-m3", timeout: int = 10, + max_query_tokens: int | None = None, + concate_len: int | None = None, headers_extra: dict | None = None, rerank_source: str | None = None, boost_weights: dict[str, float] | None = None, @@ -107,6 +109,8 @@ def __init__( self.token = token or "" self.model = model self.timeout = timeout + self.max_query_tokens = max_query_tokens + self.concate_len = concate_len self.headers_extra = headers_extra or {} self.rerank_source = rerank_source @@ -155,6 +159,10 @@ def rerank( Re-ranked items with scores, sorted descending by score. """ + if self.max_query_tokens and len(query) > self.max_query_tokens: + single_concate_len = self.concate_len // 2 + query = query[:single_concate_len] + "\n" + query[-single_concate_len:] + if not graph_results: return [] diff --git a/src/memos/reranker/http_bge_strategy.py b/src/memos/reranker/http_bge_strategy.py index b0567698c..9a1bb21f8 100644 --- a/src/memos/reranker/http_bge_strategy.py +++ b/src/memos/reranker/http_bge_strategy.py @@ -80,6 +80,8 @@ def __init__( token: str = "", model: str = "bge-reranker-v2-m3", timeout: int = 10, + max_query_tokens: int | None = None, + concate_len: int | None = None, headers_extra: dict | None = None, rerank_source: str | None = None, boost_weights: dict[str, float] | None = None, @@ -108,6 +110,8 @@ def __init__( self.token = token or "" self.model = model self.timeout = timeout + self.max_query_tokens = max_query_tokens + self.concate_len = concate_len self.headers_extra = headers_extra or {} self.boost_weights = ( @@ -149,6 +153,11 @@ def rerank( list[tuple[TextualMemoryItem, float]] Re-ranked items with scores, sorted descending by score. """ + + if self.self.max_query_tokens and len(query) > self.max_query_tokens: + single_concate_len = self.concate_len // 2 + query = query[:single_concate_len] + "\n" + query[-single_concate_len:] + if not graph_results: return [] From d0b0d71a0152bc05d63be5763c232ed029c21e64 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=BB=91=E5=B8=83=E6=9E=97?= <11641432+heiheiyouyou@user.noreply.gitee.com> Date: Wed, 21 Jan 2026 10:25:04 +0800 Subject: [PATCH 2/6] fix rerank config for too long --- src/memos/reranker/http_bge_strategy.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/memos/reranker/http_bge_strategy.py b/src/memos/reranker/http_bge_strategy.py index 9a1bb21f8..871ac5ff0 100644 --- a/src/memos/reranker/http_bge_strategy.py +++ b/src/memos/reranker/http_bge_strategy.py @@ -153,8 +153,7 @@ def rerank( list[tuple[TextualMemoryItem, float]] Re-ranked items with scores, sorted descending by score. """ - - if self.self.max_query_tokens and len(query) > self.max_query_tokens: + if self.max_query_tokens and len(query) > self.max_query_tokens: single_concate_len = self.concate_len // 2 query = query[:single_concate_len] + "\n" + query[-single_concate_len:] From d19ba68d771ce1c4026e4920f191742b1f261bcf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=BB=91=E5=B8=83=E6=9E=97?= <11641432+heiheiyouyou@user.noreply.gitee.com> Date: Wed, 21 Jan 2026 15:30:10 +0800 Subject: [PATCH 3/6] json load warning --- src/memos/mem_reader/read_multi_modal/utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/memos/mem_reader/read_multi_modal/utils.py b/src/memos/mem_reader/read_multi_modal/utils.py index d3d97b4e6..a6d910e54 100644 --- a/src/memos/mem_reader/read_multi_modal/utils.py +++ b/src/memos/mem_reader/read_multi_modal/utils.py @@ -97,7 +97,7 @@ def _cheap_close(t: str) -> str: return json.loads(s) except json.JSONDecodeError: pass - logger.error(f"[JSONParse] Failed to decode JSON: {e}\nRaw: {response_text}") + logger.warning(f"[JSONParse] Failed to decode JSON: {e}\nRaw: {response_text}") return {} From 75d7bddd919a0dbdd6c34b876af1f84cee7f8b1b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=BB=91=E5=B8=83=E6=9E=97?= <11641432+heiheiyouyou@user.noreply.gitee.com> Date: Wed, 21 Jan 2026 15:44:35 +0800 Subject: [PATCH 4/6] json load warning --- .../read_multi_modal/file_content_parser.py | 23 ++++++++++++++----- .../mem_reader/read_multi_modal/utils.py | 2 +- 2 files changed, 18 insertions(+), 7 deletions(-) diff --git a/src/memos/mem_reader/read_multi_modal/file_content_parser.py b/src/memos/mem_reader/read_multi_modal/file_content_parser.py index fbc704d0b..66f5fd8d8 100644 --- a/src/memos/mem_reader/read_multi_modal/file_content_parser.py +++ b/src/memos/mem_reader/read_multi_modal/file_content_parser.py @@ -74,12 +74,23 @@ def _get_doc_llm_response(self, chunk_text: str, custom_tags: list[str] | None = prompt = prompt.replace("{custom_tags_prompt}", custom_tags_prompt) messages = [{"role": "user", "content": prompt}] - try: - response_text = self.llm.generate(messages) - response_json = parse_json_result(response_text) - except Exception as e: - logger.error(f"[FileContentParser] LLM generation error: {e}") - response_json = {} + max_retries = 3 + for attempt in range(max_retries): + try: + response_text = self.llm.generate(messages) + response_json = parse_json_result(response_text) + return response_json + except Exception as e: + if attempt < max_retries - 1: + logger.warning( + f"[FileContentParser] LLM generation error (attempt {attempt + 1}/{max_retries}): {e}." + ) + else: + logger.error( + f"[FileContentParser] LLM generation error after {max_retries} attempts: {e}" + ) + response_json = {} + return response_json def _handle_url(self, url_str: str, filename: str) -> tuple[str, str | None, bool]: diff --git a/src/memos/mem_reader/read_multi_modal/utils.py b/src/memos/mem_reader/read_multi_modal/utils.py index a6d910e54..d3d97b4e6 100644 --- a/src/memos/mem_reader/read_multi_modal/utils.py +++ b/src/memos/mem_reader/read_multi_modal/utils.py @@ -97,7 +97,7 @@ def _cheap_close(t: str) -> str: return json.loads(s) except json.JSONDecodeError: pass - logger.warning(f"[JSONParse] Failed to decode JSON: {e}\nRaw: {response_text}") + logger.error(f"[JSONParse] Failed to decode JSON: {e}\nRaw: {response_text}") return {} From 5207df898a7f80246c9767096843b27814dc495d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=BB=91=E5=B8=83=E6=9E=97?= <11641432+heiheiyouyou@user.noreply.gitee.com> Date: Wed, 21 Jan 2026 19:30:07 +0800 Subject: [PATCH 5/6] json load change to warning --- .../read_multi_modal/file_content_parser.py | 22 +++++-------------- .../mem_reader/read_multi_modal/utils.py | 2 +- 2 files changed, 7 insertions(+), 17 deletions(-) diff --git a/src/memos/mem_reader/read_multi_modal/file_content_parser.py b/src/memos/mem_reader/read_multi_modal/file_content_parser.py index 66f5fd8d8..ead7e9fb6 100644 --- a/src/memos/mem_reader/read_multi_modal/file_content_parser.py +++ b/src/memos/mem_reader/read_multi_modal/file_content_parser.py @@ -74,22 +74,12 @@ def _get_doc_llm_response(self, chunk_text: str, custom_tags: list[str] | None = prompt = prompt.replace("{custom_tags_prompt}", custom_tags_prompt) messages = [{"role": "user", "content": prompt}] - max_retries = 3 - for attempt in range(max_retries): - try: - response_text = self.llm.generate(messages) - response_json = parse_json_result(response_text) - return response_json - except Exception as e: - if attempt < max_retries - 1: - logger.warning( - f"[FileContentParser] LLM generation error (attempt {attempt + 1}/{max_retries}): {e}." - ) - else: - logger.error( - f"[FileContentParser] LLM generation error after {max_retries} attempts: {e}" - ) - response_json = {} + try: + response_text = self.llm.generate(messages) + response_json = parse_json_result(response_text) + except Exception as e: + logger.error(f"[FileContentParser] LLM generation error: {e}") + response_json = {} return response_json diff --git a/src/memos/mem_reader/read_multi_modal/utils.py b/src/memos/mem_reader/read_multi_modal/utils.py index d3d97b4e6..a6d910e54 100644 --- a/src/memos/mem_reader/read_multi_modal/utils.py +++ b/src/memos/mem_reader/read_multi_modal/utils.py @@ -97,7 +97,7 @@ def _cheap_close(t: str) -> str: return json.loads(s) except json.JSONDecodeError: pass - logger.error(f"[JSONParse] Failed to decode JSON: {e}\nRaw: {response_text}") + logger.warning(f"[JSONParse] Failed to decode JSON: {e}\nRaw: {response_text}") return {} From 4558b3b045243e4b67c543f270f723d7b1938f7f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=BB=91=E5=B8=83=E6=9E=97?= <11641432+heiheiyouyou@user.noreply.gitee.com> Date: Wed, 21 Jan 2026 19:33:04 +0800 Subject: [PATCH 6/6] json load change to warning --- src/memos/mem_reader/read_multi_modal/file_content_parser.py | 1 - 1 file changed, 1 deletion(-) diff --git a/src/memos/mem_reader/read_multi_modal/file_content_parser.py b/src/memos/mem_reader/read_multi_modal/file_content_parser.py index ead7e9fb6..fbc704d0b 100644 --- a/src/memos/mem_reader/read_multi_modal/file_content_parser.py +++ b/src/memos/mem_reader/read_multi_modal/file_content_parser.py @@ -80,7 +80,6 @@ def _get_doc_llm_response(self, chunk_text: str, custom_tags: list[str] | None = except Exception as e: logger.error(f"[FileContentParser] LLM generation error: {e}") response_json = {} - return response_json def _handle_url(self, url_str: str, filename: str) -> tuple[str, str | None, bool]: