Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
90 commits
Select commit Hold shift + click to select a range
70373f9
fix playground bug, internet search judge
Dec 4, 2025
d181339
Merge branch 'dev' into feat/fix_palyground_bug
Dec 4, 2025
11cf00a
fix playground internet bug
Dec 4, 2025
6b10ce1
merge dev
Dec 4, 2025
c861f61
modify delete mem
Dec 4, 2025
e638039
modify tool resp bug in multi cube
Dec 4, 2025
dcd3d50
Merge branch 'dev' into feat/fix_palyground_bug
Dec 4, 2025
0c0eff8
Merge branch 'dev' into feat/fix_palyground_bug
Dec 5, 2025
8765dc4
fix bug in playground chat handle and search inter
Dec 5, 2025
1a335db
modify prompt
Dec 5, 2025
18320ff
fix bug in playground
Dec 6, 2025
666b897
fix bug playfround
Dec 6, 2025
275b9b6
Merge branch 'dev' into feat/fix_palyground_bug
Dec 7, 2025
0d22512
fix bug
Dec 7, 2025
d38f55f
Merge branch 'dev' into feat/fix_palyground_bug
Dec 7, 2025
a9eb1f6
fix code
Dec 7, 2025
94ad709
Merge branch 'dev' into feat/fix_palyground_bug
Dec 7, 2025
723a14f
fix model bug in playground
Dec 7, 2025
6f06a23
Merge branch 'dev' into feat/fix_palyground_bug
Dec 7, 2025
a300670
Merge branch 'dev' into feat/fix_palyground_bug
Dec 8, 2025
7ee13b1
Merge branch 'dev' into feat/fix_palyground_bug
Dec 8, 2025
5ab6e92
modify plan b
Dec 8, 2025
1bb0bcd
llm param modify
Dec 8, 2025
1b607e7
Merge branch 'dev' into feat/fix_palyground_bug
Dec 8, 2025
f5bc426
add logger in playground
Dec 8, 2025
a9fa309
modify code
Dec 9, 2025
d2efa24
Merge branch 'dev' into feat/fix_palyground_bug
Dec 9, 2025
9ebfbe1
Merge branch 'dev' into feat/fix_palyground_bug
fridayL Dec 9, 2025
4c055d0
fix bug
Dec 9, 2025
27b4fc4
modify code
Dec 9, 2025
cefeefb
modify code
Dec 9, 2025
7e05fa7
fix bug
Dec 9, 2025
a4f66b1
Merge branch 'dev' into feat/fix_palyground_bug
Dec 9, 2025
9b47647
Merge branch 'dev' into feat/fix_palyground_bug
Dec 9, 2025
05da172
fix search bug in plarground
Dec 9, 2025
e410ec2
fixx bug
Dec 9, 2025
0324588
move schadualr to back
Dec 9, 2025
a834028
Merge branch 'dev' into feat/fix_palyground_bug
Dec 9, 2025
4084954
modify pref location
Dec 9, 2025
de5e372
Merge branch 'dev' into feat/fix_palyground_bug
Dec 9, 2025
87861ab
Merge branch 'dev' into feat/fix_palyground_bug
Dec 9, 2025
8b547b8
modify fast net search
Dec 9, 2025
c915867
Merge branch 'dev' into feat/fix_palyground_bug
Dec 9, 2025
2f238fd
Merge branch 'dev' into feat/fix_palyground_bug
Dec 9, 2025
4543332
add tags and new package
Dec 10, 2025
c51ef0d
merge dev
Dec 10, 2025
033e8bd
modify prompt fix bug
Dec 10, 2025
e300112
Merge branch 'dev' into feat/fix_palyground_bug
Dec 10, 2025
da498fc
Merge branch 'dev' into feat/fix_palyground_bug
Dec 10, 2025
4057f5d
remove nltk due to image promblem
Dec 10, 2025
479d74e
Merge branch 'dev' into feat/fix_palyground_bug
Dec 10, 2025
ecff6e5
prompt modify
Dec 11, 2025
1b4ef23
Merge branch 'dev' into feat/fix_palyground_bug
Dec 11, 2025
7e18cae
modify bug remove redundant field
Dec 11, 2025
a70ffa3
modify bug
Dec 11, 2025
e06eff2
merge dev
Dec 11, 2025
7a149e3
fix playground bug
Dec 11, 2025
0c2d132
merge dev
Dec 11, 2025
d69fd88
fix bug
Dec 11, 2025
a9a7613
merge dev
Dec 11, 2025
dad4ca6
bust internet topk
Dec 11, 2025
f49fad6
Merge branch 'dev' into feat/fix_palyground_bug
Dec 11, 2025
393a7f5
bust to 50
Dec 11, 2025
b691b05
Merge branch 'dev' into feat/fix_palyground_bug
Dec 11, 2025
2bba2c2
fix bug cite
Dec 11, 2025
571770b
modify search
Dec 12, 2025
f5e032c
merge dev
Dec 12, 2025
d7f5c0d
Merge branch 'dev' into feat/fix_palyground_bug
Dec 15, 2025
a570450
remote query add in playground
Dec 15, 2025
14a21c4
modify bug
Dec 15, 2025
2d84ae5
Merge branch 'dev' into feat/fix_palyground_bug
Dec 15, 2025
42591c8
modify pref bug
Dec 16, 2025
c4c3a87
Merge branch 'dev' into feat/fix_palyground_bug
CaralHsi Dec 16, 2025
289debd
move add position
Dec 16, 2025
9c855a8
Merge branch 'dev' into feat/fix_palyground_bug
Dec 16, 2025
705ed47
Merge branch 'dev' into feat/fix_palyground_bug
Dec 16, 2025
e654465
modify chat prompt
Dec 16, 2025
7b01f84
modify overthinking
Dec 16, 2025
a751823
Merge branch 'dev' into feat/fix_palyground_bug
Dec 17, 2025
002f990
add logger in playground chat
Dec 17, 2025
11594a7
midify mem
Dec 17, 2025
64bb466
merge dev
Dec 17, 2025
f50ab5c
remove must in prompt
Dec 17, 2025
f2ad9c5
Merge branch 'dev' into feat/fix_palyground_bug
Dec 17, 2025
3026f87
add logger
Dec 17, 2025
78572b0
Merge branch 'dev' into feat/fix_palyground_bug
Dec 24, 2025
e6de2c1
add logger
Dec 24, 2025
f2217d3
Merge branch 'dev' into feat/fix_palyground_bug
Dec 24, 2025
6dea91e
merge dev
Dec 25, 2025
7ae3496
Merge branch 'dev' into feat/fix_palyground_bug
CaralHsi Dec 25, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
98 changes: 65 additions & 33 deletions src/memos/api/handlers/chat_handler.py
Original file line number Diff line number Diff line change
Expand Up @@ -99,15 +99,13 @@ def __init__(

def handle_chat_complete(self, chat_req: APIChatCompleteRequest) -> dict[str, Any]:
"""
Chat with MemOS for complete response (non-streaming).

This implementation directly uses search/add handlers instead of mos_server.
Chat with MemOS for chat complete response (non-streaming).

Args:
chat_req: Chat complete request

Returns:
Dictionary with response and references
Dictionary with chat complete response and reasoning

Raises:
HTTPException: If chat fails
Expand Down Expand Up @@ -161,7 +159,7 @@ def handle_chat_complete(self, chat_req: APIChatCompleteRequest) -> dict[str, An
{"role": "user", "content": chat_req.query},
]

self.logger.info("Starting to generate complete response...")
self.logger.info("[Cloud Service] Starting to generate chat complete response...")

# Step 3: Generate complete response from LLM
if chat_req.model_name_or_path and chat_req.model_name_or_path not in self.chat_llms:
Expand All @@ -172,11 +170,23 @@ def handle_chat_complete(self, chat_req: APIChatCompleteRequest) -> dict[str, An

model = chat_req.model_name_or_path or next(iter(self.chat_llms.keys()))

self.logger.info(f"[Cloud Service Chat Complete Model]: {model}")
self.logger.info(f"[Cloud Service] Chat Complete Model: {model}")
strat = time.time()
response = self.chat_llms[model].generate(current_messages, model_name_or_path=model)
end = time.time()
self.logger.info(f"[Cloud Service Chat Complete Time]: {end - strat} seconds")
self.logger.info(f"[Cloud Service] Chat Complete Time: {end - strat} seconds")

if not response:
self.logger.error(
f"[Cloud Service] Chat Complete Failed, LLM response is {response}"
)
raise HTTPException(
status_code=500, detail="Chat complete failed, LLM response is None"
)

self.logger.info(
f"[Cloud Service] Chat Complete LLM Input: {json.dumps(current_messages, ensure_ascii=False)} Chat Complete LLM Response: {response}"
)

# Step 4: start add after chat asynchronously
if chat_req.add_message_on_answer:
Expand All @@ -192,7 +202,7 @@ def handle_chat_complete(self, chat_req: APIChatCompleteRequest) -> dict[str, An
async_mode="async",
)
end = time.time()
self.logger.info(f"[Cloud Service Chat Add Time]: {end - start} seconds")
self.logger.info(f"[Cloud Service] Chat Add Time: {end - start} seconds")

match = re.search(r"<think>([\s\S]*?)</think>", response)
reasoning_text = match.group(1) if match else None
Expand All @@ -208,14 +218,12 @@ def handle_chat_complete(self, chat_req: APIChatCompleteRequest) -> dict[str, An
except ValueError as err:
raise HTTPException(status_code=404, detail=str(traceback.format_exc())) from err
except Exception as err:
self.logger.error(f"Failed to complete chat: {traceback.format_exc()}")
self.logger.error(f"[Cloud Service] Failed to chat complete: {traceback.format_exc()}")
raise HTTPException(status_code=500, detail=str(traceback.format_exc())) from err

def handle_chat_stream(self, chat_req: ChatRequest) -> StreamingResponse:
"""
Chat with MemOS via Server-Sent Events (SSE) stream using search/add handlers.

This implementation directly uses search_handler and add_handler.
Chat with MemOS via Server-Sent Events (SSE) stream for chat stream response.

Args:
chat_req: Chat stream request
Expand All @@ -229,7 +237,7 @@ def handle_chat_stream(self, chat_req: ChatRequest) -> StreamingResponse:
try:

def generate_chat_response() -> Generator[str, None, None]:
"""Generate chat response as SSE stream."""
"""Generate chat stream response as SSE stream."""
try:
# Resolve readable cube IDs (for search)
readable_cube_ids = chat_req.readable_cube_ids or (
Expand Down Expand Up @@ -289,7 +297,7 @@ def generate_chat_response() -> Generator[str, None, None]:
]

self.logger.info(
f"user_id: {chat_req.user_id}, readable_cube_ids: {readable_cube_ids}, "
f"[Cloud Service] chat stream user_id: {chat_req.user_id}, readable_cube_ids: {readable_cube_ids}, "
f"current_system_prompt: {system_prompt}"
)

Expand All @@ -304,14 +312,12 @@ def generate_chat_response() -> Generator[str, None, None]:
)

model = chat_req.model_name_or_path or next(iter(self.chat_llms.keys()))
self.logger.info(f"[Cloud Service Chat Stream Model]: {model}")
self.logger.info(f"[Cloud Service] Chat Stream Model: {model}")

start = time.time()
response_stream = self.chat_llms[model].generate_stream(
current_messages, model_name_or_path=model
)
end = time.time()
self.logger.info(f"[Cloud Service Chat Stream Time]: {end - start} seconds")

# Stream the response
buffer = ""
Expand All @@ -337,6 +343,13 @@ def generate_chat_response() -> Generator[str, None, None]:
chunk_data = f"data: {json.dumps({'type': 'text', 'data': chunk}, ensure_ascii=False)}\n\n"
yield chunk_data

end = time.time()
self.logger.info(f"[Cloud Service] Chat Stream Time: {end - start} seconds")

self.logger.info(
f"[Cloud Service] Chat Stream LLM Input: {json.dumps(current_messages, ensure_ascii=False)} Chat Stream LLM Response: {full_response}"
)

current_messages.append({"role": "assistant", "content": full_response})
if chat_req.add_message_on_answer:
# Resolve writable cube IDs (for add)
Expand All @@ -354,10 +367,10 @@ def generate_chat_response() -> Generator[str, None, None]:
)
end = time.time()
self.logger.info(
f"[Cloud Service Chat Stream Add Time]: {end - start} seconds"
f"[Cloud Service] Chat Stream Add Time: {end - start} seconds"
)
except Exception as e:
self.logger.error(f"Error in chat stream: {e}", exc_info=True)
self.logger.error(f"[Cloud Service] Error in chat stream: {e}", exc_info=True)
error_data = f"data: {json.dumps({'type': 'error', 'content': str(traceback.format_exc())})}\n\n"
yield error_data

Expand All @@ -377,14 +390,14 @@ def generate_chat_response() -> Generator[str, None, None]:
except ValueError as err:
raise HTTPException(status_code=404, detail=str(traceback.format_exc())) from err
except Exception as err:
self.logger.error(f"Failed to start chat stream: {traceback.format_exc()}")
self.logger.error(
f"[Cloud Service] Failed to start chat stream: {traceback.format_exc()}"
)
raise HTTPException(status_code=500, detail=str(traceback.format_exc())) from err

def handle_chat_stream_playground(self, chat_req: ChatPlaygroundRequest) -> StreamingResponse:
"""
Chat with MemOS via Server-Sent Events (SSE) stream using search/add handlers.

This implementation directly uses search_handler and add_handler.
Chat with MemOS via Server-Sent Events (SSE) stream for playground chat stream response.

Args:
chat_req: Chat stream request
Expand All @@ -398,7 +411,7 @@ def handle_chat_stream_playground(self, chat_req: ChatPlaygroundRequest) -> Stre
try:

def generate_chat_response() -> Generator[str, None, None]:
"""Generate chat response as SSE stream."""
"""Generate playground chat stream response as SSE stream."""
try:
import time

Expand Down Expand Up @@ -434,7 +447,9 @@ def generate_chat_response() -> Generator[str, None, None]:
start_time = time.time()
search_response = self.search_handler.handle_search_memories(search_req)
end_time = time.time()
self.logger.info(f"first search time: {end_time - start_time}")
self.logger.info(
f"[PLAYGROUND CHAT] first search time: {end_time - start_time}"
)

yield f"data: {json.dumps({'type': 'status', 'data': '1'})}\n\n"

Expand Down Expand Up @@ -481,7 +496,7 @@ def generate_chat_response() -> Generator[str, None, None]:
conversation=chat_req.history,
mode="fine",
)
self.logger.info(f"[PLAYGROUND chat parsed_goal]: {parsed_goal}")
self.logger.info(f"[PLAYGROUND CHAT] parsed_goal: {parsed_goal}")

if chat_req.beginner_guide_step == "first":
chat_req.internet_search = False
Expand Down Expand Up @@ -512,12 +527,14 @@ def generate_chat_response() -> Generator[str, None, None]:
search_tool_memory=False,
)

self.logger.info(f"[PLAYGROUND second search query]: {search_req.query}")
self.logger.info(f"[PLAYGROUND CHAT] second search query: {search_req.query}")

start_time = time.time()
search_response = self.search_handler.handle_search_memories(search_req)
end_time = time.time()
self.logger.info(f"second search time: {end_time - start_time}")
self.logger.info(
f"[PLAYGROUND CHAT] second search time: {end_time - start_time}"
)

# for playground, add the query to memory without response
self._start_add_to_memory(
Expand Down Expand Up @@ -578,13 +595,15 @@ def generate_chat_response() -> Generator[str, None, None]:
]

self.logger.info(
f"user_id: {chat_req.user_id}, readable_cube_ids: {readable_cube_ids}, "
f"[PLAYGROUND CHAT] user_id: {chat_req.user_id}, readable_cube_ids: {readable_cube_ids}, "
f"current_system_prompt: {system_prompt}"
)

# Step 3: Generate streaming response from LLM
try:
model = next(iter(self.chat_llms.keys()))
self.logger.info(f"[PLAYGROUND CHAT] Chat Playground Stream Model: {model}")
start = time.time()
response_stream = self.chat_llms[model].generate_stream(
current_messages, model_name_or_path=model
)
Expand Down Expand Up @@ -629,10 +648,19 @@ def generate_chat_response() -> Generator[str, None, None]:
chunk_data = f"data: {json.dumps({'type': 'text', 'data': processed_chunk}, ensure_ascii=False)}\n\n"
yield chunk_data

end = time.time()
self.logger.info(
f"[PLAYGROUND CHAT] Chat Playground Stream Time: {end - start} seconds"
)
self.logger.info(
f"[PLAYGROUND CHAT] Chat Playground Stream LLM Input: {json.dumps(current_messages, ensure_ascii=False)} Chat Playground Stream LLM Response: {full_response}"
)

except Exception as llm_error:
# Log the error
self.logger.error(
f"Error during LLM generation: {llm_error}", exc_info=True
f"[PLAYGROUND CHAT] Error during LLM generation: {llm_error}",
exc_info=True,
)
# Send error message to client
error_msg = f"模型生成错误: {llm_error!s}"
Expand All @@ -654,7 +682,7 @@ def generate_chat_response() -> Generator[str, None, None]:
# Get further suggestion
current_messages.append({"role": "assistant", "content": full_response})
further_suggestion = self._get_further_suggestion(current_messages)
self.logger.info(f"further_suggestion: {further_suggestion}")
self.logger.info(f"[PLAYGROUND CHAT] further_suggestion: {further_suggestion}")
yield f"data: {json.dumps({'type': 'suggestion', 'data': further_suggestion})}\n\n"

yield f"data: {json.dumps({'type': 'end'})}\n\n"
Expand Down Expand Up @@ -685,7 +713,9 @@ def generate_chat_response() -> Generator[str, None, None]:
)

except Exception as e:
self.logger.error(f"Error in chat stream: {e}", exc_info=True)
self.logger.error(
f"[PLAYGROUND CHAT] Error in playground chat stream: {e}", exc_info=True
)
error_data = f"data: {json.dumps({'type': 'error', 'content': str(traceback.format_exc())})}\n\n"
yield error_data

Expand All @@ -705,7 +735,9 @@ def generate_chat_response() -> Generator[str, None, None]:
except ValueError as err:
raise HTTPException(status_code=404, detail=str(traceback.format_exc())) from err
except Exception as err:
self.logger.error(f"Failed to start chat stream: {traceback.format_exc()}")
self.logger.error(
f"[PLAYGROUND CHAT] Failed to start playground chat stream: {traceback.format_exc()}"
)
raise HTTPException(status_code=500, detail=str(traceback.format_exc())) from err

def _dedup_and_supplement_memories(
Expand Down
6 changes: 5 additions & 1 deletion src/memos/api/handlers/component_init.py
Original file line number Diff line number Diff line change
Expand Up @@ -177,7 +177,11 @@ def init_server() -> dict[str, Any]:
else None
)
llm = LLMFactory.from_config(llm_config)
chat_llms = _init_chat_llms(chat_llm_config)
chat_llms = (
_init_chat_llms(chat_llm_config)
if os.getenv("ENABLE_CHAT_API", "false") == "true"
else None
)
embedder = EmbedderFactory.from_config(embedder_config)
mem_reader = MemReaderFactory.from_config(mem_reader_config)
reranker = RerankerFactory.from_config(reranker_config)
Expand Down
30 changes: 23 additions & 7 deletions src/memos/api/routers/server_router.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
import random as _random
import socket

from fastapi import APIRouter, Query
from fastapi import APIRouter, HTTPException, Query

from memos.api import handlers
from memos.api.handlers.add_handler import AddHandler
Expand Down Expand Up @@ -64,12 +64,16 @@
# Initialize all handlers with dependency injection
search_handler = SearchHandler(dependencies)
add_handler = AddHandler(dependencies)
chat_handler = ChatHandler(
dependencies,
components["chat_llms"],
search_handler,
add_handler,
online_bot=components.get("online_bot"),
chat_handler = (
ChatHandler(
dependencies,
components["chat_llms"],
search_handler,
add_handler,
online_bot=components.get("online_bot"),
)
if os.getenv("ENABLE_CHAT_API", "false") == "true"
else None
)
feedback_handler = FeedbackHandler(dependencies)
# Extract commonly used components for function-based handlers
Expand Down Expand Up @@ -201,6 +205,10 @@ def chat_complete(chat_req: APIChatCompleteRequest):

This endpoint uses the class-based ChatHandler.
"""
if chat_handler is None:
raise HTTPException(
status_code=503, detail="Chat service is not available. Chat handler not initialized."
)
return chat_handler.handle_chat_complete(chat_req)


Expand All @@ -212,6 +220,10 @@ def chat_stream(chat_req: ChatRequest):
This endpoint uses the class-based ChatHandler which internally
composes SearchHandler and AddHandler for a clean architecture.
"""
if chat_handler is None:
raise HTTPException(
status_code=503, detail="Chat service is not available. Chat handler not initialized."
)
return chat_handler.handle_chat_stream(chat_req)


Expand All @@ -223,6 +235,10 @@ def chat_stream_playground(chat_req: ChatPlaygroundRequest):
This endpoint uses the class-based ChatHandler which internally
composes SearchHandler and AddHandler for a clean architecture.
"""
if chat_handler is None:
raise HTTPException(
status_code=503, detail="Chat service is not available. Chat handler not initialized."
)
return chat_handler.handle_chat_stream_playground(chat_req)


Expand Down
6 changes: 6 additions & 0 deletions src/memos/memories/textual/prefer_text_memory/extractor.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,9 @@ def extract_explicit_preference(self, qa_pair: MessageList | str) -> dict[str, A
try:
response = self.llm_provider.generate([{"role": "user", "content": prompt}])
if not response:
logger.error(
f"[prefer_extractor]: (Error) LLM response content is {response} when extracting explicit preference"
)
return None
response = response.strip().replace("```json", "").replace("```", "").strip()
result = json.loads(response)
Expand All @@ -95,6 +98,9 @@ def extract_implicit_preference(self, qa_pair: MessageList | str) -> dict[str, A
try:
response = self.llm_provider.generate([{"role": "user", "content": prompt}])
if not response:
logger.error(
f"[prefer_extractor]: (Error) LLM response content is {response} when extracting implicit preference"
)
return None
response = response.strip().replace("```json", "").replace("```", "").strip()
result = json.loads(response)
Expand Down
Loading