Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
32 commits
Select commit Hold shift + click to select a range
83d1576
fix bugs: try to fix bugs in _submit_web_logs
tangg555 Dec 18, 2025
e50c56c
fix bugs: try to address bugs
tangg555 Dec 18, 2025
74f1da0
Merge remote-tracking branch 'upstream/dev' into dev
tangg555 Dec 18, 2025
58eb6b8
fix bugs
tangg555 Dec 18, 2025
392b6df
Merge remote-tracking branch 'upstream/dev' into dev
tangg555 Dec 18, 2025
0d72ce7
refactor: modify examples
tangg555 Dec 18, 2025
2fe965b
revise add operation and fix an unbelievable bug
tangg555 Dec 18, 2025
26267f4
Merge branch 'dev' into scheduler
tangg555 Dec 18, 2025
eecfa51
address the bug issues
tangg555 Dec 22, 2025
7c6b7da
Merge remote-tracking branch 'upstream/dev' into dev
tangg555 Dec 22, 2025
f2da3a7
the doc file has a format problem which has been fixed in this commit
tangg555 Dec 24, 2025
a6881b4
add a range of new feats for the add operation
tangg555 Dec 24, 2025
7f39e7e
address the incompatible issue of local scheduler
tangg555 Dec 24, 2025
6778cc4
address the conflicts
tangg555 Dec 24, 2025
3fe9cb0
feat(scheduler): optimize redis queue consumer group management
tangg555 Dec 24, 2025
b35096f
fix(tests): resolve AttributeError in SimpleStructMemReader tests
tangg555 Dec 24, 2025
a7f5b77
Merge branch 'dev' into dev
tangg555 Dec 24, 2025
ded7ac6
Merge branch 'dev' into dev
tangg555 Dec 24, 2025
8943ba8
fix(mem_reader): pass info dict to add_before_search for correct user…
tangg555 Dec 24, 2025
78a4327
refactor add_before_search from mem_reader to SingleCubeView
tangg555 Dec 24, 2025
a5fc4c0
address bugs
tangg555 Dec 24, 2025
45224dd
fix: fix the qsize bug of task queue, and accept change from hotfix/s…
tangg555 Dec 25, 2025
f3c4f6c
fix: address some issues to run old scheduler example and kv cache ex…
tangg555 Dec 26, 2025
d634851
Merge remote-tracking branch 'upstream/dev' into dev
tangg555 Dec 26, 2025
e9b60db
fix: address the issue of Top-level import of unavailable module 'torch'
tangg555 Dec 26, 2025
c6bdb22
fix: resolve linting errors and make optional dependencies lazy loaded
tangg555 Dec 26, 2025
077f529
Merge branch 'dev' into scheduler
tangg555 Dec 29, 2025
5abbe23
Merge branch 'dev' into scheduler
tangg555 Dec 29, 2025
ad3620a
refactor: revise the rewrite prompt to make it better
tangg555 Dec 29, 2025
2475286
refactor: update examples
tangg555 Dec 30, 2025
24c9b18
Merge remote-tracking branch 'upstream/dev' into dev
tangg555 Dec 30, 2025
0ecee35
Merge branch 'dev' into scheduler
tangg555 Dec 30, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 21 additions & 0 deletions examples/data/config/mem_scheduler/mem_cube_config.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
user_id: "user_test"
cube_id: "user_test/mem_cube_naive"
text_mem:
backend: "naive_text"
config:
extractor_llm:
backend: "huggingface_singleton"
config:
model_name_or_path: "Qwen/Qwen3-0.6B"
temperature: 0.1
max_tokens: 1024
act_mem:
backend: "kv_cache"
config:
memory_filename: "activation_memory.pickle"
extractor_llm:
backend: "huggingface_singleton"
config:
model_name_or_path: "Qwen/Qwen3-0.6B"
temperature: 0.8
max_tokens: 1024
12 changes: 4 additions & 8 deletions examples/data/config/mem_scheduler/memos_config_w_scheduler.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -10,16 +10,12 @@ mem_reader:
backend: "simple_struct"
config:
llm:
backend: "openai"
backend: "huggingface_singleton"
config:
model_name_or_path: "gpt-4o-mini"
temperature: 0.8
max_tokens: 4096
top_p: 0.9
top_k: 50
model_name_or_path: "Qwen/Qwen3-1.7B"
temperature: 0.1
remove_think_prefix: true
api_key: "sk-xxxxxx"
api_base: "https://api.openai.com/v1"
max_tokens: 4096
embedder:
backend: "ollama"
config:
Expand Down
253 changes: 253 additions & 0 deletions examples/mem_scheduler/quick_start_examples.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,253 @@
import json
import shutil
import sys
import uuid

from pathlib import Path

from transformers import DynamicCache

from memos.configs.mem_cube import GeneralMemCubeConfig
from memos.configs.mem_os import MOSConfig
from memos.configs.memory import MemoryConfigFactory
from memos.mem_cube.general import GeneralMemCube
from memos.mem_os.main import MOS
from memos.mem_scheduler.schemas.message_schemas import ScheduleMessageItem
from memos.mem_scheduler.schemas.task_schemas import (
ANSWER_TASK_LABEL,
MEM_UPDATE_TASK_LABEL,
QUERY_TASK_LABEL,
)
from memos.mem_scheduler.utils.db_utils import get_utc_now
from memos.mem_scheduler.utils.misc_utils import parse_yaml
from memos.memories.activation.item import KVCacheItem
from memos.memories.factory import MemoryFactory


FILE_PATH = Path(__file__).absolute()
BASE_DIR = FILE_PATH.parent.parent.parent
sys.path.insert(0, str(BASE_DIR)) # Enable execution from any working directory


def get_cache_info(cache):
if not cache:
return None

num_layers = 0
total_size_bytes = 0

if hasattr(cache, "layers"):
num_layers = len(cache.layers)
for layer in cache.layers:
if hasattr(layer, "key_cache") and layer.key_cache is not None:
total_size_bytes += layer.key_cache.nelement() * layer.key_cache.element_size()
if hasattr(layer, "value_cache") and layer.value_cache is not None:
total_size_bytes += layer.value_cache.nelement() * layer.value_cache.element_size()

if hasattr(layer, "keys") and layer.keys is not None:
total_size_bytes += layer.keys.nelement() * layer.keys.element_size()
if hasattr(layer, "values") and layer.values is not None:
total_size_bytes += layer.values.nelement() * layer.values.element_size()

elif hasattr(cache, "key_cache") and hasattr(cache, "value_cache"):
num_layers = len(cache.key_cache)
for k, v in zip(cache.key_cache, cache.value_cache, strict=False):
if k is not None:
total_size_bytes += k.nelement() * k.element_size()
if v is not None:
total_size_bytes += v.nelement() * v.element_size()

return {
"num_layers": num_layers,
"size_bytes": total_size_bytes,
"size_mb": f"{total_size_bytes / (1024 * 1024):.2f} MB",
}


def serialize_item(obj):
if isinstance(obj, list):
return [serialize_item(x) for x in obj]

if isinstance(obj, KVCacheItem):
return {
"id": obj.id,
"metadata": obj.metadata,
"records": obj.records.model_dump()
if hasattr(obj.records, "model_dump")
else obj.records,
"memory": get_cache_info(obj.memory),
}

if isinstance(obj, DynamicCache):
return get_cache_info(obj)

return str(obj)


def kv_cache_only():
# 为 KVCacheMemory(HuggingFace 后端)创建配置
config = MemoryConfigFactory(
backend="kv_cache",
config={
"extractor_llm": {
"backend": "huggingface",
"config": {
"model_name_or_path": "Qwen/Qwen3-0.6B",
"max_tokens": 32,
"add_generation_prompt": True,
"remove_think_prefix": True,
},
},
},
)

# 实例化 KVCacheMemory
kv_mem = MemoryFactory.from_config(config)

# 提取一个 KVCacheItem(DynamicCache)
prompt = [
{"role": "user", "content": "What is MemOS?"},
{"role": "assistant", "content": "MemOS is a memory operating system for LLMs."},
]
print("===== Extract KVCacheItem =====")
cache_item = kv_mem.extract(prompt)
print(json.dumps(serialize_item(cache_item), indent=2, default=str))

# 将缓存添加到内存中
kv_mem.add([cache_item])
print("All caches:")
print(json.dumps(serialize_item(kv_mem.get_all()), indent=2, default=str))

# 通过 ID 获取
retrieved = kv_mem.get(cache_item.id)
print("Retrieved:")
print(json.dumps(serialize_item(retrieved), indent=2, default=str))

# 合并缓存
item2 = kv_mem.extract([{"role": "user", "content": "Tell me a joke."}])
kv_mem.add([item2])
merged = kv_mem.get_cache([cache_item.id, item2.id])
print("Merged cache:")
print(json.dumps(serialize_item(merged), indent=2, default=str))

# 删除其中一个
kv_mem.delete([cache_item.id])
print("After delete:")
print(json.dumps(serialize_item(kv_mem.get_all()), indent=2, default=str))

# 导出和加载缓存
kv_mem.dump("tmp/kv_mem")
print("Dumped to tmp/kv_mem")
kv_mem.delete_all()
kv_mem.load("tmp/kv_mem")
print("Loaded caches:")
print(json.dumps(serialize_item(kv_mem.get_all()), indent=2, default=str))


def run_scheduler_example():
# 使用 MemScheduler 加载主 MOS 配置
config = parse_yaml(
f"{BASE_DIR}/examples/data/config/mem_scheduler/memos_config_w_scheduler.yaml"
)
mos_config = MOSConfig(**config)
mos = MOS(mos_config)

# 创建动态用户 ID
user_id = str(uuid.uuid4())
mos.create_user(user_id=user_id)

# 创建 MemCube 配置并导出
config = GeneralMemCubeConfig.from_yaml_file(
f"{BASE_DIR}/examples/data/config/mem_scheduler/mem_cube_config.yaml"
)
mem_cube_id = "mem_cube_5"
mem_cube_name_or_path = f"{BASE_DIR}/outputs/mem_scheduler/{user_id}/{mem_cube_id}"

# 若存在旧目录则删除
if Path(mem_cube_name_or_path).exists():
shutil.rmtree(mem_cube_name_or_path)
print(f"{mem_cube_name_or_path} is not empty, and has been removed.")

# 导出新的 MemCube
mem_cube = GeneralMemCube(config)
mem_cube.dump(mem_cube_name_or_path)

# 为该用户注册 MemCube
mos.register_mem_cube(
mem_cube_name_or_path=mem_cube_name_or_path, mem_cube_id=mem_cube_id, user_id=user_id
)

# Define custom scheduler handlers
def custom_query_handler(messages: list[ScheduleMessageItem]):
for msg in messages:
print(f"\n[scheduler] 用户输入了query: {msg.content}")
# Trigger mem_update manually
new_msg = msg.model_copy(update={"label": MEM_UPDATE_TASK_LABEL})
mos.mem_scheduler.submit_messages([new_msg])

def custom_answer_handler(messages: list[ScheduleMessageItem]):
for msg in messages:
mem_cube = mos.mem_cubes.get(msg.mem_cube_id)
kv_mem = mem_cube.act_mem
for cache_item in kv_mem.get_all():
print(
f"[scheduler] act memory: {get_cache_info(cache_item.memory)} ({cache_item.records})"
)
print(f"\n[scheduler] LLM回复了answer:{msg.content}")

def custom_mem_update_handler(messages: list[ScheduleMessageItem]):
for msg in messages:
mem_cube = mos.mem_cubes.get(msg.mem_cube_id)
kv_mem = mem_cube.act_mem
if mem_cube and mem_cube.text_mem:
results = mem_cube.text_mem.search(msg.content, top_k=3)
for mem in results:
print(f"\n[scheduler] searched memories: {mem.memory}")

cache_item = kv_mem.extract(mem.memory)
cache_item.records.text_memories = [mem.memory]
cache_item.records.timestamp = get_utc_now()
kv_mem.add([cache_item])

# Register custom handlers
mos.mem_scheduler.dispatcher.register_handlers(
{
QUERY_TASK_LABEL: custom_query_handler,
ANSWER_TASK_LABEL: custom_answer_handler,
MEM_UPDATE_TASK_LABEL: custom_mem_update_handler,
}
)

# 添加消息
messages = [
{"role": "user", "content": "I like playing football."},
{"role": "assistant", "content": "I like playing football too."},
]
mos.add(messages, user_id=user_id, mem_cube_id=mem_cube_id)

# 聊天循环: 展示 TreeTextMemory 节点 + KVCache
while True:
user_input = input("👤 [You] ").strip()
print()
response = mos.chat(user_input, user_id=user_id)
retrieved_memories = mos.get_all(mem_cube_id=mem_cube_id, user_id=user_id)

print(f"🤖 [Assistant] {response}")

# 展示 TreeTextMemory 中的各类型节点
text_memories = retrieved_memories["text_mem"][0]["memories"]
# Handle different memory structures (NaiveTextMemory returns list, TreeTextMemory returns dict with nodes)
if isinstance(text_memories, dict) and "nodes" in text_memories:
for node in text_memories["nodes"]:
mem_type = node["metadata"].get("memory_type", "Unknown")
print(f"[{mem_type}] {node['memory']}")
elif isinstance(text_memories, list):
for mem in text_memories:
# Naive memory items might not have memory_type metadata, or it might be different
print(f"[TextMemory] {mem.memory if hasattr(mem, 'memory') else mem}")


if __name__ == "__main__":
kv_cache_only()

run_scheduler_example()
31 changes: 21 additions & 10 deletions src/memos/llms/hf.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,7 @@
from typing import Any

from transformers import (
AutoModelForCausalLM,
AutoTokenizer,
DynamicCache,
LogitsProcessorList,
TemperatureLogitsWarper,
TopKLogitsWarper,
TopPLogitsWarper,
)

from memos.configs.llm import HFLLMConfig
Expand All @@ -30,16 +24,32 @@ def __init__(self, config: HFLLMConfig):
"""
Initialize the HFLLM model and tokenizer, and set up logits processors for sampling.
"""
import torch

from transformers import (
AutoModelForCausalLM,
AutoTokenizer,
LogitsProcessorList,
TemperatureLogitsWarper,
TopKLogitsWarper,
TopPLogitsWarper,
)

self.config = config

# Default model if not specified
if not self.config.model_name_or_path:
self.config.model_name_or_path = "Qwen/Qwen3-1.7B"

# Initialize hf model
self.model = AutoModelForCausalLM.from_pretrained(
self.config.model_name_or_path, torch_dtype="auto", device_map="auto"
)
if torch.backends.mps.is_available():
self.model = AutoModelForCausalLM.from_pretrained(
self.config.model_name_or_path, torch_dtype="auto"
).to("mps")
else:
self.model = AutoModelForCausalLM.from_pretrained(
self.config.model_name_or_path, torch_dtype="auto", device_map="auto"
)
self.tokenizer = AutoTokenizer.from_pretrained(
self.config.model_name_or_path, use_fast=True
)
Expand Down Expand Up @@ -355,6 +365,7 @@ def build_kv_cache(self, messages) -> DynamicCache:
DynamicCache: The constructed KV cache object.
"""
import torch
import transformers

# Accept multiple input types and convert to standard chat messages
if isinstance(messages, str):
Expand Down Expand Up @@ -391,7 +402,7 @@ def build_kv_cache(self, messages) -> DynamicCache:

# Convert from legacy tuple format to DynamicCache if needed
if isinstance(kv, tuple):
kv = DynamicCache.from_legacy_cache(kv)
kv = transformers.DynamicCache.from_legacy_cache(kv)

# Handle compatibility between old and new transformers versions
# In newer versions, DynamicCache uses 'layers' attribute
Expand Down
6 changes: 4 additions & 2 deletions src/memos/mem_os/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -311,7 +311,7 @@ def chat(self, query: str, user_id: str | None = None, base_prompt: str | None =
past_key_values = None

if self.config.enable_activation_memory:
if self.config.chat_model.backend != "huggingface":
if self.config.chat_model.backend not in ["huggingface", "huggingface_singleton"]:
logger.error(
"Activation memory only used for huggingface backend. Skipping activation memory."
)
Expand Down Expand Up @@ -498,7 +498,9 @@ def register_mem_cube(
existing_cube = self.user_manager.get_cube(mem_cube_id)

# check the embedder is it consistent with MOSConfig
if self.config.mem_reader.config.embedder != (
if hasattr(
self.mem_cubes[mem_cube_id].text_mem.config, "embedder"
) and self.config.mem_reader.config.embedder != (
cube_embedder := self.mem_cubes[mem_cube_id].text_mem.config.embedder
):
logger.warning(
Expand Down
2 changes: 1 addition & 1 deletion src/memos/mem_os/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -310,7 +310,7 @@ def _generate_enhanced_response_with_context(
# Handle activation memory if enabled (same as core method)
past_key_values = None
if self.config.enable_activation_memory:
if self.config.chat_model.backend != "huggingface":
if self.config.chat_model.backend not in ["huggingface", "huggingface_singleton"]:
logger.error(
"Activation memory only used for huggingface backend. Skipping activation memory."
)
Expand Down
Loading