crewAIInc · lorenzejay · Feb 6, 2026
diff --git a/lib/crewai/src/crewai/translations/en.json b/lib/crewai/src/crewai/translations/en.json
@@ -22,9 +22,9 @@
     "expected_output": "\nThis is the expected criteria for your final answer: {expected_output}\nyou MUST return the actual complete content as the final answer, not a summary.",
     "human_feedback": "You got human feedback on your work, re-evaluate it and give a new Final Answer when ready.\n {human_feedback}",
     "getting_input": "This is the agent's final answer: {final_answer}\n\n",
-    "summarizer_system_message": "You are a helpful assistant that summarizes text.",
-    "summarize_instruction": "Summarize the following text, make sure to include all the important information: {group}",
-    "summary": "This is a summary of our conversation so far:\n{merged_summary}",
+    "summarizer_system_message": "You are a precise assistant that creates structured summaries of agent conversations. You preserve critical context needed for seamless task continuation.",
+    "summarize_instruction": "Analyze the following conversation and create a structured summary that preserves all information needed to continue the task seamlessly.\n\n<conversation>\n{conversation}\n</conversation>\n\nCreate a summary with these sections:\n1. **Task Overview**: What is the agent trying to accomplish?\n2. **Current State**: What has been completed so far? What step is the agent on?\n3. **Important Discoveries**: Key facts, data, tool results, or findings that must not be lost.\n4. **Next Steps**: What should the agent do next based on the conversation?\n5. **Context to Preserve**: Any specific values, names, URLs, code snippets, or details referenced in the conversation.\n\nWrap your entire summary in <summary> tags.\n\n<summary>\n[Your structured summary here]\n</summary>",
+    "summary": "<summary>\n{merged_summary}\n</summary>\n\nContinue the task from where the conversation left off. The above is a structured summary of prior context.",
     "manager_request": "Your best answer to your coworker asking you this, accounting for the context shared.",
     "formatted_task_instructions": "Format your final answer according to the following OpenAPI schema: {output_format}\n\nIMPORTANT: Preserve the original content exactly as-is. Do NOT rewrite, paraphrase, or modify the meaning of the content. Only structure it to match the schema format.\n\nDo not include the OpenAPI schema in the final output. Ensure the final output does not include any code block markers like ```json or ```python.",
     "conversation_history_instruction": "You are a member of a crew collaborating to achieve a common goal. Your task is a specific action that contributes to this larger objective. For additional context, please review the conversation history between you and the user that led to the initiation of this crew. Use any relevant information or feedback from the conversation to inform your task execution and ensure your response aligns with both the immediate task and the crew's overall goals.",

diff --git a/lib/crewai/src/crewai/utilities/agent_utils.py b/lib/crewai/src/crewai/utilities/agent_utils.py
@@ -640,6 +640,137 @@ def handle_context_length(
         )
 
 
+def _estimate_token_count(text: str) -> int:
+    """Estimate token count using a conservative cross-provider heuristic.
+
+    Args:
+        text: The text to estimate tokens for.
+
+    Returns:
+        Estimated token count (roughly 1 token per 4 characters).
+    """
+    return len(text) // 4
+
+
+def _format_messages_for_summary(messages: list[LLMMessage]) -> str:
+    """Format messages with role labels for summarization.
+
+    Skips system messages. Handles None content, tool_calls, and
+    multimodal content blocks.
+
+    Args:
+        messages: List of messages to format.
+
+    Returns:
+        Role-labeled conversation text.
+    """
+    lines: list[str] = []
+    for msg in messages:
+        role = msg.get("role", "user")
+        if role == "system":
+            continue
+
+        content = msg.get("content")
+        if content is None:
+            # Check for tool_calls on assistant messages with no content
+            tool_calls = msg.get("tool_calls")
+            if tool_calls:
+                tool_names = []
+                for tc in tool_calls:
+                    func = tc.get("function", {})
+                    name = func.get("name", "unknown") if isinstance(func, dict) else "unknown"
+                    tool_names.append(name)
+                content = f"[Called tools: {', '.join(tool_names)}]"
+            else:
+                content = ""
+        elif isinstance(content, list):
+            # Multimodal content blocks — extract text parts
+            text_parts = []
+            for block in content:
+                if isinstance(block, dict) and block.get("type") == "text":
+                    text_parts.append(block.get("text", ""))
+            content = " ".join(text_parts) if text_parts else "[multimodal content]"
+
+        if role == "assistant":
+            label = "[ASSISTANT]:"
+        elif role == "tool":
+            tool_name = msg.get("name", "unknown")
+            label = f"[TOOL_RESULT ({tool_name})]:"
+        else:
+            label = "[USER]:"
+
+        lines.append(f"{label} {content}")
+
+    return "\n\n".join(lines)
+
+
+def _split_messages_into_chunks(
+    messages: list[LLMMessage], max_tokens: int
+) -> list[list[LLMMessage]]:
+    """Split messages into chunks at message boundaries.
+
+    Excludes system messages from chunks. Each chunk stays under
+    max_tokens based on estimated token count.
+
+    Args:
+        messages: List of messages to split.
+        max_tokens: Maximum estimated tokens per chunk.
+
+    Returns:
+        List of message chunks.
+    """
+    non_system = [m for m in messages if m.get("role") != "system"]
+    if not non_system:
+        return []
+
+    chunks: list[list[LLMMessage]] = []
+    current_chunk: list[LLMMessage] = []
+    current_tokens = 0
+
+    for msg in non_system:
+        content = msg.get("content")
+        if content is None:
+            msg_text = ""
+        elif isinstance(content, list):
+            msg_text = str(content)
+        else:
+            msg_text = str(content)
+
+        msg_tokens = _estimate_token_count(msg_text)
+
+        # If adding this message would exceed the limit and we already have
+        # messages in the current chunk, start a new chunk
+        if current_chunk and (current_tokens + msg_tokens) > max_tokens:
+            chunks.append(current_chunk)
+            current_chunk = []
+            current_tokens = 0
+
+        current_chunk.append(msg)
+        current_tokens += msg_tokens
+
+    if current_chunk:
+        chunks.append(current_chunk)
+
+    return chunks
+
+
+def _extract_summary_tags(text: str) -> str:
+    """Extract content between <summary></summary> tags.
+
+    Falls back to the full text if no tags are found.
+
+    Args:
+        text: Text potentially containing summary tags.
+
+    Returns:
+        Extracted summary content, or full text if no tags found.
+    """
+    match = re.search(r"<summary>(.*?)</summary>", text, re.DOTALL)
+    if match:
+        return match.group(1).strip()
+    return text.strip()
+
+
 def summarize_messages(
     messages: list[LLMMessage],
     llm: LLM | BaseLLM,
@@ -649,6 +780,10 @@ def summarize_messages(
 ) -> None:
     """Summarize messages to fit within context window.
 
+    Uses structured context compaction: preserves system messages,
+    splits at message boundaries, formats with role labels, and
+    produces structured summaries for seamless task continuation.
+
     Preserves any files attached to user messages and re-attaches them to
     the summarized message. Files from all user messages are merged.
 
@@ -657,49 +792,64 @@ def summarize_messages(
         llm: LLM instance for summarization
         callbacks: List of callbacks for LLM
         i18n: I18N instance for messages
+        verbose: Whether to print progress.
     """
+    # 1. Extract & preserve file attachments from user messages
     preserved_files: dict[str, Any] = {}
     for msg in messages:
         if msg.get("role") == "user" and msg.get("files"):
             preserved_files.update(msg["files"])
 
-    messages_string = " ".join(
-        [str(message.get("content", "")) for message in messages]
-    )
-    cut_size = llm.get_context_window_size()
+    # 2. Extract system messages — never summarize them
+    system_messages = [m for m in messages if m.get("role") == "system"]
+    non_system_messages = [m for m in messages if m.get("role") != "system"]
 
-    messages_groups = [
-        {"content": messages_string[i : i + cut_size]}
-        for i in range(0, len(messages_string), cut_size)
-    ]
+    # If there are only system messages (or no non-system messages), nothing to summarize
+    if not non_system_messages:
+        return
 
+    # 3. Split non-system messages into chunks at message boundaries
+    max_tokens = llm.get_context_window_size()
+    chunks = _split_messages_into_chunks(non_system_messages, max_tokens)
+
+    # 4. Summarize each chunk with role-labeled formatting
     summarized_contents: list[SummaryContent] = []
+    total_chunks = len(chunks)
 
-    total_groups = len(messages_groups)
-    for idx, group in enumerate(messages_groups, 1):
+    for idx, chunk in enumerate(chunks, 1):
         if verbose:
             Printer().print(
-                content=f"Summarizing {idx}/{total_groups}...",
+                content=f"Summarizing {idx}/{total_chunks}...",
                 color="yellow",
             )
 
+        conversation_text = _format_messages_for_summary(chunk)
+
         summarization_messages = [
             format_message_for_llm(
                 i18n.slice("summarizer_system_message"), role="system"
             ),
             format_message_for_llm(
-                i18n.slice("summarize_instruction").format(group=group["content"]),
+                i18n.slice("summarize_instruction").format(
+                    conversation=conversation_text
+                ),
             ),
         ]
         summary = llm.call(
             summarization_messages,
             callbacks=callbacks,
         )
-        summarized_contents.append({"content": str(summary)})
+        # Extract content from <summary> tags with graceful fallback
+        extracted = _extract_summary_tags(str(summary))
+        summarized_contents.append({"content": extracted})
 
-    merged_summary = " ".join(content["content"] for content in summarized_contents)
+    merged_summary = "\n\n".join(content["content"] for content in summarized_contents)
 
+    # 6. Reconstruct messages: [system messages...] + [summary user message]
     messages.clear()
+    for sys_msg in system_messages:
+        messages.append(sys_msg)
+
     summary_message = format_message_for_llm(
         i18n.slice("summary").format(merged_summary=merged_summary)
     )