From bd6e8cb4ca8af3018ddfa624ca695a1ce20679f7 Mon Sep 17 00:00:00 2001 From: Tim Abbott Date: Thu, 15 Jan 2026 08:45:40 -0800 Subject: [PATCH] bots: Add topic summarization bot. --- .../bots/summarize/requirements.txt | 2 + .../zulip_bots/bots/summarize/summarize.py | 148 ++++++++++++++++++ 2 files changed, 150 insertions(+) create mode 100644 zulip_bots/zulip_bots/bots/summarize/requirements.txt create mode 100755 zulip_bots/zulip_bots/bots/summarize/summarize.py diff --git a/zulip_bots/zulip_bots/bots/summarize/requirements.txt b/zulip_bots/zulip_bots/bots/summarize/requirements.txt new file mode 100644 index 000000000..f4d085270 --- /dev/null +++ b/zulip_bots/zulip_bots/bots/summarize/requirements.txt @@ -0,0 +1,2 @@ +zulip +litellm diff --git a/zulip_bots/zulip_bots/bots/summarize/summarize.py b/zulip_bots/zulip_bots/bots/summarize/summarize.py new file mode 100755 index 000000000..fd7d310a0 --- /dev/null +++ b/zulip_bots/zulip_bots/bots/summarize/summarize.py @@ -0,0 +1,148 @@ +#!/usr/bin/env python3 + +import re +from typing import Any, Dict + +import requests + +from zulip_bots.lib import AbstractBotHandler + +import argparse +import json +import os +import sys +import urllib.parse +from configparser import ConfigParser +from typing import Any, Dict + +from litellm import completion # type: ignore[import-not-found] + +import zulip + + +def format_conversation(result: Dict[str, Any]) -> str: + # Note: Including timestamps seems to have no impact; including reactions + # makes the results worse. + zulip_messages = result["messages"] + if len(zulip_messages) == 0: + print("No messages in conversation to summarize") + sys.exit(0) + + zulip_messages_list = [ + {"sender": f'@_**{message["sender_full_name"]}**', "content": message["content"]} + for message in zulip_messages + ] + return json.dumps(zulip_messages_list) + + +def make_message(content: str, role: str = "user") -> Dict[str, str]: + return {"content": content, "role": role} + + +def get_max_summary_length(conversation_length: int) -> int: + return min(6, 4 + int((conversation_length - 10) / 10)) + + +config_file = "/home/tabbott/zuliprc-llm" +if not config_file: + print("Could not find the Zulip configuration file. Please read the provided README.") + sys.exit() + +client = zulip.Client(config_file=config_file) + +config = ConfigParser() +# Make config parser case sensitive otherwise API keys will be lowercased +# which is not supported by litellm. +# https://docs.python.org/3/library/configparser.html#configparser.ConfigParser.optionxform +config.optionxform = str # type: ignore[assignment, method-assign] + +with open(config_file) as f: + config.read_file(f, config_file) + +# Set all the keys in `litellm` as environment variables. +for key in config["litellm"]: + print("Setting key:", key) + os.environ[key] = config["litellm"][key] + +from dataclasses import dataclass + + +class LLMArgs: + max_tokens: int = 600 + max_messages: int = 100 + # gpt-4.1 gpt-4o-mini gpt-4.1-mini o4-mini + model: str = "gpt-4.1-mini" + + +args = LLMArgs() + + +def summarize_conversation(channel: str, topic: str) -> str: + model = args.model + + narrow = [ + {"operator": "channel", "operand": channel}, + {"operator": "topic", "operand": topic}, + ] + + request = { + "anchor": "newest", + "num_before": args.max_messages, + "num_after": 0, + "narrow": narrow, + # Fetch raw Markdown, not HTML + "apply_markdown": False, + } + result = client.get_messages(request) + if result["result"] == "error": + print("Failed fetching message history", result) + sys.exit(1) + + conversation_length = len(result["messages"]) + max_summary_length = get_max_summary_length(conversation_length) + + print(f"Max summary length: {max_summary_length}") + + intro = f"The following is a chat conversation in the Zulip team chat app. channel: {channel}, topic: {topic}" + formatted_conversation = format_conversation(result) + prompt = f"Succinctly summarize this conversation based only on the information provided, in up to {max_summary_length} sentences, for someone who is familiar with the context. Mention key conclusions and actions, if any. Refer to specific people as appropriate, formatting names with this special syntax: Tim Abbott should be formatted as @_**Tim Abbott**. Don't use an intro phrase. You can use Zulip's CommonMark based formatting. Please use paragraph breaks after every 2-3 sentences." + messages = [ + make_message(intro, "system"), + make_message(formatted_conversation), + make_message(prompt), + ] + + # Send formatted messages to the LLM model for summarization + response = completion( + max_tokens=args.max_tokens, + model=model, + messages=messages, + ) + + print( + f"Used {response['usage']['completion_tokens']} completion tokens to summarize {conversation_length} Zulip messages ({response['usage']['prompt_tokens']} prompt tokens)." + ) + print() + return response["choices"][0]["message"]["content"] + + +class LiteLLMHandler: + """A Zulip bot handler for LLMs""" + + def usage(self) -> str: + return "" + + def initialize(self, bot_handler: AbstractBotHandler) -> None: + pass + + def handle_message(self, message: Dict[str, str], bot_handler: AbstractBotHandler) -> None: + content = message["content"].strip("#*") + channel, topic = content.split(">", 1) + print(channel, topic) + + response = summarize_conversation(channel, topic) + + bot_handler.send_reply(message, response) + + +handler_class = LiteLLMHandler