agentailor · IBJunior · Aug 29, 2025 · Aug 29, 2025 · Aug 29, 2025 · Aug 29, 2025
diff --git a/.github/copilot-instructions.md b/.github/copilot-instructions.md
@@ -7,8 +7,6 @@
 ### High-Level Repository Information
 
 - **Type**: TypeScript npm library/package
-- **Source Files**: ~24
-- **Installed Size**: ~106MB (including all dependencies in node_modules)
 - **Languages**: TypeScript (primary), JavaScript (compiled output)
 - **Target Runtime**: Node.js (CommonJS modules)
 - **Framework**: Model-agnostic core; optional adapters (LangChain)
@@ -21,7 +19,7 @@
 
 ### Prerequisites and Environment Setup
 
-- **Node.js**: Version 20+ (as specified in the `engines` field of package.json)
+- **Node.js**: Version 20+ (recommended)
 - **Package Manager**: pnpm 10.14.0 (preferred) or npm (fallback)
 
 ### Critical Build Steps (Always Follow This Order)
@@ -41,42 +39,39 @@
 2. **Build the Project**
 
    ```bash
-   npm run build
+   pnpm run build
    # Compiles TypeScript to dist/ directory
-   # Duration: ~5-10 seconds
    ```
 
 3. **Run Tests**
 
    ```bash
-   npm run test
+   pnpm run test
    # Runs all vitest tests
-   # Duration: ~5-10 seconds
-   # Should show: "Test Files 3 passed (3), Tests 16 passed (16)"
+   # All tests should pass
    ```
 
 4. **Format Code**
 
    ```bash
-   npm run format        # Auto-format code
-   npm run format:check  # Check formatting without changes
+   pnpm run format        # Auto-format code
+   pnpm run format:check  # Check formatting without changes
    ```
 
 5. **Lint Code (Known Issue)**
    ```bash
-   npm run lint
+   pnpm run lint
    ```
-   **KNOWN ISSUE**: ESLint currently fails with "parserOptions.tsconfigRootDir must be an absolute path" error. This is a configuration bug but doesn't affect build or tests. The code itself is properly linted in CI environment.
 
 ### Complete Development Workflow
 
 ```bash
 # Clean start (recommended for agents):
 rm -rf node_modules dist
-npm install           # Always use npm for reliability
-npm run test         # Verify tests pass
-npm run format:check # Verify formatting
-npm run build        # Final build
+pnpm install           # Always use pnpm for reliability
+pnpm run test         # Verify tests pass
+pnpm run format:check # Verify formatting
+pnpm run build        # Final build
 ```
 
 ### CI/CD Pipeline Validation
@@ -103,8 +98,9 @@ The repository uses GitHub Actions CI that runs:
 │   ├── adapters/                 # Integration adapters (optional)
 │   │   └── langchain.ts          # LangChain adapter + helpers (compressLangChainHistory, toSlimModel)
 │   └── strategies/               # Compression strategy implementations
-│       ├── trim.ts               # TrimCompressor: keeps first + last N messages
-│       └── summarize.ts          # SummarizeCompressor: AI-powered summarization
+│       ├── common.ts             # Shared token-budget utilities & defaults (thresholds, estimator)
+│       ├── trim.ts               # TrimCompressor: token-threshold trimming (preserve system + recent)
+│       └── summarize.ts          # SummarizeCompressor: token-threshold summarization (inject summary)
 ├── tests/                        # vitest test files
 │   ├── trim.test.ts             # Tests for TrimCompressor
 │   ├── summarize.test.ts        # Tests for SummarizeCompressor
@@ -132,11 +128,19 @@ The repository uses GitHub Actions CI that runs:
 - `SlimContextMessage`: Standard message format with role ('system'|'user'|'assistant'|'tool'|'human') and content
 - `SlimContextChatModel`: BYOM interface requiring only `invoke(messages) -> response`
 - `SlimContextCompressor`: Strategy interface for compression implementations
+- `TokenEstimator`: `(message) => number` callback used for model-agnostic token budgeting
 
 **Compression Strategies**:
 
-- **TrimCompressor**: Simple strategy keeping first (system) message + last N-1 messages
-- **SummarizeCompressor**: AI-powered strategy that summarizes middle conversations when exceeding maxMessages
+- Token-threshold based design using the model’s max token window and a configurable threshold (default 70%).
+- Shared config shape (TokenBudgetConfig): `{ maxModelTokens?, thresholdPercent?, estimateTokens?, minRecentMessages? }`.
+- **TrimCompressor**: Drops the oldest non-system messages until estimated tokens fall below the threshold, while always preserving any system message(s) and at least the most recent `minRecentMessages`.
+- **SummarizeCompressor**: When over threshold, summarizes all messages before the recent tail (excluding the leading system message if present) and inserts a synthetic system summary just before the preserved recent messages.
+
+**Shared Utilities** (src/strategies/common.ts):
+
+- Defaults: `DEFAULT_MAX_MODEL_TOKENS = 8192`, `DEFAULT_THRESHOLD_PERCENT = 0.7`, `DEFAULT_MIN_RECENT_MESSAGES = 2`.
+- Estimator: `DEFAULT_ESTIMATOR` (~`len/4 + 2`) plus `computeThresholdTokens`, `normalizeBudgetConfig` helpers.
 
 **Framework Independence**: Core library has no framework dependencies. An optional LangChain adapter is provided for convenience; core remains BYOM.
 
@@ -153,8 +157,8 @@ The repository uses GitHub Actions CI that runs:
 ### Running Tests
 
 ```bash
-npm run test
-# Expects: ~16 tests across 3 files, all passing
+pnpm run test
+# Expects: All tests to pass
 # Tests cover TrimCompressor, SummarizeCompressor, and the LangChain adapter/helper
 ```
 
@@ -196,7 +200,7 @@ npm run test
     - CommonJS: `const { langchain } = require('slimcontext')`
     - ESM/TypeScript: `import * as slim from 'slimcontext'; const { langchain } = slim;`
   - Note: `import { langchain } from 'slimcontext'` may not work in all environments due to CJS/ESM interop. Prefer one of the patterns above.
-  - Includes a one-call history helper: `compressLangChainHistory(history, options)`
+  - Includes a one-call history helper: `compressLangChainHistory(history, options)` where `options` accepts the token-threshold fields (`maxModelTokens`, `thresholdPercent`, `estimateTokens`, `minRecentMessages`) and either `strategy: 'trim'` or `strategy: 'summarize'` with `llm` for the latter.
 
 ---
 

diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -4,7 +4,26 @@ All notable changes to this project will be documented in this file.
 
 The format is based on Keep a Changelog, and this project adheres to Semantic Versioning.
 
-## [2.1.0] - 2025-08-27
+## [2.1.0] - 2025-08-28
+
+### Breaking
+
+- Strategies are now token-threshold based instead of message-count based.
+  - `TrimCompressor({ messagesToKeep })` replaced by `TrimCompressor({ maxModelTokens?, thresholdPercent?, estimateTokens?, minRecentMessages? })`.
+  - `SummarizeCompressor({ model, maxMessages, ... })` replaced by `SummarizeCompressor({ model, maxModelTokens?, thresholdPercent?, estimateTokens?, minRecentMessages?, prompt? })`.
+
+### Migration
+
+- Provide your model’s context window via `maxModelTokens` (optional; defaults to 8192).
+- Choose a `thresholdPercent` (0–1) at which to trigger compression (default 0.7; recommended 0.8–0.9 for aggressive usage).
+- Optional: pass a custom `estimateTokens` to better approximate token usage.
+- Optional: tune `minRecentMessages` (trim: default 2, summarize: default 4).
+- Update adapter/example usages accordingly (README and examples have been updated).
+
+### Changed
+
+- Trim: when total estimated tokens exceed threshold, drop oldest non-system messages until under threshold, preserving system messages and the most recent messages.
+- Summarize: when over threshold, summarize the oldest portion (excluding a leading system message) and insert a synthetic system summary before recent messages.
 
 ### Added
 
@@ -16,17 +35,10 @@ The format is based on Keep a Changelog, and this project adheres to Semantic Ve
 - Examples:
   - `examples/LANGCHAIN_EXAMPLE.md`: adapting a LangChain model to `SlimContextChatModel`.
   - `examples/LANGCHAIN_COMPRESS_HISTORY.md`: using `compressLangChainHistory` directly.
+- `TokenEstimator` type for custom token estimation.
+- Docs and examples updated to reflect token-based configuration.
 
-### Changed
-
-- README updated with a LangChain adapter section and one-call usage sample.
-
-### Notes
-
-- The adapter treats LangChain `tool` messages as `assistant` during compression.
-- `@langchain/core` is an optional peer dependency; only needed if you use the adapter.
-
-## [2.0.0] - 2025-08-24
+## [2.0.1] - 2025-08-24
 
 ### Breaking
 
@@ -41,7 +53,7 @@ The format is based on Keep a Changelog, and this project adheres to Semantic Ve
   - `IChatModel` -> `SlimContextChatModel`
   - `ICompressor` -> `SlimContextCompressor`
 
-Migration notes:
+### Migration
 
 - Import and use `SlimContextMessage` everywhere you previously used `Message` or `BaseMessage`.
 - Update any custom `IChatModel` implementations to accept `SlimContextMessage[]`.
@@ -63,3 +75,7 @@ Migration notes:
 ### Behavior
 
 - SummarizeCompressor alignment: after summarization, the first kept message following the summary is enforced to be a `user` message to maintain dialogue consistency. To achieve this while preserving recent context, the resulting message count may be `maxMessages - 1`, `maxMessages`, or `maxMessages + 1` depending on the split position.
+
+### Notes
+
+- `@langchain/core` is an optional peer dependency; only needed if you use the adapter.
diff --git a/README.md b/README.md
@@ -11,8 +11,8 @@ Lightweight, model-agnostic chat history compression utilities for AI assistants
 
 ## Features
 
-- Trim strategy: keep the first (system) message and last N messages.
-- Summarize strategy: summarize the middle portion using your own chat model.
+- Trim strategy: token-aware trimming based on your model's max tokens and a threshold.
+- Summarize strategy: token-aware summarization of older messages using your own chat model.
 - Framework agnostic: plug in any model wrapper implementing a minimal `invoke()` interface.
 - Optional LangChain adapter with a one-call helper for compressing histories.
 
@@ -22,6 +22,12 @@ Lightweight, model-agnostic chat history compression utilities for AI assistants
 npm install slimcontext
 ```
 
+## Migration
+
+Upgrading from an earlier version? See the Migration notes in the changelog:
+
+- CHANGELOG: ./CHANGELOG.md#migration
+
 ## Core Concepts
 
 Provide a model that implements:
@@ -55,7 +61,15 @@ interface SlimContextMessage {
 ```ts
 import { TrimCompressor, SlimContextMessage } from 'slimcontext';
 
-const compressor = new TrimCompressor({ messagesToKeep: 8 });
+// Configure token-aware trimming
+const compressor = new TrimCompressor({
+  // Optional: defaults shown
+  maxModelTokens: 8192, // your model's context window
+  thresholdPercent: 0.7, // start trimming after 70% of maxModelTokens
+  minRecentMessages: 2, // always keep at least last 2 messages
+  // Optional estimator; default is a len/4 heuristic
+  // estimateTokens: (m) => yourCustomTokenCounter(m),
+});
 
 let history: SlimContextMessage[] = [
   { role: 'system', content: 'You are a helpful assistant.' },
@@ -84,7 +98,15 @@ class MyModel implements SlimContextChatModel {
 }
 
 const model = new MyModel();
-const compressor = new SummarizeCompressor({ model, maxMessages: 12 });
+const compressor = new SummarizeCompressor({
+  model,
+  // Optional: defaults shown
+  maxModelTokens: 8192,
+  thresholdPercent: 0.7, // summarize once total tokens exceed 70%
+  minRecentMessages: 4, // keep at least last 4 messages verbatim
+  // estimateTokens: (m) => yourCustomTokenCounter(m),
+  // prompt: '...custom summarization instructions...'
+});
 
 let history: SlimContextMessage[] = [
   { role: 'system', content: 'You are a helpful assistant.' },
@@ -96,21 +118,12 @@ history = await compressor.compress(history);
 
 Notes about summarization behavior
 
-- Alignment: after compression, messages will start with `[system, summary, ...]`, and the first kept message after the summary is always a `user` turn. This preserves dialogue consistency.
-- Size: to keep this alignment and preserve recency, the output length can be `maxMessages - 1`, `maxMessages`, or `maxMessages + 1`.
-  - Preference: if the default split lands on an assistant, we first try shifting forward by 1 (staying within `maxMessages`). If that still isn’t a user, we shift backward by 1 (allowing `maxMessages + 1`).
+- When the estimated total tokens exceed the threshold, the oldest portion (excluding a leading system message) is summarized into a single system message inserted before the recent tail.
+- The most recent `minRecentMessages` are always preserved verbatim.
 
 ### Strategy Combination Example
 
-You can chain strategies depending on size thresholds:
-
-```ts
-if (history.length > 50) {
-  history = await summarizeCompressor.compress(history);
-} else if (history.length > 25) {
-  history = await trimCompressor.compress(history);
-}
-```
+You can chain strategies depending on token thresholds or other heuristics.
 
 ## Example Integration
 
@@ -139,7 +152,7 @@ import { AIMessage, HumanMessage, SystemMessage } from '@langchain/core/messages
 import { ChatOpenAI } from '@langchain/openai';
 import { langchain } from 'slimcontext';
 
-const lc = new ChatOpenAI({ model: 'gpt-4o-mini', temperature: 0 });
+const lc = new ChatOpenAI({ model: 'gpt-5-mini', temperature: 0 });
 
 const history = [
   new SystemMessage('You are helpful.'),
@@ -151,7 +164,9 @@ const history = [
 const compact = await langchain.compressLangChainHistory(history, {
   strategy: 'summarize',
   llm: lc, // BaseChatModel
-  maxMessages: 12,
+  maxModelTokens: 8192,
+  thresholdPercent: 0.8, // summarize beyond 80% of context window
+  minRecentMessages: 4,
 });
 ```
 
@@ -161,8 +176,8 @@ See `examples/LANGCHAIN_COMPRESS_HISTORY.md` for a fuller copy-paste example.
 
 ### Classes
 
-- `TrimCompressor({ messagesToKeep })`
-- `SummarizeCompressor({ model, maxMessages, prompt? })`
+- `TrimCompressor({ maxModelTokens?, thresholdPercent?, estimateTokens?, minRecentMessages? })`
+- `SummarizeCompressor({ model, maxModelTokens?, thresholdPercent?, estimateTokens?, minRecentMessages?, prompt? })`
 
 ### Interfaces
 

diff --git a/examples/LANGCHAIN_COMPRESS_HISTORY.md b/examples/LANGCHAIN_COMPRESS_HISTORY.md
@@ -8,7 +8,7 @@ import { ChatOpenAI } from '@langchain/openai';
 import { langchain } from 'slimcontext';
 
 // 1) Create your LangChain chat model (any BaseChatModel works)
-const llm = new ChatOpenAI({ model: 'gpt-4o-mini', temperature: 0 });
+const llm = new ChatOpenAI({ model: 'gpt-5-mini', temperature: 0 });
 
 // 2) Build your existing LangChain-compatible history
 const history = [
@@ -22,13 +22,17 @@ const history = [
 const compact = await langchain.compressLangChainHistory(history, {
   strategy: 'summarize',
   llm, // pass your BaseChatModel
-  maxMessages: 12, // target total messages after compression (system + summary + recent)
+  maxModelTokens: 8192,
+  thresholdPercent: 0.8,
+  minRecentMessages: 4,
 });
 
 // Alternatively, use trimming without an LLM:
 const trimmed = await langchain.compressLangChainHistory(history, {
   strategy: 'trim',
-  messagesToKeep: 8,
+  maxModelTokens: 8192,
+  thresholdPercent: 0.8,
+  minRecentMessages: 4,
 });
 
 console.log('Original size:', history.length);
@@ -39,4 +43,4 @@ console.log('Trimmed size:', trimmed.length);
 Notes
 
 - `@langchain/core` is an optional peer dependency. Install it only if you use the adapter.
-- `maxMessages` must be at least 4 for summarize (system + summary + 2 recent).
+- Summarize strategy summarizes older content when total tokens exceed `thresholdPercent * maxModelTokens`.
diff --git a/examples/LANGCHAIN_EXAMPLE.md b/examples/LANGCHAIN_EXAMPLE.md
@@ -12,7 +12,7 @@ import {
 import { ChatOpenAI } from '@langchain/openai'; // or any LangChain chat model
 
 // Create a LangChain model (reads from env, e.g., OPENAI_API_KEY)
-const lc = new ChatOpenAI({ model: 'gpt-4o-mini', temperature: 0 });
+const lc = new ChatOpenAI({ model: 'gpt-5-mini', temperature: 0 });
 
 class LangChainModel implements SlimContextChatModel {
   async invoke(messages: SlimContextMessage[]): Promise<SlimContextModelResponse> {
@@ -37,7 +37,12 @@ class LangChainModel implements SlimContextChatModel {
 }
 
 async function compress(history: SlimContextMessage[]) {
-  const summarize = new SummarizeCompressor({ model: new LangChainModel(), maxMessages: 12 });
+  const summarize = new SummarizeCompressor({
+    model: new LangChainModel(),
+    maxModelTokens: 8192,
+    thresholdPercent: 0.75,
+    minRecentMessages: 4,
+  });
   return summarize.compress(history);
 }
 

diff --git a/examples/OPENAI_EXAMPLE.md b/examples/OPENAI_EXAMPLE.md
@@ -16,7 +16,7 @@ const client = new OpenAI();
 class OpenAIModel implements SlimContextChatModel {
   async invoke(msgs: SlimContextMessage[]): Promise<SlimContextModelResponse> {
     const response = await client.chat.completions.create({
-      model: 'gpt-4o-mini',
+      model: 'gpt-5-mini',
       messages: msgs.map((m) => ({
         role: m.role === 'human' ? 'user' : (m.role as 'system' | 'user' | 'assistant'),
         content: m.content,
@@ -33,11 +33,16 @@ async function main() {
     // ... conversation grows
   ];
 
-  const summarize = new SummarizeCompressor({ model: new OpenAIModel(), maxMessages: 10 });
+  const summarize = new SummarizeCompressor({
+    model: new OpenAIModel(),
+    maxModelTokens: 128000,
+    thresholdPercent: 0.8,
+    minRecentMessages: 4,
+  });
   const compressed = await summarize.compress(history);
 
   const completion = await client.chat.completions.create({
-    model: 'gpt-4o-mini',
+    model: 'gpt-5-mini',
     messages: compressed
       .filter((m) => m.role !== 'tool')
       .map((m) => ({ role: m.role as 'system' | 'user' | 'assistant', content: m.content })),